summaryrefslogtreecommitdiffstats
path: root/src/crypto/elliptic
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/elliptic')
-rw-r--r--src/crypto/elliptic/elliptic.go491
-rw-r--r--src/crypto/elliptic/elliptic_test.go331
-rw-r--r--src/crypto/elliptic/fuzz_test.go54
-rw-r--r--src/crypto/elliptic/internal/fiat/Dockerfile12
-rw-r--r--src/crypto/elliptic/internal/fiat/README39
-rw-r--r--src/crypto/elliptic/internal/fiat/p521.go197
-rw-r--r--src/crypto/elliptic/internal/fiat/p521_fiat64.go1856
-rw-r--r--src/crypto/elliptic/internal/fiat/p521_test.go37
-rw-r--r--src/crypto/elliptic/p224.go783
-rw-r--r--src/crypto/elliptic/p224_test.go629
-rw-r--r--src/crypto/elliptic/p256.go1193
-rw-r--r--src/crypto/elliptic/p256_asm.go539
-rw-r--r--src/crypto/elliptic/p256_asm_amd64.s2347
-rw-r--r--src/crypto/elliptic/p256_asm_arm64.s1529
-rw-r--r--src/crypto/elliptic/p256_asm_ppc64le.s2494
-rw-r--r--src/crypto/elliptic/p256_asm_s390x.s2714
-rw-r--r--src/crypto/elliptic/p256_asm_table.go1473
-rw-r--r--src/crypto/elliptic/p256_asm_table_test.go59
-rw-r--r--src/crypto/elliptic/p256_generic.go15
-rw-r--r--src/crypto/elliptic/p256_ppc64le.go522
-rw-r--r--src/crypto/elliptic/p256_s390x.go577
-rw-r--r--src/crypto/elliptic/p256_test.go169
-rw-r--r--src/crypto/elliptic/p521.go264
23 files changed, 18324 insertions, 0 deletions
diff --git a/src/crypto/elliptic/elliptic.go b/src/crypto/elliptic/elliptic.go
new file mode 100644
index 0000000..b84339e
--- /dev/null
+++ b/src/crypto/elliptic/elliptic.go
@@ -0,0 +1,491 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package elliptic implements several standard elliptic curves over prime
+// fields.
+package elliptic
+
+// This package operates, internally, on Jacobian coordinates. For a given
+// (x, y) position on the curve, the Jacobian coordinates are (x1, y1, z1)
+// where x = x1/z1² and y = y1/z1³. The greatest speedups come when the whole
+// calculation can be performed within the transform (as in ScalarMult and
+// ScalarBaseMult). But even for Add and Double, it's faster to apply and
+// reverse the transform than to operate in affine coordinates.
+
+import (
+ "io"
+ "math/big"
+ "sync"
+)
+
+// A Curve represents a short-form Weierstrass curve with a=-3.
+//
+// Note that the point at infinity (0, 0) is not considered on the curve, and
+// although it can be returned by Add, Double, ScalarMult, or ScalarBaseMult, it
+// can't be marshaled or unmarshaled, and IsOnCurve will return false for it.
+type Curve interface {
+ // Params returns the parameters for the curve.
+ Params() *CurveParams
+ // IsOnCurve reports whether the given (x,y) lies on the curve.
+ IsOnCurve(x, y *big.Int) bool
+ // Add returns the sum of (x1,y1) and (x2,y2)
+ Add(x1, y1, x2, y2 *big.Int) (x, y *big.Int)
+ // Double returns 2*(x,y)
+ Double(x1, y1 *big.Int) (x, y *big.Int)
+ // ScalarMult returns k*(Bx,By) where k is a number in big-endian form.
+ ScalarMult(x1, y1 *big.Int, k []byte) (x, y *big.Int)
+ // ScalarBaseMult returns k*G, where G is the base point of the group
+ // and k is an integer in big-endian form.
+ ScalarBaseMult(k []byte) (x, y *big.Int)
+}
+
+func matchesSpecificCurve(params *CurveParams, available ...Curve) (Curve, bool) {
+ for _, c := range available {
+ if params == c.Params() {
+ return c, true
+ }
+ }
+ return nil, false
+}
+
+// CurveParams contains the parameters of an elliptic curve and also provides
+// a generic, non-constant time implementation of Curve.
+type CurveParams struct {
+ P *big.Int // the order of the underlying field
+ N *big.Int // the order of the base point
+ B *big.Int // the constant of the curve equation
+ Gx, Gy *big.Int // (x,y) of the base point
+ BitSize int // the size of the underlying field
+ Name string // the canonical name of the curve
+}
+
+func (curve *CurveParams) Params() *CurveParams {
+ return curve
+}
+
+// polynomial returns x³ - 3x + b.
+func (curve *CurveParams) polynomial(x *big.Int) *big.Int {
+ x3 := new(big.Int).Mul(x, x)
+ x3.Mul(x3, x)
+
+ threeX := new(big.Int).Lsh(x, 1)
+ threeX.Add(threeX, x)
+
+ x3.Sub(x3, threeX)
+ x3.Add(x3, curve.B)
+ x3.Mod(x3, curve.P)
+
+ return x3
+}
+
+func (curve *CurveParams) IsOnCurve(x, y *big.Int) bool {
+ // If there is a dedicated constant-time implementation for this curve operation,
+ // use that instead of the generic one.
+ if specific, ok := matchesSpecificCurve(curve, p224, p521); ok {
+ return specific.IsOnCurve(x, y)
+ }
+
+ if x.Sign() < 0 || x.Cmp(curve.P) >= 0 ||
+ y.Sign() < 0 || y.Cmp(curve.P) >= 0 {
+ return false
+ }
+
+ // y² = x³ - 3x + b
+ y2 := new(big.Int).Mul(y, y)
+ y2.Mod(y2, curve.P)
+
+ return curve.polynomial(x).Cmp(y2) == 0
+}
+
+// zForAffine returns a Jacobian Z value for the affine point (x, y). If x and
+// y are zero, it assumes that they represent the point at infinity because (0,
+// 0) is not on the any of the curves handled here.
+func zForAffine(x, y *big.Int) *big.Int {
+ z := new(big.Int)
+ if x.Sign() != 0 || y.Sign() != 0 {
+ z.SetInt64(1)
+ }
+ return z
+}
+
+// affineFromJacobian reverses the Jacobian transform. See the comment at the
+// top of the file. If the point is ∞ it returns 0, 0.
+func (curve *CurveParams) affineFromJacobian(x, y, z *big.Int) (xOut, yOut *big.Int) {
+ if z.Sign() == 0 {
+ return new(big.Int), new(big.Int)
+ }
+
+ zinv := new(big.Int).ModInverse(z, curve.P)
+ zinvsq := new(big.Int).Mul(zinv, zinv)
+
+ xOut = new(big.Int).Mul(x, zinvsq)
+ xOut.Mod(xOut, curve.P)
+ zinvsq.Mul(zinvsq, zinv)
+ yOut = new(big.Int).Mul(y, zinvsq)
+ yOut.Mod(yOut, curve.P)
+ return
+}
+
+func (curve *CurveParams) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
+ // If there is a dedicated constant-time implementation for this curve operation,
+ // use that instead of the generic one.
+ if specific, ok := matchesSpecificCurve(curve, p224, p521); ok {
+ return specific.Add(x1, y1, x2, y2)
+ }
+
+ z1 := zForAffine(x1, y1)
+ z2 := zForAffine(x2, y2)
+ return curve.affineFromJacobian(curve.addJacobian(x1, y1, z1, x2, y2, z2))
+}
+
+// addJacobian takes two points in Jacobian coordinates, (x1, y1, z1) and
+// (x2, y2, z2) and returns their sum, also in Jacobian form.
+func (curve *CurveParams) addJacobian(x1, y1, z1, x2, y2, z2 *big.Int) (*big.Int, *big.Int, *big.Int) {
+ // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+ x3, y3, z3 := new(big.Int), new(big.Int), new(big.Int)
+ if z1.Sign() == 0 {
+ x3.Set(x2)
+ y3.Set(y2)
+ z3.Set(z2)
+ return x3, y3, z3
+ }
+ if z2.Sign() == 0 {
+ x3.Set(x1)
+ y3.Set(y1)
+ z3.Set(z1)
+ return x3, y3, z3
+ }
+
+ z1z1 := new(big.Int).Mul(z1, z1)
+ z1z1.Mod(z1z1, curve.P)
+ z2z2 := new(big.Int).Mul(z2, z2)
+ z2z2.Mod(z2z2, curve.P)
+
+ u1 := new(big.Int).Mul(x1, z2z2)
+ u1.Mod(u1, curve.P)
+ u2 := new(big.Int).Mul(x2, z1z1)
+ u2.Mod(u2, curve.P)
+ h := new(big.Int).Sub(u2, u1)
+ xEqual := h.Sign() == 0
+ if h.Sign() == -1 {
+ h.Add(h, curve.P)
+ }
+ i := new(big.Int).Lsh(h, 1)
+ i.Mul(i, i)
+ j := new(big.Int).Mul(h, i)
+
+ s1 := new(big.Int).Mul(y1, z2)
+ s1.Mul(s1, z2z2)
+ s1.Mod(s1, curve.P)
+ s2 := new(big.Int).Mul(y2, z1)
+ s2.Mul(s2, z1z1)
+ s2.Mod(s2, curve.P)
+ r := new(big.Int).Sub(s2, s1)
+ if r.Sign() == -1 {
+ r.Add(r, curve.P)
+ }
+ yEqual := r.Sign() == 0
+ if xEqual && yEqual {
+ return curve.doubleJacobian(x1, y1, z1)
+ }
+ r.Lsh(r, 1)
+ v := new(big.Int).Mul(u1, i)
+
+ x3.Set(r)
+ x3.Mul(x3, x3)
+ x3.Sub(x3, j)
+ x3.Sub(x3, v)
+ x3.Sub(x3, v)
+ x3.Mod(x3, curve.P)
+
+ y3.Set(r)
+ v.Sub(v, x3)
+ y3.Mul(y3, v)
+ s1.Mul(s1, j)
+ s1.Lsh(s1, 1)
+ y3.Sub(y3, s1)
+ y3.Mod(y3, curve.P)
+
+ z3.Add(z1, z2)
+ z3.Mul(z3, z3)
+ z3.Sub(z3, z1z1)
+ z3.Sub(z3, z2z2)
+ z3.Mul(z3, h)
+ z3.Mod(z3, curve.P)
+
+ return x3, y3, z3
+}
+
+func (curve *CurveParams) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
+ // If there is a dedicated constant-time implementation for this curve operation,
+ // use that instead of the generic one.
+ if specific, ok := matchesSpecificCurve(curve, p224, p521); ok {
+ return specific.Double(x1, y1)
+ }
+
+ z1 := zForAffine(x1, y1)
+ return curve.affineFromJacobian(curve.doubleJacobian(x1, y1, z1))
+}
+
+// doubleJacobian takes a point in Jacobian coordinates, (x, y, z), and
+// returns its double, also in Jacobian form.
+func (curve *CurveParams) doubleJacobian(x, y, z *big.Int) (*big.Int, *big.Int, *big.Int) {
+ // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
+ delta := new(big.Int).Mul(z, z)
+ delta.Mod(delta, curve.P)
+ gamma := new(big.Int).Mul(y, y)
+ gamma.Mod(gamma, curve.P)
+ alpha := new(big.Int).Sub(x, delta)
+ if alpha.Sign() == -1 {
+ alpha.Add(alpha, curve.P)
+ }
+ alpha2 := new(big.Int).Add(x, delta)
+ alpha.Mul(alpha, alpha2)
+ alpha2.Set(alpha)
+ alpha.Lsh(alpha, 1)
+ alpha.Add(alpha, alpha2)
+
+ beta := alpha2.Mul(x, gamma)
+
+ x3 := new(big.Int).Mul(alpha, alpha)
+ beta8 := new(big.Int).Lsh(beta, 3)
+ beta8.Mod(beta8, curve.P)
+ x3.Sub(x3, beta8)
+ if x3.Sign() == -1 {
+ x3.Add(x3, curve.P)
+ }
+ x3.Mod(x3, curve.P)
+
+ z3 := new(big.Int).Add(y, z)
+ z3.Mul(z3, z3)
+ z3.Sub(z3, gamma)
+ if z3.Sign() == -1 {
+ z3.Add(z3, curve.P)
+ }
+ z3.Sub(z3, delta)
+ if z3.Sign() == -1 {
+ z3.Add(z3, curve.P)
+ }
+ z3.Mod(z3, curve.P)
+
+ beta.Lsh(beta, 2)
+ beta.Sub(beta, x3)
+ if beta.Sign() == -1 {
+ beta.Add(beta, curve.P)
+ }
+ y3 := alpha.Mul(alpha, beta)
+
+ gamma.Mul(gamma, gamma)
+ gamma.Lsh(gamma, 3)
+ gamma.Mod(gamma, curve.P)
+
+ y3.Sub(y3, gamma)
+ if y3.Sign() == -1 {
+ y3.Add(y3, curve.P)
+ }
+ y3.Mod(y3, curve.P)
+
+ return x3, y3, z3
+}
+
+func (curve *CurveParams) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.Int) {
+ // If there is a dedicated constant-time implementation for this curve operation,
+ // use that instead of the generic one.
+ if specific, ok := matchesSpecificCurve(curve, p224, p256, p521); ok {
+ return specific.ScalarMult(Bx, By, k)
+ }
+
+ Bz := new(big.Int).SetInt64(1)
+ x, y, z := new(big.Int), new(big.Int), new(big.Int)
+
+ for _, byte := range k {
+ for bitNum := 0; bitNum < 8; bitNum++ {
+ x, y, z = curve.doubleJacobian(x, y, z)
+ if byte&0x80 == 0x80 {
+ x, y, z = curve.addJacobian(Bx, By, Bz, x, y, z)
+ }
+ byte <<= 1
+ }
+ }
+
+ return curve.affineFromJacobian(x, y, z)
+}
+
+func (curve *CurveParams) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
+ // If there is a dedicated constant-time implementation for this curve operation,
+ // use that instead of the generic one.
+ if specific, ok := matchesSpecificCurve(curve, p224, p256, p521); ok {
+ return specific.ScalarBaseMult(k)
+ }
+
+ return curve.ScalarMult(curve.Gx, curve.Gy, k)
+}
+
+var mask = []byte{0xff, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f}
+
+// GenerateKey returns a public/private key pair. The private key is
+// generated using the given reader, which must return random data.
+func GenerateKey(curve Curve, rand io.Reader) (priv []byte, x, y *big.Int, err error) {
+ N := curve.Params().N
+ bitSize := N.BitLen()
+ byteLen := (bitSize + 7) / 8
+ priv = make([]byte, byteLen)
+
+ for x == nil {
+ _, err = io.ReadFull(rand, priv)
+ if err != nil {
+ return
+ }
+ // We have to mask off any excess bits in the case that the size of the
+ // underlying field is not a whole number of bytes.
+ priv[0] &= mask[bitSize%8]
+ // This is because, in tests, rand will return all zeros and we don't
+ // want to get the point at infinity and loop forever.
+ priv[1] ^= 0x42
+
+ // If the scalar is out of range, sample another random number.
+ if new(big.Int).SetBytes(priv).Cmp(N) >= 0 {
+ continue
+ }
+
+ x, y = curve.ScalarBaseMult(priv)
+ }
+ return
+}
+
+// Marshal converts a point on the curve into the uncompressed form specified in
+// section 4.3.6 of ANSI X9.62.
+func Marshal(curve Curve, x, y *big.Int) []byte {
+ byteLen := (curve.Params().BitSize + 7) / 8
+
+ ret := make([]byte, 1+2*byteLen)
+ ret[0] = 4 // uncompressed point
+
+ x.FillBytes(ret[1 : 1+byteLen])
+ y.FillBytes(ret[1+byteLen : 1+2*byteLen])
+
+ return ret
+}
+
+// MarshalCompressed converts a point on the curve into the compressed form
+// specified in section 4.3.6 of ANSI X9.62.
+func MarshalCompressed(curve Curve, x, y *big.Int) []byte {
+ byteLen := (curve.Params().BitSize + 7) / 8
+ compressed := make([]byte, 1+byteLen)
+ compressed[0] = byte(y.Bit(0)) | 2
+ x.FillBytes(compressed[1:])
+ return compressed
+}
+
+// Unmarshal converts a point, serialized by Marshal, into an x, y pair.
+// It is an error if the point is not in uncompressed form or is not on the curve.
+// On error, x = nil.
+func Unmarshal(curve Curve, data []byte) (x, y *big.Int) {
+ byteLen := (curve.Params().BitSize + 7) / 8
+ if len(data) != 1+2*byteLen {
+ return nil, nil
+ }
+ if data[0] != 4 { // uncompressed form
+ return nil, nil
+ }
+ p := curve.Params().P
+ x = new(big.Int).SetBytes(data[1 : 1+byteLen])
+ y = new(big.Int).SetBytes(data[1+byteLen:])
+ if x.Cmp(p) >= 0 || y.Cmp(p) >= 0 {
+ return nil, nil
+ }
+ if !curve.IsOnCurve(x, y) {
+ return nil, nil
+ }
+ return
+}
+
+// UnmarshalCompressed converts a point, serialized by MarshalCompressed, into an x, y pair.
+// It is an error if the point is not in compressed form or is not on the curve.
+// On error, x = nil.
+func UnmarshalCompressed(curve Curve, data []byte) (x, y *big.Int) {
+ byteLen := (curve.Params().BitSize + 7) / 8
+ if len(data) != 1+byteLen {
+ return nil, nil
+ }
+ if data[0] != 2 && data[0] != 3 { // compressed form
+ return nil, nil
+ }
+ p := curve.Params().P
+ x = new(big.Int).SetBytes(data[1:])
+ if x.Cmp(p) >= 0 {
+ return nil, nil
+ }
+ // y² = x³ - 3x + b
+ y = curve.Params().polynomial(x)
+ y = y.ModSqrt(y, p)
+ if y == nil {
+ return nil, nil
+ }
+ if byte(y.Bit(0)) != data[0]&1 {
+ y.Neg(y).Mod(y, p)
+ }
+ if !curve.IsOnCurve(x, y) {
+ return nil, nil
+ }
+ return
+}
+
+var initonce sync.Once
+var p384 *CurveParams
+
+func initAll() {
+ initP224()
+ initP256()
+ initP384()
+ initP521()
+}
+
+func initP384() {
+ // See FIPS 186-3, section D.2.4
+ p384 = &CurveParams{Name: "P-384"}
+ p384.P, _ = new(big.Int).SetString("39402006196394479212279040100143613805079739270465446667948293404245721771496870329047266088258938001861606973112319", 10)
+ p384.N, _ = new(big.Int).SetString("39402006196394479212279040100143613805079739270465446667946905279627659399113263569398956308152294913554433653942643", 10)
+ p384.B, _ = new(big.Int).SetString("b3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef", 16)
+ p384.Gx, _ = new(big.Int).SetString("aa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7", 16)
+ p384.Gy, _ = new(big.Int).SetString("3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f", 16)
+ p384.BitSize = 384
+}
+
+// P256 returns a Curve which implements NIST P-256 (FIPS 186-3, section D.2.3),
+// also known as secp256r1 or prime256v1. The CurveParams.Name of this Curve is
+// "P-256".
+//
+// Multiple invocations of this function will return the same value, so it can
+// be used for equality checks and switch statements.
+//
+// ScalarMult and ScalarBaseMult are implemented using constant-time algorithms.
+func P256() Curve {
+ initonce.Do(initAll)
+ return p256
+}
+
+// P384 returns a Curve which implements NIST P-384 (FIPS 186-3, section D.2.4),
+// also known as secp384r1. The CurveParams.Name of this Curve is "P-384".
+//
+// Multiple invocations of this function will return the same value, so it can
+// be used for equality checks and switch statements.
+//
+// The cryptographic operations do not use constant-time algorithms.
+func P384() Curve {
+ initonce.Do(initAll)
+ return p384
+}
+
+// P521 returns a Curve which implements NIST P-521 (FIPS 186-3, section D.2.5),
+// also known as secp521r1. The CurveParams.Name of this Curve is "P-521".
+//
+// Multiple invocations of this function will return the same value, so it can
+// be used for equality checks and switch statements.
+//
+// The cryptographic operations are implemented using constant-time algorithms.
+func P521() Curve {
+ initonce.Do(initAll)
+ return p521
+}
diff --git a/src/crypto/elliptic/elliptic_test.go b/src/crypto/elliptic/elliptic_test.go
new file mode 100644
index 0000000..3fe53c5
--- /dev/null
+++ b/src/crypto/elliptic/elliptic_test.go
@@ -0,0 +1,331 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package elliptic
+
+import (
+ "bytes"
+ "crypto/rand"
+ "encoding/hex"
+ "math/big"
+ "testing"
+)
+
+// genericParamsForCurve returns the dereferenced CurveParams for
+// the specified curve. This is used to avoid the logic for
+// upgrading a curve to it's specific implementation, forcing
+// usage of the generic implementation. This is only relevant
+// for the P224, P256, and P521 curves.
+func genericParamsForCurve(c Curve) *CurveParams {
+ d := *(c.Params())
+ return &d
+}
+
+func testAllCurves(t *testing.T, f func(*testing.T, Curve)) {
+ tests := []struct {
+ name string
+ curve Curve
+ }{
+ {"P256", P256()},
+ {"P256/Params", genericParamsForCurve(P256())},
+ {"P224", P224()},
+ {"P224/Params", genericParamsForCurve(P224())},
+ {"P384", P384()},
+ {"P384/Params", genericParamsForCurve(P384())},
+ {"P521", P521()},
+ {"P521/Params", genericParamsForCurve(P521())},
+ }
+ if testing.Short() {
+ tests = tests[:1]
+ }
+ for _, test := range tests {
+ curve := test.curve
+ t.Run(test.name, func(t *testing.T) {
+ t.Parallel()
+ f(t, curve)
+ })
+ }
+}
+
+func TestOnCurve(t *testing.T) {
+ testAllCurves(t, func(t *testing.T, curve Curve) {
+ if !curve.IsOnCurve(curve.Params().Gx, curve.Params().Gy) {
+ t.Error("basepoint is not on the curve")
+ }
+ })
+}
+
+func TestOffCurve(t *testing.T) {
+ testAllCurves(t, func(t *testing.T, curve Curve) {
+ x, y := new(big.Int).SetInt64(1), new(big.Int).SetInt64(1)
+ if curve.IsOnCurve(x, y) {
+ t.Errorf("point off curve is claimed to be on the curve")
+ }
+ b := Marshal(curve, x, y)
+ x1, y1 := Unmarshal(curve, b)
+ if x1 != nil || y1 != nil {
+ t.Errorf("unmarshaling a point not on the curve succeeded")
+ }
+ })
+}
+
+func TestInfinity(t *testing.T) {
+ testAllCurves(t, testInfinity)
+}
+
+func testInfinity(t *testing.T, curve Curve) {
+ _, x, y, _ := GenerateKey(curve, rand.Reader)
+ x, y = curve.ScalarMult(x, y, curve.Params().N.Bytes())
+ if x.Sign() != 0 || y.Sign() != 0 {
+ t.Errorf("x^q != ∞")
+ }
+
+ x, y = curve.ScalarBaseMult([]byte{0})
+ if x.Sign() != 0 || y.Sign() != 0 {
+ t.Errorf("b^0 != ∞")
+ x.SetInt64(0)
+ y.SetInt64(0)
+ }
+
+ x2, y2 := curve.Double(x, y)
+ if x2.Sign() != 0 || y2.Sign() != 0 {
+ t.Errorf("2∞ != ∞")
+ }
+
+ baseX := curve.Params().Gx
+ baseY := curve.Params().Gy
+
+ x3, y3 := curve.Add(baseX, baseY, x, y)
+ if x3.Cmp(baseX) != 0 || y3.Cmp(baseY) != 0 {
+ t.Errorf("x+∞ != x")
+ }
+
+ x4, y4 := curve.Add(x, y, baseX, baseY)
+ if x4.Cmp(baseX) != 0 || y4.Cmp(baseY) != 0 {
+ t.Errorf("∞+x != x")
+ }
+
+ if curve.IsOnCurve(x, y) {
+ t.Errorf("IsOnCurve(∞) == true")
+ }
+}
+
+func TestMarshal(t *testing.T) {
+ testAllCurves(t, func(t *testing.T, curve Curve) {
+ _, x, y, err := GenerateKey(curve, rand.Reader)
+ if err != nil {
+ t.Fatal(err)
+ }
+ serialized := Marshal(curve, x, y)
+ xx, yy := Unmarshal(curve, serialized)
+ if xx == nil {
+ t.Fatal("failed to unmarshal")
+ }
+ if xx.Cmp(x) != 0 || yy.Cmp(y) != 0 {
+ t.Fatal("unmarshal returned different values")
+ }
+ })
+}
+
+func TestUnmarshalToLargeCoordinates(t *testing.T) {
+ // See https://golang.org/issues/20482.
+ testAllCurves(t, testUnmarshalToLargeCoordinates)
+}
+
+func testUnmarshalToLargeCoordinates(t *testing.T, curve Curve) {
+ p := curve.Params().P
+ byteLen := (p.BitLen() + 7) / 8
+
+ // Set x to be greater than curve's parameter P – specifically, to P+5.
+ // Set y to mod_sqrt(x^3 - 3x + B)) so that (x mod P = 5 , y) is on the
+ // curve.
+ x := new(big.Int).Add(p, big.NewInt(5))
+ y := curve.Params().polynomial(x)
+ y.ModSqrt(y, p)
+
+ invalid := make([]byte, byteLen*2+1)
+ invalid[0] = 4 // uncompressed encoding
+ x.FillBytes(invalid[1 : 1+byteLen])
+ y.FillBytes(invalid[1+byteLen:])
+
+ if X, Y := Unmarshal(curve, invalid); X != nil || Y != nil {
+ t.Errorf("Unmarshal accepts invalid X coordinate")
+ }
+
+ if curve == p256 {
+ // This is a point on the curve with a small y value, small enough that
+ // we can add p and still be within 32 bytes.
+ x, _ = new(big.Int).SetString("31931927535157963707678568152204072984517581467226068221761862915403492091210", 10)
+ y, _ = new(big.Int).SetString("5208467867388784005506817585327037698770365050895731383201516607147", 10)
+ y.Add(y, p)
+
+ if p.Cmp(y) > 0 || y.BitLen() != 256 {
+ t.Fatal("y not within expected range")
+ }
+
+ // marshal
+ x.FillBytes(invalid[1 : 1+byteLen])
+ y.FillBytes(invalid[1+byteLen:])
+
+ if X, Y := Unmarshal(curve, invalid); X != nil || Y != nil {
+ t.Errorf("Unmarshal accepts invalid Y coordinate")
+ }
+ }
+}
+
+// TestInvalidCoordinates tests big.Int values that are not valid field elements
+// (negative or bigger than P). They are expected to return false from
+// IsOnCurve, all other behavior is undefined.
+func TestInvalidCoordinates(t *testing.T) {
+ testAllCurves(t, testInvalidCoordinates)
+}
+
+func testInvalidCoordinates(t *testing.T, curve Curve) {
+ checkIsOnCurveFalse := func(name string, x, y *big.Int) {
+ if curve.IsOnCurve(x, y) {
+ t.Errorf("IsOnCurve(%s) unexpectedly returned true", name)
+ }
+ }
+
+ p := curve.Params().P
+ _, x, y, _ := GenerateKey(curve, rand.Reader)
+ xx, yy := new(big.Int), new(big.Int)
+
+ // Check if the sign is getting dropped.
+ xx.Neg(x)
+ checkIsOnCurveFalse("-x, y", xx, y)
+ yy.Neg(y)
+ checkIsOnCurveFalse("x, -y", x, yy)
+
+ // Check if negative values are reduced modulo P.
+ xx.Sub(x, p)
+ checkIsOnCurveFalse("x-P, y", xx, y)
+ yy.Sub(y, p)
+ checkIsOnCurveFalse("x, y-P", x, yy)
+
+ // Check if positive values are reduced modulo P.
+ xx.Add(x, p)
+ checkIsOnCurveFalse("x+P, y", xx, y)
+ yy.Add(y, p)
+ checkIsOnCurveFalse("x, y+P", x, yy)
+
+ // Check if the overflow is dropped.
+ xx.Add(x, new(big.Int).Lsh(big.NewInt(1), 535))
+ checkIsOnCurveFalse("x+2⁵³⁵, y", xx, y)
+ yy.Add(y, new(big.Int).Lsh(big.NewInt(1), 535))
+ checkIsOnCurveFalse("x, y+2⁵³⁵", x, yy)
+
+ // Check if P is treated like zero (if possible).
+ // y^2 = x^3 - 3x + B
+ // y = mod_sqrt(x^3 - 3x + B)
+ // y = mod_sqrt(B) if x = 0
+ // If there is no modsqrt, there is no point with x = 0, can't test x = P.
+ if yy := new(big.Int).ModSqrt(curve.Params().B, p); yy != nil {
+ if !curve.IsOnCurve(big.NewInt(0), yy) {
+ t.Fatal("(0, mod_sqrt(B)) is not on the curve?")
+ }
+ checkIsOnCurveFalse("P, y", p, yy)
+ }
+}
+
+func TestMarshalCompressed(t *testing.T) {
+ t.Run("P-256/03", func(t *testing.T) {
+ data, _ := hex.DecodeString("031e3987d9f9ea9d7dd7155a56a86b2009e1e0ab332f962d10d8beb6406ab1ad79")
+ x, _ := new(big.Int).SetString("13671033352574878777044637384712060483119675368076128232297328793087057702265", 10)
+ y, _ := new(big.Int).SetString("66200849279091436748794323380043701364391950689352563629885086590854940586447", 10)
+ testMarshalCompressed(t, P256(), x, y, data)
+ })
+ t.Run("P-256/02", func(t *testing.T) {
+ data, _ := hex.DecodeString("021e3987d9f9ea9d7dd7155a56a86b2009e1e0ab332f962d10d8beb6406ab1ad79")
+ x, _ := new(big.Int).SetString("13671033352574878777044637384712060483119675368076128232297328793087057702265", 10)
+ y, _ := new(big.Int).SetString("49591239931264812013903123569363872165694192725937750565648544718012157267504", 10)
+ testMarshalCompressed(t, P256(), x, y, data)
+ })
+
+ t.Run("Invalid", func(t *testing.T) {
+ data, _ := hex.DecodeString("02fd4bf61763b46581fd9174d623516cf3c81edd40e29ffa2777fb6cb0ae3ce535")
+ X, Y := UnmarshalCompressed(P256(), data)
+ if X != nil || Y != nil {
+ t.Error("expected an error for invalid encoding")
+ }
+ })
+
+ if testing.Short() {
+ t.Skip("skipping other curves on short test")
+ }
+
+ testAllCurves(t, func(t *testing.T, curve Curve) {
+ _, x, y, err := GenerateKey(curve, rand.Reader)
+ if err != nil {
+ t.Fatal(err)
+ }
+ testMarshalCompressed(t, curve, x, y, nil)
+ })
+
+}
+
+func testMarshalCompressed(t *testing.T, curve Curve, x, y *big.Int, want []byte) {
+ if !curve.IsOnCurve(x, y) {
+ t.Fatal("invalid test point")
+ }
+ got := MarshalCompressed(curve, x, y)
+ if want != nil && !bytes.Equal(got, want) {
+ t.Errorf("got unexpected MarshalCompressed result: got %x, want %x", got, want)
+ }
+
+ X, Y := UnmarshalCompressed(curve, got)
+ if X == nil || Y == nil {
+ t.Fatalf("UnmarshalCompressed failed unexpectedly")
+ }
+
+ if !curve.IsOnCurve(X, Y) {
+ t.Error("UnmarshalCompressed returned a point not on the curve")
+ }
+ if X.Cmp(x) != 0 || Y.Cmp(y) != 0 {
+ t.Errorf("point did not round-trip correctly: got (%v, %v), want (%v, %v)", X, Y, x, y)
+ }
+}
+
+func benchmarkAllCurves(t *testing.B, f func(*testing.B, Curve)) {
+ tests := []struct {
+ name string
+ curve Curve
+ }{
+ {"P256", P256()},
+ {"P224", P224()},
+ {"P384", P384()},
+ {"P521", P521()},
+ }
+ for _, test := range tests {
+ curve := test.curve
+ t.Run(test.name, func(t *testing.B) {
+ f(t, curve)
+ })
+ }
+}
+
+func BenchmarkScalarBaseMult(b *testing.B) {
+ benchmarkAllCurves(b, func(b *testing.B, curve Curve) {
+ priv, _, _, _ := GenerateKey(curve, rand.Reader)
+ b.ReportAllocs()
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ x, _ := curve.ScalarBaseMult(priv)
+ // Prevent the compiler from optimizing out the operation.
+ priv[0] ^= byte(x.Bits()[0])
+ }
+ })
+}
+
+func BenchmarkScalarMult(b *testing.B) {
+ benchmarkAllCurves(b, func(b *testing.B, curve Curve) {
+ _, x, y, _ := GenerateKey(curve, rand.Reader)
+ priv, _, _, _ := GenerateKey(curve, rand.Reader)
+ b.ReportAllocs()
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ x, y = curve.ScalarMult(x, y, priv)
+ }
+ })
+}
diff --git a/src/crypto/elliptic/fuzz_test.go b/src/crypto/elliptic/fuzz_test.go
new file mode 100644
index 0000000..8ff3bf3
--- /dev/null
+++ b/src/crypto/elliptic/fuzz_test.go
@@ -0,0 +1,54 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64 || ppc64le
+// +build amd64 arm64 ppc64le
+
+package elliptic
+
+import (
+ "crypto/rand"
+ "testing"
+ "time"
+)
+
+func TestFuzz(t *testing.T) {
+ p256 := P256()
+ p256Generic := p256.Params()
+
+ var scalar1 [32]byte
+ var scalar2 [32]byte
+ var timeout *time.Timer
+
+ if testing.Short() {
+ timeout = time.NewTimer(10 * time.Millisecond)
+ } else {
+ timeout = time.NewTimer(2 * time.Second)
+ }
+
+ for {
+ select {
+ case <-timeout.C:
+ return
+ default:
+ }
+
+ rand.Read(scalar1[:])
+ rand.Read(scalar2[:])
+
+ x, y := p256.ScalarBaseMult(scalar1[:])
+ x2, y2 := p256Generic.ScalarBaseMult(scalar1[:])
+
+ xx, yy := p256.ScalarMult(x, y, scalar2[:])
+ xx2, yy2 := p256Generic.ScalarMult(x2, y2, scalar2[:])
+
+ if x.Cmp(x2) != 0 || y.Cmp(y2) != 0 {
+ t.Fatalf("ScalarBaseMult does not match reference result with scalar: %x, please report this error to security@golang.org", scalar1)
+ }
+
+ if xx.Cmp(xx2) != 0 || yy.Cmp(yy2) != 0 {
+ t.Fatalf("ScalarMult does not match reference result with scalars: %x and %x, please report this error to security@golang.org", scalar1, scalar2)
+ }
+ }
+}
diff --git a/src/crypto/elliptic/internal/fiat/Dockerfile b/src/crypto/elliptic/internal/fiat/Dockerfile
new file mode 100644
index 0000000..7b5ece0
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/Dockerfile
@@ -0,0 +1,12 @@
+# Copyright 2021 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+FROM coqorg/coq:8.13.2
+
+RUN git clone https://github.com/mit-plv/fiat-crypto
+RUN cd fiat-crypto && git checkout c076f3550bea2bb7f4cb5766a32594b9e67694f2
+RUN cd fiat-crypto && git submodule update --init --recursive
+RUN cd fiat-crypto && eval $(opam env) && make -j4 standalone-ocaml SKIP_BEDROCK2=1
+
+ENTRYPOINT ["fiat-crypto/src/ExtractionOCaml/unsaturated_solinas"]
diff --git a/src/crypto/elliptic/internal/fiat/README b/src/crypto/elliptic/internal/fiat/README
new file mode 100644
index 0000000..171f57a
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/README
@@ -0,0 +1,39 @@
+The code in this package was autogenerated by the fiat-crypto project
+at commit c076f3550 from a formally verified model.
+
+ docker build -t fiat-crypto:c076f3550 .
+ docker run fiat-crypto:c076f3550 --lang Go --no-wide-int --cmovznz-by-mul \
+ --internal-static --public-function-case camelCase --public-type-case camelCase \
+ --private-function-case camelCase --private-type-case camelCase \
+ --no-prefix-fiat --package-name fiat --doc-text-before-function-name '' \
+ --doc-prepend-header 'Code generated by Fiat Cryptography. DO NOT EDIT.' \
+ --doc-newline-before-package-declaration p521 64 9 '2^521 - 1' \
+ carry_mul carry_square carry add sub to_bytes from_bytes selectznz \
+ > p521_fiat64.go
+
+It comes under the following license.
+
+ Copyright (c) 2015-2020 The fiat-crypto Authors. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
+ Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The authors are listed at
+
+ https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS
diff --git a/src/crypto/elliptic/internal/fiat/p521.go b/src/crypto/elliptic/internal/fiat/p521.go
new file mode 100644
index 0000000..dc67732
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/p521.go
@@ -0,0 +1,197 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fiat implements prime order fields using formally verified algorithms
+// from the Fiat Cryptography project.
+package fiat
+
+import (
+ "crypto/subtle"
+ "errors"
+)
+
+// P521Element is an integer modulo 2^521 - 1.
+//
+// The zero value is a valid zero element.
+type P521Element struct {
+ // This element has the following bounds, which are tighter than
+ // the output bounds of some operations. Those operations must be
+ // followed by a carry.
+ //
+ // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000],
+ // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000],
+ // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]
+ x [9]uint64
+}
+
+// One sets e = 1, and returns e.
+func (e *P521Element) One() *P521Element {
+ *e = P521Element{}
+ e.x[0] = 1
+ return e
+}
+
+// Equal returns 1 if e == t, and zero otherwise.
+func (e *P521Element) Equal(t *P521Element) int {
+ eBytes := e.Bytes()
+ tBytes := t.Bytes()
+ return subtle.ConstantTimeCompare(eBytes, tBytes)
+}
+
+var p521ZeroEncoding = new(P521Element).Bytes()
+
+// IsZero returns 1 if e == 0, and zero otherwise.
+func (e *P521Element) IsZero() int {
+ eBytes := e.Bytes()
+ return subtle.ConstantTimeCompare(eBytes, p521ZeroEncoding)
+}
+
+// Set sets e = t, and returns e.
+func (e *P521Element) Set(t *P521Element) *P521Element {
+ e.x = t.x
+ return e
+}
+
+// Bytes returns the 66-byte little-endian encoding of e.
+func (e *P521Element) Bytes() []byte {
+ // This function must be inlined to move the allocation to the parent and
+ // save it from escaping to the heap.
+ var out [66]byte
+ p521ToBytes(&out, &e.x)
+ return out[:]
+}
+
+// SetBytes sets e = v, where v is a little-endian 66-byte encoding, and returns
+// e. If v is not 66 bytes or it encodes a value higher than 2^521 - 1, SetBytes
+// returns nil and an error, and e is unchanged.
+func (e *P521Element) SetBytes(v []byte) (*P521Element, error) {
+ if len(v) != 66 || v[65] > 1 {
+ return nil, errors.New("invalid P-521 field encoding")
+ }
+ var in [66]byte
+ copy(in[:], v)
+ p521FromBytes(&e.x, &in)
+ return e, nil
+}
+
+// Add sets e = t1 + t2, and returns e.
+func (e *P521Element) Add(t1, t2 *P521Element) *P521Element {
+ p521Add(&e.x, &t1.x, &t2.x)
+ p521Carry(&e.x, &e.x)
+ return e
+}
+
+// Sub sets e = t1 - t2, and returns e.
+func (e *P521Element) Sub(t1, t2 *P521Element) *P521Element {
+ p521Sub(&e.x, &t1.x, &t2.x)
+ p521Carry(&e.x, &e.x)
+ return e
+}
+
+// Mul sets e = t1 * t2, and returns e.
+func (e *P521Element) Mul(t1, t2 *P521Element) *P521Element {
+ p521CarryMul(&e.x, &t1.x, &t2.x)
+ return e
+}
+
+// Square sets e = t * t, and returns e.
+func (e *P521Element) Square(t *P521Element) *P521Element {
+ p521CarrySquare(&e.x, &t.x)
+ return e
+}
+
+// Select sets e to a if cond == 1, and to b if cond == 0.
+func (v *P521Element) Select(a, b *P521Element, cond int) *P521Element {
+ p521Selectznz(&v.x, p521Uint1(cond), &b.x, &a.x)
+ return v
+}
+
+// Invert sets e = 1/t, and returns e.
+//
+// If t == 0, Invert returns e = 0.
+func (e *P521Element) Invert(t *P521Element) *P521Element {
+ // Inversion is implemented as exponentiation with exponent p − 2.
+ // The sequence of multiplications and squarings was generated with
+ // github.com/mmcloughlin/addchain v0.2.0.
+
+ var t1, t2 = new(P521Element), new(P521Element)
+
+ // _10 = 2 * 1
+ t1.Square(t)
+
+ // _11 = 1 + _10
+ t1.Mul(t, t1)
+
+ // _1100 = _11 << 2
+ t2.Square(t1)
+ t2.Square(t2)
+
+ // _1111 = _11 + _1100
+ t1.Mul(t1, t2)
+
+ // _11110000 = _1111 << 4
+ t2.Square(t1)
+ for i := 0; i < 3; i++ {
+ t2.Square(t2)
+ }
+
+ // _11111111 = _1111 + _11110000
+ t1.Mul(t1, t2)
+
+ // x16 = _11111111<<8 + _11111111
+ t2.Square(t1)
+ for i := 0; i < 7; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x32 = x16<<16 + x16
+ t2.Square(t1)
+ for i := 0; i < 15; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x64 = x32<<32 + x32
+ t2.Square(t1)
+ for i := 0; i < 31; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x65 = 2*x64 + 1
+ t2.Square(t1)
+ t2.Mul(t2, t)
+
+ // x129 = x65<<64 + x64
+ for i := 0; i < 64; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x130 = 2*x129 + 1
+ t2.Square(t1)
+ t2.Mul(t2, t)
+
+ // x259 = x130<<129 + x129
+ for i := 0; i < 129; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x260 = 2*x259 + 1
+ t2.Square(t1)
+ t2.Mul(t2, t)
+
+ // x519 = x260<<259 + x259
+ for i := 0; i < 259; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // return x519<<2 + 1
+ t1.Square(t1)
+ t1.Square(t1)
+ return e.Mul(t1, t)
+}
diff --git a/src/crypto/elliptic/internal/fiat/p521_fiat64.go b/src/crypto/elliptic/internal/fiat/p521_fiat64.go
new file mode 100644
index 0000000..f86283b
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/p521_fiat64.go
@@ -0,0 +1,1856 @@
+// Code generated by Fiat Cryptography. DO NOT EDIT.
+//
+// Autogenerated: 'fiat-crypto/src/ExtractionOCaml/unsaturated_solinas' --lang Go --no-wide-int --cmovznz-by-mul --internal-static --public-function-case camelCase --public-type-case camelCase --private-function-case camelCase --private-type-case camelCase --no-prefix-fiat --package-name fiat --doc-text-before-function-name '' --doc-prepend-header 'Code generated by Fiat Cryptography. DO NOT EDIT.' --doc-newline-before-package-declaration p521 64 9 '2^521 - 1' carry_mul carry_square carry add sub to_bytes from_bytes selectznz
+//
+// curve description: p521
+//
+// machine_wordsize = 64 (from "64")
+//
+// requested operations: carry_mul, carry_square, carry, add, sub, to_bytes, from_bytes, selectznz
+//
+// n = 9 (from "9")
+//
+// s-c = 2^521 - [(1, 1)] (from "2^521 - 1")
+//
+// tight_bounds_multiplier = 1 (from "")
+//
+//
+//
+// Computed values:
+//
+// carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1]
+//
+// eval z = z[0] + (z[1] << 58) + (z[2] << 116) + (z[3] << 174) + (z[4] << 232) + (z[5] << 0x122) + (z[6] << 0x15c) + (z[7] << 0x196) + (z[8] << 0x1d0)
+//
+// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208)
+//
+// balance = [0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x3fffffffffffffe]
+
+package fiat
+
+import "math/bits"
+
+type p521Uint1 uint8
+type p521Int1 int8
+
+// p521AddcarryxU64 is a thin wrapper around bits.Add64 that uses p521Uint1 rather than uint64
+func p521AddcarryxU64(x uint64, y uint64, carry p521Uint1) (uint64, p521Uint1) {
+ sum, carryOut := bits.Add64(x, y, uint64(carry))
+ return sum, p521Uint1(carryOut)
+}
+
+// p521SubborrowxU64 is a thin wrapper around bits.Sub64 that uses p521Uint1 rather than uint64
+func p521SubborrowxU64(x uint64, y uint64, carry p521Uint1) (uint64, p521Uint1) {
+ sum, carryOut := bits.Sub64(x, y, uint64(carry))
+ return sum, p521Uint1(carryOut)
+}
+
+// p521AddcarryxU58 is an addition with carry.
+//
+// Postconditions:
+// out1 = (arg1 + arg2 + arg3) mod 2^58
+// out2 = ⌊(arg1 + arg2 + arg3) / 2^58⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x3ffffffffffffff]
+// arg3: [0x0 ~> 0x3ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x3ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521AddcarryxU58(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((uint64(arg1) + arg2) + arg3)
+ x2 := (x1 & 0x3ffffffffffffff)
+ x3 := p521Uint1((x1 >> 58))
+ *out1 = x2
+ *out2 = x3
+}
+
+// p521SubborrowxU58 is a subtraction with borrow.
+//
+// Postconditions:
+// out1 = (-arg1 + arg2 + -arg3) mod 2^58
+// out2 = -⌊(-arg1 + arg2 + -arg3) / 2^58⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x3ffffffffffffff]
+// arg3: [0x0 ~> 0x3ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x3ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521SubborrowxU58(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((int64(arg2) - int64(arg1)) - int64(arg3))
+ x2 := p521Int1((x1 >> 58))
+ x3 := (uint64(x1) & 0x3ffffffffffffff)
+ *out1 = x3
+ *out2 = (0x0 - p521Uint1(x2))
+}
+
+// p521AddcarryxU57 is an addition with carry.
+//
+// Postconditions:
+// out1 = (arg1 + arg2 + arg3) mod 2^57
+// out2 = ⌊(arg1 + arg2 + arg3) / 2^57⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x1ffffffffffffff]
+// arg3: [0x0 ~> 0x1ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x1ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521AddcarryxU57(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((uint64(arg1) + arg2) + arg3)
+ x2 := (x1 & 0x1ffffffffffffff)
+ x3 := p521Uint1((x1 >> 57))
+ *out1 = x2
+ *out2 = x3
+}
+
+// p521SubborrowxU57 is a subtraction with borrow.
+//
+// Postconditions:
+// out1 = (-arg1 + arg2 + -arg3) mod 2^57
+// out2 = -⌊(-arg1 + arg2 + -arg3) / 2^57⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x1ffffffffffffff]
+// arg3: [0x0 ~> 0x1ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x1ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521SubborrowxU57(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((int64(arg2) - int64(arg1)) - int64(arg3))
+ x2 := p521Int1((x1 >> 57))
+ x3 := (uint64(x1) & 0x1ffffffffffffff)
+ *out1 = x3
+ *out2 = (0x0 - p521Uint1(x2))
+}
+
+// p521CmovznzU64 is a single-word conditional move.
+//
+// Postconditions:
+// out1 = (if arg1 = 0 then arg2 else arg3)
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0xffffffffffffffff]
+// arg3: [0x0 ~> 0xffffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0xffffffffffffffff]
+func p521CmovznzU64(out1 *uint64, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := (uint64(arg1) * 0xffffffffffffffff)
+ x2 := ((x1 & arg3) | ((^x1) & arg2))
+ *out1 = x2
+}
+
+// p521CarryMul multiplies two field elements and reduces the result.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 * eval arg2) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// arg2: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521CarryMul(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) {
+ var x1 uint64
+ var x2 uint64
+ x2, x1 = bits.Mul64(arg1[8], (arg2[8] * 0x2))
+ var x3 uint64
+ var x4 uint64
+ x4, x3 = bits.Mul64(arg1[8], (arg2[7] * 0x2))
+ var x5 uint64
+ var x6 uint64
+ x6, x5 = bits.Mul64(arg1[8], (arg2[6] * 0x2))
+ var x7 uint64
+ var x8 uint64
+ x8, x7 = bits.Mul64(arg1[8], (arg2[5] * 0x2))
+ var x9 uint64
+ var x10 uint64
+ x10, x9 = bits.Mul64(arg1[8], (arg2[4] * 0x2))
+ var x11 uint64
+ var x12 uint64
+ x12, x11 = bits.Mul64(arg1[8], (arg2[3] * 0x2))
+ var x13 uint64
+ var x14 uint64
+ x14, x13 = bits.Mul64(arg1[8], (arg2[2] * 0x2))
+ var x15 uint64
+ var x16 uint64
+ x16, x15 = bits.Mul64(arg1[8], (arg2[1] * 0x2))
+ var x17 uint64
+ var x18 uint64
+ x18, x17 = bits.Mul64(arg1[7], (arg2[8] * 0x2))
+ var x19 uint64
+ var x20 uint64
+ x20, x19 = bits.Mul64(arg1[7], (arg2[7] * 0x2))
+ var x21 uint64
+ var x22 uint64
+ x22, x21 = bits.Mul64(arg1[7], (arg2[6] * 0x2))
+ var x23 uint64
+ var x24 uint64
+ x24, x23 = bits.Mul64(arg1[7], (arg2[5] * 0x2))
+ var x25 uint64
+ var x26 uint64
+ x26, x25 = bits.Mul64(arg1[7], (arg2[4] * 0x2))
+ var x27 uint64
+ var x28 uint64
+ x28, x27 = bits.Mul64(arg1[7], (arg2[3] * 0x2))
+ var x29 uint64
+ var x30 uint64
+ x30, x29 = bits.Mul64(arg1[7], (arg2[2] * 0x2))
+ var x31 uint64
+ var x32 uint64
+ x32, x31 = bits.Mul64(arg1[6], (arg2[8] * 0x2))
+ var x33 uint64
+ var x34 uint64
+ x34, x33 = bits.Mul64(arg1[6], (arg2[7] * 0x2))
+ var x35 uint64
+ var x36 uint64
+ x36, x35 = bits.Mul64(arg1[6], (arg2[6] * 0x2))
+ var x37 uint64
+ var x38 uint64
+ x38, x37 = bits.Mul64(arg1[6], (arg2[5] * 0x2))
+ var x39 uint64
+ var x40 uint64
+ x40, x39 = bits.Mul64(arg1[6], (arg2[4] * 0x2))
+ var x41 uint64
+ var x42 uint64
+ x42, x41 = bits.Mul64(arg1[6], (arg2[3] * 0x2))
+ var x43 uint64
+ var x44 uint64
+ x44, x43 = bits.Mul64(arg1[5], (arg2[8] * 0x2))
+ var x45 uint64
+ var x46 uint64
+ x46, x45 = bits.Mul64(arg1[5], (arg2[7] * 0x2))
+ var x47 uint64
+ var x48 uint64
+ x48, x47 = bits.Mul64(arg1[5], (arg2[6] * 0x2))
+ var x49 uint64
+ var x50 uint64
+ x50, x49 = bits.Mul64(arg1[5], (arg2[5] * 0x2))
+ var x51 uint64
+ var x52 uint64
+ x52, x51 = bits.Mul64(arg1[5], (arg2[4] * 0x2))
+ var x53 uint64
+ var x54 uint64
+ x54, x53 = bits.Mul64(arg1[4], (arg2[8] * 0x2))
+ var x55 uint64
+ var x56 uint64
+ x56, x55 = bits.Mul64(arg1[4], (arg2[7] * 0x2))
+ var x57 uint64
+ var x58 uint64
+ x58, x57 = bits.Mul64(arg1[4], (arg2[6] * 0x2))
+ var x59 uint64
+ var x60 uint64
+ x60, x59 = bits.Mul64(arg1[4], (arg2[5] * 0x2))
+ var x61 uint64
+ var x62 uint64
+ x62, x61 = bits.Mul64(arg1[3], (arg2[8] * 0x2))
+ var x63 uint64
+ var x64 uint64
+ x64, x63 = bits.Mul64(arg1[3], (arg2[7] * 0x2))
+ var x65 uint64
+ var x66 uint64
+ x66, x65 = bits.Mul64(arg1[3], (arg2[6] * 0x2))
+ var x67 uint64
+ var x68 uint64
+ x68, x67 = bits.Mul64(arg1[2], (arg2[8] * 0x2))
+ var x69 uint64
+ var x70 uint64
+ x70, x69 = bits.Mul64(arg1[2], (arg2[7] * 0x2))
+ var x71 uint64
+ var x72 uint64
+ x72, x71 = bits.Mul64(arg1[1], (arg2[8] * 0x2))
+ var x73 uint64
+ var x74 uint64
+ x74, x73 = bits.Mul64(arg1[8], arg2[0])
+ var x75 uint64
+ var x76 uint64
+ x76, x75 = bits.Mul64(arg1[7], arg2[1])
+ var x77 uint64
+ var x78 uint64
+ x78, x77 = bits.Mul64(arg1[7], arg2[0])
+ var x79 uint64
+ var x80 uint64
+ x80, x79 = bits.Mul64(arg1[6], arg2[2])
+ var x81 uint64
+ var x82 uint64
+ x82, x81 = bits.Mul64(arg1[6], arg2[1])
+ var x83 uint64
+ var x84 uint64
+ x84, x83 = bits.Mul64(arg1[6], arg2[0])
+ var x85 uint64
+ var x86 uint64
+ x86, x85 = bits.Mul64(arg1[5], arg2[3])
+ var x87 uint64
+ var x88 uint64
+ x88, x87 = bits.Mul64(arg1[5], arg2[2])
+ var x89 uint64
+ var x90 uint64
+ x90, x89 = bits.Mul64(arg1[5], arg2[1])
+ var x91 uint64
+ var x92 uint64
+ x92, x91 = bits.Mul64(arg1[5], arg2[0])
+ var x93 uint64
+ var x94 uint64
+ x94, x93 = bits.Mul64(arg1[4], arg2[4])
+ var x95 uint64
+ var x96 uint64
+ x96, x95 = bits.Mul64(arg1[4], arg2[3])
+ var x97 uint64
+ var x98 uint64
+ x98, x97 = bits.Mul64(arg1[4], arg2[2])
+ var x99 uint64
+ var x100 uint64
+ x100, x99 = bits.Mul64(arg1[4], arg2[1])
+ var x101 uint64
+ var x102 uint64
+ x102, x101 = bits.Mul64(arg1[4], arg2[0])
+ var x103 uint64
+ var x104 uint64
+ x104, x103 = bits.Mul64(arg1[3], arg2[5])
+ var x105 uint64
+ var x106 uint64
+ x106, x105 = bits.Mul64(arg1[3], arg2[4])
+ var x107 uint64
+ var x108 uint64
+ x108, x107 = bits.Mul64(arg1[3], arg2[3])
+ var x109 uint64
+ var x110 uint64
+ x110, x109 = bits.Mul64(arg1[3], arg2[2])
+ var x111 uint64
+ var x112 uint64
+ x112, x111 = bits.Mul64(arg1[3], arg2[1])
+ var x113 uint64
+ var x114 uint64
+ x114, x113 = bits.Mul64(arg1[3], arg2[0])
+ var x115 uint64
+ var x116 uint64
+ x116, x115 = bits.Mul64(arg1[2], arg2[6])
+ var x117 uint64
+ var x118 uint64
+ x118, x117 = bits.Mul64(arg1[2], arg2[5])
+ var x119 uint64
+ var x120 uint64
+ x120, x119 = bits.Mul64(arg1[2], arg2[4])
+ var x121 uint64
+ var x122 uint64
+ x122, x121 = bits.Mul64(arg1[2], arg2[3])
+ var x123 uint64
+ var x124 uint64
+ x124, x123 = bits.Mul64(arg1[2], arg2[2])
+ var x125 uint64
+ var x126 uint64
+ x126, x125 = bits.Mul64(arg1[2], arg2[1])
+ var x127 uint64
+ var x128 uint64
+ x128, x127 = bits.Mul64(arg1[2], arg2[0])
+ var x129 uint64
+ var x130 uint64
+ x130, x129 = bits.Mul64(arg1[1], arg2[7])
+ var x131 uint64
+ var x132 uint64
+ x132, x131 = bits.Mul64(arg1[1], arg2[6])
+ var x133 uint64
+ var x134 uint64
+ x134, x133 = bits.Mul64(arg1[1], arg2[5])
+ var x135 uint64
+ var x136 uint64
+ x136, x135 = bits.Mul64(arg1[1], arg2[4])
+ var x137 uint64
+ var x138 uint64
+ x138, x137 = bits.Mul64(arg1[1], arg2[3])
+ var x139 uint64
+ var x140 uint64
+ x140, x139 = bits.Mul64(arg1[1], arg2[2])
+ var x141 uint64
+ var x142 uint64
+ x142, x141 = bits.Mul64(arg1[1], arg2[1])
+ var x143 uint64
+ var x144 uint64
+ x144, x143 = bits.Mul64(arg1[1], arg2[0])
+ var x145 uint64
+ var x146 uint64
+ x146, x145 = bits.Mul64(arg1[0], arg2[8])
+ var x147 uint64
+ var x148 uint64
+ x148, x147 = bits.Mul64(arg1[0], arg2[7])
+ var x149 uint64
+ var x150 uint64
+ x150, x149 = bits.Mul64(arg1[0], arg2[6])
+ var x151 uint64
+ var x152 uint64
+ x152, x151 = bits.Mul64(arg1[0], arg2[5])
+ var x153 uint64
+ var x154 uint64
+ x154, x153 = bits.Mul64(arg1[0], arg2[4])
+ var x155 uint64
+ var x156 uint64
+ x156, x155 = bits.Mul64(arg1[0], arg2[3])
+ var x157 uint64
+ var x158 uint64
+ x158, x157 = bits.Mul64(arg1[0], arg2[2])
+ var x159 uint64
+ var x160 uint64
+ x160, x159 = bits.Mul64(arg1[0], arg2[1])
+ var x161 uint64
+ var x162 uint64
+ x162, x161 = bits.Mul64(arg1[0], arg2[0])
+ var x163 uint64
+ var x164 p521Uint1
+ x163, x164 = p521AddcarryxU64(x29, x15, 0x0)
+ var x165 uint64
+ x165, _ = p521AddcarryxU64(x30, x16, x164)
+ var x167 uint64
+ var x168 p521Uint1
+ x167, x168 = p521AddcarryxU64(x41, x163, 0x0)
+ var x169 uint64
+ x169, _ = p521AddcarryxU64(x42, x165, x168)
+ var x171 uint64
+ var x172 p521Uint1
+ x171, x172 = p521AddcarryxU64(x51, x167, 0x0)
+ var x173 uint64
+ x173, _ = p521AddcarryxU64(x52, x169, x172)
+ var x175 uint64
+ var x176 p521Uint1
+ x175, x176 = p521AddcarryxU64(x59, x171, 0x0)
+ var x177 uint64
+ x177, _ = p521AddcarryxU64(x60, x173, x176)
+ var x179 uint64
+ var x180 p521Uint1
+ x179, x180 = p521AddcarryxU64(x65, x175, 0x0)
+ var x181 uint64
+ x181, _ = p521AddcarryxU64(x66, x177, x180)
+ var x183 uint64
+ var x184 p521Uint1
+ x183, x184 = p521AddcarryxU64(x69, x179, 0x0)
+ var x185 uint64
+ x185, _ = p521AddcarryxU64(x70, x181, x184)
+ var x187 uint64
+ var x188 p521Uint1
+ x187, x188 = p521AddcarryxU64(x71, x183, 0x0)
+ var x189 uint64
+ x189, _ = p521AddcarryxU64(x72, x185, x188)
+ var x191 uint64
+ var x192 p521Uint1
+ x191, x192 = p521AddcarryxU64(x161, x187, 0x0)
+ var x193 uint64
+ x193, _ = p521AddcarryxU64(x162, x189, x192)
+ x195 := ((x191 >> 58) | ((x193 << 6) & 0xffffffffffffffff))
+ x196 := (x193 >> 58)
+ x197 := (x191 & 0x3ffffffffffffff)
+ var x198 uint64
+ var x199 p521Uint1
+ x198, x199 = p521AddcarryxU64(x75, x73, 0x0)
+ var x200 uint64
+ x200, _ = p521AddcarryxU64(x76, x74, x199)
+ var x202 uint64
+ var x203 p521Uint1
+ x202, x203 = p521AddcarryxU64(x79, x198, 0x0)
+ var x204 uint64
+ x204, _ = p521AddcarryxU64(x80, x200, x203)
+ var x206 uint64
+ var x207 p521Uint1
+ x206, x207 = p521AddcarryxU64(x85, x202, 0x0)
+ var x208 uint64
+ x208, _ = p521AddcarryxU64(x86, x204, x207)
+ var x210 uint64
+ var x211 p521Uint1
+ x210, x211 = p521AddcarryxU64(x93, x206, 0x0)
+ var x212 uint64
+ x212, _ = p521AddcarryxU64(x94, x208, x211)
+ var x214 uint64
+ var x215 p521Uint1
+ x214, x215 = p521AddcarryxU64(x103, x210, 0x0)
+ var x216 uint64
+ x216, _ = p521AddcarryxU64(x104, x212, x215)
+ var x218 uint64
+ var x219 p521Uint1
+ x218, x219 = p521AddcarryxU64(x115, x214, 0x0)
+ var x220 uint64
+ x220, _ = p521AddcarryxU64(x116, x216, x219)
+ var x222 uint64
+ var x223 p521Uint1
+ x222, x223 = p521AddcarryxU64(x129, x218, 0x0)
+ var x224 uint64
+ x224, _ = p521AddcarryxU64(x130, x220, x223)
+ var x226 uint64
+ var x227 p521Uint1
+ x226, x227 = p521AddcarryxU64(x145, x222, 0x0)
+ var x228 uint64
+ x228, _ = p521AddcarryxU64(x146, x224, x227)
+ var x230 uint64
+ var x231 p521Uint1
+ x230, x231 = p521AddcarryxU64(x77, x1, 0x0)
+ var x232 uint64
+ x232, _ = p521AddcarryxU64(x78, x2, x231)
+ var x234 uint64
+ var x235 p521Uint1
+ x234, x235 = p521AddcarryxU64(x81, x230, 0x0)
+ var x236 uint64
+ x236, _ = p521AddcarryxU64(x82, x232, x235)
+ var x238 uint64
+ var x239 p521Uint1
+ x238, x239 = p521AddcarryxU64(x87, x234, 0x0)
+ var x240 uint64
+ x240, _ = p521AddcarryxU64(x88, x236, x239)
+ var x242 uint64
+ var x243 p521Uint1
+ x242, x243 = p521AddcarryxU64(x95, x238, 0x0)
+ var x244 uint64
+ x244, _ = p521AddcarryxU64(x96, x240, x243)
+ var x246 uint64
+ var x247 p521Uint1
+ x246, x247 = p521AddcarryxU64(x105, x242, 0x0)
+ var x248 uint64
+ x248, _ = p521AddcarryxU64(x106, x244, x247)
+ var x250 uint64
+ var x251 p521Uint1
+ x250, x251 = p521AddcarryxU64(x117, x246, 0x0)
+ var x252 uint64
+ x252, _ = p521AddcarryxU64(x118, x248, x251)
+ var x254 uint64
+ var x255 p521Uint1
+ x254, x255 = p521AddcarryxU64(x131, x250, 0x0)
+ var x256 uint64
+ x256, _ = p521AddcarryxU64(x132, x252, x255)
+ var x258 uint64
+ var x259 p521Uint1
+ x258, x259 = p521AddcarryxU64(x147, x254, 0x0)
+ var x260 uint64
+ x260, _ = p521AddcarryxU64(x148, x256, x259)
+ var x262 uint64
+ var x263 p521Uint1
+ x262, x263 = p521AddcarryxU64(x17, x3, 0x0)
+ var x264 uint64
+ x264, _ = p521AddcarryxU64(x18, x4, x263)
+ var x266 uint64
+ var x267 p521Uint1
+ x266, x267 = p521AddcarryxU64(x83, x262, 0x0)
+ var x268 uint64
+ x268, _ = p521AddcarryxU64(x84, x264, x267)
+ var x270 uint64
+ var x271 p521Uint1
+ x270, x271 = p521AddcarryxU64(x89, x266, 0x0)
+ var x272 uint64
+ x272, _ = p521AddcarryxU64(x90, x268, x271)
+ var x274 uint64
+ var x275 p521Uint1
+ x274, x275 = p521AddcarryxU64(x97, x270, 0x0)
+ var x276 uint64
+ x276, _ = p521AddcarryxU64(x98, x272, x275)
+ var x278 uint64
+ var x279 p521Uint1
+ x278, x279 = p521AddcarryxU64(x107, x274, 0x0)
+ var x280 uint64
+ x280, _ = p521AddcarryxU64(x108, x276, x279)
+ var x282 uint64
+ var x283 p521Uint1
+ x282, x283 = p521AddcarryxU64(x119, x278, 0x0)
+ var x284 uint64
+ x284, _ = p521AddcarryxU64(x120, x280, x283)
+ var x286 uint64
+ var x287 p521Uint1
+ x286, x287 = p521AddcarryxU64(x133, x282, 0x0)
+ var x288 uint64
+ x288, _ = p521AddcarryxU64(x134, x284, x287)
+ var x290 uint64
+ var x291 p521Uint1
+ x290, x291 = p521AddcarryxU64(x149, x286, 0x0)
+ var x292 uint64
+ x292, _ = p521AddcarryxU64(x150, x288, x291)
+ var x294 uint64
+ var x295 p521Uint1
+ x294, x295 = p521AddcarryxU64(x19, x5, 0x0)
+ var x296 uint64
+ x296, _ = p521AddcarryxU64(x20, x6, x295)
+ var x298 uint64
+ var x299 p521Uint1
+ x298, x299 = p521AddcarryxU64(x31, x294, 0x0)
+ var x300 uint64
+ x300, _ = p521AddcarryxU64(x32, x296, x299)
+ var x302 uint64
+ var x303 p521Uint1
+ x302, x303 = p521AddcarryxU64(x91, x298, 0x0)
+ var x304 uint64
+ x304, _ = p521AddcarryxU64(x92, x300, x303)
+ var x306 uint64
+ var x307 p521Uint1
+ x306, x307 = p521AddcarryxU64(x99, x302, 0x0)
+ var x308 uint64
+ x308, _ = p521AddcarryxU64(x100, x304, x307)
+ var x310 uint64
+ var x311 p521Uint1
+ x310, x311 = p521AddcarryxU64(x109, x306, 0x0)
+ var x312 uint64
+ x312, _ = p521AddcarryxU64(x110, x308, x311)
+ var x314 uint64
+ var x315 p521Uint1
+ x314, x315 = p521AddcarryxU64(x121, x310, 0x0)
+ var x316 uint64
+ x316, _ = p521AddcarryxU64(x122, x312, x315)
+ var x318 uint64
+ var x319 p521Uint1
+ x318, x319 = p521AddcarryxU64(x135, x314, 0x0)
+ var x320 uint64
+ x320, _ = p521AddcarryxU64(x136, x316, x319)
+ var x322 uint64
+ var x323 p521Uint1
+ x322, x323 = p521AddcarryxU64(x151, x318, 0x0)
+ var x324 uint64
+ x324, _ = p521AddcarryxU64(x152, x320, x323)
+ var x326 uint64
+ var x327 p521Uint1
+ x326, x327 = p521AddcarryxU64(x21, x7, 0x0)
+ var x328 uint64
+ x328, _ = p521AddcarryxU64(x22, x8, x327)
+ var x330 uint64
+ var x331 p521Uint1
+ x330, x331 = p521AddcarryxU64(x33, x326, 0x0)
+ var x332 uint64
+ x332, _ = p521AddcarryxU64(x34, x328, x331)
+ var x334 uint64
+ var x335 p521Uint1
+ x334, x335 = p521AddcarryxU64(x43, x330, 0x0)
+ var x336 uint64
+ x336, _ = p521AddcarryxU64(x44, x332, x335)
+ var x338 uint64
+ var x339 p521Uint1
+ x338, x339 = p521AddcarryxU64(x101, x334, 0x0)
+ var x340 uint64
+ x340, _ = p521AddcarryxU64(x102, x336, x339)
+ var x342 uint64
+ var x343 p521Uint1
+ x342, x343 = p521AddcarryxU64(x111, x338, 0x0)
+ var x344 uint64
+ x344, _ = p521AddcarryxU64(x112, x340, x343)
+ var x346 uint64
+ var x347 p521Uint1
+ x346, x347 = p521AddcarryxU64(x123, x342, 0x0)
+ var x348 uint64
+ x348, _ = p521AddcarryxU64(x124, x344, x347)
+ var x350 uint64
+ var x351 p521Uint1
+ x350, x351 = p521AddcarryxU64(x137, x346, 0x0)
+ var x352 uint64
+ x352, _ = p521AddcarryxU64(x138, x348, x351)
+ var x354 uint64
+ var x355 p521Uint1
+ x354, x355 = p521AddcarryxU64(x153, x350, 0x0)
+ var x356 uint64
+ x356, _ = p521AddcarryxU64(x154, x352, x355)
+ var x358 uint64
+ var x359 p521Uint1
+ x358, x359 = p521AddcarryxU64(x23, x9, 0x0)
+ var x360 uint64
+ x360, _ = p521AddcarryxU64(x24, x10, x359)
+ var x362 uint64
+ var x363 p521Uint1
+ x362, x363 = p521AddcarryxU64(x35, x358, 0x0)
+ var x364 uint64
+ x364, _ = p521AddcarryxU64(x36, x360, x363)
+ var x366 uint64
+ var x367 p521Uint1
+ x366, x367 = p521AddcarryxU64(x45, x362, 0x0)
+ var x368 uint64
+ x368, _ = p521AddcarryxU64(x46, x364, x367)
+ var x370 uint64
+ var x371 p521Uint1
+ x370, x371 = p521AddcarryxU64(x53, x366, 0x0)
+ var x372 uint64
+ x372, _ = p521AddcarryxU64(x54, x368, x371)
+ var x374 uint64
+ var x375 p521Uint1
+ x374, x375 = p521AddcarryxU64(x113, x370, 0x0)
+ var x376 uint64
+ x376, _ = p521AddcarryxU64(x114, x372, x375)
+ var x378 uint64
+ var x379 p521Uint1
+ x378, x379 = p521AddcarryxU64(x125, x374, 0x0)
+ var x380 uint64
+ x380, _ = p521AddcarryxU64(x126, x376, x379)
+ var x382 uint64
+ var x383 p521Uint1
+ x382, x383 = p521AddcarryxU64(x139, x378, 0x0)
+ var x384 uint64
+ x384, _ = p521AddcarryxU64(x140, x380, x383)
+ var x386 uint64
+ var x387 p521Uint1
+ x386, x387 = p521AddcarryxU64(x155, x382, 0x0)
+ var x388 uint64
+ x388, _ = p521AddcarryxU64(x156, x384, x387)
+ var x390 uint64
+ var x391 p521Uint1
+ x390, x391 = p521AddcarryxU64(x25, x11, 0x0)
+ var x392 uint64
+ x392, _ = p521AddcarryxU64(x26, x12, x391)
+ var x394 uint64
+ var x395 p521Uint1
+ x394, x395 = p521AddcarryxU64(x37, x390, 0x0)
+ var x396 uint64
+ x396, _ = p521AddcarryxU64(x38, x392, x395)
+ var x398 uint64
+ var x399 p521Uint1
+ x398, x399 = p521AddcarryxU64(x47, x394, 0x0)
+ var x400 uint64
+ x400, _ = p521AddcarryxU64(x48, x396, x399)
+ var x402 uint64
+ var x403 p521Uint1
+ x402, x403 = p521AddcarryxU64(x55, x398, 0x0)
+ var x404 uint64
+ x404, _ = p521AddcarryxU64(x56, x400, x403)
+ var x406 uint64
+ var x407 p521Uint1
+ x406, x407 = p521AddcarryxU64(x61, x402, 0x0)
+ var x408 uint64
+ x408, _ = p521AddcarryxU64(x62, x404, x407)
+ var x410 uint64
+ var x411 p521Uint1
+ x410, x411 = p521AddcarryxU64(x127, x406, 0x0)
+ var x412 uint64
+ x412, _ = p521AddcarryxU64(x128, x408, x411)
+ var x414 uint64
+ var x415 p521Uint1
+ x414, x415 = p521AddcarryxU64(x141, x410, 0x0)
+ var x416 uint64
+ x416, _ = p521AddcarryxU64(x142, x412, x415)
+ var x418 uint64
+ var x419 p521Uint1
+ x418, x419 = p521AddcarryxU64(x157, x414, 0x0)
+ var x420 uint64
+ x420, _ = p521AddcarryxU64(x158, x416, x419)
+ var x422 uint64
+ var x423 p521Uint1
+ x422, x423 = p521AddcarryxU64(x27, x13, 0x0)
+ var x424 uint64
+ x424, _ = p521AddcarryxU64(x28, x14, x423)
+ var x426 uint64
+ var x427 p521Uint1
+ x426, x427 = p521AddcarryxU64(x39, x422, 0x0)
+ var x428 uint64
+ x428, _ = p521AddcarryxU64(x40, x424, x427)
+ var x430 uint64
+ var x431 p521Uint1
+ x430, x431 = p521AddcarryxU64(x49, x426, 0x0)
+ var x432 uint64
+ x432, _ = p521AddcarryxU64(x50, x428, x431)
+ var x434 uint64
+ var x435 p521Uint1
+ x434, x435 = p521AddcarryxU64(x57, x430, 0x0)
+ var x436 uint64
+ x436, _ = p521AddcarryxU64(x58, x432, x435)
+ var x438 uint64
+ var x439 p521Uint1
+ x438, x439 = p521AddcarryxU64(x63, x434, 0x0)
+ var x440 uint64
+ x440, _ = p521AddcarryxU64(x64, x436, x439)
+ var x442 uint64
+ var x443 p521Uint1
+ x442, x443 = p521AddcarryxU64(x67, x438, 0x0)
+ var x444 uint64
+ x444, _ = p521AddcarryxU64(x68, x440, x443)
+ var x446 uint64
+ var x447 p521Uint1
+ x446, x447 = p521AddcarryxU64(x143, x442, 0x0)
+ var x448 uint64
+ x448, _ = p521AddcarryxU64(x144, x444, x447)
+ var x450 uint64
+ var x451 p521Uint1
+ x450, x451 = p521AddcarryxU64(x159, x446, 0x0)
+ var x452 uint64
+ x452, _ = p521AddcarryxU64(x160, x448, x451)
+ var x454 uint64
+ var x455 p521Uint1
+ x454, x455 = p521AddcarryxU64(x195, x450, 0x0)
+ var x456 uint64
+ x456, _ = p521AddcarryxU64(x196, x452, x455)
+ x458 := ((x454 >> 58) | ((x456 << 6) & 0xffffffffffffffff))
+ x459 := (x456 >> 58)
+ x460 := (x454 & 0x3ffffffffffffff)
+ var x461 uint64
+ var x462 p521Uint1
+ x461, x462 = p521AddcarryxU64(x458, x418, 0x0)
+ var x463 uint64
+ x463, _ = p521AddcarryxU64(x459, x420, x462)
+ x465 := ((x461 >> 58) | ((x463 << 6) & 0xffffffffffffffff))
+ x466 := (x463 >> 58)
+ x467 := (x461 & 0x3ffffffffffffff)
+ var x468 uint64
+ var x469 p521Uint1
+ x468, x469 = p521AddcarryxU64(x465, x386, 0x0)
+ var x470 uint64
+ x470, _ = p521AddcarryxU64(x466, x388, x469)
+ x472 := ((x468 >> 58) | ((x470 << 6) & 0xffffffffffffffff))
+ x473 := (x470 >> 58)
+ x474 := (x468 & 0x3ffffffffffffff)
+ var x475 uint64
+ var x476 p521Uint1
+ x475, x476 = p521AddcarryxU64(x472, x354, 0x0)
+ var x477 uint64
+ x477, _ = p521AddcarryxU64(x473, x356, x476)
+ x479 := ((x475 >> 58) | ((x477 << 6) & 0xffffffffffffffff))
+ x480 := (x477 >> 58)
+ x481 := (x475 & 0x3ffffffffffffff)
+ var x482 uint64
+ var x483 p521Uint1
+ x482, x483 = p521AddcarryxU64(x479, x322, 0x0)
+ var x484 uint64
+ x484, _ = p521AddcarryxU64(x480, x324, x483)
+ x486 := ((x482 >> 58) | ((x484 << 6) & 0xffffffffffffffff))
+ x487 := (x484 >> 58)
+ x488 := (x482 & 0x3ffffffffffffff)
+ var x489 uint64
+ var x490 p521Uint1
+ x489, x490 = p521AddcarryxU64(x486, x290, 0x0)
+ var x491 uint64
+ x491, _ = p521AddcarryxU64(x487, x292, x490)
+ x493 := ((x489 >> 58) | ((x491 << 6) & 0xffffffffffffffff))
+ x494 := (x491 >> 58)
+ x495 := (x489 & 0x3ffffffffffffff)
+ var x496 uint64
+ var x497 p521Uint1
+ x496, x497 = p521AddcarryxU64(x493, x258, 0x0)
+ var x498 uint64
+ x498, _ = p521AddcarryxU64(x494, x260, x497)
+ x500 := ((x496 >> 58) | ((x498 << 6) & 0xffffffffffffffff))
+ x501 := (x498 >> 58)
+ x502 := (x496 & 0x3ffffffffffffff)
+ var x503 uint64
+ var x504 p521Uint1
+ x503, x504 = p521AddcarryxU64(x500, x226, 0x0)
+ var x505 uint64
+ x505, _ = p521AddcarryxU64(x501, x228, x504)
+ x507 := ((x503 >> 57) | ((x505 << 7) & 0xffffffffffffffff))
+ x508 := (x505 >> 57)
+ x509 := (x503 & 0x1ffffffffffffff)
+ var x510 uint64
+ var x511 p521Uint1
+ x510, x511 = p521AddcarryxU64(x197, x507, 0x0)
+ x512 := (uint64(x511) + x508)
+ x513 := ((x510 >> 58) | ((x512 << 6) & 0xffffffffffffffff))
+ x514 := (x510 & 0x3ffffffffffffff)
+ x515 := (x513 + x460)
+ x516 := p521Uint1((x515 >> 58))
+ x517 := (x515 & 0x3ffffffffffffff)
+ x518 := (uint64(x516) + x467)
+ out1[0] = x514
+ out1[1] = x517
+ out1[2] = x518
+ out1[3] = x474
+ out1[4] = x481
+ out1[5] = x488
+ out1[6] = x495
+ out1[7] = x502
+ out1[8] = x509
+}
+
+// p521CarrySquare squares a field element and reduces the result.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 * eval arg1) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521CarrySquare(out1 *[9]uint64, arg1 *[9]uint64) {
+ x1 := arg1[8]
+ x2 := (x1 * 0x2)
+ x3 := (arg1[8] * 0x2)
+ x4 := arg1[7]
+ x5 := (x4 * 0x2)
+ x6 := (arg1[7] * 0x2)
+ x7 := arg1[6]
+ x8 := (x7 * 0x2)
+ x9 := (arg1[6] * 0x2)
+ x10 := arg1[5]
+ x11 := (x10 * 0x2)
+ x12 := (arg1[5] * 0x2)
+ x13 := (arg1[4] * 0x2)
+ x14 := (arg1[3] * 0x2)
+ x15 := (arg1[2] * 0x2)
+ x16 := (arg1[1] * 0x2)
+ var x17 uint64
+ var x18 uint64
+ x18, x17 = bits.Mul64(arg1[8], (x1 * 0x2))
+ var x19 uint64
+ var x20 uint64
+ x20, x19 = bits.Mul64(arg1[7], (x2 * 0x2))
+ var x21 uint64
+ var x22 uint64
+ x22, x21 = bits.Mul64(arg1[7], (x4 * 0x2))
+ var x23 uint64
+ var x24 uint64
+ x24, x23 = bits.Mul64(arg1[6], (x2 * 0x2))
+ var x25 uint64
+ var x26 uint64
+ x26, x25 = bits.Mul64(arg1[6], (x5 * 0x2))
+ var x27 uint64
+ var x28 uint64
+ x28, x27 = bits.Mul64(arg1[6], (x7 * 0x2))
+ var x29 uint64
+ var x30 uint64
+ x30, x29 = bits.Mul64(arg1[5], (x2 * 0x2))
+ var x31 uint64
+ var x32 uint64
+ x32, x31 = bits.Mul64(arg1[5], (x5 * 0x2))
+ var x33 uint64
+ var x34 uint64
+ x34, x33 = bits.Mul64(arg1[5], (x8 * 0x2))
+ var x35 uint64
+ var x36 uint64
+ x36, x35 = bits.Mul64(arg1[5], (x10 * 0x2))
+ var x37 uint64
+ var x38 uint64
+ x38, x37 = bits.Mul64(arg1[4], (x2 * 0x2))
+ var x39 uint64
+ var x40 uint64
+ x40, x39 = bits.Mul64(arg1[4], (x5 * 0x2))
+ var x41 uint64
+ var x42 uint64
+ x42, x41 = bits.Mul64(arg1[4], (x8 * 0x2))
+ var x43 uint64
+ var x44 uint64
+ x44, x43 = bits.Mul64(arg1[4], (x11 * 0x2))
+ var x45 uint64
+ var x46 uint64
+ x46, x45 = bits.Mul64(arg1[4], arg1[4])
+ var x47 uint64
+ var x48 uint64
+ x48, x47 = bits.Mul64(arg1[3], (x2 * 0x2))
+ var x49 uint64
+ var x50 uint64
+ x50, x49 = bits.Mul64(arg1[3], (x5 * 0x2))
+ var x51 uint64
+ var x52 uint64
+ x52, x51 = bits.Mul64(arg1[3], (x8 * 0x2))
+ var x53 uint64
+ var x54 uint64
+ x54, x53 = bits.Mul64(arg1[3], x12)
+ var x55 uint64
+ var x56 uint64
+ x56, x55 = bits.Mul64(arg1[3], x13)
+ var x57 uint64
+ var x58 uint64
+ x58, x57 = bits.Mul64(arg1[3], arg1[3])
+ var x59 uint64
+ var x60 uint64
+ x60, x59 = bits.Mul64(arg1[2], (x2 * 0x2))
+ var x61 uint64
+ var x62 uint64
+ x62, x61 = bits.Mul64(arg1[2], (x5 * 0x2))
+ var x63 uint64
+ var x64 uint64
+ x64, x63 = bits.Mul64(arg1[2], x9)
+ var x65 uint64
+ var x66 uint64
+ x66, x65 = bits.Mul64(arg1[2], x12)
+ var x67 uint64
+ var x68 uint64
+ x68, x67 = bits.Mul64(arg1[2], x13)
+ var x69 uint64
+ var x70 uint64
+ x70, x69 = bits.Mul64(arg1[2], x14)
+ var x71 uint64
+ var x72 uint64
+ x72, x71 = bits.Mul64(arg1[2], arg1[2])
+ var x73 uint64
+ var x74 uint64
+ x74, x73 = bits.Mul64(arg1[1], (x2 * 0x2))
+ var x75 uint64
+ var x76 uint64
+ x76, x75 = bits.Mul64(arg1[1], x6)
+ var x77 uint64
+ var x78 uint64
+ x78, x77 = bits.Mul64(arg1[1], x9)
+ var x79 uint64
+ var x80 uint64
+ x80, x79 = bits.Mul64(arg1[1], x12)
+ var x81 uint64
+ var x82 uint64
+ x82, x81 = bits.Mul64(arg1[1], x13)
+ var x83 uint64
+ var x84 uint64
+ x84, x83 = bits.Mul64(arg1[1], x14)
+ var x85 uint64
+ var x86 uint64
+ x86, x85 = bits.Mul64(arg1[1], x15)
+ var x87 uint64
+ var x88 uint64
+ x88, x87 = bits.Mul64(arg1[1], arg1[1])
+ var x89 uint64
+ var x90 uint64
+ x90, x89 = bits.Mul64(arg1[0], x3)
+ var x91 uint64
+ var x92 uint64
+ x92, x91 = bits.Mul64(arg1[0], x6)
+ var x93 uint64
+ var x94 uint64
+ x94, x93 = bits.Mul64(arg1[0], x9)
+ var x95 uint64
+ var x96 uint64
+ x96, x95 = bits.Mul64(arg1[0], x12)
+ var x97 uint64
+ var x98 uint64
+ x98, x97 = bits.Mul64(arg1[0], x13)
+ var x99 uint64
+ var x100 uint64
+ x100, x99 = bits.Mul64(arg1[0], x14)
+ var x101 uint64
+ var x102 uint64
+ x102, x101 = bits.Mul64(arg1[0], x15)
+ var x103 uint64
+ var x104 uint64
+ x104, x103 = bits.Mul64(arg1[0], x16)
+ var x105 uint64
+ var x106 uint64
+ x106, x105 = bits.Mul64(arg1[0], arg1[0])
+ var x107 uint64
+ var x108 p521Uint1
+ x107, x108 = p521AddcarryxU64(x51, x43, 0x0)
+ var x109 uint64
+ x109, _ = p521AddcarryxU64(x52, x44, x108)
+ var x111 uint64
+ var x112 p521Uint1
+ x111, x112 = p521AddcarryxU64(x61, x107, 0x0)
+ var x113 uint64
+ x113, _ = p521AddcarryxU64(x62, x109, x112)
+ var x115 uint64
+ var x116 p521Uint1
+ x115, x116 = p521AddcarryxU64(x73, x111, 0x0)
+ var x117 uint64
+ x117, _ = p521AddcarryxU64(x74, x113, x116)
+ var x119 uint64
+ var x120 p521Uint1
+ x119, x120 = p521AddcarryxU64(x105, x115, 0x0)
+ var x121 uint64
+ x121, _ = p521AddcarryxU64(x106, x117, x120)
+ x123 := ((x119 >> 58) | ((x121 << 6) & 0xffffffffffffffff))
+ x124 := (x121 >> 58)
+ x125 := (x119 & 0x3ffffffffffffff)
+ var x126 uint64
+ var x127 p521Uint1
+ x126, x127 = p521AddcarryxU64(x53, x45, 0x0)
+ var x128 uint64
+ x128, _ = p521AddcarryxU64(x54, x46, x127)
+ var x130 uint64
+ var x131 p521Uint1
+ x130, x131 = p521AddcarryxU64(x63, x126, 0x0)
+ var x132 uint64
+ x132, _ = p521AddcarryxU64(x64, x128, x131)
+ var x134 uint64
+ var x135 p521Uint1
+ x134, x135 = p521AddcarryxU64(x75, x130, 0x0)
+ var x136 uint64
+ x136, _ = p521AddcarryxU64(x76, x132, x135)
+ var x138 uint64
+ var x139 p521Uint1
+ x138, x139 = p521AddcarryxU64(x89, x134, 0x0)
+ var x140 uint64
+ x140, _ = p521AddcarryxU64(x90, x136, x139)
+ var x142 uint64
+ var x143 p521Uint1
+ x142, x143 = p521AddcarryxU64(x55, x17, 0x0)
+ var x144 uint64
+ x144, _ = p521AddcarryxU64(x56, x18, x143)
+ var x146 uint64
+ var x147 p521Uint1
+ x146, x147 = p521AddcarryxU64(x65, x142, 0x0)
+ var x148 uint64
+ x148, _ = p521AddcarryxU64(x66, x144, x147)
+ var x150 uint64
+ var x151 p521Uint1
+ x150, x151 = p521AddcarryxU64(x77, x146, 0x0)
+ var x152 uint64
+ x152, _ = p521AddcarryxU64(x78, x148, x151)
+ var x154 uint64
+ var x155 p521Uint1
+ x154, x155 = p521AddcarryxU64(x91, x150, 0x0)
+ var x156 uint64
+ x156, _ = p521AddcarryxU64(x92, x152, x155)
+ var x158 uint64
+ var x159 p521Uint1
+ x158, x159 = p521AddcarryxU64(x57, x19, 0x0)
+ var x160 uint64
+ x160, _ = p521AddcarryxU64(x58, x20, x159)
+ var x162 uint64
+ var x163 p521Uint1
+ x162, x163 = p521AddcarryxU64(x67, x158, 0x0)
+ var x164 uint64
+ x164, _ = p521AddcarryxU64(x68, x160, x163)
+ var x166 uint64
+ var x167 p521Uint1
+ x166, x167 = p521AddcarryxU64(x79, x162, 0x0)
+ var x168 uint64
+ x168, _ = p521AddcarryxU64(x80, x164, x167)
+ var x170 uint64
+ var x171 p521Uint1
+ x170, x171 = p521AddcarryxU64(x93, x166, 0x0)
+ var x172 uint64
+ x172, _ = p521AddcarryxU64(x94, x168, x171)
+ var x174 uint64
+ var x175 p521Uint1
+ x174, x175 = p521AddcarryxU64(x23, x21, 0x0)
+ var x176 uint64
+ x176, _ = p521AddcarryxU64(x24, x22, x175)
+ var x178 uint64
+ var x179 p521Uint1
+ x178, x179 = p521AddcarryxU64(x69, x174, 0x0)
+ var x180 uint64
+ x180, _ = p521AddcarryxU64(x70, x176, x179)
+ var x182 uint64
+ var x183 p521Uint1
+ x182, x183 = p521AddcarryxU64(x81, x178, 0x0)
+ var x184 uint64
+ x184, _ = p521AddcarryxU64(x82, x180, x183)
+ var x186 uint64
+ var x187 p521Uint1
+ x186, x187 = p521AddcarryxU64(x95, x182, 0x0)
+ var x188 uint64
+ x188, _ = p521AddcarryxU64(x96, x184, x187)
+ var x190 uint64
+ var x191 p521Uint1
+ x190, x191 = p521AddcarryxU64(x29, x25, 0x0)
+ var x192 uint64
+ x192, _ = p521AddcarryxU64(x30, x26, x191)
+ var x194 uint64
+ var x195 p521Uint1
+ x194, x195 = p521AddcarryxU64(x71, x190, 0x0)
+ var x196 uint64
+ x196, _ = p521AddcarryxU64(x72, x192, x195)
+ var x198 uint64
+ var x199 p521Uint1
+ x198, x199 = p521AddcarryxU64(x83, x194, 0x0)
+ var x200 uint64
+ x200, _ = p521AddcarryxU64(x84, x196, x199)
+ var x202 uint64
+ var x203 p521Uint1
+ x202, x203 = p521AddcarryxU64(x97, x198, 0x0)
+ var x204 uint64
+ x204, _ = p521AddcarryxU64(x98, x200, x203)
+ var x206 uint64
+ var x207 p521Uint1
+ x206, x207 = p521AddcarryxU64(x31, x27, 0x0)
+ var x208 uint64
+ x208, _ = p521AddcarryxU64(x32, x28, x207)
+ var x210 uint64
+ var x211 p521Uint1
+ x210, x211 = p521AddcarryxU64(x37, x206, 0x0)
+ var x212 uint64
+ x212, _ = p521AddcarryxU64(x38, x208, x211)
+ var x214 uint64
+ var x215 p521Uint1
+ x214, x215 = p521AddcarryxU64(x85, x210, 0x0)
+ var x216 uint64
+ x216, _ = p521AddcarryxU64(x86, x212, x215)
+ var x218 uint64
+ var x219 p521Uint1
+ x218, x219 = p521AddcarryxU64(x99, x214, 0x0)
+ var x220 uint64
+ x220, _ = p521AddcarryxU64(x100, x216, x219)
+ var x222 uint64
+ var x223 p521Uint1
+ x222, x223 = p521AddcarryxU64(x39, x33, 0x0)
+ var x224 uint64
+ x224, _ = p521AddcarryxU64(x40, x34, x223)
+ var x226 uint64
+ var x227 p521Uint1
+ x226, x227 = p521AddcarryxU64(x47, x222, 0x0)
+ var x228 uint64
+ x228, _ = p521AddcarryxU64(x48, x224, x227)
+ var x230 uint64
+ var x231 p521Uint1
+ x230, x231 = p521AddcarryxU64(x87, x226, 0x0)
+ var x232 uint64
+ x232, _ = p521AddcarryxU64(x88, x228, x231)
+ var x234 uint64
+ var x235 p521Uint1
+ x234, x235 = p521AddcarryxU64(x101, x230, 0x0)
+ var x236 uint64
+ x236, _ = p521AddcarryxU64(x102, x232, x235)
+ var x238 uint64
+ var x239 p521Uint1
+ x238, x239 = p521AddcarryxU64(x41, x35, 0x0)
+ var x240 uint64
+ x240, _ = p521AddcarryxU64(x42, x36, x239)
+ var x242 uint64
+ var x243 p521Uint1
+ x242, x243 = p521AddcarryxU64(x49, x238, 0x0)
+ var x244 uint64
+ x244, _ = p521AddcarryxU64(x50, x240, x243)
+ var x246 uint64
+ var x247 p521Uint1
+ x246, x247 = p521AddcarryxU64(x59, x242, 0x0)
+ var x248 uint64
+ x248, _ = p521AddcarryxU64(x60, x244, x247)
+ var x250 uint64
+ var x251 p521Uint1
+ x250, x251 = p521AddcarryxU64(x103, x246, 0x0)
+ var x252 uint64
+ x252, _ = p521AddcarryxU64(x104, x248, x251)
+ var x254 uint64
+ var x255 p521Uint1
+ x254, x255 = p521AddcarryxU64(x123, x250, 0x0)
+ var x256 uint64
+ x256, _ = p521AddcarryxU64(x124, x252, x255)
+ x258 := ((x254 >> 58) | ((x256 << 6) & 0xffffffffffffffff))
+ x259 := (x256 >> 58)
+ x260 := (x254 & 0x3ffffffffffffff)
+ var x261 uint64
+ var x262 p521Uint1
+ x261, x262 = p521AddcarryxU64(x258, x234, 0x0)
+ var x263 uint64
+ x263, _ = p521AddcarryxU64(x259, x236, x262)
+ x265 := ((x261 >> 58) | ((x263 << 6) & 0xffffffffffffffff))
+ x266 := (x263 >> 58)
+ x267 := (x261 & 0x3ffffffffffffff)
+ var x268 uint64
+ var x269 p521Uint1
+ x268, x269 = p521AddcarryxU64(x265, x218, 0x0)
+ var x270 uint64
+ x270, _ = p521AddcarryxU64(x266, x220, x269)
+ x272 := ((x268 >> 58) | ((x270 << 6) & 0xffffffffffffffff))
+ x273 := (x270 >> 58)
+ x274 := (x268 & 0x3ffffffffffffff)
+ var x275 uint64
+ var x276 p521Uint1
+ x275, x276 = p521AddcarryxU64(x272, x202, 0x0)
+ var x277 uint64
+ x277, _ = p521AddcarryxU64(x273, x204, x276)
+ x279 := ((x275 >> 58) | ((x277 << 6) & 0xffffffffffffffff))
+ x280 := (x277 >> 58)
+ x281 := (x275 & 0x3ffffffffffffff)
+ var x282 uint64
+ var x283 p521Uint1
+ x282, x283 = p521AddcarryxU64(x279, x186, 0x0)
+ var x284 uint64
+ x284, _ = p521AddcarryxU64(x280, x188, x283)
+ x286 := ((x282 >> 58) | ((x284 << 6) & 0xffffffffffffffff))
+ x287 := (x284 >> 58)
+ x288 := (x282 & 0x3ffffffffffffff)
+ var x289 uint64
+ var x290 p521Uint1
+ x289, x290 = p521AddcarryxU64(x286, x170, 0x0)
+ var x291 uint64
+ x291, _ = p521AddcarryxU64(x287, x172, x290)
+ x293 := ((x289 >> 58) | ((x291 << 6) & 0xffffffffffffffff))
+ x294 := (x291 >> 58)
+ x295 := (x289 & 0x3ffffffffffffff)
+ var x296 uint64
+ var x297 p521Uint1
+ x296, x297 = p521AddcarryxU64(x293, x154, 0x0)
+ var x298 uint64
+ x298, _ = p521AddcarryxU64(x294, x156, x297)
+ x300 := ((x296 >> 58) | ((x298 << 6) & 0xffffffffffffffff))
+ x301 := (x298 >> 58)
+ x302 := (x296 & 0x3ffffffffffffff)
+ var x303 uint64
+ var x304 p521Uint1
+ x303, x304 = p521AddcarryxU64(x300, x138, 0x0)
+ var x305 uint64
+ x305, _ = p521AddcarryxU64(x301, x140, x304)
+ x307 := ((x303 >> 57) | ((x305 << 7) & 0xffffffffffffffff))
+ x308 := (x305 >> 57)
+ x309 := (x303 & 0x1ffffffffffffff)
+ var x310 uint64
+ var x311 p521Uint1
+ x310, x311 = p521AddcarryxU64(x125, x307, 0x0)
+ x312 := (uint64(x311) + x308)
+ x313 := ((x310 >> 58) | ((x312 << 6) & 0xffffffffffffffff))
+ x314 := (x310 & 0x3ffffffffffffff)
+ x315 := (x313 + x260)
+ x316 := p521Uint1((x315 >> 58))
+ x317 := (x315 & 0x3ffffffffffffff)
+ x318 := (uint64(x316) + x267)
+ out1[0] = x314
+ out1[1] = x317
+ out1[2] = x318
+ out1[3] = x274
+ out1[4] = x281
+ out1[5] = x288
+ out1[6] = x295
+ out1[7] = x302
+ out1[8] = x309
+}
+
+// p521Carry reduces a field element.
+//
+// Postconditions:
+// eval out1 mod m = eval arg1 mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521Carry(out1 *[9]uint64, arg1 *[9]uint64) {
+ x1 := arg1[0]
+ x2 := ((x1 >> 58) + arg1[1])
+ x3 := ((x2 >> 58) + arg1[2])
+ x4 := ((x3 >> 58) + arg1[3])
+ x5 := ((x4 >> 58) + arg1[4])
+ x6 := ((x5 >> 58) + arg1[5])
+ x7 := ((x6 >> 58) + arg1[6])
+ x8 := ((x7 >> 58) + arg1[7])
+ x9 := ((x8 >> 58) + arg1[8])
+ x10 := ((x1 & 0x3ffffffffffffff) + (x9 >> 57))
+ x11 := (uint64(p521Uint1((x10 >> 58))) + (x2 & 0x3ffffffffffffff))
+ x12 := (x10 & 0x3ffffffffffffff)
+ x13 := (x11 & 0x3ffffffffffffff)
+ x14 := (uint64(p521Uint1((x11 >> 58))) + (x3 & 0x3ffffffffffffff))
+ x15 := (x4 & 0x3ffffffffffffff)
+ x16 := (x5 & 0x3ffffffffffffff)
+ x17 := (x6 & 0x3ffffffffffffff)
+ x18 := (x7 & 0x3ffffffffffffff)
+ x19 := (x8 & 0x3ffffffffffffff)
+ x20 := (x9 & 0x1ffffffffffffff)
+ out1[0] = x12
+ out1[1] = x13
+ out1[2] = x14
+ out1[3] = x15
+ out1[4] = x16
+ out1[5] = x17
+ out1[6] = x18
+ out1[7] = x19
+ out1[8] = x20
+}
+
+// p521Add adds two field elements.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 + eval arg2) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+func p521Add(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) {
+ x1 := (arg1[0] + arg2[0])
+ x2 := (arg1[1] + arg2[1])
+ x3 := (arg1[2] + arg2[2])
+ x4 := (arg1[3] + arg2[3])
+ x5 := (arg1[4] + arg2[4])
+ x6 := (arg1[5] + arg2[5])
+ x7 := (arg1[6] + arg2[6])
+ x8 := (arg1[7] + arg2[7])
+ x9 := (arg1[8] + arg2[8])
+ out1[0] = x1
+ out1[1] = x2
+ out1[2] = x3
+ out1[3] = x4
+ out1[4] = x5
+ out1[5] = x6
+ out1[6] = x7
+ out1[7] = x8
+ out1[8] = x9
+}
+
+// p521Sub subtracts two field elements.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 - eval arg2) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+func p521Sub(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) {
+ x1 := ((0x7fffffffffffffe + arg1[0]) - arg2[0])
+ x2 := ((0x7fffffffffffffe + arg1[1]) - arg2[1])
+ x3 := ((0x7fffffffffffffe + arg1[2]) - arg2[2])
+ x4 := ((0x7fffffffffffffe + arg1[3]) - arg2[3])
+ x5 := ((0x7fffffffffffffe + arg1[4]) - arg2[4])
+ x6 := ((0x7fffffffffffffe + arg1[5]) - arg2[5])
+ x7 := ((0x7fffffffffffffe + arg1[6]) - arg2[6])
+ x8 := ((0x7fffffffffffffe + arg1[7]) - arg2[7])
+ x9 := ((0x3fffffffffffffe + arg1[8]) - arg2[8])
+ out1[0] = x1
+ out1[1] = x2
+ out1[2] = x3
+ out1[3] = x4
+ out1[4] = x5
+ out1[5] = x6
+ out1[6] = x7
+ out1[7] = x8
+ out1[8] = x9
+}
+
+// p521ToBytes serializes a field element to bytes in little-endian order.
+//
+// Postconditions:
+// out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65]
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+func p521ToBytes(out1 *[66]uint8, arg1 *[9]uint64) {
+ var x1 uint64
+ var x2 p521Uint1
+ p521SubborrowxU58(&x1, &x2, 0x0, arg1[0], 0x3ffffffffffffff)
+ var x3 uint64
+ var x4 p521Uint1
+ p521SubborrowxU58(&x3, &x4, x2, arg1[1], 0x3ffffffffffffff)
+ var x5 uint64
+ var x6 p521Uint1
+ p521SubborrowxU58(&x5, &x6, x4, arg1[2], 0x3ffffffffffffff)
+ var x7 uint64
+ var x8 p521Uint1
+ p521SubborrowxU58(&x7, &x8, x6, arg1[3], 0x3ffffffffffffff)
+ var x9 uint64
+ var x10 p521Uint1
+ p521SubborrowxU58(&x9, &x10, x8, arg1[4], 0x3ffffffffffffff)
+ var x11 uint64
+ var x12 p521Uint1
+ p521SubborrowxU58(&x11, &x12, x10, arg1[5], 0x3ffffffffffffff)
+ var x13 uint64
+ var x14 p521Uint1
+ p521SubborrowxU58(&x13, &x14, x12, arg1[6], 0x3ffffffffffffff)
+ var x15 uint64
+ var x16 p521Uint1
+ p521SubborrowxU58(&x15, &x16, x14, arg1[7], 0x3ffffffffffffff)
+ var x17 uint64
+ var x18 p521Uint1
+ p521SubborrowxU57(&x17, &x18, x16, arg1[8], 0x1ffffffffffffff)
+ var x19 uint64
+ p521CmovznzU64(&x19, x18, uint64(0x0), 0xffffffffffffffff)
+ var x20 uint64
+ var x21 p521Uint1
+ p521AddcarryxU58(&x20, &x21, 0x0, x1, (x19 & 0x3ffffffffffffff))
+ var x22 uint64
+ var x23 p521Uint1
+ p521AddcarryxU58(&x22, &x23, x21, x3, (x19 & 0x3ffffffffffffff))
+ var x24 uint64
+ var x25 p521Uint1
+ p521AddcarryxU58(&x24, &x25, x23, x5, (x19 & 0x3ffffffffffffff))
+ var x26 uint64
+ var x27 p521Uint1
+ p521AddcarryxU58(&x26, &x27, x25, x7, (x19 & 0x3ffffffffffffff))
+ var x28 uint64
+ var x29 p521Uint1
+ p521AddcarryxU58(&x28, &x29, x27, x9, (x19 & 0x3ffffffffffffff))
+ var x30 uint64
+ var x31 p521Uint1
+ p521AddcarryxU58(&x30, &x31, x29, x11, (x19 & 0x3ffffffffffffff))
+ var x32 uint64
+ var x33 p521Uint1
+ p521AddcarryxU58(&x32, &x33, x31, x13, (x19 & 0x3ffffffffffffff))
+ var x34 uint64
+ var x35 p521Uint1
+ p521AddcarryxU58(&x34, &x35, x33, x15, (x19 & 0x3ffffffffffffff))
+ var x36 uint64
+ var x37 p521Uint1
+ p521AddcarryxU57(&x36, &x37, x35, x17, (x19 & 0x1ffffffffffffff))
+ x38 := (x34 << 6)
+ x39 := (x32 << 4)
+ x40 := (x30 << 2)
+ x41 := (x26 << 6)
+ x42 := (x24 << 4)
+ x43 := (x22 << 2)
+ x44 := (uint8(x20) & 0xff)
+ x45 := (x20 >> 8)
+ x46 := (uint8(x45) & 0xff)
+ x47 := (x45 >> 8)
+ x48 := (uint8(x47) & 0xff)
+ x49 := (x47 >> 8)
+ x50 := (uint8(x49) & 0xff)
+ x51 := (x49 >> 8)
+ x52 := (uint8(x51) & 0xff)
+ x53 := (x51 >> 8)
+ x54 := (uint8(x53) & 0xff)
+ x55 := (x53 >> 8)
+ x56 := (uint8(x55) & 0xff)
+ x57 := uint8((x55 >> 8))
+ x58 := (x43 + uint64(x57))
+ x59 := (uint8(x58) & 0xff)
+ x60 := (x58 >> 8)
+ x61 := (uint8(x60) & 0xff)
+ x62 := (x60 >> 8)
+ x63 := (uint8(x62) & 0xff)
+ x64 := (x62 >> 8)
+ x65 := (uint8(x64) & 0xff)
+ x66 := (x64 >> 8)
+ x67 := (uint8(x66) & 0xff)
+ x68 := (x66 >> 8)
+ x69 := (uint8(x68) & 0xff)
+ x70 := (x68 >> 8)
+ x71 := (uint8(x70) & 0xff)
+ x72 := uint8((x70 >> 8))
+ x73 := (x42 + uint64(x72))
+ x74 := (uint8(x73) & 0xff)
+ x75 := (x73 >> 8)
+ x76 := (uint8(x75) & 0xff)
+ x77 := (x75 >> 8)
+ x78 := (uint8(x77) & 0xff)
+ x79 := (x77 >> 8)
+ x80 := (uint8(x79) & 0xff)
+ x81 := (x79 >> 8)
+ x82 := (uint8(x81) & 0xff)
+ x83 := (x81 >> 8)
+ x84 := (uint8(x83) & 0xff)
+ x85 := (x83 >> 8)
+ x86 := (uint8(x85) & 0xff)
+ x87 := uint8((x85 >> 8))
+ x88 := (x41 + uint64(x87))
+ x89 := (uint8(x88) & 0xff)
+ x90 := (x88 >> 8)
+ x91 := (uint8(x90) & 0xff)
+ x92 := (x90 >> 8)
+ x93 := (uint8(x92) & 0xff)
+ x94 := (x92 >> 8)
+ x95 := (uint8(x94) & 0xff)
+ x96 := (x94 >> 8)
+ x97 := (uint8(x96) & 0xff)
+ x98 := (x96 >> 8)
+ x99 := (uint8(x98) & 0xff)
+ x100 := (x98 >> 8)
+ x101 := (uint8(x100) & 0xff)
+ x102 := uint8((x100 >> 8))
+ x103 := (uint8(x28) & 0xff)
+ x104 := (x28 >> 8)
+ x105 := (uint8(x104) & 0xff)
+ x106 := (x104 >> 8)
+ x107 := (uint8(x106) & 0xff)
+ x108 := (x106 >> 8)
+ x109 := (uint8(x108) & 0xff)
+ x110 := (x108 >> 8)
+ x111 := (uint8(x110) & 0xff)
+ x112 := (x110 >> 8)
+ x113 := (uint8(x112) & 0xff)
+ x114 := (x112 >> 8)
+ x115 := (uint8(x114) & 0xff)
+ x116 := uint8((x114 >> 8))
+ x117 := (x40 + uint64(x116))
+ x118 := (uint8(x117) & 0xff)
+ x119 := (x117 >> 8)
+ x120 := (uint8(x119) & 0xff)
+ x121 := (x119 >> 8)
+ x122 := (uint8(x121) & 0xff)
+ x123 := (x121 >> 8)
+ x124 := (uint8(x123) & 0xff)
+ x125 := (x123 >> 8)
+ x126 := (uint8(x125) & 0xff)
+ x127 := (x125 >> 8)
+ x128 := (uint8(x127) & 0xff)
+ x129 := (x127 >> 8)
+ x130 := (uint8(x129) & 0xff)
+ x131 := uint8((x129 >> 8))
+ x132 := (x39 + uint64(x131))
+ x133 := (uint8(x132) & 0xff)
+ x134 := (x132 >> 8)
+ x135 := (uint8(x134) & 0xff)
+ x136 := (x134 >> 8)
+ x137 := (uint8(x136) & 0xff)
+ x138 := (x136 >> 8)
+ x139 := (uint8(x138) & 0xff)
+ x140 := (x138 >> 8)
+ x141 := (uint8(x140) & 0xff)
+ x142 := (x140 >> 8)
+ x143 := (uint8(x142) & 0xff)
+ x144 := (x142 >> 8)
+ x145 := (uint8(x144) & 0xff)
+ x146 := uint8((x144 >> 8))
+ x147 := (x38 + uint64(x146))
+ x148 := (uint8(x147) & 0xff)
+ x149 := (x147 >> 8)
+ x150 := (uint8(x149) & 0xff)
+ x151 := (x149 >> 8)
+ x152 := (uint8(x151) & 0xff)
+ x153 := (x151 >> 8)
+ x154 := (uint8(x153) & 0xff)
+ x155 := (x153 >> 8)
+ x156 := (uint8(x155) & 0xff)
+ x157 := (x155 >> 8)
+ x158 := (uint8(x157) & 0xff)
+ x159 := (x157 >> 8)
+ x160 := (uint8(x159) & 0xff)
+ x161 := uint8((x159 >> 8))
+ x162 := (uint8(x36) & 0xff)
+ x163 := (x36 >> 8)
+ x164 := (uint8(x163) & 0xff)
+ x165 := (x163 >> 8)
+ x166 := (uint8(x165) & 0xff)
+ x167 := (x165 >> 8)
+ x168 := (uint8(x167) & 0xff)
+ x169 := (x167 >> 8)
+ x170 := (uint8(x169) & 0xff)
+ x171 := (x169 >> 8)
+ x172 := (uint8(x171) & 0xff)
+ x173 := (x171 >> 8)
+ x174 := (uint8(x173) & 0xff)
+ x175 := p521Uint1((x173 >> 8))
+ out1[0] = x44
+ out1[1] = x46
+ out1[2] = x48
+ out1[3] = x50
+ out1[4] = x52
+ out1[5] = x54
+ out1[6] = x56
+ out1[7] = x59
+ out1[8] = x61
+ out1[9] = x63
+ out1[10] = x65
+ out1[11] = x67
+ out1[12] = x69
+ out1[13] = x71
+ out1[14] = x74
+ out1[15] = x76
+ out1[16] = x78
+ out1[17] = x80
+ out1[18] = x82
+ out1[19] = x84
+ out1[20] = x86
+ out1[21] = x89
+ out1[22] = x91
+ out1[23] = x93
+ out1[24] = x95
+ out1[25] = x97
+ out1[26] = x99
+ out1[27] = x101
+ out1[28] = x102
+ out1[29] = x103
+ out1[30] = x105
+ out1[31] = x107
+ out1[32] = x109
+ out1[33] = x111
+ out1[34] = x113
+ out1[35] = x115
+ out1[36] = x118
+ out1[37] = x120
+ out1[38] = x122
+ out1[39] = x124
+ out1[40] = x126
+ out1[41] = x128
+ out1[42] = x130
+ out1[43] = x133
+ out1[44] = x135
+ out1[45] = x137
+ out1[46] = x139
+ out1[47] = x141
+ out1[48] = x143
+ out1[49] = x145
+ out1[50] = x148
+ out1[51] = x150
+ out1[52] = x152
+ out1[53] = x154
+ out1[54] = x156
+ out1[55] = x158
+ out1[56] = x160
+ out1[57] = x161
+ out1[58] = x162
+ out1[59] = x164
+ out1[60] = x166
+ out1[61] = x168
+ out1[62] = x170
+ out1[63] = x172
+ out1[64] = x174
+ out1[65] = uint8(x175)
+}
+
+// p521FromBytes deserializes a field element from bytes in little-endian order.
+//
+// Postconditions:
+// eval out1 mod m = bytes_eval arg1 mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521FromBytes(out1 *[9]uint64, arg1 *[66]uint8) {
+ x1 := (uint64(p521Uint1(arg1[65])) << 56)
+ x2 := (uint64(arg1[64]) << 48)
+ x3 := (uint64(arg1[63]) << 40)
+ x4 := (uint64(arg1[62]) << 32)
+ x5 := (uint64(arg1[61]) << 24)
+ x6 := (uint64(arg1[60]) << 16)
+ x7 := (uint64(arg1[59]) << 8)
+ x8 := arg1[58]
+ x9 := (uint64(arg1[57]) << 50)
+ x10 := (uint64(arg1[56]) << 42)
+ x11 := (uint64(arg1[55]) << 34)
+ x12 := (uint64(arg1[54]) << 26)
+ x13 := (uint64(arg1[53]) << 18)
+ x14 := (uint64(arg1[52]) << 10)
+ x15 := (uint64(arg1[51]) << 2)
+ x16 := (uint64(arg1[50]) << 52)
+ x17 := (uint64(arg1[49]) << 44)
+ x18 := (uint64(arg1[48]) << 36)
+ x19 := (uint64(arg1[47]) << 28)
+ x20 := (uint64(arg1[46]) << 20)
+ x21 := (uint64(arg1[45]) << 12)
+ x22 := (uint64(arg1[44]) << 4)
+ x23 := (uint64(arg1[43]) << 54)
+ x24 := (uint64(arg1[42]) << 46)
+ x25 := (uint64(arg1[41]) << 38)
+ x26 := (uint64(arg1[40]) << 30)
+ x27 := (uint64(arg1[39]) << 22)
+ x28 := (uint64(arg1[38]) << 14)
+ x29 := (uint64(arg1[37]) << 6)
+ x30 := (uint64(arg1[36]) << 56)
+ x31 := (uint64(arg1[35]) << 48)
+ x32 := (uint64(arg1[34]) << 40)
+ x33 := (uint64(arg1[33]) << 32)
+ x34 := (uint64(arg1[32]) << 24)
+ x35 := (uint64(arg1[31]) << 16)
+ x36 := (uint64(arg1[30]) << 8)
+ x37 := arg1[29]
+ x38 := (uint64(arg1[28]) << 50)
+ x39 := (uint64(arg1[27]) << 42)
+ x40 := (uint64(arg1[26]) << 34)
+ x41 := (uint64(arg1[25]) << 26)
+ x42 := (uint64(arg1[24]) << 18)
+ x43 := (uint64(arg1[23]) << 10)
+ x44 := (uint64(arg1[22]) << 2)
+ x45 := (uint64(arg1[21]) << 52)
+ x46 := (uint64(arg1[20]) << 44)
+ x47 := (uint64(arg1[19]) << 36)
+ x48 := (uint64(arg1[18]) << 28)
+ x49 := (uint64(arg1[17]) << 20)
+ x50 := (uint64(arg1[16]) << 12)
+ x51 := (uint64(arg1[15]) << 4)
+ x52 := (uint64(arg1[14]) << 54)
+ x53 := (uint64(arg1[13]) << 46)
+ x54 := (uint64(arg1[12]) << 38)
+ x55 := (uint64(arg1[11]) << 30)
+ x56 := (uint64(arg1[10]) << 22)
+ x57 := (uint64(arg1[9]) << 14)
+ x58 := (uint64(arg1[8]) << 6)
+ x59 := (uint64(arg1[7]) << 56)
+ x60 := (uint64(arg1[6]) << 48)
+ x61 := (uint64(arg1[5]) << 40)
+ x62 := (uint64(arg1[4]) << 32)
+ x63 := (uint64(arg1[3]) << 24)
+ x64 := (uint64(arg1[2]) << 16)
+ x65 := (uint64(arg1[1]) << 8)
+ x66 := arg1[0]
+ x67 := (x65 + uint64(x66))
+ x68 := (x64 + x67)
+ x69 := (x63 + x68)
+ x70 := (x62 + x69)
+ x71 := (x61 + x70)
+ x72 := (x60 + x71)
+ x73 := (x59 + x72)
+ x74 := (x73 & 0x3ffffffffffffff)
+ x75 := uint8((x73 >> 58))
+ x76 := (x58 + uint64(x75))
+ x77 := (x57 + x76)
+ x78 := (x56 + x77)
+ x79 := (x55 + x78)
+ x80 := (x54 + x79)
+ x81 := (x53 + x80)
+ x82 := (x52 + x81)
+ x83 := (x82 & 0x3ffffffffffffff)
+ x84 := uint8((x82 >> 58))
+ x85 := (x51 + uint64(x84))
+ x86 := (x50 + x85)
+ x87 := (x49 + x86)
+ x88 := (x48 + x87)
+ x89 := (x47 + x88)
+ x90 := (x46 + x89)
+ x91 := (x45 + x90)
+ x92 := (x91 & 0x3ffffffffffffff)
+ x93 := uint8((x91 >> 58))
+ x94 := (x44 + uint64(x93))
+ x95 := (x43 + x94)
+ x96 := (x42 + x95)
+ x97 := (x41 + x96)
+ x98 := (x40 + x97)
+ x99 := (x39 + x98)
+ x100 := (x38 + x99)
+ x101 := (x36 + uint64(x37))
+ x102 := (x35 + x101)
+ x103 := (x34 + x102)
+ x104 := (x33 + x103)
+ x105 := (x32 + x104)
+ x106 := (x31 + x105)
+ x107 := (x30 + x106)
+ x108 := (x107 & 0x3ffffffffffffff)
+ x109 := uint8((x107 >> 58))
+ x110 := (x29 + uint64(x109))
+ x111 := (x28 + x110)
+ x112 := (x27 + x111)
+ x113 := (x26 + x112)
+ x114 := (x25 + x113)
+ x115 := (x24 + x114)
+ x116 := (x23 + x115)
+ x117 := (x116 & 0x3ffffffffffffff)
+ x118 := uint8((x116 >> 58))
+ x119 := (x22 + uint64(x118))
+ x120 := (x21 + x119)
+ x121 := (x20 + x120)
+ x122 := (x19 + x121)
+ x123 := (x18 + x122)
+ x124 := (x17 + x123)
+ x125 := (x16 + x124)
+ x126 := (x125 & 0x3ffffffffffffff)
+ x127 := uint8((x125 >> 58))
+ x128 := (x15 + uint64(x127))
+ x129 := (x14 + x128)
+ x130 := (x13 + x129)
+ x131 := (x12 + x130)
+ x132 := (x11 + x131)
+ x133 := (x10 + x132)
+ x134 := (x9 + x133)
+ x135 := (x7 + uint64(x8))
+ x136 := (x6 + x135)
+ x137 := (x5 + x136)
+ x138 := (x4 + x137)
+ x139 := (x3 + x138)
+ x140 := (x2 + x139)
+ x141 := (x1 + x140)
+ out1[0] = x74
+ out1[1] = x83
+ out1[2] = x92
+ out1[3] = x100
+ out1[4] = x108
+ out1[5] = x117
+ out1[6] = x126
+ out1[7] = x134
+ out1[8] = x141
+}
+
+// p521Selectznz is a multi-limb conditional select.
+//
+// Postconditions:
+// eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+// arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+func p521Selectznz(out1 *[9]uint64, arg1 p521Uint1, arg2 *[9]uint64, arg3 *[9]uint64) {
+ var x1 uint64
+ p521CmovznzU64(&x1, arg1, arg2[0], arg3[0])
+ var x2 uint64
+ p521CmovznzU64(&x2, arg1, arg2[1], arg3[1])
+ var x3 uint64
+ p521CmovznzU64(&x3, arg1, arg2[2], arg3[2])
+ var x4 uint64
+ p521CmovznzU64(&x4, arg1, arg2[3], arg3[3])
+ var x5 uint64
+ p521CmovznzU64(&x5, arg1, arg2[4], arg3[4])
+ var x6 uint64
+ p521CmovznzU64(&x6, arg1, arg2[5], arg3[5])
+ var x7 uint64
+ p521CmovznzU64(&x7, arg1, arg2[6], arg3[6])
+ var x8 uint64
+ p521CmovznzU64(&x8, arg1, arg2[7], arg3[7])
+ var x9 uint64
+ p521CmovznzU64(&x9, arg1, arg2[8], arg3[8])
+ out1[0] = x1
+ out1[1] = x2
+ out1[2] = x3
+ out1[3] = x4
+ out1[4] = x5
+ out1[5] = x6
+ out1[6] = x7
+ out1[7] = x8
+ out1[8] = x9
+}
diff --git a/src/crypto/elliptic/internal/fiat/p521_test.go b/src/crypto/elliptic/internal/fiat/p521_test.go
new file mode 100644
index 0000000..661bde3
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/p521_test.go
@@ -0,0 +1,37 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fiat_test
+
+import (
+ "crypto/elliptic/internal/fiat"
+ "crypto/rand"
+ "testing"
+)
+
+func p521Random(t *testing.T) *fiat.P521Element {
+ buf := make([]byte, 66)
+ if _, err := rand.Read(buf); err != nil {
+ t.Fatal(err)
+ }
+ buf[65] &= 1
+ e, err := new(fiat.P521Element).SetBytes(buf)
+ if err != nil {
+ t.Fatal(err)
+ }
+ return e
+}
+
+func TestP521Invert(t *testing.T) {
+ a := p521Random(t)
+ inv := new(fiat.P521Element).Invert(a)
+ one := new(fiat.P521Element).Mul(a, inv)
+ if new(fiat.P521Element).One().Equal(one) != 1 {
+ t.Errorf("a * 1/a != 1; got %x for %x", one.Bytes(), a.Bytes())
+ }
+ inv.Invert(new(fiat.P521Element))
+ if new(fiat.P521Element).Equal(inv) != 1 {
+ t.Errorf("1/0 != 0; got %x", inv.Bytes())
+ }
+}
diff --git a/src/crypto/elliptic/p224.go b/src/crypto/elliptic/p224.go
new file mode 100644
index 0000000..ff5c834
--- /dev/null
+++ b/src/crypto/elliptic/p224.go
@@ -0,0 +1,783 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package elliptic
+
+// This is a constant-time, 32-bit implementation of P224. See FIPS 186-3,
+// section D.2.2.
+//
+// See https://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background.
+
+import (
+ "math/big"
+)
+
+var p224 p224Curve
+
+type p224Curve struct {
+ *CurveParams
+ gx, gy, b p224FieldElement
+}
+
+func initP224() {
+ // See FIPS 186-3, section D.2.2
+ p224.CurveParams = &CurveParams{Name: "P-224"}
+ p224.P, _ = new(big.Int).SetString("26959946667150639794667015087019630673557916260026308143510066298881", 10)
+ p224.N, _ = new(big.Int).SetString("26959946667150639794667015087019625940457807714424391721682722368061", 10)
+ p224.B, _ = new(big.Int).SetString("b4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", 16)
+ p224.Gx, _ = new(big.Int).SetString("b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", 16)
+ p224.Gy, _ = new(big.Int).SetString("bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", 16)
+ p224.BitSize = 224
+
+ p224FromBig(&p224.gx, p224.Gx)
+ p224FromBig(&p224.gy, p224.Gy)
+ p224FromBig(&p224.b, p224.B)
+}
+
+// P224 returns a Curve which implements P-224 (see FIPS 186-3, section D.2.2).
+//
+// The cryptographic operations are implemented using constant-time algorithms.
+func P224() Curve {
+ initonce.Do(initAll)
+ return p224
+}
+
+func (curve p224Curve) Params() *CurveParams {
+ return curve.CurveParams
+}
+
+func (curve p224Curve) IsOnCurve(bigX, bigY *big.Int) bool {
+ if bigX.Sign() < 0 || bigX.Cmp(curve.P) >= 0 ||
+ bigY.Sign() < 0 || bigY.Cmp(curve.P) >= 0 {
+ return false
+ }
+
+ var x, y p224FieldElement
+ p224FromBig(&x, bigX)
+ p224FromBig(&y, bigY)
+
+ // y² = x³ - 3x + b
+ var tmp p224LargeFieldElement
+ var x3 p224FieldElement
+ p224Square(&x3, &x, &tmp)
+ p224Mul(&x3, &x3, &x, &tmp)
+
+ for i := 0; i < 8; i++ {
+ x[i] *= 3
+ }
+ p224Sub(&x3, &x3, &x)
+ p224Reduce(&x3)
+ p224Add(&x3, &x3, &curve.b)
+ p224Contract(&x3, &x3)
+
+ p224Square(&y, &y, &tmp)
+ p224Contract(&y, &y)
+
+ for i := 0; i < 8; i++ {
+ if y[i] != x3[i] {
+ return false
+ }
+ }
+ return true
+}
+
+func (p224Curve) Add(bigX1, bigY1, bigX2, bigY2 *big.Int) (x, y *big.Int) {
+ var x1, y1, z1, x2, y2, z2, x3, y3, z3 p224FieldElement
+
+ p224FromBig(&x1, bigX1)
+ p224FromBig(&y1, bigY1)
+ if bigX1.Sign() != 0 || bigY1.Sign() != 0 {
+ z1[0] = 1
+ }
+ p224FromBig(&x2, bigX2)
+ p224FromBig(&y2, bigY2)
+ if bigX2.Sign() != 0 || bigY2.Sign() != 0 {
+ z2[0] = 1
+ }
+
+ p224AddJacobian(&x3, &y3, &z3, &x1, &y1, &z1, &x2, &y2, &z2)
+ return p224ToAffine(&x3, &y3, &z3)
+}
+
+func (p224Curve) Double(bigX1, bigY1 *big.Int) (x, y *big.Int) {
+ var x1, y1, z1, x2, y2, z2 p224FieldElement
+
+ p224FromBig(&x1, bigX1)
+ p224FromBig(&y1, bigY1)
+ z1[0] = 1
+
+ p224DoubleJacobian(&x2, &y2, &z2, &x1, &y1, &z1)
+ return p224ToAffine(&x2, &y2, &z2)
+}
+
+func (p224Curve) ScalarMult(bigX1, bigY1 *big.Int, scalar []byte) (x, y *big.Int) {
+ var x1, y1, z1, x2, y2, z2 p224FieldElement
+
+ p224FromBig(&x1, bigX1)
+ p224FromBig(&y1, bigY1)
+ z1[0] = 1
+
+ p224ScalarMult(&x2, &y2, &z2, &x1, &y1, &z1, scalar)
+ return p224ToAffine(&x2, &y2, &z2)
+}
+
+func (curve p224Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
+ var z1, x2, y2, z2 p224FieldElement
+
+ z1[0] = 1
+ p224ScalarMult(&x2, &y2, &z2, &curve.gx, &curve.gy, &z1, scalar)
+ return p224ToAffine(&x2, &y2, &z2)
+}
+
+// Field element functions.
+//
+// The field that we're dealing with is ℤ/pℤ where p = 2**224 - 2**96 + 1.
+//
+// Field elements are represented by a FieldElement, which is a typedef to an
+// array of 8 uint32's. The value of a FieldElement, a, is:
+// a[0] + 2**28·a[1] + 2**56·a[1] + ... + 2**196·a[7]
+//
+// Using 28-bit limbs means that there's only 4 bits of headroom, which is less
+// than we would really like. But it has the useful feature that we hit 2**224
+// exactly, making the reflections during a reduce much nicer.
+type p224FieldElement [8]uint32
+
+// p224P is the order of the field, represented as a p224FieldElement.
+var p224P = [8]uint32{1, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
+
+// p224IsZero returns 1 if a == 0 mod p and 0 otherwise.
+//
+// a[i] < 2**29
+func p224IsZero(a *p224FieldElement) uint32 {
+ // Since a p224FieldElement contains 224 bits there are two possible
+ // representations of 0: 0 and p.
+ var minimal p224FieldElement
+ p224Contract(&minimal, a)
+
+ var isZero, isP uint32
+ for i, v := range minimal {
+ isZero |= v
+ isP |= v - p224P[i]
+ }
+
+ // If either isZero or isP is 0, then we should return 1.
+ isZero |= isZero >> 16
+ isZero |= isZero >> 8
+ isZero |= isZero >> 4
+ isZero |= isZero >> 2
+ isZero |= isZero >> 1
+
+ isP |= isP >> 16
+ isP |= isP >> 8
+ isP |= isP >> 4
+ isP |= isP >> 2
+ isP |= isP >> 1
+
+ // For isZero and isP, the LSB is 0 iff all the bits are zero.
+ result := isZero & isP
+ result = (^result) & 1
+
+ return result
+}
+
+// p224Add computes *out = a+b
+//
+// a[i] + b[i] < 2**32
+func p224Add(out, a, b *p224FieldElement) {
+ for i := 0; i < 8; i++ {
+ out[i] = a[i] + b[i]
+ }
+}
+
+const two31p3 = 1<<31 + 1<<3
+const two31m3 = 1<<31 - 1<<3
+const two31m15m3 = 1<<31 - 1<<15 - 1<<3
+
+// p224ZeroModP31 is 0 mod p where bit 31 is set in all limbs so that we can
+// subtract smaller amounts without underflow. See the section "Subtraction" in
+// [1] for reasoning.
+var p224ZeroModP31 = []uint32{two31p3, two31m3, two31m3, two31m15m3, two31m3, two31m3, two31m3, two31m3}
+
+// p224Sub computes *out = a-b
+//
+// a[i], b[i] < 2**30
+// out[i] < 2**32
+func p224Sub(out, a, b *p224FieldElement) {
+ for i := 0; i < 8; i++ {
+ out[i] = a[i] + p224ZeroModP31[i] - b[i]
+ }
+}
+
+// LargeFieldElement also represents an element of the field. The limbs are
+// still spaced 28-bits apart and in little-endian order. So the limbs are at
+// 0, 28, 56, ..., 392 bits, each 64-bits wide.
+type p224LargeFieldElement [15]uint64
+
+const two63p35 = 1<<63 + 1<<35
+const two63m35 = 1<<63 - 1<<35
+const two63m35m19 = 1<<63 - 1<<35 - 1<<19
+
+// p224ZeroModP63 is 0 mod p where bit 63 is set in all limbs. See the section
+// "Subtraction" in [1] for why.
+var p224ZeroModP63 = [8]uint64{two63p35, two63m35, two63m35, two63m35, two63m35m19, two63m35, two63m35, two63m35}
+
+const bottom12Bits = 0xfff
+const bottom28Bits = 0xfffffff
+
+// p224Mul computes *out = a*b
+//
+// a[i] < 2**29, b[i] < 2**30 (or vice versa)
+// out[i] < 2**29
+func p224Mul(out, a, b *p224FieldElement, tmp *p224LargeFieldElement) {
+ for i := 0; i < 15; i++ {
+ tmp[i] = 0
+ }
+
+ for i := 0; i < 8; i++ {
+ for j := 0; j < 8; j++ {
+ tmp[i+j] += uint64(a[i]) * uint64(b[j])
+ }
+ }
+
+ p224ReduceLarge(out, tmp)
+}
+
+// Square computes *out = a*a
+//
+// a[i] < 2**29
+// out[i] < 2**29
+func p224Square(out, a *p224FieldElement, tmp *p224LargeFieldElement) {
+ for i := 0; i < 15; i++ {
+ tmp[i] = 0
+ }
+
+ for i := 0; i < 8; i++ {
+ for j := 0; j <= i; j++ {
+ r := uint64(a[i]) * uint64(a[j])
+ if i == j {
+ tmp[i+j] += r
+ } else {
+ tmp[i+j] += r << 1
+ }
+ }
+ }
+
+ p224ReduceLarge(out, tmp)
+}
+
+// ReduceLarge converts a p224LargeFieldElement to a p224FieldElement.
+//
+// in[i] < 2**62
+func p224ReduceLarge(out *p224FieldElement, in *p224LargeFieldElement) {
+ for i := 0; i < 8; i++ {
+ in[i] += p224ZeroModP63[i]
+ }
+
+ // Eliminate the coefficients at 2**224 and greater.
+ for i := 14; i >= 8; i-- {
+ in[i-8] -= in[i]
+ in[i-5] += (in[i] & 0xffff) << 12
+ in[i-4] += in[i] >> 16
+ }
+ in[8] = 0
+ // in[0..8] < 2**64
+
+ // As the values become small enough, we start to store them in |out|
+ // and use 32-bit operations.
+ for i := 1; i < 8; i++ {
+ in[i+1] += in[i] >> 28
+ out[i] = uint32(in[i] & bottom28Bits)
+ }
+ in[0] -= in[8]
+ out[3] += uint32(in[8]&0xffff) << 12
+ out[4] += uint32(in[8] >> 16)
+ // in[0] < 2**64
+ // out[3] < 2**29
+ // out[4] < 2**29
+ // out[1,2,5..7] < 2**28
+
+ out[0] = uint32(in[0] & bottom28Bits)
+ out[1] += uint32((in[0] >> 28) & bottom28Bits)
+ out[2] += uint32(in[0] >> 56)
+ // out[0] < 2**28
+ // out[1..4] < 2**29
+ // out[5..7] < 2**28
+}
+
+// Reduce reduces the coefficients of a to smaller bounds.
+//
+// On entry: a[i] < 2**31 + 2**30
+// On exit: a[i] < 2**29
+func p224Reduce(a *p224FieldElement) {
+ for i := 0; i < 7; i++ {
+ a[i+1] += a[i] >> 28
+ a[i] &= bottom28Bits
+ }
+ top := a[7] >> 28
+ a[7] &= bottom28Bits
+
+ // top < 2**4
+ mask := top
+ mask |= mask >> 2
+ mask |= mask >> 1
+ mask <<= 31
+ mask = uint32(int32(mask) >> 31)
+ // Mask is all ones if top != 0, all zero otherwise
+
+ a[0] -= top
+ a[3] += top << 12
+
+ // We may have just made a[0] negative but, if we did, then we must
+ // have added something to a[3], this it's > 2**12. Therefore we can
+ // carry down to a[0].
+ a[3] -= 1 & mask
+ a[2] += mask & (1<<28 - 1)
+ a[1] += mask & (1<<28 - 1)
+ a[0] += mask & (1 << 28)
+}
+
+// p224Invert calculates *out = in**-1 by computing in**(2**224 - 2**96 - 1),
+// i.e. Fermat's little theorem.
+func p224Invert(out, in *p224FieldElement) {
+ var f1, f2, f3, f4 p224FieldElement
+ var c p224LargeFieldElement
+
+ p224Square(&f1, in, &c) // 2
+ p224Mul(&f1, &f1, in, &c) // 2**2 - 1
+ p224Square(&f1, &f1, &c) // 2**3 - 2
+ p224Mul(&f1, &f1, in, &c) // 2**3 - 1
+ p224Square(&f2, &f1, &c) // 2**4 - 2
+ p224Square(&f2, &f2, &c) // 2**5 - 4
+ p224Square(&f2, &f2, &c) // 2**6 - 8
+ p224Mul(&f1, &f1, &f2, &c) // 2**6 - 1
+ p224Square(&f2, &f1, &c) // 2**7 - 2
+ for i := 0; i < 5; i++ { // 2**12 - 2**6
+ p224Square(&f2, &f2, &c)
+ }
+ p224Mul(&f2, &f2, &f1, &c) // 2**12 - 1
+ p224Square(&f3, &f2, &c) // 2**13 - 2
+ for i := 0; i < 11; i++ { // 2**24 - 2**12
+ p224Square(&f3, &f3, &c)
+ }
+ p224Mul(&f2, &f3, &f2, &c) // 2**24 - 1
+ p224Square(&f3, &f2, &c) // 2**25 - 2
+ for i := 0; i < 23; i++ { // 2**48 - 2**24
+ p224Square(&f3, &f3, &c)
+ }
+ p224Mul(&f3, &f3, &f2, &c) // 2**48 - 1
+ p224Square(&f4, &f3, &c) // 2**49 - 2
+ for i := 0; i < 47; i++ { // 2**96 - 2**48
+ p224Square(&f4, &f4, &c)
+ }
+ p224Mul(&f3, &f3, &f4, &c) // 2**96 - 1
+ p224Square(&f4, &f3, &c) // 2**97 - 2
+ for i := 0; i < 23; i++ { // 2**120 - 2**24
+ p224Square(&f4, &f4, &c)
+ }
+ p224Mul(&f2, &f4, &f2, &c) // 2**120 - 1
+ for i := 0; i < 6; i++ { // 2**126 - 2**6
+ p224Square(&f2, &f2, &c)
+ }
+ p224Mul(&f1, &f1, &f2, &c) // 2**126 - 1
+ p224Square(&f1, &f1, &c) // 2**127 - 2
+ p224Mul(&f1, &f1, in, &c) // 2**127 - 1
+ for i := 0; i < 97; i++ { // 2**224 - 2**97
+ p224Square(&f1, &f1, &c)
+ }
+ p224Mul(out, &f1, &f3, &c) // 2**224 - 2**96 - 1
+}
+
+// p224Contract converts a FieldElement to its unique, minimal form.
+//
+// On entry, in[i] < 2**29
+// On exit, out[i] < 2**28 and out < p
+func p224Contract(out, in *p224FieldElement) {
+ copy(out[:], in[:])
+
+ // First, carry the bits above 28 to the higher limb.
+ for i := 0; i < 7; i++ {
+ out[i+1] += out[i] >> 28
+ out[i] &= bottom28Bits
+ }
+ top := out[7] >> 28
+ out[7] &= bottom28Bits
+
+ // Use the reduction identity to carry the overflow.
+ //
+ // a + top * 2²²⁴ = a + top * 2⁹⁶ - top
+ out[0] -= top
+ out[3] += top << 12
+
+ // We may just have made out[0] negative. So we carry down. If we made
+ // out[0] negative then we know that out[3] is sufficiently positive
+ // because we just added to it.
+ for i := 0; i < 3; i++ {
+ mask := uint32(int32(out[i]) >> 31)
+ out[i] += (1 << 28) & mask
+ out[i+1] -= 1 & mask
+ }
+
+ // We might have pushed out[3] over 2**28 so we perform another, partial,
+ // carry chain.
+ for i := 3; i < 7; i++ {
+ out[i+1] += out[i] >> 28
+ out[i] &= bottom28Bits
+ }
+ top = out[7] >> 28
+ out[7] &= bottom28Bits
+
+ // Eliminate top while maintaining the same value mod p.
+ out[0] -= top
+ out[3] += top << 12
+
+ // There are two cases to consider for out[3]:
+ // 1) The first time that we eliminated top, we didn't push out[3] over
+ // 2**28. In this case, the partial carry chain didn't change any values
+ // and top is now zero.
+ // 2) We did push out[3] over 2**28 the first time that we eliminated top.
+ // The first value of top was in [0..2], therefore, after overflowing
+ // and being reduced by the second carry chain, out[3] <= 2<<12 - 1.
+ // In both cases, out[3] cannot have overflowed when we eliminated top for
+ // the second time.
+
+ // Again, we may just have made out[0] negative, so do the same carry down.
+ // As before, if we made out[0] negative then we know that out[3] is
+ // sufficiently positive.
+ for i := 0; i < 3; i++ {
+ mask := uint32(int32(out[i]) >> 31)
+ out[i] += (1 << 28) & mask
+ out[i+1] -= 1 & mask
+ }
+
+ // Now we see if the value is >= p and, if so, subtract p.
+
+ // First we build a mask from the top four limbs, which must all be
+ // equal to bottom28Bits if the whole value is >= p. If top4AllOnes
+ // ends up with any zero bits in the bottom 28 bits, then this wasn't
+ // true.
+ top4AllOnes := uint32(0xffffffff)
+ for i := 4; i < 8; i++ {
+ top4AllOnes &= out[i]
+ }
+ top4AllOnes |= 0xf0000000
+ // Now we replicate any zero bits to all the bits in top4AllOnes.
+ top4AllOnes &= top4AllOnes >> 16
+ top4AllOnes &= top4AllOnes >> 8
+ top4AllOnes &= top4AllOnes >> 4
+ top4AllOnes &= top4AllOnes >> 2
+ top4AllOnes &= top4AllOnes >> 1
+ top4AllOnes = uint32(int32(top4AllOnes<<31) >> 31)
+
+ // Now we test whether the bottom three limbs are non-zero.
+ bottom3NonZero := out[0] | out[1] | out[2]
+ bottom3NonZero |= bottom3NonZero >> 16
+ bottom3NonZero |= bottom3NonZero >> 8
+ bottom3NonZero |= bottom3NonZero >> 4
+ bottom3NonZero |= bottom3NonZero >> 2
+ bottom3NonZero |= bottom3NonZero >> 1
+ bottom3NonZero = uint32(int32(bottom3NonZero<<31) >> 31)
+
+ // Assuming top4AllOnes != 0, everything depends on the value of out[3].
+ // If it's > 0xffff000 then the whole value is > p
+ // If it's = 0xffff000 and bottom3NonZero != 0, then the whole value is >= p
+ // If it's < 0xffff000, then the whole value is < p
+ n := 0xffff000 - out[3]
+ out3Equal := n
+ out3Equal |= out3Equal >> 16
+ out3Equal |= out3Equal >> 8
+ out3Equal |= out3Equal >> 4
+ out3Equal |= out3Equal >> 2
+ out3Equal |= out3Equal >> 1
+ out3Equal = ^uint32(int32(out3Equal<<31) >> 31)
+
+ // If out[3] > 0xffff000 then n's MSB will be one.
+ out3GT := uint32(int32(n) >> 31)
+
+ mask := top4AllOnes & ((out3Equal & bottom3NonZero) | out3GT)
+ out[0] -= 1 & mask
+ out[3] -= 0xffff000 & mask
+ out[4] -= 0xfffffff & mask
+ out[5] -= 0xfffffff & mask
+ out[6] -= 0xfffffff & mask
+ out[7] -= 0xfffffff & mask
+
+ // Do one final carry down, in case we made out[0] negative. One of
+ // out[0..3] needs to be positive and able to absorb the -1 or the value
+ // would have been < p, and the subtraction wouldn't have happened.
+ for i := 0; i < 3; i++ {
+ mask := uint32(int32(out[i]) >> 31)
+ out[i] += (1 << 28) & mask
+ out[i+1] -= 1 & mask
+ }
+}
+
+// Group element functions.
+//
+// These functions deal with group elements. The group is an elliptic curve
+// group with a = -3 defined in FIPS 186-3, section D.2.2.
+
+// p224AddJacobian computes *out = a+b where a != b.
+func p224AddJacobian(x3, y3, z3, x1, y1, z1, x2, y2, z2 *p224FieldElement) {
+ // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-p224Add-2007-bl
+ var z1z1, z2z2, u1, u2, s1, s2, h, i, j, r, v p224FieldElement
+ var c p224LargeFieldElement
+
+ z1IsZero := p224IsZero(z1)
+ z2IsZero := p224IsZero(z2)
+
+ // Z1Z1 = Z1²
+ p224Square(&z1z1, z1, &c)
+ // Z2Z2 = Z2²
+ p224Square(&z2z2, z2, &c)
+ // U1 = X1*Z2Z2
+ p224Mul(&u1, x1, &z2z2, &c)
+ // U2 = X2*Z1Z1
+ p224Mul(&u2, x2, &z1z1, &c)
+ // S1 = Y1*Z2*Z2Z2
+ p224Mul(&s1, z2, &z2z2, &c)
+ p224Mul(&s1, y1, &s1, &c)
+ // S2 = Y2*Z1*Z1Z1
+ p224Mul(&s2, z1, &z1z1, &c)
+ p224Mul(&s2, y2, &s2, &c)
+ // H = U2-U1
+ p224Sub(&h, &u2, &u1)
+ p224Reduce(&h)
+ xEqual := p224IsZero(&h)
+ // I = (2*H)²
+ for j := 0; j < 8; j++ {
+ i[j] = h[j] << 1
+ }
+ p224Reduce(&i)
+ p224Square(&i, &i, &c)
+ // J = H*I
+ p224Mul(&j, &h, &i, &c)
+ // r = 2*(S2-S1)
+ p224Sub(&r, &s2, &s1)
+ p224Reduce(&r)
+ yEqual := p224IsZero(&r)
+ if xEqual == 1 && yEqual == 1 && z1IsZero == 0 && z2IsZero == 0 {
+ p224DoubleJacobian(x3, y3, z3, x1, y1, z1)
+ return
+ }
+ for i := 0; i < 8; i++ {
+ r[i] <<= 1
+ }
+ p224Reduce(&r)
+ // V = U1*I
+ p224Mul(&v, &u1, &i, &c)
+ // Z3 = ((Z1+Z2)²-Z1Z1-Z2Z2)*H
+ p224Add(&z1z1, &z1z1, &z2z2)
+ p224Add(&z2z2, z1, z2)
+ p224Reduce(&z2z2)
+ p224Square(&z2z2, &z2z2, &c)
+ p224Sub(z3, &z2z2, &z1z1)
+ p224Reduce(z3)
+ p224Mul(z3, z3, &h, &c)
+ // X3 = r²-J-2*V
+ for i := 0; i < 8; i++ {
+ z1z1[i] = v[i] << 1
+ }
+ p224Add(&z1z1, &j, &z1z1)
+ p224Reduce(&z1z1)
+ p224Square(x3, &r, &c)
+ p224Sub(x3, x3, &z1z1)
+ p224Reduce(x3)
+ // Y3 = r*(V-X3)-2*S1*J
+ for i := 0; i < 8; i++ {
+ s1[i] <<= 1
+ }
+ p224Mul(&s1, &s1, &j, &c)
+ p224Sub(&z1z1, &v, x3)
+ p224Reduce(&z1z1)
+ p224Mul(&z1z1, &z1z1, &r, &c)
+ p224Sub(y3, &z1z1, &s1)
+ p224Reduce(y3)
+
+ p224CopyConditional(x3, x2, z1IsZero)
+ p224CopyConditional(x3, x1, z2IsZero)
+ p224CopyConditional(y3, y2, z1IsZero)
+ p224CopyConditional(y3, y1, z2IsZero)
+ p224CopyConditional(z3, z2, z1IsZero)
+ p224CopyConditional(z3, z1, z2IsZero)
+}
+
+// p224DoubleJacobian computes *out = a+a.
+func p224DoubleJacobian(x3, y3, z3, x1, y1, z1 *p224FieldElement) {
+ var delta, gamma, beta, alpha, t p224FieldElement
+ var c p224LargeFieldElement
+
+ p224Square(&delta, z1, &c)
+ p224Square(&gamma, y1, &c)
+ p224Mul(&beta, x1, &gamma, &c)
+
+ // alpha = 3*(X1-delta)*(X1+delta)
+ p224Add(&t, x1, &delta)
+ for i := 0; i < 8; i++ {
+ t[i] += t[i] << 1
+ }
+ p224Reduce(&t)
+ p224Sub(&alpha, x1, &delta)
+ p224Reduce(&alpha)
+ p224Mul(&alpha, &alpha, &t, &c)
+
+ // Z3 = (Y1+Z1)²-gamma-delta
+ p224Add(z3, y1, z1)
+ p224Reduce(z3)
+ p224Square(z3, z3, &c)
+ p224Sub(z3, z3, &gamma)
+ p224Reduce(z3)
+ p224Sub(z3, z3, &delta)
+ p224Reduce(z3)
+
+ // X3 = alpha²-8*beta
+ for i := 0; i < 8; i++ {
+ delta[i] = beta[i] << 3
+ }
+ p224Reduce(&delta)
+ p224Square(x3, &alpha, &c)
+ p224Sub(x3, x3, &delta)
+ p224Reduce(x3)
+
+ // Y3 = alpha*(4*beta-X3)-8*gamma²
+ for i := 0; i < 8; i++ {
+ beta[i] <<= 2
+ }
+ p224Sub(&beta, &beta, x3)
+ p224Reduce(&beta)
+ p224Square(&gamma, &gamma, &c)
+ for i := 0; i < 8; i++ {
+ gamma[i] <<= 3
+ }
+ p224Reduce(&gamma)
+ p224Mul(y3, &alpha, &beta, &c)
+ p224Sub(y3, y3, &gamma)
+ p224Reduce(y3)
+}
+
+// p224CopyConditional sets *out = *in iff the least-significant-bit of control
+// is true, and it runs in constant time.
+func p224CopyConditional(out, in *p224FieldElement, control uint32) {
+ control <<= 31
+ control = uint32(int32(control) >> 31)
+
+ for i := 0; i < 8; i++ {
+ out[i] ^= (out[i] ^ in[i]) & control
+ }
+}
+
+func p224ScalarMult(outX, outY, outZ, inX, inY, inZ *p224FieldElement, scalar []byte) {
+ var xx, yy, zz p224FieldElement
+ for i := 0; i < 8; i++ {
+ outX[i] = 0
+ outY[i] = 0
+ outZ[i] = 0
+ }
+
+ for _, byte := range scalar {
+ for bitNum := uint(0); bitNum < 8; bitNum++ {
+ p224DoubleJacobian(outX, outY, outZ, outX, outY, outZ)
+ bit := uint32((byte >> (7 - bitNum)) & 1)
+ p224AddJacobian(&xx, &yy, &zz, inX, inY, inZ, outX, outY, outZ)
+ p224CopyConditional(outX, &xx, bit)
+ p224CopyConditional(outY, &yy, bit)
+ p224CopyConditional(outZ, &zz, bit)
+ }
+ }
+}
+
+// p224ToAffine converts from Jacobian to affine form.
+func p224ToAffine(x, y, z *p224FieldElement) (*big.Int, *big.Int) {
+ var zinv, zinvsq, outx, outy p224FieldElement
+ var tmp p224LargeFieldElement
+
+ if isPointAtInfinity := p224IsZero(z); isPointAtInfinity == 1 {
+ return new(big.Int), new(big.Int)
+ }
+
+ p224Invert(&zinv, z)
+ p224Square(&zinvsq, &zinv, &tmp)
+ p224Mul(x, x, &zinvsq, &tmp)
+ p224Mul(&zinvsq, &zinvsq, &zinv, &tmp)
+ p224Mul(y, y, &zinvsq, &tmp)
+
+ p224Contract(&outx, x)
+ p224Contract(&outy, y)
+ return p224ToBig(&outx), p224ToBig(&outy)
+}
+
+// get28BitsFromEnd returns the least-significant 28 bits from buf>>shift,
+// where buf is interpreted as a big-endian number.
+func get28BitsFromEnd(buf []byte, shift uint) (uint32, []byte) {
+ var ret uint32
+
+ for i := uint(0); i < 4; i++ {
+ var b byte
+ if l := len(buf); l > 0 {
+ b = buf[l-1]
+ // We don't remove the byte if we're about to return and we're not
+ // reading all of it.
+ if i != 3 || shift == 4 {
+ buf = buf[:l-1]
+ }
+ }
+ ret |= uint32(b) << (8 * i) >> shift
+ }
+ ret &= bottom28Bits
+ return ret, buf
+}
+
+// p224FromBig sets *out = *in.
+func p224FromBig(out *p224FieldElement, in *big.Int) {
+ bytes := in.Bytes()
+ out[0], bytes = get28BitsFromEnd(bytes, 0)
+ out[1], bytes = get28BitsFromEnd(bytes, 4)
+ out[2], bytes = get28BitsFromEnd(bytes, 0)
+ out[3], bytes = get28BitsFromEnd(bytes, 4)
+ out[4], bytes = get28BitsFromEnd(bytes, 0)
+ out[5], bytes = get28BitsFromEnd(bytes, 4)
+ out[6], bytes = get28BitsFromEnd(bytes, 0)
+ out[7], bytes = get28BitsFromEnd(bytes, 4)
+}
+
+// p224ToBig returns in as a big.Int.
+func p224ToBig(in *p224FieldElement) *big.Int {
+ var buf [28]byte
+ buf[27] = byte(in[0])
+ buf[26] = byte(in[0] >> 8)
+ buf[25] = byte(in[0] >> 16)
+ buf[24] = byte(((in[0] >> 24) & 0x0f) | (in[1]<<4)&0xf0)
+
+ buf[23] = byte(in[1] >> 4)
+ buf[22] = byte(in[1] >> 12)
+ buf[21] = byte(in[1] >> 20)
+
+ buf[20] = byte(in[2])
+ buf[19] = byte(in[2] >> 8)
+ buf[18] = byte(in[2] >> 16)
+ buf[17] = byte(((in[2] >> 24) & 0x0f) | (in[3]<<4)&0xf0)
+
+ buf[16] = byte(in[3] >> 4)
+ buf[15] = byte(in[3] >> 12)
+ buf[14] = byte(in[3] >> 20)
+
+ buf[13] = byte(in[4])
+ buf[12] = byte(in[4] >> 8)
+ buf[11] = byte(in[4] >> 16)
+ buf[10] = byte(((in[4] >> 24) & 0x0f) | (in[5]<<4)&0xf0)
+
+ buf[9] = byte(in[5] >> 4)
+ buf[8] = byte(in[5] >> 12)
+ buf[7] = byte(in[5] >> 20)
+
+ buf[6] = byte(in[6])
+ buf[5] = byte(in[6] >> 8)
+ buf[4] = byte(in[6] >> 16)
+ buf[3] = byte(((in[6] >> 24) & 0x0f) | (in[7]<<4)&0xf0)
+
+ buf[2] = byte(in[7] >> 4)
+ buf[1] = byte(in[7] >> 12)
+ buf[0] = byte(in[7] >> 20)
+
+ return new(big.Int).SetBytes(buf[:])
+}
diff --git a/src/crypto/elliptic/p224_test.go b/src/crypto/elliptic/p224_test.go
new file mode 100644
index 0000000..b213b27
--- /dev/null
+++ b/src/crypto/elliptic/p224_test.go
@@ -0,0 +1,629 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package elliptic
+
+import (
+ "encoding/hex"
+ "fmt"
+ "math/big"
+ "math/bits"
+ "math/rand"
+ "reflect"
+ "testing"
+ "testing/quick"
+)
+
+var toFromBigTests = []string{
+ "0",
+ "1",
+ "23",
+ "b70e0cb46bb4bf7f321390b94a03c1d356c01122343280d6105c1d21",
+ "706a46d476dcb76798e6046d89474788d164c18032d268fd10704fa6",
+}
+
+func p224AlternativeToBig(in *p224FieldElement) *big.Int {
+ ret := new(big.Int)
+ tmp := new(big.Int)
+
+ for i := len(in) - 1; i >= 0; i-- {
+ ret.Lsh(ret, 28)
+ tmp.SetInt64(int64(in[i]))
+ ret.Add(ret, tmp)
+ }
+ ret.Mod(ret, P224().Params().P)
+ return ret
+}
+
+func TestP224ToFromBig(t *testing.T) {
+ for i, test := range toFromBigTests {
+ n, _ := new(big.Int).SetString(test, 16)
+ var x p224FieldElement
+ p224FromBig(&x, n)
+ m := p224ToBig(&x)
+ if n.Cmp(m) != 0 {
+ t.Errorf("#%d: %x != %x", i, n, m)
+ }
+ q := p224AlternativeToBig(&x)
+ if n.Cmp(q) != 0 {
+ t.Errorf("#%d: %x != %x (alternative)", i, n, q)
+ }
+ }
+}
+
+// quickCheckConfig32 will make each quickcheck test run (32 * -quickchecks)
+// times. The default value of -quickchecks is 100.
+var quickCheckConfig32 = &quick.Config{MaxCountScale: 32}
+
+// weirdLimbs can be combined to generate a range of edge-case field elements.
+var weirdLimbs = [...]uint32{
+ 0, 1, (1 << 29) - 1,
+ (1 << 12), (1 << 12) - 1,
+ (1 << 28), (1 << 28) - 1,
+}
+
+func generateLimb(rand *rand.Rand) uint32 {
+ const bottom29Bits = 0x1fffffff
+ n := rand.Intn(len(weirdLimbs) + 3)
+ switch n {
+ case len(weirdLimbs):
+ // Random value.
+ return uint32(rand.Int31n(1 << 29))
+ case len(weirdLimbs) + 1:
+ // Sum of two values.
+ k := generateLimb(rand) + generateLimb(rand)
+ return k & bottom29Bits
+ case len(weirdLimbs) + 2:
+ // Difference of two values.
+ k := generateLimb(rand) - generateLimb(rand)
+ return k & bottom29Bits
+ default:
+ return weirdLimbs[n]
+ }
+}
+
+func (p224FieldElement) Generate(rand *rand.Rand, size int) reflect.Value {
+ return reflect.ValueOf(p224FieldElement{
+ generateLimb(rand),
+ generateLimb(rand),
+ generateLimb(rand),
+ generateLimb(rand),
+ generateLimb(rand),
+ generateLimb(rand),
+ generateLimb(rand),
+ generateLimb(rand),
+ })
+}
+
+func isInBounds(x *p224FieldElement) bool {
+ return bits.Len32(x[0]) <= 29 &&
+ bits.Len32(x[1]) <= 29 &&
+ bits.Len32(x[2]) <= 29 &&
+ bits.Len32(x[3]) <= 29 &&
+ bits.Len32(x[4]) <= 29 &&
+ bits.Len32(x[5]) <= 29 &&
+ bits.Len32(x[6]) <= 29 &&
+ bits.Len32(x[7]) <= 29
+}
+
+func TestP224Mul(t *testing.T) {
+ mulMatchesBigInt := func(a, b, out p224FieldElement) bool {
+ var tmp p224LargeFieldElement
+ p224Mul(&out, &a, &b, &tmp)
+
+ exp := new(big.Int).Mul(p224AlternativeToBig(&a), p224AlternativeToBig(&b))
+ exp.Mod(exp, P224().Params().P)
+ got := p224AlternativeToBig(&out)
+ if exp.Cmp(got) != 0 || !isInBounds(&out) {
+ t.Logf("a = %x", a)
+ t.Logf("b = %x", b)
+ t.Logf("p224Mul(a, b) = %x = %v", out, got)
+ t.Logf("a * b = %v", exp)
+ return false
+ }
+
+ return true
+ }
+
+ a := p224FieldElement{0xfffffff, 0xfffffff, 0xf00ffff, 0x20f, 0x0, 0x0, 0x0, 0x0}
+ b := p224FieldElement{1, 0, 0, 0, 0, 0, 0, 0}
+ if !mulMatchesBigInt(a, b, p224FieldElement{}) {
+ t.Fail()
+ }
+
+ if err := quick.Check(mulMatchesBigInt, quickCheckConfig32); err != nil {
+ t.Error(err)
+ }
+}
+
+func TestP224Square(t *testing.T) {
+ squareMatchesBigInt := func(a, out p224FieldElement) bool {
+ var tmp p224LargeFieldElement
+ p224Square(&out, &a, &tmp)
+
+ exp := p224AlternativeToBig(&a)
+ exp.Mul(exp, exp)
+ exp.Mod(exp, P224().Params().P)
+ got := p224AlternativeToBig(&out)
+ if exp.Cmp(got) != 0 || !isInBounds(&out) {
+ t.Logf("a = %x", a)
+ t.Logf("p224Square(a, b) = %x = %v", out, got)
+ t.Logf("a * a = %v", exp)
+ return false
+ }
+
+ return true
+ }
+
+ if err := quick.Check(squareMatchesBigInt, quickCheckConfig32); err != nil {
+ t.Error(err)
+ }
+}
+
+func TestP224Add(t *testing.T) {
+ addMatchesBigInt := func(a, b, out p224FieldElement) bool {
+ p224Add(&out, &a, &b)
+
+ exp := new(big.Int).Add(p224AlternativeToBig(&a), p224AlternativeToBig(&b))
+ exp.Mod(exp, P224().Params().P)
+ got := p224AlternativeToBig(&out)
+ if exp.Cmp(got) != 0 {
+ t.Logf("a = %x", a)
+ t.Logf("b = %x", b)
+ t.Logf("p224Add(a, b) = %x = %v", out, got)
+ t.Logf("a + b = %v", exp)
+ return false
+ }
+
+ return true
+ }
+
+ if err := quick.Check(addMatchesBigInt, quickCheckConfig32); err != nil {
+ t.Error(err)
+ }
+}
+
+func TestP224Reduce(t *testing.T) {
+ reduceMatchesBigInt := func(a p224FieldElement) bool {
+ out := a
+ // TODO: generate higher values for functions like p224Reduce that are
+ // expected to work with higher input bounds.
+ p224Reduce(&out)
+
+ exp := p224AlternativeToBig(&a)
+ got := p224AlternativeToBig(&out)
+ if exp.Cmp(got) != 0 || !isInBounds(&out) {
+ t.Logf("a = %x = %v", a, exp)
+ t.Logf("p224Reduce(a) = %x = %v", out, got)
+ return false
+ }
+
+ return true
+ }
+
+ if err := quick.Check(reduceMatchesBigInt, quickCheckConfig32); err != nil {
+ t.Error(err)
+ }
+}
+
+func TestP224Contract(t *testing.T) {
+ contractMatchesBigInt := func(a, out p224FieldElement) bool {
+ p224Contract(&out, &a)
+
+ exp := p224AlternativeToBig(&a)
+ got := p224AlternativeToBig(&out)
+ if exp.Cmp(got) != 0 {
+ t.Logf("a = %x = %v", a, exp)
+ t.Logf("p224Contract(a) = %x = %v", out, got)
+ return false
+ }
+
+ // Check that out < P.
+ for i := range p224P {
+ k := 8 - i - 1
+ if out[k] > p224P[k] {
+ t.Logf("p224Contract(a) = %x", out)
+ return false
+ }
+ if out[k] < p224P[k] {
+ return true
+ }
+ }
+ t.Logf("p224Contract(a) = %x", out)
+ return false
+ }
+
+ if !contractMatchesBigInt(p224P, p224FieldElement{}) {
+ t.Error("p224Contract(p) is broken")
+ }
+ pMinus1 := p224FieldElement{0, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
+ if !contractMatchesBigInt(pMinus1, p224FieldElement{}) {
+ t.Error("p224Contract(p - 1) is broken")
+ }
+ // Check that we can handle input above p, but lowest limb zero.
+ a := p224FieldElement{0, 1, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
+ if !contractMatchesBigInt(a, p224FieldElement{}) {
+ t.Error("p224Contract(p + 2²⁸) is broken")
+ }
+ // Check that we can handle input above p, but lowest three limbs zero.
+ b := p224FieldElement{0, 0, 0, 0xffff001, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff}
+ if !contractMatchesBigInt(b, p224FieldElement{}) {
+ t.Error("p224Contract(p + 2⁸⁴) is broken")
+ }
+
+ if err := quick.Check(contractMatchesBigInt, quickCheckConfig32); err != nil {
+ t.Error(err)
+ }
+}
+
+func TestP224IsZero(t *testing.T) {
+ if got := p224IsZero(&p224FieldElement{}); got != 1 {
+ t.Errorf("p224IsZero(0) = %d, expected 1", got)
+ }
+ if got := p224IsZero((*p224FieldElement)(&p224P)); got != 1 {
+ t.Errorf("p224IsZero(p) = %d, expected 1", got)
+ }
+ if got := p224IsZero(&p224FieldElement{1}); got != 0 {
+ t.Errorf("p224IsZero(1) = %d, expected 0", got)
+ }
+
+ isZeroMatchesBigInt := func(a p224FieldElement) bool {
+ isZero := p224IsZero(&a)
+
+ big := p224AlternativeToBig(&a)
+ if big.Sign() == 0 && isZero != 1 {
+ return false
+ }
+ if big.Sign() != 0 && isZero != 0 {
+ return false
+ }
+ return true
+ }
+
+ if err := quick.Check(isZeroMatchesBigInt, quickCheckConfig32); err != nil {
+ t.Error(err)
+ }
+}
+
+func TestP224Invert(t *testing.T) {
+ var out p224FieldElement
+
+ p224Invert(&out, &p224FieldElement{})
+ if got := p224IsZero(&out); got != 1 {
+ t.Errorf("p224Invert(0) = %x, expected 0", out)
+ }
+
+ p224Invert(&out, (*p224FieldElement)(&p224P))
+ if got := p224IsZero(&out); got != 1 {
+ t.Errorf("p224Invert(p) = %x, expected 0", out)
+ }
+
+ p224Invert(&out, &p224FieldElement{1})
+ p224Contract(&out, &out)
+ if out != (p224FieldElement{1}) {
+ t.Errorf("p224Invert(1) = %x, expected 1", out)
+ }
+
+ var tmp p224LargeFieldElement
+ a := p224FieldElement{1, 2, 3, 4, 5, 6, 7, 8}
+ p224Invert(&out, &a)
+ p224Mul(&out, &out, &a, &tmp)
+ p224Contract(&out, &out)
+ if out != (p224FieldElement{1}) {
+ t.Errorf("p224Invert(a) * a = %x, expected 1", out)
+ }
+}
+
+type baseMultTest struct {
+ k string
+ x, y string
+}
+
+var p224BaseMultTests = []baseMultTest{
+ {
+ "1",
+ "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21",
+ "bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34",
+ },
+ {
+ "2",
+ "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6",
+ "1c2b76a7bc25e7702a704fa986892849fca629487acf3709d2e4e8bb",
+ },
+ {
+ "3",
+ "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04",
+ "a3f7f03cadd0be444c0aa56830130ddf77d317344e1af3591981a925",
+ },
+ {
+ "4",
+ "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301",
+ "482580a0ec5bc47e88bc8c378632cd196cb3fa058a7114eb03054c9",
+ },
+ {
+ "5",
+ "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa",
+ "27e8bff1745635ec5ba0c9f1c2ede15414c6507d29ffe37e790a079b",
+ },
+ {
+ "6",
+ "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408",
+ "89faf0ccb750d99b553c574fad7ecfb0438586eb3952af5b4b153c7e",
+ },
+ {
+ "7",
+ "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28",
+ "f3a30085497f2f611ee2517b163ef8c53b715d18bb4e4808d02b963",
+ },
+ {
+ "8",
+ "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550",
+ "46dcd3ea5c43898c5c5fc4fdac7db39c2f02ebee4e3541d1e78047a",
+ },
+ {
+ "9",
+ "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d",
+ "371732e4f41bf4f7883035e6a79fcedc0e196eb07b48171697517463",
+ },
+ {
+ "10",
+ "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd",
+ "39bb30eab337e0a521b6cba1abe4b2b3a3e524c14a3fe3eb116b655f",
+ },
+ {
+ "11",
+ "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c",
+ "20b510004092e96636cfb7e32efded8265c266dfb754fa6d6491a6da",
+ },
+ {
+ "12",
+ "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a",
+ "207dddf0385bfdeab6e9acda8da06b3bbef224a93ab1e9e036109d13",
+ },
+ {
+ "13",
+ "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca",
+ "252819f71c7fb7fbcb159be337d37d3336d7feb963724fdfb0ecb767",
+ },
+ {
+ "14",
+ "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa",
+ "d5814cd724199c4a5b974a43685fbf5b8bac69459c9469bc8f23ccaf",
+ },
+ {
+ "15",
+ "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9",
+ "979a5f4759f80f4fb4ec2e34f5566d595680a11735e7b61046127989",
+ },
+ {
+ "16",
+ "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d",
+ "3399d464345906b11b00e363ef429221f2ec720d2f665d7dead5b482",
+ },
+ {
+ "17",
+ "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc",
+ "ff149efa6606a6bd20ef7d1b06bd92f6904639dce5174db6cc554a26",
+ },
+ {
+ "18",
+ "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc",
+ "ea98d60e5ffc9b8fcf999fab1df7e7ef7084f20ddb61bb045a6ce002",
+ },
+ {
+ "19",
+ "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c",
+ "dcf1f6c3db09c70acc25391d492fe25b4a180babd6cea356c04719cd",
+ },
+ {
+ "20",
+ "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455",
+ "d5d7110274cba7cdee90e1a8b0d394c376a5573db6be0bf2747f530",
+ },
+ {
+ "112233445566778899",
+ "61f077c6f62ed802dad7c2f38f5c67f2cc453601e61bd076bb46179e",
+ "2272f9e9f5933e70388ee652513443b5e289dd135dcc0d0299b225e4",
+ },
+ {
+ "112233445566778899112233445566778899",
+ "29895f0af496bfc62b6ef8d8a65c88c613949b03668aab4f0429e35",
+ "3ea6e53f9a841f2019ec24bde1a75677aa9b5902e61081c01064de93",
+ },
+ {
+ "6950511619965839450988900688150712778015737983940691968051900319680",
+ "ab689930bcae4a4aa5f5cb085e823e8ae30fd365eb1da4aba9cf0379",
+ "3345a121bbd233548af0d210654eb40bab788a03666419be6fbd34e7",
+ },
+ {
+ "13479972933410060327035789020509431695094902435494295338570602119423",
+ "bdb6a8817c1f89da1c2f3dd8e97feb4494f2ed302a4ce2bc7f5f4025",
+ "4c7020d57c00411889462d77a5438bb4e97d177700bf7243a07f1680",
+ },
+ {
+ "13479971751745682581351455311314208093898607229429740618390390702079",
+ "d58b61aa41c32dd5eba462647dba75c5d67c83606c0af2bd928446a9",
+ "d24ba6a837be0460dd107ae77725696d211446c5609b4595976b16bd",
+ },
+ {
+ "13479972931865328106486971546324465392952975980343228160962702868479",
+ "dc9fa77978a005510980e929a1485f63716df695d7a0c18bb518df03",
+ "ede2b016f2ddffc2a8c015b134928275ce09e5661b7ab14ce0d1d403",
+ },
+ {
+ "11795773708834916026404142434151065506931607341523388140225443265536",
+ "499d8b2829cfb879c901f7d85d357045edab55028824d0f05ba279ba",
+ "bf929537b06e4015919639d94f57838fa33fc3d952598dcdbb44d638",
+ },
+ {
+ "784254593043826236572847595991346435467177662189391577090",
+ "8246c999137186632c5f9eddf3b1b0e1764c5e8bd0e0d8a554b9cb77",
+ "e80ed8660bc1cb17ac7d845be40a7a022d3306f116ae9f81fea65947",
+ },
+ {
+ "13479767645505654746623887797783387853576174193480695826442858012671",
+ "6670c20afcceaea672c97f75e2e9dd5c8460e54bb38538ebb4bd30eb",
+ "f280d8008d07a4caf54271f993527d46ff3ff46fd1190a3f1faa4f74",
+ },
+ {
+ "205688069665150753842126177372015544874550518966168735589597183",
+ "eca934247425cfd949b795cb5ce1eff401550386e28d1a4c5a8eb",
+ "d4c01040dba19628931bc8855370317c722cbd9ca6156985f1c2e9ce",
+ },
+ {
+ "13479966930919337728895168462090683249159702977113823384618282123295",
+ "ef353bf5c73cd551b96d596fbc9a67f16d61dd9fe56af19de1fba9cd",
+ "21771b9cdce3e8430c09b3838be70b48c21e15bc09ee1f2d7945b91f",
+ },
+ {
+ "50210731791415612487756441341851895584393717453129007497216",
+ "4036052a3091eb481046ad3289c95d3ac905ca0023de2c03ecd451cf",
+ "d768165a38a2b96f812586a9d59d4136035d9c853a5bf2e1c86a4993",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368041",
+ "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455",
+ "f2a28eefd8b345832116f1e574f2c6b2c895aa8c24941f40d8b80ad1",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368042",
+ "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c",
+ "230e093c24f638f533dac6e2b6d01da3b5e7f45429315ca93fb8e634",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368043",
+ "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc",
+ "156729f1a003647030666054e208180f8f7b0df2249e44fba5931fff",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368044",
+ "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc",
+ "eb610599f95942df1082e4f9426d086fb9c6231ae8b24933aab5db",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368045",
+ "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d",
+ "cc662b9bcba6f94ee4ff1c9c10bd6ddd0d138df2d099a282152a4b7f",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368046",
+ "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9",
+ "6865a0b8a607f0b04b13d1cb0aa992a5a97f5ee8ca1849efb9ed8678",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368047",
+ "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa",
+ "2a7eb328dbe663b5a468b5bc97a040a3745396ba636b964370dc3352",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368048",
+ "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca",
+ "dad7e608e380480434ea641cc82c82cbc92801469c8db0204f13489a",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368049",
+ "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a",
+ "df82220fc7a4021549165325725f94c3410ddb56c54e161fc9ef62ee",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368050",
+ "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c",
+ "df4aefffbf6d1699c930481cd102127c9a3d992048ab05929b6e5927",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368051",
+ "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd",
+ "c644cf154cc81f5ade49345e541b4d4b5c1adb3eb5c01c14ee949aa2",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368052",
+ "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d",
+ "c8e8cd1b0be40b0877cfca1958603122f1e6914f84b7e8e968ae8b9e",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368053",
+ "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550",
+ "fb9232c15a3bc7673a3a03b0253824c53d0fd1411b1cabe2e187fb87",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368054",
+ "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28",
+ "f0c5cff7ab680d09ee11dae84e9c1072ac48ea2e744b1b7f72fd469e",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368055",
+ "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408",
+ "76050f3348af2664aac3a8b05281304ebc7a7914c6ad50a4b4eac383",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368056",
+ "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa",
+ "d817400e8ba9ca13a45f360e3d121eaaeb39af82d6001c8186f5f866",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368057",
+ "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301",
+ "fb7da7f5f13a43b81774373c879cd32d6934c05fa758eeb14fcfab38",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368058",
+ "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04",
+ "5c080fc3522f41bbb3f55a97cfecf21f882ce8cbb1e50ca6e67e56dc",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368059",
+ "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6",
+ "e3d4895843da188fd58fb0567976d7b50359d6b78530c8f62d1b1746",
+ },
+ {
+ "26959946667150639794667015087019625940457807714424391721682722368060",
+ "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21",
+ "42c89c774a08dc04b3dd201932bc8a5ea5f8b89bbb2a7e667aff81cd",
+ },
+}
+
+func TestP224BaseMult(t *testing.T) {
+ p224 := P224()
+ for i, e := range p224BaseMultTests {
+ k, ok := new(big.Int).SetString(e.k, 10)
+ if !ok {
+ t.Errorf("%d: bad value for k: %s", i, e.k)
+ }
+ x, y := p224.ScalarBaseMult(k.Bytes())
+ if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y {
+ t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y)
+ }
+ if testing.Short() && i > 5 {
+ break
+ }
+ }
+}
+
+func TestP224GenericBaseMult(t *testing.T) {
+ // We use the P224 CurveParams directly in order to test the generic implementation.
+ p224 := P224().Params()
+ for i, e := range p224BaseMultTests {
+ k, ok := new(big.Int).SetString(e.k, 10)
+ if !ok {
+ t.Errorf("%d: bad value for k: %s", i, e.k)
+ }
+ x, y := p224.ScalarBaseMult(k.Bytes())
+ if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y {
+ t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y)
+ }
+ if testing.Short() && i > 5 {
+ break
+ }
+ }
+}
+
+func TestP224Overflow(t *testing.T) {
+ // This tests for a specific bug in the P224 implementation.
+ p224 := P224()
+ pointData, _ := hex.DecodeString("049B535B45FB0A2072398A6831834624C7E32CCFD5A4B933BCEAF77F1DD945E08BBE5178F5EDF5E733388F196D2A631D2E075BB16CBFEEA15B")
+ x, y := Unmarshal(p224, pointData)
+ if !p224.IsOnCurve(x, y) {
+ t.Error("P224 failed to validate a correct point")
+ }
+}
diff --git a/src/crypto/elliptic/p256.go b/src/crypto/elliptic/p256.go
new file mode 100644
index 0000000..da52837
--- /dev/null
+++ b/src/crypto/elliptic/p256.go
@@ -0,0 +1,1193 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 && !arm64
+// +build !amd64,!arm64
+
+package elliptic
+
+// This file contains a constant-time, 32-bit implementation of P256.
+
+import (
+ "math/big"
+)
+
+type p256Curve struct {
+ *CurveParams
+}
+
+var (
+ p256Params *CurveParams
+
+ // RInverse contains 1/R mod p - the inverse of the Montgomery constant
+ // (2**257).
+ p256RInverse *big.Int
+)
+
+func initP256() {
+ // See FIPS 186-3, section D.2.3
+ p256Params = &CurveParams{Name: "P-256"}
+ p256Params.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
+ p256Params.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
+ p256Params.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
+ p256Params.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
+ p256Params.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
+ p256Params.BitSize = 256
+
+ p256RInverse, _ = new(big.Int).SetString("7fffffff00000001fffffffe8000000100000000ffffffff0000000180000000", 16)
+
+ // Arch-specific initialization, i.e. let a platform dynamically pick a P256 implementation
+ initP256Arch()
+}
+
+func (curve p256Curve) Params() *CurveParams {
+ return curve.CurveParams
+}
+
+// p256GetScalar endian-swaps the big-endian scalar value from in and writes it
+// to out. If the scalar is equal or greater than the order of the group, it's
+// reduced modulo that order.
+func p256GetScalar(out *[32]byte, in []byte) {
+ n := new(big.Int).SetBytes(in)
+ var scalarBytes []byte
+
+ if n.Cmp(p256Params.N) >= 0 || len(in) > len(out) {
+ n.Mod(n, p256Params.N)
+ scalarBytes = n.Bytes()
+ } else {
+ scalarBytes = in
+ }
+
+ for i, v := range scalarBytes {
+ out[len(scalarBytes)-(1+i)] = v
+ }
+}
+
+func (p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
+ var scalarReversed [32]byte
+ p256GetScalar(&scalarReversed, scalar)
+
+ var x1, y1, z1 [p256Limbs]uint32
+ p256ScalarBaseMult(&x1, &y1, &z1, &scalarReversed)
+ return p256ToAffine(&x1, &y1, &z1)
+}
+
+func (p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
+ var scalarReversed [32]byte
+ p256GetScalar(&scalarReversed, scalar)
+
+ var px, py, x1, y1, z1 [p256Limbs]uint32
+ p256FromBig(&px, bigX)
+ p256FromBig(&py, bigY)
+ p256ScalarMult(&x1, &y1, &z1, &px, &py, &scalarReversed)
+ return p256ToAffine(&x1, &y1, &z1)
+}
+
+// Field elements are represented as nine, unsigned 32-bit words.
+//
+// The value of a field element is:
+// x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
+//
+// That is, each limb is alternately 29 or 28-bits wide in little-endian
+// order.
+//
+// This means that a field element hits 2**257, rather than 2**256 as we would
+// like. A 28, 29, ... pattern would cause us to hit 2**256, but that causes
+// problems when multiplying as terms end up one bit short of a limb which
+// would require much bit-shifting to correct.
+//
+// Finally, the values stored in a field element are in Montgomery form. So the
+// value |y| is stored as (y*R) mod p, where p is the P-256 prime and R is
+// 2**257.
+
+const (
+ p256Limbs = 9
+ bottom29Bits = 0x1fffffff
+)
+
+var (
+ // p256One is the number 1 as a field element.
+ p256One = [p256Limbs]uint32{2, 0, 0, 0xffff800, 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, 0}
+ p256Zero = [p256Limbs]uint32{0, 0, 0, 0, 0, 0, 0, 0, 0}
+ // p256P is the prime modulus as a field element.
+ p256P = [p256Limbs]uint32{0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, 0, 0, 0x200000, 0xf000000, 0xfffffff}
+ // p2562P is the twice prime modulus as a field element.
+ p2562P = [p256Limbs]uint32{0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, 0, 0, 0x400000, 0xe000000, 0x1fffffff}
+)
+
+// p256Precomputed contains precomputed values to aid the calculation of scalar
+// multiples of the base point, G. It's actually two, equal length, tables
+// concatenated.
+//
+// The first table contains (x,y) field element pairs for 16 multiples of the
+// base point, G.
+//
+// Index | Index (binary) | Value
+// 0 | 0000 | 0G (all zeros, omitted)
+// 1 | 0001 | G
+// 2 | 0010 | 2**64G
+// 3 | 0011 | 2**64G + G
+// 4 | 0100 | 2**128G
+// 5 | 0101 | 2**128G + G
+// 6 | 0110 | 2**128G + 2**64G
+// 7 | 0111 | 2**128G + 2**64G + G
+// 8 | 1000 | 2**192G
+// 9 | 1001 | 2**192G + G
+// 10 | 1010 | 2**192G + 2**64G
+// 11 | 1011 | 2**192G + 2**64G + G
+// 12 | 1100 | 2**192G + 2**128G
+// 13 | 1101 | 2**192G + 2**128G + G
+// 14 | 1110 | 2**192G + 2**128G + 2**64G
+// 15 | 1111 | 2**192G + 2**128G + 2**64G + G
+//
+// The second table follows the same style, but the terms are 2**32G,
+// 2**96G, 2**160G, 2**224G.
+//
+// This is ~2KB of data.
+var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{
+ 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee,
+ 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3,
+ 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c,
+ 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22,
+ 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050,
+ 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b,
+ 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa,
+ 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2,
+ 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609,
+ 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581,
+ 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca,
+ 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33,
+ 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6,
+ 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd,
+ 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,
+ 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881,
+ 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a,
+ 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26,
+ 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b,
+ 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023,
+ 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133,
+ 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa,
+ 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,
+ 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc,
+ 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8,
+ 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59,
+ 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39,
+ 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689,
+ 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa,
+ 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3,
+ 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1,
+ 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f,
+ 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,
+ 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d,
+ 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b,
+ 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,
+ 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a,
+ 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f,
+ 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb,
+ 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc,
+ 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9,
+ 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce,
+ 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2,
+ 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca,
+ 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229,
+ 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57,
+ 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c,
+ 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa,
+ 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651,
+ 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,
+ 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7,
+ 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c,
+ 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927,
+ 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298,
+ 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8,
+ 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2,
+ 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d,
+ 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4,
+ 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8,
+ 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78,
+}
+
+// Field element operations:
+
+// nonZeroToAllOnes returns:
+// 0xffffffff for 0 < x <= 2**31
+// 0 for x == 0 or x > 2**31.
+func nonZeroToAllOnes(x uint32) uint32 {
+ return ((x - 1) >> 31) - 1
+}
+
+// p256ReduceCarry adds a multiple of p in order to cancel |carry|,
+// which is a term at 2**257.
+//
+// On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
+// On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
+func p256ReduceCarry(inout *[p256Limbs]uint32, carry uint32) {
+ carry_mask := nonZeroToAllOnes(carry)
+
+ inout[0] += carry << 1
+ inout[3] += 0x10000000 & carry_mask
+ // carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
+ // previous line therefore this doesn't underflow.
+ inout[3] -= carry << 11
+ inout[4] += (0x20000000 - 1) & carry_mask
+ inout[5] += (0x10000000 - 1) & carry_mask
+ inout[6] += (0x20000000 - 1) & carry_mask
+ inout[6] -= carry << 22
+ // This may underflow if carry is non-zero but, if so, we'll fix it in the
+ // next line.
+ inout[7] -= 1 & carry_mask
+ inout[7] += carry << 25
+}
+
+// p256Sum sets out = in+in2.
+//
+// On entry, in[i]+in2[i] must not overflow a 32-bit word.
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+func p256Sum(out, in, in2 *[p256Limbs]uint32) {
+ carry := uint32(0)
+ for i := 0; ; i++ {
+ out[i] = in[i] + in2[i]
+ out[i] += carry
+ carry = out[i] >> 29
+ out[i] &= bottom29Bits
+
+ i++
+ if i == p256Limbs {
+ break
+ }
+
+ out[i] = in[i] + in2[i]
+ out[i] += carry
+ carry = out[i] >> 28
+ out[i] &= bottom28Bits
+ }
+
+ p256ReduceCarry(out, carry)
+}
+
+const (
+ two30m2 = 1<<30 - 1<<2
+ two30p13m2 = 1<<30 + 1<<13 - 1<<2
+ two31m2 = 1<<31 - 1<<2
+ two31p24m2 = 1<<31 + 1<<24 - 1<<2
+ two30m27m2 = 1<<30 - 1<<27 - 1<<2
+)
+
+// p256Zero31 is 0 mod p.
+var p256Zero31 = [p256Limbs]uint32{two31m3, two30m2, two31m2, two30p13m2, two31m2, two30m2, two31p24m2, two30m27m2, two31m2}
+
+// p256Diff sets out = in-in2.
+//
+// On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+// in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+func p256Diff(out, in, in2 *[p256Limbs]uint32) {
+ var carry uint32
+
+ for i := 0; ; i++ {
+ out[i] = in[i] - in2[i]
+ out[i] += p256Zero31[i]
+ out[i] += carry
+ carry = out[i] >> 29
+ out[i] &= bottom29Bits
+
+ i++
+ if i == p256Limbs {
+ break
+ }
+
+ out[i] = in[i] - in2[i]
+ out[i] += p256Zero31[i]
+ out[i] += carry
+ carry = out[i] >> 28
+ out[i] &= bottom28Bits
+ }
+
+ p256ReduceCarry(out, carry)
+}
+
+// p256ReduceDegree sets out = tmp/R mod p where tmp contains 64-bit words with
+// the same 29,28,... bit positions as a field element.
+//
+// The values in field elements are in Montgomery form: x*R mod p where R =
+// 2**257. Since we just multiplied two Montgomery values together, the result
+// is x*y*R*R mod p. We wish to divide by R in order for the result also to be
+// in Montgomery form.
+//
+// On entry: tmp[i] < 2**64
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+func p256ReduceDegree(out *[p256Limbs]uint32, tmp [17]uint64) {
+ // The following table may be helpful when reading this code:
+ //
+ // Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
+ // Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
+ // Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285
+ // (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285
+ var tmp2 [18]uint32
+ var carry, x, xMask uint32
+
+ // tmp contains 64-bit words with the same 29,28,29-bit positions as a
+ // field element. So the top of an element of tmp might overlap with
+ // another element two positions down. The following loop eliminates
+ // this overlap.
+ tmp2[0] = uint32(tmp[0]) & bottom29Bits
+
+ tmp2[1] = uint32(tmp[0]) >> 29
+ tmp2[1] |= (uint32(tmp[0]>>32) << 3) & bottom28Bits
+ tmp2[1] += uint32(tmp[1]) & bottom28Bits
+ carry = tmp2[1] >> 28
+ tmp2[1] &= bottom28Bits
+
+ for i := 2; i < 17; i++ {
+ tmp2[i] = (uint32(tmp[i-2] >> 32)) >> 25
+ tmp2[i] += (uint32(tmp[i-1])) >> 28
+ tmp2[i] += (uint32(tmp[i-1]>>32) << 4) & bottom29Bits
+ tmp2[i] += uint32(tmp[i]) & bottom29Bits
+ tmp2[i] += carry
+ carry = tmp2[i] >> 29
+ tmp2[i] &= bottom29Bits
+
+ i++
+ if i == 17 {
+ break
+ }
+ tmp2[i] = uint32(tmp[i-2]>>32) >> 25
+ tmp2[i] += uint32(tmp[i-1]) >> 29
+ tmp2[i] += ((uint32(tmp[i-1] >> 32)) << 3) & bottom28Bits
+ tmp2[i] += uint32(tmp[i]) & bottom28Bits
+ tmp2[i] += carry
+ carry = tmp2[i] >> 28
+ tmp2[i] &= bottom28Bits
+ }
+
+ tmp2[17] = uint32(tmp[15]>>32) >> 25
+ tmp2[17] += uint32(tmp[16]) >> 29
+ tmp2[17] += uint32(tmp[16]>>32) << 3
+ tmp2[17] += carry
+
+ // Montgomery elimination of terms:
+ //
+ // Since R is 2**257, we can divide by R with a bitwise shift if we can
+ // ensure that the right-most 257 bits are all zero. We can make that true
+ // by adding multiplies of p without affecting the value.
+ //
+ // So we eliminate limbs from right to left. Since the bottom 29 bits of p
+ // are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
+ // We can do that for 8 further limbs and then right shift to eliminate the
+ // extra factor of R.
+ for i := 0; ; i += 2 {
+ tmp2[i+1] += tmp2[i] >> 29
+ x = tmp2[i] & bottom29Bits
+ xMask = nonZeroToAllOnes(x)
+ tmp2[i] = 0
+
+ // The bounds calculations for this loop are tricky. Each iteration of
+ // the loop eliminates two words by adding values to words to their
+ // right.
+ //
+ // The following table contains the amounts added to each word (as an
+ // offset from the value of i at the top of the loop). The amounts are
+ // accounted for from the first and second half of the loop separately
+ // and are written as, for example, 28 to mean a value <2**28.
+ //
+ // Word: 3 4 5 6 7 8 9 10
+ // Added in top half: 28 11 29 21 29 28
+ // 28 29
+ // 29
+ // Added in bottom half: 29 10 28 21 28 28
+ // 29
+ //
+ // The value that is currently offset 7 will be offset 5 for the next
+ // iteration and then offset 3 for the iteration after that. Therefore
+ // the total value added will be the values added at 7, 5 and 3.
+ //
+ // The following table accumulates these values. The sums at the bottom
+ // are written as, for example, 29+28, to mean a value < 2**29+2**28.
+ //
+ // Word: 3 4 5 6 7 8 9 10 11 12 13
+ // 28 11 10 29 21 29 28 28 28 28 28
+ // 29 28 11 28 29 28 29 28 29 28
+ // 29 28 21 21 29 21 29 21
+ // 10 29 28 21 28 21 28
+ // 28 29 28 29 28 29 28
+ // 11 10 29 10 29 10
+ // 29 28 11 28 11
+ // 29 29
+ // --------------------------------------------
+ // 30+ 31+ 30+ 31+ 30+
+ // 28+ 29+ 28+ 29+ 21+
+ // 21+ 28+ 21+ 28+ 10
+ // 10 21+ 10 21+
+ // 11 11
+ //
+ // So the greatest amount is added to tmp2[10] and tmp2[12]. If
+ // tmp2[10/12] has an initial value of <2**29, then the maximum value
+ // will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32,
+ // as required.
+ tmp2[i+3] += (x << 10) & bottom28Bits
+ tmp2[i+4] += (x >> 18)
+
+ tmp2[i+6] += (x << 21) & bottom29Bits
+ tmp2[i+7] += x >> 8
+
+ // At position 200, which is the starting bit position for word 7, we
+ // have a factor of 0xf000000 = 2**28 - 2**24.
+ tmp2[i+7] += 0x10000000 & xMask
+ tmp2[i+8] += (x - 1) & xMask
+ tmp2[i+7] -= (x << 24) & bottom28Bits
+ tmp2[i+8] -= x >> 4
+
+ tmp2[i+8] += 0x20000000 & xMask
+ tmp2[i+8] -= x
+ tmp2[i+8] += (x << 28) & bottom29Bits
+ tmp2[i+9] += ((x >> 1) - 1) & xMask
+
+ if i+1 == p256Limbs {
+ break
+ }
+ tmp2[i+2] += tmp2[i+1] >> 28
+ x = tmp2[i+1] & bottom28Bits
+ xMask = nonZeroToAllOnes(x)
+ tmp2[i+1] = 0
+
+ tmp2[i+4] += (x << 11) & bottom29Bits
+ tmp2[i+5] += (x >> 18)
+
+ tmp2[i+7] += (x << 21) & bottom28Bits
+ tmp2[i+8] += x >> 7
+
+ // At position 199, which is the starting bit of the 8th word when
+ // dealing with a context starting on an odd word, we have a factor of
+ // 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
+ // word from i+1 is i+8.
+ tmp2[i+8] += 0x20000000 & xMask
+ tmp2[i+9] += (x - 1) & xMask
+ tmp2[i+8] -= (x << 25) & bottom29Bits
+ tmp2[i+9] -= x >> 4
+
+ tmp2[i+9] += 0x10000000 & xMask
+ tmp2[i+9] -= x
+ tmp2[i+10] += (x - 1) & xMask
+ }
+
+ // We merge the right shift with a carry chain. The words above 2**257 have
+ // widths of 28,29,... which we need to correct when copying them down.
+ carry = 0
+ for i := 0; i < 8; i++ {
+ // The maximum value of tmp2[i + 9] occurs on the first iteration and
+ // is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
+ // therefore safe.
+ out[i] = tmp2[i+9]
+ out[i] += carry
+ out[i] += (tmp2[i+10] << 28) & bottom29Bits
+ carry = out[i] >> 29
+ out[i] &= bottom29Bits
+
+ i++
+ out[i] = tmp2[i+9] >> 1
+ out[i] += carry
+ carry = out[i] >> 28
+ out[i] &= bottom28Bits
+ }
+
+ out[8] = tmp2[17]
+ out[8] += carry
+ carry = out[8] >> 29
+ out[8] &= bottom29Bits
+
+ p256ReduceCarry(out, carry)
+}
+
+// p256Square sets out=in*in.
+//
+// On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+func p256Square(out, in *[p256Limbs]uint32) {
+ var tmp [17]uint64
+
+ tmp[0] = uint64(in[0]) * uint64(in[0])
+ tmp[1] = uint64(in[0]) * (uint64(in[1]) << 1)
+ tmp[2] = uint64(in[0])*(uint64(in[2])<<1) +
+ uint64(in[1])*(uint64(in[1])<<1)
+ tmp[3] = uint64(in[0])*(uint64(in[3])<<1) +
+ uint64(in[1])*(uint64(in[2])<<1)
+ tmp[4] = uint64(in[0])*(uint64(in[4])<<1) +
+ uint64(in[1])*(uint64(in[3])<<2) +
+ uint64(in[2])*uint64(in[2])
+ tmp[5] = uint64(in[0])*(uint64(in[5])<<1) +
+ uint64(in[1])*(uint64(in[4])<<1) +
+ uint64(in[2])*(uint64(in[3])<<1)
+ tmp[6] = uint64(in[0])*(uint64(in[6])<<1) +
+ uint64(in[1])*(uint64(in[5])<<2) +
+ uint64(in[2])*(uint64(in[4])<<1) +
+ uint64(in[3])*(uint64(in[3])<<1)
+ tmp[7] = uint64(in[0])*(uint64(in[7])<<1) +
+ uint64(in[1])*(uint64(in[6])<<1) +
+ uint64(in[2])*(uint64(in[5])<<1) +
+ uint64(in[3])*(uint64(in[4])<<1)
+ // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
+ // which is < 2**64 as required.
+ tmp[8] = uint64(in[0])*(uint64(in[8])<<1) +
+ uint64(in[1])*(uint64(in[7])<<2) +
+ uint64(in[2])*(uint64(in[6])<<1) +
+ uint64(in[3])*(uint64(in[5])<<2) +
+ uint64(in[4])*uint64(in[4])
+ tmp[9] = uint64(in[1])*(uint64(in[8])<<1) +
+ uint64(in[2])*(uint64(in[7])<<1) +
+ uint64(in[3])*(uint64(in[6])<<1) +
+ uint64(in[4])*(uint64(in[5])<<1)
+ tmp[10] = uint64(in[2])*(uint64(in[8])<<1) +
+ uint64(in[3])*(uint64(in[7])<<2) +
+ uint64(in[4])*(uint64(in[6])<<1) +
+ uint64(in[5])*(uint64(in[5])<<1)
+ tmp[11] = uint64(in[3])*(uint64(in[8])<<1) +
+ uint64(in[4])*(uint64(in[7])<<1) +
+ uint64(in[5])*(uint64(in[6])<<1)
+ tmp[12] = uint64(in[4])*(uint64(in[8])<<1) +
+ uint64(in[5])*(uint64(in[7])<<2) +
+ uint64(in[6])*uint64(in[6])
+ tmp[13] = uint64(in[5])*(uint64(in[8])<<1) +
+ uint64(in[6])*(uint64(in[7])<<1)
+ tmp[14] = uint64(in[6])*(uint64(in[8])<<1) +
+ uint64(in[7])*(uint64(in[7])<<1)
+ tmp[15] = uint64(in[7]) * (uint64(in[8]) << 1)
+ tmp[16] = uint64(in[8]) * uint64(in[8])
+
+ p256ReduceDegree(out, tmp)
+}
+
+// p256Mul sets out=in*in2.
+//
+// On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+// in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+func p256Mul(out, in, in2 *[p256Limbs]uint32) {
+ var tmp [17]uint64
+
+ tmp[0] = uint64(in[0]) * uint64(in2[0])
+ tmp[1] = uint64(in[0])*(uint64(in2[1])<<0) +
+ uint64(in[1])*(uint64(in2[0])<<0)
+ tmp[2] = uint64(in[0])*(uint64(in2[2])<<0) +
+ uint64(in[1])*(uint64(in2[1])<<1) +
+ uint64(in[2])*(uint64(in2[0])<<0)
+ tmp[3] = uint64(in[0])*(uint64(in2[3])<<0) +
+ uint64(in[1])*(uint64(in2[2])<<0) +
+ uint64(in[2])*(uint64(in2[1])<<0) +
+ uint64(in[3])*(uint64(in2[0])<<0)
+ tmp[4] = uint64(in[0])*(uint64(in2[4])<<0) +
+ uint64(in[1])*(uint64(in2[3])<<1) +
+ uint64(in[2])*(uint64(in2[2])<<0) +
+ uint64(in[3])*(uint64(in2[1])<<1) +
+ uint64(in[4])*(uint64(in2[0])<<0)
+ tmp[5] = uint64(in[0])*(uint64(in2[5])<<0) +
+ uint64(in[1])*(uint64(in2[4])<<0) +
+ uint64(in[2])*(uint64(in2[3])<<0) +
+ uint64(in[3])*(uint64(in2[2])<<0) +
+ uint64(in[4])*(uint64(in2[1])<<0) +
+ uint64(in[5])*(uint64(in2[0])<<0)
+ tmp[6] = uint64(in[0])*(uint64(in2[6])<<0) +
+ uint64(in[1])*(uint64(in2[5])<<1) +
+ uint64(in[2])*(uint64(in2[4])<<0) +
+ uint64(in[3])*(uint64(in2[3])<<1) +
+ uint64(in[4])*(uint64(in2[2])<<0) +
+ uint64(in[5])*(uint64(in2[1])<<1) +
+ uint64(in[6])*(uint64(in2[0])<<0)
+ tmp[7] = uint64(in[0])*(uint64(in2[7])<<0) +
+ uint64(in[1])*(uint64(in2[6])<<0) +
+ uint64(in[2])*(uint64(in2[5])<<0) +
+ uint64(in[3])*(uint64(in2[4])<<0) +
+ uint64(in[4])*(uint64(in2[3])<<0) +
+ uint64(in[5])*(uint64(in2[2])<<0) +
+ uint64(in[6])*(uint64(in2[1])<<0) +
+ uint64(in[7])*(uint64(in2[0])<<0)
+ // tmp[8] has the greatest value but doesn't overflow. See logic in
+ // p256Square.
+ tmp[8] = uint64(in[0])*(uint64(in2[8])<<0) +
+ uint64(in[1])*(uint64(in2[7])<<1) +
+ uint64(in[2])*(uint64(in2[6])<<0) +
+ uint64(in[3])*(uint64(in2[5])<<1) +
+ uint64(in[4])*(uint64(in2[4])<<0) +
+ uint64(in[5])*(uint64(in2[3])<<1) +
+ uint64(in[6])*(uint64(in2[2])<<0) +
+ uint64(in[7])*(uint64(in2[1])<<1) +
+ uint64(in[8])*(uint64(in2[0])<<0)
+ tmp[9] = uint64(in[1])*(uint64(in2[8])<<0) +
+ uint64(in[2])*(uint64(in2[7])<<0) +
+ uint64(in[3])*(uint64(in2[6])<<0) +
+ uint64(in[4])*(uint64(in2[5])<<0) +
+ uint64(in[5])*(uint64(in2[4])<<0) +
+ uint64(in[6])*(uint64(in2[3])<<0) +
+ uint64(in[7])*(uint64(in2[2])<<0) +
+ uint64(in[8])*(uint64(in2[1])<<0)
+ tmp[10] = uint64(in[2])*(uint64(in2[8])<<0) +
+ uint64(in[3])*(uint64(in2[7])<<1) +
+ uint64(in[4])*(uint64(in2[6])<<0) +
+ uint64(in[5])*(uint64(in2[5])<<1) +
+ uint64(in[6])*(uint64(in2[4])<<0) +
+ uint64(in[7])*(uint64(in2[3])<<1) +
+ uint64(in[8])*(uint64(in2[2])<<0)
+ tmp[11] = uint64(in[3])*(uint64(in2[8])<<0) +
+ uint64(in[4])*(uint64(in2[7])<<0) +
+ uint64(in[5])*(uint64(in2[6])<<0) +
+ uint64(in[6])*(uint64(in2[5])<<0) +
+ uint64(in[7])*(uint64(in2[4])<<0) +
+ uint64(in[8])*(uint64(in2[3])<<0)
+ tmp[12] = uint64(in[4])*(uint64(in2[8])<<0) +
+ uint64(in[5])*(uint64(in2[7])<<1) +
+ uint64(in[6])*(uint64(in2[6])<<0) +
+ uint64(in[7])*(uint64(in2[5])<<1) +
+ uint64(in[8])*(uint64(in2[4])<<0)
+ tmp[13] = uint64(in[5])*(uint64(in2[8])<<0) +
+ uint64(in[6])*(uint64(in2[7])<<0) +
+ uint64(in[7])*(uint64(in2[6])<<0) +
+ uint64(in[8])*(uint64(in2[5])<<0)
+ tmp[14] = uint64(in[6])*(uint64(in2[8])<<0) +
+ uint64(in[7])*(uint64(in2[7])<<1) +
+ uint64(in[8])*(uint64(in2[6])<<0)
+ tmp[15] = uint64(in[7])*(uint64(in2[8])<<0) +
+ uint64(in[8])*(uint64(in2[7])<<0)
+ tmp[16] = uint64(in[8]) * (uint64(in2[8]) << 0)
+
+ p256ReduceDegree(out, tmp)
+}
+
+func p256Assign(out, in *[p256Limbs]uint32) {
+ *out = *in
+}
+
+// p256Invert calculates |out| = |in|^{-1}
+//
+// Based on Fermat's Little Theorem:
+// a^p = a (mod p)
+// a^{p-1} = 1 (mod p)
+// a^{p-2} = a^{-1} (mod p)
+func p256Invert(out, in *[p256Limbs]uint32) {
+ var ftmp, ftmp2 [p256Limbs]uint32
+
+ // each e_I will hold |in|^{2^I - 1}
+ var e2, e4, e8, e16, e32, e64 [p256Limbs]uint32
+
+ p256Square(&ftmp, in) // 2^1
+ p256Mul(&ftmp, in, &ftmp) // 2^2 - 2^0
+ p256Assign(&e2, &ftmp)
+ p256Square(&ftmp, &ftmp) // 2^3 - 2^1
+ p256Square(&ftmp, &ftmp) // 2^4 - 2^2
+ p256Mul(&ftmp, &ftmp, &e2) // 2^4 - 2^0
+ p256Assign(&e4, &ftmp)
+ p256Square(&ftmp, &ftmp) // 2^5 - 2^1
+ p256Square(&ftmp, &ftmp) // 2^6 - 2^2
+ p256Square(&ftmp, &ftmp) // 2^7 - 2^3
+ p256Square(&ftmp, &ftmp) // 2^8 - 2^4
+ p256Mul(&ftmp, &ftmp, &e4) // 2^8 - 2^0
+ p256Assign(&e8, &ftmp)
+ for i := 0; i < 8; i++ {
+ p256Square(&ftmp, &ftmp)
+ } // 2^16 - 2^8
+ p256Mul(&ftmp, &ftmp, &e8) // 2^16 - 2^0
+ p256Assign(&e16, &ftmp)
+ for i := 0; i < 16; i++ {
+ p256Square(&ftmp, &ftmp)
+ } // 2^32 - 2^16
+ p256Mul(&ftmp, &ftmp, &e16) // 2^32 - 2^0
+ p256Assign(&e32, &ftmp)
+ for i := 0; i < 32; i++ {
+ p256Square(&ftmp, &ftmp)
+ } // 2^64 - 2^32
+ p256Assign(&e64, &ftmp)
+ p256Mul(&ftmp, &ftmp, in) // 2^64 - 2^32 + 2^0
+ for i := 0; i < 192; i++ {
+ p256Square(&ftmp, &ftmp)
+ } // 2^256 - 2^224 + 2^192
+
+ p256Mul(&ftmp2, &e64, &e32) // 2^64 - 2^0
+ for i := 0; i < 16; i++ {
+ p256Square(&ftmp2, &ftmp2)
+ } // 2^80 - 2^16
+ p256Mul(&ftmp2, &ftmp2, &e16) // 2^80 - 2^0
+ for i := 0; i < 8; i++ {
+ p256Square(&ftmp2, &ftmp2)
+ } // 2^88 - 2^8
+ p256Mul(&ftmp2, &ftmp2, &e8) // 2^88 - 2^0
+ for i := 0; i < 4; i++ {
+ p256Square(&ftmp2, &ftmp2)
+ } // 2^92 - 2^4
+ p256Mul(&ftmp2, &ftmp2, &e4) // 2^92 - 2^0
+ p256Square(&ftmp2, &ftmp2) // 2^93 - 2^1
+ p256Square(&ftmp2, &ftmp2) // 2^94 - 2^2
+ p256Mul(&ftmp2, &ftmp2, &e2) // 2^94 - 2^0
+ p256Square(&ftmp2, &ftmp2) // 2^95 - 2^1
+ p256Square(&ftmp2, &ftmp2) // 2^96 - 2^2
+ p256Mul(&ftmp2, &ftmp2, in) // 2^96 - 3
+
+ p256Mul(out, &ftmp2, &ftmp) // 2^256 - 2^224 + 2^192 + 2^96 - 3
+}
+
+// p256Scalar3 sets out=3*out.
+//
+// On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+func p256Scalar3(out *[p256Limbs]uint32) {
+ var carry uint32
+
+ for i := 0; ; i++ {
+ out[i] *= 3
+ out[i] += carry
+ carry = out[i] >> 29
+ out[i] &= bottom29Bits
+
+ i++
+ if i == p256Limbs {
+ break
+ }
+
+ out[i] *= 3
+ out[i] += carry
+ carry = out[i] >> 28
+ out[i] &= bottom28Bits
+ }
+
+ p256ReduceCarry(out, carry)
+}
+
+// p256Scalar4 sets out=4*out.
+//
+// On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+func p256Scalar4(out *[p256Limbs]uint32) {
+ var carry, nextCarry uint32
+
+ for i := 0; ; i++ {
+ nextCarry = out[i] >> 27
+ out[i] <<= 2
+ out[i] &= bottom29Bits
+ out[i] += carry
+ carry = nextCarry + (out[i] >> 29)
+ out[i] &= bottom29Bits
+
+ i++
+ if i == p256Limbs {
+ break
+ }
+ nextCarry = out[i] >> 26
+ out[i] <<= 2
+ out[i] &= bottom28Bits
+ out[i] += carry
+ carry = nextCarry + (out[i] >> 28)
+ out[i] &= bottom28Bits
+ }
+
+ p256ReduceCarry(out, carry)
+}
+
+// p256Scalar8 sets out=8*out.
+//
+// On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+func p256Scalar8(out *[p256Limbs]uint32) {
+ var carry, nextCarry uint32
+
+ for i := 0; ; i++ {
+ nextCarry = out[i] >> 26
+ out[i] <<= 3
+ out[i] &= bottom29Bits
+ out[i] += carry
+ carry = nextCarry + (out[i] >> 29)
+ out[i] &= bottom29Bits
+
+ i++
+ if i == p256Limbs {
+ break
+ }
+ nextCarry = out[i] >> 25
+ out[i] <<= 3
+ out[i] &= bottom28Bits
+ out[i] += carry
+ carry = nextCarry + (out[i] >> 28)
+ out[i] &= bottom28Bits
+ }
+
+ p256ReduceCarry(out, carry)
+}
+
+// Group operations:
+//
+// Elements of the elliptic curve group are represented in Jacobian
+// coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
+// Jacobian form.
+
+// p256PointDouble sets {xOut,yOut,zOut} = 2*{x,y,z}.
+//
+// See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
+func p256PointDouble(xOut, yOut, zOut, x, y, z *[p256Limbs]uint32) {
+ var delta, gamma, alpha, beta, tmp, tmp2 [p256Limbs]uint32
+
+ p256Square(&delta, z)
+ p256Square(&gamma, y)
+ p256Mul(&beta, x, &gamma)
+
+ p256Sum(&tmp, x, &delta)
+ p256Diff(&tmp2, x, &delta)
+ p256Mul(&alpha, &tmp, &tmp2)
+ p256Scalar3(&alpha)
+
+ p256Sum(&tmp, y, z)
+ p256Square(&tmp, &tmp)
+ p256Diff(&tmp, &tmp, &gamma)
+ p256Diff(zOut, &tmp, &delta)
+
+ p256Scalar4(&beta)
+ p256Square(xOut, &alpha)
+ p256Diff(xOut, xOut, &beta)
+ p256Diff(xOut, xOut, &beta)
+
+ p256Diff(&tmp, &beta, xOut)
+ p256Mul(&tmp, &alpha, &tmp)
+ p256Square(&tmp2, &gamma)
+ p256Scalar8(&tmp2)
+ p256Diff(yOut, &tmp, &tmp2)
+}
+
+// p256PointAddMixed sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,1}.
+// (i.e. the second point is affine.)
+//
+// See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+//
+// Note that this function does not handle P+P, infinity+P nor P+infinity
+// correctly.
+func p256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *[p256Limbs]uint32) {
+ var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32
+
+ p256Square(&z1z1, z1)
+ p256Sum(&tmp, z1, z1)
+
+ p256Mul(&u2, x2, &z1z1)
+ p256Mul(&z1z1z1, z1, &z1z1)
+ p256Mul(&s2, y2, &z1z1z1)
+ p256Diff(&h, &u2, x1)
+ p256Sum(&i, &h, &h)
+ p256Square(&i, &i)
+ p256Mul(&j, &h, &i)
+ p256Diff(&r, &s2, y1)
+ p256Sum(&r, &r, &r)
+ p256Mul(&v, x1, &i)
+
+ p256Mul(zOut, &tmp, &h)
+ p256Square(&rr, &r)
+ p256Diff(xOut, &rr, &j)
+ p256Diff(xOut, xOut, &v)
+ p256Diff(xOut, xOut, &v)
+
+ p256Diff(&tmp, &v, xOut)
+ p256Mul(yOut, &tmp, &r)
+ p256Mul(&tmp, y1, &j)
+ p256Diff(yOut, yOut, &tmp)
+ p256Diff(yOut, yOut, &tmp)
+}
+
+// p256PointAdd sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,z2}.
+//
+// See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+//
+// Note that this function does not handle P+P, infinity+P nor P+infinity
+// correctly.
+func p256PointAdd(xOut, yOut, zOut, x1, y1, z1, x2, y2, z2 *[p256Limbs]uint32) {
+ var z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32
+
+ p256Square(&z1z1, z1)
+ p256Square(&z2z2, z2)
+ p256Mul(&u1, x1, &z2z2)
+
+ p256Sum(&tmp, z1, z2)
+ p256Square(&tmp, &tmp)
+ p256Diff(&tmp, &tmp, &z1z1)
+ p256Diff(&tmp, &tmp, &z2z2)
+
+ p256Mul(&z2z2z2, z2, &z2z2)
+ p256Mul(&s1, y1, &z2z2z2)
+
+ p256Mul(&u2, x2, &z1z1)
+ p256Mul(&z1z1z1, z1, &z1z1)
+ p256Mul(&s2, y2, &z1z1z1)
+ p256Diff(&h, &u2, &u1)
+ p256Sum(&i, &h, &h)
+ p256Square(&i, &i)
+ p256Mul(&j, &h, &i)
+ p256Diff(&r, &s2, &s1)
+ p256Sum(&r, &r, &r)
+ p256Mul(&v, &u1, &i)
+
+ p256Mul(zOut, &tmp, &h)
+ p256Square(&rr, &r)
+ p256Diff(xOut, &rr, &j)
+ p256Diff(xOut, xOut, &v)
+ p256Diff(xOut, xOut, &v)
+
+ p256Diff(&tmp, &v, xOut)
+ p256Mul(yOut, &tmp, &r)
+ p256Mul(&tmp, &s1, &j)
+ p256Diff(yOut, yOut, &tmp)
+ p256Diff(yOut, yOut, &tmp)
+}
+
+// p256CopyConditional sets out=in if mask = 0xffffffff in constant time.
+//
+// On entry: mask is either 0 or 0xffffffff.
+func p256CopyConditional(out, in *[p256Limbs]uint32, mask uint32) {
+ for i := 0; i < p256Limbs; i++ {
+ tmp := mask & (in[i] ^ out[i])
+ out[i] ^= tmp
+ }
+}
+
+// p256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table.
+// On entry: index < 16, table[0] must be zero.
+func p256SelectAffinePoint(xOut, yOut *[p256Limbs]uint32, table []uint32, index uint32) {
+ for i := range xOut {
+ xOut[i] = 0
+ }
+ for i := range yOut {
+ yOut[i] = 0
+ }
+
+ for i := uint32(1); i < 16; i++ {
+ mask := i ^ index
+ mask |= mask >> 2
+ mask |= mask >> 1
+ mask &= 1
+ mask--
+ for j := range xOut {
+ xOut[j] |= table[0] & mask
+ table = table[1:]
+ }
+ for j := range yOut {
+ yOut[j] |= table[0] & mask
+ table = table[1:]
+ }
+ }
+}
+
+// p256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of
+// table.
+// On entry: index < 16, table[0] must be zero.
+func p256SelectJacobianPoint(xOut, yOut, zOut *[p256Limbs]uint32, table *[16][3][p256Limbs]uint32, index uint32) {
+ for i := range xOut {
+ xOut[i] = 0
+ }
+ for i := range yOut {
+ yOut[i] = 0
+ }
+ for i := range zOut {
+ zOut[i] = 0
+ }
+
+ // The implicit value at index 0 is all zero. We don't need to perform that
+ // iteration of the loop because we already set out_* to zero.
+ for i := uint32(1); i < 16; i++ {
+ mask := i ^ index
+ mask |= mask >> 2
+ mask |= mask >> 1
+ mask &= 1
+ mask--
+ for j := range xOut {
+ xOut[j] |= table[i][0][j] & mask
+ }
+ for j := range yOut {
+ yOut[j] |= table[i][1][j] & mask
+ }
+ for j := range zOut {
+ zOut[j] |= table[i][2][j] & mask
+ }
+ }
+}
+
+// p256GetBit returns the bit'th bit of scalar.
+func p256GetBit(scalar *[32]uint8, bit uint) uint32 {
+ return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1)
+}
+
+// p256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a
+// little-endian number. Note that the value of scalar must be less than the
+// order of the group.
+func p256ScalarBaseMult(xOut, yOut, zOut *[p256Limbs]uint32, scalar *[32]uint8) {
+ nIsInfinityMask := ^uint32(0)
+ var pIsNoninfiniteMask, mask, tableOffset uint32
+ var px, py, tx, ty, tz [p256Limbs]uint32
+
+ for i := range xOut {
+ xOut[i] = 0
+ }
+ for i := range yOut {
+ yOut[i] = 0
+ }
+ for i := range zOut {
+ zOut[i] = 0
+ }
+
+ // The loop adds bits at positions 0, 64, 128 and 192, followed by
+ // positions 32,96,160 and 224 and does this 32 times.
+ for i := uint(0); i < 32; i++ {
+ if i != 0 {
+ p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
+ }
+ tableOffset = 0
+ for j := uint(0); j <= 32; j += 32 {
+ bit0 := p256GetBit(scalar, 31-i+j)
+ bit1 := p256GetBit(scalar, 95-i+j)
+ bit2 := p256GetBit(scalar, 159-i+j)
+ bit3 := p256GetBit(scalar, 223-i+j)
+ index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3)
+
+ p256SelectAffinePoint(&px, &py, p256Precomputed[tableOffset:], index)
+ tableOffset += 30 * p256Limbs
+
+ // Since scalar is less than the order of the group, we know that
+ // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle
+ // below.
+ p256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py)
+ // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero
+ // (a.k.a. the point at infinity). We handle that situation by
+ // copying the point from the table.
+ p256CopyConditional(xOut, &px, nIsInfinityMask)
+ p256CopyConditional(yOut, &py, nIsInfinityMask)
+ p256CopyConditional(zOut, &p256One, nIsInfinityMask)
+
+ // Equally, the result is also wrong if the point from the table is
+ // zero, which happens when the index is zero. We handle that by
+ // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0.
+ pIsNoninfiniteMask = nonZeroToAllOnes(index)
+ mask = pIsNoninfiniteMask & ^nIsInfinityMask
+ p256CopyConditional(xOut, &tx, mask)
+ p256CopyConditional(yOut, &ty, mask)
+ p256CopyConditional(zOut, &tz, mask)
+ // If p was not zero, then n is now non-zero.
+ nIsInfinityMask &^= pIsNoninfiniteMask
+ }
+ }
+}
+
+// p256PointToAffine converts a Jacobian point to an affine point. If the input
+// is the point at infinity then it returns (0, 0) in constant time.
+func p256PointToAffine(xOut, yOut, x, y, z *[p256Limbs]uint32) {
+ var zInv, zInvSq [p256Limbs]uint32
+
+ p256Invert(&zInv, z)
+ p256Square(&zInvSq, &zInv)
+ p256Mul(xOut, x, &zInvSq)
+ p256Mul(&zInv, &zInv, &zInvSq)
+ p256Mul(yOut, y, &zInv)
+}
+
+// p256ToAffine returns a pair of *big.Int containing the affine representation
+// of {x,y,z}.
+func p256ToAffine(x, y, z *[p256Limbs]uint32) (xOut, yOut *big.Int) {
+ var xx, yy [p256Limbs]uint32
+ p256PointToAffine(&xx, &yy, x, y, z)
+ return p256ToBig(&xx), p256ToBig(&yy)
+}
+
+// p256ScalarMult sets {xOut,yOut,zOut} = scalar*{x,y}.
+func p256ScalarMult(xOut, yOut, zOut, x, y *[p256Limbs]uint32, scalar *[32]uint8) {
+ var px, py, pz, tx, ty, tz [p256Limbs]uint32
+ var precomp [16][3][p256Limbs]uint32
+ var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32
+
+ // We precompute 0,1,2,... times {x,y}.
+ precomp[1][0] = *x
+ precomp[1][1] = *y
+ precomp[1][2] = p256One
+
+ for i := 2; i < 16; i += 2 {
+ p256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2])
+ p256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y)
+ }
+
+ for i := range xOut {
+ xOut[i] = 0
+ }
+ for i := range yOut {
+ yOut[i] = 0
+ }
+ for i := range zOut {
+ zOut[i] = 0
+ }
+ nIsInfinityMask = ^uint32(0)
+
+ // We add in a window of four bits each iteration and do this 64 times.
+ for i := 0; i < 64; i++ {
+ if i != 0 {
+ p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
+ p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
+ p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
+ p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
+ }
+
+ index = uint32(scalar[31-i/2])
+ if (i & 1) == 1 {
+ index &= 15
+ } else {
+ index >>= 4
+ }
+
+ // See the comments in scalarBaseMult about handling infinities.
+ p256SelectJacobianPoint(&px, &py, &pz, &precomp, index)
+ p256PointAdd(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py, &pz)
+ p256CopyConditional(xOut, &px, nIsInfinityMask)
+ p256CopyConditional(yOut, &py, nIsInfinityMask)
+ p256CopyConditional(zOut, &pz, nIsInfinityMask)
+
+ pIsNoninfiniteMask = nonZeroToAllOnes(index)
+ mask = pIsNoninfiniteMask & ^nIsInfinityMask
+ p256CopyConditional(xOut, &tx, mask)
+ p256CopyConditional(yOut, &ty, mask)
+ p256CopyConditional(zOut, &tz, mask)
+ nIsInfinityMask &^= pIsNoninfiniteMask
+ }
+}
+
+// p256FromBig sets out = R*in.
+func p256FromBig(out *[p256Limbs]uint32, in *big.Int) {
+ tmp := new(big.Int).Lsh(in, 257)
+ tmp.Mod(tmp, p256Params.P)
+
+ for i := 0; i < p256Limbs; i++ {
+ if bits := tmp.Bits(); len(bits) > 0 {
+ out[i] = uint32(bits[0]) & bottom29Bits
+ } else {
+ out[i] = 0
+ }
+ tmp.Rsh(tmp, 29)
+
+ i++
+ if i == p256Limbs {
+ break
+ }
+
+ if bits := tmp.Bits(); len(bits) > 0 {
+ out[i] = uint32(bits[0]) & bottom28Bits
+ } else {
+ out[i] = 0
+ }
+ tmp.Rsh(tmp, 28)
+ }
+}
+
+// p256ToBig returns a *big.Int containing the value of in.
+func p256ToBig(in *[p256Limbs]uint32) *big.Int {
+ result, tmp := new(big.Int), new(big.Int)
+
+ result.SetInt64(int64(in[p256Limbs-1]))
+ for i := p256Limbs - 2; i >= 0; i-- {
+ if (i & 1) == 0 {
+ result.Lsh(result, 29)
+ } else {
+ result.Lsh(result, 28)
+ }
+ tmp.SetInt64(int64(in[i]))
+ result.Add(result, tmp)
+ }
+
+ result.Mul(result, p256RInverse)
+ result.Mod(result, p256Params.P)
+ return result
+}
diff --git a/src/crypto/elliptic/p256_asm.go b/src/crypto/elliptic/p256_asm.go
new file mode 100644
index 0000000..9a808f2
--- /dev/null
+++ b/src/crypto/elliptic/p256_asm.go
@@ -0,0 +1,539 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains the Go wrapper for the constant-time, 64-bit assembly
+// implementation of P256. The optimizations performed here are described in
+// detail in:
+// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
+// 256-bit primes"
+// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
+// https://eprint.iacr.org/2013/816.pdf
+
+//go:build amd64 || arm64
+// +build amd64 arm64
+
+package elliptic
+
+import (
+ "math/big"
+)
+
+type (
+ p256Curve struct {
+ *CurveParams
+ }
+
+ p256Point struct {
+ xyz [12]uint64
+ }
+)
+
+var p256 p256Curve
+
+func initP256() {
+ // See FIPS 186-3, section D.2.3
+ p256.CurveParams = &CurveParams{Name: "P-256"}
+ p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
+ p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
+ p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
+ p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
+ p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
+ p256.BitSize = 256
+}
+
+func (curve p256Curve) Params() *CurveParams {
+ return curve.CurveParams
+}
+
+// Functions implemented in p256_asm_*64.s
+// Montgomery multiplication modulo P256
+//go:noescape
+func p256Mul(res, in1, in2 []uint64)
+
+// Montgomery square modulo P256, repeated n times (n >= 1)
+//go:noescape
+func p256Sqr(res, in []uint64, n int)
+
+// Montgomery multiplication by 1
+//go:noescape
+func p256FromMont(res, in []uint64)
+
+// iff cond == 1 val <- -val
+//go:noescape
+func p256NegCond(val []uint64, cond int)
+
+// if cond == 0 res <- b; else res <- a
+//go:noescape
+func p256MovCond(res, a, b []uint64, cond int)
+
+// Endianness swap
+//go:noescape
+func p256BigToLittle(res []uint64, in []byte)
+
+//go:noescape
+func p256LittleToBig(res []byte, in []uint64)
+
+// Constant time table access
+//go:noescape
+func p256Select(point, table []uint64, idx int)
+
+//go:noescape
+func p256SelectBase(point, table []uint64, idx int)
+
+// Montgomery multiplication modulo Ord(G)
+//go:noescape
+func p256OrdMul(res, in1, in2 []uint64)
+
+// Montgomery square modulo Ord(G), repeated n times
+//go:noescape
+func p256OrdSqr(res, in []uint64, n int)
+
+// Point add with in2 being affine point
+// If sign == 1 -> in2 = -in2
+// If sel == 0 -> res = in1
+// if zero == 0 -> res = in2
+//go:noescape
+func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
+
+// Point add. Returns one if the two input points were equal and zero
+// otherwise. (Note that, due to the way that the equations work out, some
+// representations of ∞ are considered equal to everything by this function.)
+//go:noescape
+func p256PointAddAsm(res, in1, in2 []uint64) int
+
+// Point double
+//go:noescape
+func p256PointDoubleAsm(res, in []uint64)
+
+func (curve p256Curve) Inverse(k *big.Int) *big.Int {
+ if k.Sign() < 0 {
+ // This should never happen.
+ k = new(big.Int).Neg(k)
+ }
+
+ if k.Cmp(p256.N) >= 0 {
+ // This should never happen.
+ k = new(big.Int).Mod(k, p256.N)
+ }
+
+ // table will store precomputed powers of x.
+ var table [4 * 9]uint64
+ var (
+ _1 = table[4*0 : 4*1]
+ _11 = table[4*1 : 4*2]
+ _101 = table[4*2 : 4*3]
+ _111 = table[4*3 : 4*4]
+ _1111 = table[4*4 : 4*5]
+ _10101 = table[4*5 : 4*6]
+ _101111 = table[4*6 : 4*7]
+ x = table[4*7 : 4*8]
+ t = table[4*8 : 4*9]
+ )
+
+ fromBig(x[:], k)
+ // This code operates in the Montgomery domain where R = 2^256 mod n
+ // and n is the order of the scalar field. (See initP256 for the
+ // value.) Elements in the Montgomery domain take the form a×R and
+ // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
+ // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
+ // i.e. converts x into the Montgomery domain.
+ // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
+ RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
+ p256OrdMul(_1, x, RR) // _1
+ p256OrdSqr(x, _1, 1) // _10
+ p256OrdMul(_11, x, _1) // _11
+ p256OrdMul(_101, x, _11) // _101
+ p256OrdMul(_111, x, _101) // _111
+ p256OrdSqr(x, _101, 1) // _1010
+ p256OrdMul(_1111, _101, x) // _1111
+
+ p256OrdSqr(t, x, 1) // _10100
+ p256OrdMul(_10101, t, _1) // _10101
+ p256OrdSqr(x, _10101, 1) // _101010
+ p256OrdMul(_101111, _101, x) // _101111
+ p256OrdMul(x, _10101, x) // _111111 = x6
+ p256OrdSqr(t, x, 2) // _11111100
+ p256OrdMul(t, t, _11) // _11111111 = x8
+ p256OrdSqr(x, t, 8) // _ff00
+ p256OrdMul(x, x, t) // _ffff = x16
+ p256OrdSqr(t, x, 16) // _ffff0000
+ p256OrdMul(t, t, x) // _ffffffff = x32
+
+ p256OrdSqr(x, t, 64)
+ p256OrdMul(x, x, t)
+ p256OrdSqr(x, x, 32)
+ p256OrdMul(x, x, t)
+
+ sqrs := []uint8{
+ 6, 5, 4, 5, 5,
+ 4, 3, 3, 5, 9,
+ 6, 2, 5, 6, 5,
+ 4, 5, 5, 3, 10,
+ 2, 5, 5, 3, 7, 6}
+ muls := [][]uint64{
+ _101111, _111, _11, _1111, _10101,
+ _101, _101, _101, _111, _101111,
+ _1111, _1, _1, _1111, _111,
+ _111, _111, _101, _11, _101111,
+ _11, _11, _11, _1, _10101, _1111}
+
+ for i, s := range sqrs {
+ p256OrdSqr(x, x, int(s))
+ p256OrdMul(x, x, muls[i])
+ }
+
+ // Multiplying by one in the Montgomery domain converts a Montgomery
+ // value out of the domain.
+ one := []uint64{1, 0, 0, 0}
+ p256OrdMul(x, x, one)
+
+ xOut := make([]byte, 32)
+ p256LittleToBig(xOut, x)
+ return new(big.Int).SetBytes(xOut)
+}
+
+// fromBig converts a *big.Int into a format used by this code.
+func fromBig(out []uint64, big *big.Int) {
+ for i := range out {
+ out[i] = 0
+ }
+
+ for i, v := range big.Bits() {
+ out[i] = uint64(v)
+ }
+}
+
+// p256GetScalar endian-swaps the big-endian scalar value from in and writes it
+// to out. If the scalar is equal or greater than the order of the group, it's
+// reduced modulo that order.
+func p256GetScalar(out []uint64, in []byte) {
+ n := new(big.Int).SetBytes(in)
+
+ if n.Cmp(p256.N) >= 0 {
+ n.Mod(n, p256.N)
+ }
+ fromBig(out, n)
+}
+
+// p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
+// underlying field of the curve. (See initP256 for the value.) Thus rr here is
+// R×R mod p. See comment in Inverse about how this is used.
+var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
+
+func maybeReduceModP(in *big.Int) *big.Int {
+ if in.Cmp(p256.P) < 0 {
+ return in
+ }
+ return new(big.Int).Mod(in, p256.P)
+}
+
+func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
+ scalarReversed := make([]uint64, 4)
+ var r1, r2 p256Point
+ p256GetScalar(scalarReversed, baseScalar)
+ r1IsInfinity := scalarIsZero(scalarReversed)
+ r1.p256BaseMult(scalarReversed)
+
+ p256GetScalar(scalarReversed, scalar)
+ r2IsInfinity := scalarIsZero(scalarReversed)
+ fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
+ fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
+ p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
+ p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
+
+ // This sets r2's Z value to 1, in the Montgomery domain.
+ r2.xyz[8] = 0x0000000000000001
+ r2.xyz[9] = 0xffffffff00000000
+ r2.xyz[10] = 0xffffffffffffffff
+ r2.xyz[11] = 0x00000000fffffffe
+
+ r2.p256ScalarMult(scalarReversed)
+
+ var sum, double p256Point
+ pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
+ p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
+ sum.CopyConditional(&double, pointsEqual)
+ sum.CopyConditional(&r1, r2IsInfinity)
+ sum.CopyConditional(&r2, r1IsInfinity)
+
+ return sum.p256PointToAffine()
+}
+
+func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
+ scalarReversed := make([]uint64, 4)
+ p256GetScalar(scalarReversed, scalar)
+
+ var r p256Point
+ r.p256BaseMult(scalarReversed)
+ return r.p256PointToAffine()
+}
+
+func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
+ scalarReversed := make([]uint64, 4)
+ p256GetScalar(scalarReversed, scalar)
+
+ var r p256Point
+ fromBig(r.xyz[0:4], maybeReduceModP(bigX))
+ fromBig(r.xyz[4:8], maybeReduceModP(bigY))
+ p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
+ p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
+ // This sets r2's Z value to 1, in the Montgomery domain.
+ r.xyz[8] = 0x0000000000000001
+ r.xyz[9] = 0xffffffff00000000
+ r.xyz[10] = 0xffffffffffffffff
+ r.xyz[11] = 0x00000000fffffffe
+
+ r.p256ScalarMult(scalarReversed)
+ return r.p256PointToAffine()
+}
+
+// uint64IsZero returns 1 if x is zero and zero otherwise.
+func uint64IsZero(x uint64) int {
+ x = ^x
+ x &= x >> 32
+ x &= x >> 16
+ x &= x >> 8
+ x &= x >> 4
+ x &= x >> 2
+ x &= x >> 1
+ return int(x & 1)
+}
+
+// scalarIsZero returns 1 if scalar represents the zero value, and zero
+// otherwise.
+func scalarIsZero(scalar []uint64) int {
+ return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
+}
+
+func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
+ zInv := make([]uint64, 4)
+ zInvSq := make([]uint64, 4)
+ p256Inverse(zInv, p.xyz[8:12])
+ p256Sqr(zInvSq, zInv, 1)
+ p256Mul(zInv, zInv, zInvSq)
+
+ p256Mul(zInvSq, p.xyz[0:4], zInvSq)
+ p256Mul(zInv, p.xyz[4:8], zInv)
+
+ p256FromMont(zInvSq, zInvSq)
+ p256FromMont(zInv, zInv)
+
+ xOut := make([]byte, 32)
+ yOut := make([]byte, 32)
+ p256LittleToBig(xOut, zInvSq)
+ p256LittleToBig(yOut, zInv)
+
+ return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
+}
+
+// CopyConditional copies overwrites p with src if v == 1, and leaves p
+// unchanged if v == 0.
+func (p *p256Point) CopyConditional(src *p256Point, v int) {
+ pMask := uint64(v) - 1
+ srcMask := ^pMask
+
+ for i, n := range p.xyz {
+ p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
+ }
+}
+
+// p256Inverse sets out to in^-1 mod p.
+func p256Inverse(out, in []uint64) {
+ var stack [6 * 4]uint64
+ p2 := stack[4*0 : 4*0+4]
+ p4 := stack[4*1 : 4*1+4]
+ p8 := stack[4*2 : 4*2+4]
+ p16 := stack[4*3 : 4*3+4]
+ p32 := stack[4*4 : 4*4+4]
+
+ p256Sqr(out, in, 1)
+ p256Mul(p2, out, in) // 3*p
+
+ p256Sqr(out, p2, 2)
+ p256Mul(p4, out, p2) // f*p
+
+ p256Sqr(out, p4, 4)
+ p256Mul(p8, out, p4) // ff*p
+
+ p256Sqr(out, p8, 8)
+ p256Mul(p16, out, p8) // ffff*p
+
+ p256Sqr(out, p16, 16)
+ p256Mul(p32, out, p16) // ffffffff*p
+
+ p256Sqr(out, p32, 32)
+ p256Mul(out, out, in)
+
+ p256Sqr(out, out, 128)
+ p256Mul(out, out, p32)
+
+ p256Sqr(out, out, 32)
+ p256Mul(out, out, p32)
+
+ p256Sqr(out, out, 16)
+ p256Mul(out, out, p16)
+
+ p256Sqr(out, out, 8)
+ p256Mul(out, out, p8)
+
+ p256Sqr(out, out, 4)
+ p256Mul(out, out, p4)
+
+ p256Sqr(out, out, 2)
+ p256Mul(out, out, p2)
+
+ p256Sqr(out, out, 2)
+ p256Mul(out, out, in)
+}
+
+func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
+ copy(r[index*12:], p.xyz[:])
+}
+
+func boothW5(in uint) (int, int) {
+ var s uint = ^((in >> 5) - 1)
+ var d uint = (1 << 6) - in - 1
+ d = (d & s) | (in & (^s))
+ d = (d >> 1) + (d & 1)
+ return int(d), int(s & 1)
+}
+
+func boothW6(in uint) (int, int) {
+ var s uint = ^((in >> 6) - 1)
+ var d uint = (1 << 7) - in - 1
+ d = (d & s) | (in & (^s))
+ d = (d >> 1) + (d & 1)
+ return int(d), int(s & 1)
+}
+
+func (p *p256Point) p256BaseMult(scalar []uint64) {
+ wvalue := (scalar[0] << 1) & 0x7f
+ sel, sign := boothW6(uint(wvalue))
+ p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
+ p256NegCond(p.xyz[4:8], sign)
+
+ // (This is one, in the Montgomery domain.)
+ p.xyz[8] = 0x0000000000000001
+ p.xyz[9] = 0xffffffff00000000
+ p.xyz[10] = 0xffffffffffffffff
+ p.xyz[11] = 0x00000000fffffffe
+
+ var t0 p256Point
+ // (This is one, in the Montgomery domain.)
+ t0.xyz[8] = 0x0000000000000001
+ t0.xyz[9] = 0xffffffff00000000
+ t0.xyz[10] = 0xffffffffffffffff
+ t0.xyz[11] = 0x00000000fffffffe
+
+ index := uint(5)
+ zero := sel
+
+ for i := 1; i < 43; i++ {
+ if index < 192 {
+ wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
+ } else {
+ wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
+ }
+ index += 6
+ sel, sign = boothW6(uint(wvalue))
+ p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
+ p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
+ zero |= sel
+ }
+}
+
+func (p *p256Point) p256ScalarMult(scalar []uint64) {
+ // precomp is a table of precomputed points that stores powers of p
+ // from p^1 to p^16.
+ var precomp [16 * 4 * 3]uint64
+ var t0, t1, t2, t3 p256Point
+
+ // Prepare the table
+ p.p256StorePoint(&precomp, 0) // 1
+
+ p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
+ p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
+ p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
+ t0.p256StorePoint(&precomp, 1) // 2
+ t1.p256StorePoint(&precomp, 3) // 4
+ t2.p256StorePoint(&precomp, 7) // 8
+ t3.p256StorePoint(&precomp, 15) // 16
+
+ p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
+ p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
+ p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
+ t0.p256StorePoint(&precomp, 2) // 3
+ t1.p256StorePoint(&precomp, 4) // 5
+ t2.p256StorePoint(&precomp, 8) // 9
+
+ p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
+ p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
+ t0.p256StorePoint(&precomp, 5) // 6
+ t1.p256StorePoint(&precomp, 9) // 10
+
+ p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
+ p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
+ t2.p256StorePoint(&precomp, 6) // 7
+ t1.p256StorePoint(&precomp, 10) // 11
+
+ p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
+ p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
+ t0.p256StorePoint(&precomp, 11) // 12
+ t2.p256StorePoint(&precomp, 13) // 14
+
+ p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
+ p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
+ t0.p256StorePoint(&precomp, 12) // 13
+ t2.p256StorePoint(&precomp, 14) // 15
+
+ // Start scanning the window from top bit
+ index := uint(254)
+ var sel, sign int
+
+ wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
+ sel, _ = boothW5(uint(wvalue))
+
+ p256Select(p.xyz[0:12], precomp[0:], sel)
+ zero := sel
+
+ for index > 4 {
+ index -= 5
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+
+ if index < 192 {
+ wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
+ } else {
+ wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
+ }
+
+ sel, sign = boothW5(uint(wvalue))
+
+ p256Select(t0.xyz[0:], precomp[0:], sel)
+ p256NegCond(t0.xyz[4:8], sign)
+ p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
+ p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
+ p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
+ zero |= sel
+ }
+
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+ p256PointDoubleAsm(p.xyz[:], p.xyz[:])
+
+ wvalue = (scalar[0] << 1) & 0x3f
+ sel, sign = boothW5(uint(wvalue))
+
+ p256Select(t0.xyz[0:], precomp[0:], sel)
+ p256NegCond(t0.xyz[4:8], sign)
+ p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
+ p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
+ p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
+}
diff --git a/src/crypto/elliptic/p256_asm_amd64.s b/src/crypto/elliptic/p256_asm_amd64.s
new file mode 100644
index 0000000..c77b11b
--- /dev/null
+++ b/src/crypto/elliptic/p256_asm_amd64.s
@@ -0,0 +1,2347 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains constant-time, 64-bit assembly implementation of
+// P256. The optimizations performed here are described in detail in:
+// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
+// 256-bit primes"
+// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
+// https://eprint.iacr.org/2013/816.pdf
+
+#include "textflag.h"
+
+#define res_ptr DI
+#define x_ptr SI
+#define y_ptr CX
+
+#define acc0 R8
+#define acc1 R9
+#define acc2 R10
+#define acc3 R11
+#define acc4 R12
+#define acc5 R13
+#define t0 R14
+#define t1 R15
+
+DATA p256const0<>+0x00(SB)/8, $0x00000000ffffffff
+DATA p256const1<>+0x00(SB)/8, $0xffffffff00000001
+DATA p256ordK0<>+0x00(SB)/8, $0xccd1c8aaee00bc4f
+DATA p256ord<>+0x00(SB)/8, $0xf3b9cac2fc632551
+DATA p256ord<>+0x08(SB)/8, $0xbce6faada7179e84
+DATA p256ord<>+0x10(SB)/8, $0xffffffffffffffff
+DATA p256ord<>+0x18(SB)/8, $0xffffffff00000000
+DATA p256one<>+0x00(SB)/8, $0x0000000000000001
+DATA p256one<>+0x08(SB)/8, $0xffffffff00000000
+DATA p256one<>+0x10(SB)/8, $0xffffffffffffffff
+DATA p256one<>+0x18(SB)/8, $0x00000000fffffffe
+GLOBL p256const0<>(SB), 8, $8
+GLOBL p256const1<>(SB), 8, $8
+GLOBL p256ordK0<>(SB), 8, $8
+GLOBL p256ord<>(SB), 8, $32
+GLOBL p256one<>(SB), 8, $32
+
+/* ---------------------------------------*/
+// func p256LittleToBig(res []byte, in []uint64)
+TEXT ·p256LittleToBig(SB),NOSPLIT,$0
+ JMP ·p256BigToLittle(SB)
+/* ---------------------------------------*/
+// func p256BigToLittle(res []uint64, in []byte)
+TEXT ·p256BigToLittle(SB),NOSPLIT,$0
+ MOVQ res+0(FP), res_ptr
+ MOVQ in+24(FP), x_ptr
+
+ MOVQ (8*0)(x_ptr), acc0
+ MOVQ (8*1)(x_ptr), acc1
+ MOVQ (8*2)(x_ptr), acc2
+ MOVQ (8*3)(x_ptr), acc3
+
+ BSWAPQ acc0
+ BSWAPQ acc1
+ BSWAPQ acc2
+ BSWAPQ acc3
+
+ MOVQ acc3, (8*0)(res_ptr)
+ MOVQ acc2, (8*1)(res_ptr)
+ MOVQ acc1, (8*2)(res_ptr)
+ MOVQ acc0, (8*3)(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256MovCond(res, a, b []uint64, cond int)
+// If cond == 0 res=b, else res=a
+TEXT ·p256MovCond(SB),NOSPLIT,$0
+ MOVQ res+0(FP), res_ptr
+ MOVQ a+24(FP), x_ptr
+ MOVQ b+48(FP), y_ptr
+ MOVQ cond+72(FP), X12
+
+ PXOR X13, X13
+ PSHUFD $0, X12, X12
+ PCMPEQL X13, X12
+
+ MOVOU X12, X0
+ MOVOU (16*0)(x_ptr), X6
+ PANDN X6, X0
+ MOVOU X12, X1
+ MOVOU (16*1)(x_ptr), X7
+ PANDN X7, X1
+ MOVOU X12, X2
+ MOVOU (16*2)(x_ptr), X8
+ PANDN X8, X2
+ MOVOU X12, X3
+ MOVOU (16*3)(x_ptr), X9
+ PANDN X9, X3
+ MOVOU X12, X4
+ MOVOU (16*4)(x_ptr), X10
+ PANDN X10, X4
+ MOVOU X12, X5
+ MOVOU (16*5)(x_ptr), X11
+ PANDN X11, X5
+
+ MOVOU (16*0)(y_ptr), X6
+ MOVOU (16*1)(y_ptr), X7
+ MOVOU (16*2)(y_ptr), X8
+ MOVOU (16*3)(y_ptr), X9
+ MOVOU (16*4)(y_ptr), X10
+ MOVOU (16*5)(y_ptr), X11
+
+ PAND X12, X6
+ PAND X12, X7
+ PAND X12, X8
+ PAND X12, X9
+ PAND X12, X10
+ PAND X12, X11
+
+ PXOR X6, X0
+ PXOR X7, X1
+ PXOR X8, X2
+ PXOR X9, X3
+ PXOR X10, X4
+ PXOR X11, X5
+
+ MOVOU X0, (16*0)(res_ptr)
+ MOVOU X1, (16*1)(res_ptr)
+ MOVOU X2, (16*2)(res_ptr)
+ MOVOU X3, (16*3)(res_ptr)
+ MOVOU X4, (16*4)(res_ptr)
+ MOVOU X5, (16*5)(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256NegCond(val []uint64, cond int)
+TEXT ·p256NegCond(SB),NOSPLIT,$0
+ MOVQ val+0(FP), res_ptr
+ MOVQ cond+24(FP), t0
+ // acc = poly
+ MOVQ $-1, acc0
+ MOVQ p256const0<>(SB), acc1
+ MOVQ $0, acc2
+ MOVQ p256const1<>(SB), acc3
+ // Load the original value
+ MOVQ (8*0)(res_ptr), acc5
+ MOVQ (8*1)(res_ptr), x_ptr
+ MOVQ (8*2)(res_ptr), y_ptr
+ MOVQ (8*3)(res_ptr), t1
+ // Speculatively subtract
+ SUBQ acc5, acc0
+ SBBQ x_ptr, acc1
+ SBBQ y_ptr, acc2
+ SBBQ t1, acc3
+ // If condition is 0, keep original value
+ TESTQ t0, t0
+ CMOVQEQ acc5, acc0
+ CMOVQEQ x_ptr, acc1
+ CMOVQEQ y_ptr, acc2
+ CMOVQEQ t1, acc3
+ // Store result
+ MOVQ acc0, (8*0)(res_ptr)
+ MOVQ acc1, (8*1)(res_ptr)
+ MOVQ acc2, (8*2)(res_ptr)
+ MOVQ acc3, (8*3)(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256Sqr(res, in []uint64, n int)
+TEXT ·p256Sqr(SB),NOSPLIT,$0
+ MOVQ res+0(FP), res_ptr
+ MOVQ in+24(FP), x_ptr
+ MOVQ n+48(FP), BX
+
+sqrLoop:
+
+ // y[1:] * y[0]
+ MOVQ (8*0)(x_ptr), t0
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ MOVQ AX, acc1
+ MOVQ DX, acc2
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, acc3
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, acc4
+ // y[2:] * y[1]
+ MOVQ (8*1)(x_ptr), t0
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, acc5
+ // y[3] * y[2]
+ MOVQ (8*2)(x_ptr), t0
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc5
+ ADCQ $0, DX
+ MOVQ DX, y_ptr
+ XORQ t1, t1
+ // *2
+ ADDQ acc1, acc1
+ ADCQ acc2, acc2
+ ADCQ acc3, acc3
+ ADCQ acc4, acc4
+ ADCQ acc5, acc5
+ ADCQ y_ptr, y_ptr
+ ADCQ $0, t1
+ // Missing products
+ MOVQ (8*0)(x_ptr), AX
+ MULQ AX
+ MOVQ AX, acc0
+ MOVQ DX, t0
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ AX
+ ADDQ t0, acc1
+ ADCQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t0
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ AX
+ ADDQ t0, acc3
+ ADCQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t0
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ AX
+ ADDQ t0, acc5
+ ADCQ AX, y_ptr
+ ADCQ DX, t1
+ MOVQ t1, x_ptr
+ // First reduction step
+ MOVQ acc0, AX
+ MOVQ acc0, t1
+ SHLQ $32, acc0
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc0, acc1
+ ADCQ t1, acc2
+ ADCQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, acc0
+ // Second reduction step
+ MOVQ acc1, AX
+ MOVQ acc1, t1
+ SHLQ $32, acc1
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc1, acc2
+ ADCQ t1, acc3
+ ADCQ AX, acc0
+ ADCQ $0, DX
+ MOVQ DX, acc1
+ // Third reduction step
+ MOVQ acc2, AX
+ MOVQ acc2, t1
+ SHLQ $32, acc2
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc2, acc3
+ ADCQ t1, acc0
+ ADCQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, acc2
+ // Last reduction step
+ XORQ t0, t0
+ MOVQ acc3, AX
+ MOVQ acc3, t1
+ SHLQ $32, acc3
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc3, acc0
+ ADCQ t1, acc1
+ ADCQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, acc3
+ // Add bits [511:256] of the sqr result
+ ADCQ acc4, acc0
+ ADCQ acc5, acc1
+ ADCQ y_ptr, acc2
+ ADCQ x_ptr, acc3
+ ADCQ $0, t0
+
+ MOVQ acc0, acc4
+ MOVQ acc1, acc5
+ MOVQ acc2, y_ptr
+ MOVQ acc3, t1
+ // Subtract p256
+ SUBQ $-1, acc0
+ SBBQ p256const0<>(SB) ,acc1
+ SBBQ $0, acc2
+ SBBQ p256const1<>(SB), acc3
+ SBBQ $0, t0
+
+ CMOVQCS acc4, acc0
+ CMOVQCS acc5, acc1
+ CMOVQCS y_ptr, acc2
+ CMOVQCS t1, acc3
+
+ MOVQ acc0, (8*0)(res_ptr)
+ MOVQ acc1, (8*1)(res_ptr)
+ MOVQ acc2, (8*2)(res_ptr)
+ MOVQ acc3, (8*3)(res_ptr)
+ MOVQ res_ptr, x_ptr
+ DECQ BX
+ JNE sqrLoop
+
+ RET
+/* ---------------------------------------*/
+// func p256Mul(res, in1, in2 []uint64)
+TEXT ·p256Mul(SB),NOSPLIT,$0
+ MOVQ res+0(FP), res_ptr
+ MOVQ in1+24(FP), x_ptr
+ MOVQ in2+48(FP), y_ptr
+ // x * y[0]
+ MOVQ (8*0)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ MOVQ AX, acc0
+ MOVQ DX, acc1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, acc2
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, acc3
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, acc4
+ XORQ acc5, acc5
+ // First reduction step
+ MOVQ acc0, AX
+ MOVQ acc0, t1
+ SHLQ $32, acc0
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc0, acc1
+ ADCQ t1, acc2
+ ADCQ AX, acc3
+ ADCQ DX, acc4
+ ADCQ $0, acc5
+ XORQ acc0, acc0
+ // x * y[1]
+ MOVQ (8*1)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc2
+ ADCQ $0, DX
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ DX, acc5
+ ADCQ $0, acc0
+ // Second reduction step
+ MOVQ acc1, AX
+ MOVQ acc1, t1
+ SHLQ $32, acc1
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc1, acc2
+ ADCQ t1, acc3
+ ADCQ AX, acc4
+ ADCQ DX, acc5
+ ADCQ $0, acc0
+ XORQ acc1, acc1
+ // x * y[2]
+ MOVQ (8*2)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc5
+ ADCQ $0, DX
+ ADDQ AX, acc5
+ ADCQ DX, acc0
+ ADCQ $0, acc1
+ // Third reduction step
+ MOVQ acc2, AX
+ MOVQ acc2, t1
+ SHLQ $32, acc2
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc2, acc3
+ ADCQ t1, acc4
+ ADCQ AX, acc5
+ ADCQ DX, acc0
+ ADCQ $0, acc1
+ XORQ acc2, acc2
+ // x * y[3]
+ MOVQ (8*3)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc5
+ ADCQ $0, DX
+ ADDQ AX, acc5
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc0
+ ADCQ $0, DX
+ ADDQ AX, acc0
+ ADCQ DX, acc1
+ ADCQ $0, acc2
+ // Last reduction step
+ MOVQ acc3, AX
+ MOVQ acc3, t1
+ SHLQ $32, acc3
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc3, acc4
+ ADCQ t1, acc5
+ ADCQ AX, acc0
+ ADCQ DX, acc1
+ ADCQ $0, acc2
+ // Copy result [255:0]
+ MOVQ acc4, x_ptr
+ MOVQ acc5, acc3
+ MOVQ acc0, t0
+ MOVQ acc1, t1
+ // Subtract p256
+ SUBQ $-1, acc4
+ SBBQ p256const0<>(SB) ,acc5
+ SBBQ $0, acc0
+ SBBQ p256const1<>(SB), acc1
+ SBBQ $0, acc2
+
+ CMOVQCS x_ptr, acc4
+ CMOVQCS acc3, acc5
+ CMOVQCS t0, acc0
+ CMOVQCS t1, acc1
+
+ MOVQ acc4, (8*0)(res_ptr)
+ MOVQ acc5, (8*1)(res_ptr)
+ MOVQ acc0, (8*2)(res_ptr)
+ MOVQ acc1, (8*3)(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256FromMont(res, in []uint64)
+TEXT ·p256FromMont(SB),NOSPLIT,$0
+ MOVQ res+0(FP), res_ptr
+ MOVQ in+24(FP), x_ptr
+
+ MOVQ (8*0)(x_ptr), acc0
+ MOVQ (8*1)(x_ptr), acc1
+ MOVQ (8*2)(x_ptr), acc2
+ MOVQ (8*3)(x_ptr), acc3
+ XORQ acc4, acc4
+
+ // Only reduce, no multiplications are needed
+ // First stage
+ MOVQ acc0, AX
+ MOVQ acc0, t1
+ SHLQ $32, acc0
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc0, acc1
+ ADCQ t1, acc2
+ ADCQ AX, acc3
+ ADCQ DX, acc4
+ XORQ acc5, acc5
+ // Second stage
+ MOVQ acc1, AX
+ MOVQ acc1, t1
+ SHLQ $32, acc1
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc1, acc2
+ ADCQ t1, acc3
+ ADCQ AX, acc4
+ ADCQ DX, acc5
+ XORQ acc0, acc0
+ // Third stage
+ MOVQ acc2, AX
+ MOVQ acc2, t1
+ SHLQ $32, acc2
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc2, acc3
+ ADCQ t1, acc4
+ ADCQ AX, acc5
+ ADCQ DX, acc0
+ XORQ acc1, acc1
+ // Last stage
+ MOVQ acc3, AX
+ MOVQ acc3, t1
+ SHLQ $32, acc3
+ MULQ p256const1<>(SB)
+ SHRQ $32, t1
+ ADDQ acc3, acc4
+ ADCQ t1, acc5
+ ADCQ AX, acc0
+ ADCQ DX, acc1
+
+ MOVQ acc4, x_ptr
+ MOVQ acc5, acc3
+ MOVQ acc0, t0
+ MOVQ acc1, t1
+
+ SUBQ $-1, acc4
+ SBBQ p256const0<>(SB), acc5
+ SBBQ $0, acc0
+ SBBQ p256const1<>(SB), acc1
+
+ CMOVQCS x_ptr, acc4
+ CMOVQCS acc3, acc5
+ CMOVQCS t0, acc0
+ CMOVQCS t1, acc1
+
+ MOVQ acc4, (8*0)(res_ptr)
+ MOVQ acc5, (8*1)(res_ptr)
+ MOVQ acc0, (8*2)(res_ptr)
+ MOVQ acc1, (8*3)(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// Constant time point access to arbitrary point table.
+// Indexed from 1 to 15, with -1 offset
+// (index 0 is implicitly point at infinity)
+// func p256Select(point, table []uint64, idx int)
+TEXT ·p256Select(SB),NOSPLIT,$0
+ MOVQ idx+48(FP),AX
+ MOVQ table+24(FP),DI
+ MOVQ point+0(FP),DX
+
+ PXOR X15, X15 // X15 = 0
+ PCMPEQL X14, X14 // X14 = -1
+ PSUBL X14, X15 // X15 = 1
+ MOVL AX, X14
+ PSHUFD $0, X14, X14
+
+ PXOR X0, X0
+ PXOR X1, X1
+ PXOR X2, X2
+ PXOR X3, X3
+ PXOR X4, X4
+ PXOR X5, X5
+ MOVQ $16, AX
+
+ MOVOU X15, X13
+
+loop_select:
+
+ MOVOU X13, X12
+ PADDL X15, X13
+ PCMPEQL X14, X12
+
+ MOVOU (16*0)(DI), X6
+ MOVOU (16*1)(DI), X7
+ MOVOU (16*2)(DI), X8
+ MOVOU (16*3)(DI), X9
+ MOVOU (16*4)(DI), X10
+ MOVOU (16*5)(DI), X11
+ ADDQ $(16*6), DI
+
+ PAND X12, X6
+ PAND X12, X7
+ PAND X12, X8
+ PAND X12, X9
+ PAND X12, X10
+ PAND X12, X11
+
+ PXOR X6, X0
+ PXOR X7, X1
+ PXOR X8, X2
+ PXOR X9, X3
+ PXOR X10, X4
+ PXOR X11, X5
+
+ DECQ AX
+ JNE loop_select
+
+ MOVOU X0, (16*0)(DX)
+ MOVOU X1, (16*1)(DX)
+ MOVOU X2, (16*2)(DX)
+ MOVOU X3, (16*3)(DX)
+ MOVOU X4, (16*4)(DX)
+ MOVOU X5, (16*5)(DX)
+
+ RET
+/* ---------------------------------------*/
+// Constant time point access to base point table.
+// func p256SelectBase(point, table []uint64, idx int)
+TEXT ·p256SelectBase(SB),NOSPLIT,$0
+ MOVQ idx+48(FP),AX
+ MOVQ table+24(FP),DI
+ MOVQ point+0(FP),DX
+
+ PXOR X15, X15 // X15 = 0
+ PCMPEQL X14, X14 // X14 = -1
+ PSUBL X14, X15 // X15 = 1
+ MOVL AX, X14
+ PSHUFD $0, X14, X14
+
+ PXOR X0, X0
+ PXOR X1, X1
+ PXOR X2, X2
+ PXOR X3, X3
+ MOVQ $16, AX
+
+ MOVOU X15, X13
+
+loop_select_base:
+
+ MOVOU X13, X12
+ PADDL X15, X13
+ PCMPEQL X14, X12
+
+ MOVOU (16*0)(DI), X4
+ MOVOU (16*1)(DI), X5
+ MOVOU (16*2)(DI), X6
+ MOVOU (16*3)(DI), X7
+
+ MOVOU (16*4)(DI), X8
+ MOVOU (16*5)(DI), X9
+ MOVOU (16*6)(DI), X10
+ MOVOU (16*7)(DI), X11
+
+ ADDQ $(16*8), DI
+
+ PAND X12, X4
+ PAND X12, X5
+ PAND X12, X6
+ PAND X12, X7
+
+ MOVOU X13, X12
+ PADDL X15, X13
+ PCMPEQL X14, X12
+
+ PAND X12, X8
+ PAND X12, X9
+ PAND X12, X10
+ PAND X12, X11
+
+ PXOR X4, X0
+ PXOR X5, X1
+ PXOR X6, X2
+ PXOR X7, X3
+
+ PXOR X8, X0
+ PXOR X9, X1
+ PXOR X10, X2
+ PXOR X11, X3
+
+ DECQ AX
+ JNE loop_select_base
+
+ MOVOU X0, (16*0)(DX)
+ MOVOU X1, (16*1)(DX)
+ MOVOU X2, (16*2)(DX)
+ MOVOU X3, (16*3)(DX)
+
+ RET
+/* ---------------------------------------*/
+// func p256OrdMul(res, in1, in2 []uint64)
+TEXT ·p256OrdMul(SB),NOSPLIT,$0
+ MOVQ res+0(FP), res_ptr
+ MOVQ in1+24(FP), x_ptr
+ MOVQ in2+48(FP), y_ptr
+ // x * y[0]
+ MOVQ (8*0)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ MOVQ AX, acc0
+ MOVQ DX, acc1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, acc2
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, acc3
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, acc4
+ XORQ acc5, acc5
+ // First reduction step
+ MOVQ acc0, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc0
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc1
+ ADCQ $0, DX
+ ADDQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x10(SB), AX
+ MULQ t0
+ ADDQ t1, acc2
+ ADCQ $0, DX
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x18(SB), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+ ADCQ DX, acc4
+ ADCQ $0, acc5
+ // x * y[1]
+ MOVQ (8*1)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc2
+ ADCQ $0, DX
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ DX, acc5
+ ADCQ $0, acc0
+ // Second reduction step
+ MOVQ acc1, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc2
+ ADCQ $0, DX
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x10(SB), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x18(SB), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ DX, acc5
+ ADCQ $0, acc0
+ // x * y[2]
+ MOVQ (8*2)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc5
+ ADCQ $0, DX
+ ADDQ AX, acc5
+ ADCQ DX, acc0
+ ADCQ $0, acc1
+ // Third reduction step
+ MOVQ acc2, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x10(SB), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x18(SB), AX
+ MULQ t0
+ ADDQ t1, acc5
+ ADCQ $0, DX
+ ADDQ AX, acc5
+ ADCQ DX, acc0
+ ADCQ $0, acc1
+ // x * y[3]
+ MOVQ (8*3)(y_ptr), t0
+
+ MOVQ (8*0)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc5
+ ADCQ $0, DX
+ ADDQ AX, acc5
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc0
+ ADCQ $0, DX
+ ADDQ AX, acc0
+ ADCQ DX, acc1
+ ADCQ $0, acc2
+ // Last reduction step
+ MOVQ acc3, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x10(SB), AX
+ MULQ t0
+ ADDQ t1, acc5
+ ADCQ $0, DX
+ ADDQ AX, acc5
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x18(SB), AX
+ MULQ t0
+ ADDQ t1, acc0
+ ADCQ $0, DX
+ ADDQ AX, acc0
+ ADCQ DX, acc1
+ ADCQ $0, acc2
+ // Copy result [255:0]
+ MOVQ acc4, x_ptr
+ MOVQ acc5, acc3
+ MOVQ acc0, t0
+ MOVQ acc1, t1
+ // Subtract p256
+ SUBQ p256ord<>+0x00(SB), acc4
+ SBBQ p256ord<>+0x08(SB) ,acc5
+ SBBQ p256ord<>+0x10(SB), acc0
+ SBBQ p256ord<>+0x18(SB), acc1
+ SBBQ $0, acc2
+
+ CMOVQCS x_ptr, acc4
+ CMOVQCS acc3, acc5
+ CMOVQCS t0, acc0
+ CMOVQCS t1, acc1
+
+ MOVQ acc4, (8*0)(res_ptr)
+ MOVQ acc5, (8*1)(res_ptr)
+ MOVQ acc0, (8*2)(res_ptr)
+ MOVQ acc1, (8*3)(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256OrdSqr(res, in []uint64, n int)
+TEXT ·p256OrdSqr(SB),NOSPLIT,$0
+ MOVQ res+0(FP), res_ptr
+ MOVQ in+24(FP), x_ptr
+ MOVQ n+48(FP), BX
+
+ordSqrLoop:
+
+ // y[1:] * y[0]
+ MOVQ (8*0)(x_ptr), t0
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ t0
+ MOVQ AX, acc1
+ MOVQ DX, acc2
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, acc3
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, acc4
+ // y[2:] * y[1]
+ MOVQ (8*1)(x_ptr), t0
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ t1, acc4
+ ADCQ $0, DX
+ ADDQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, acc5
+ // y[3] * y[2]
+ MOVQ (8*2)(x_ptr), t0
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ t0
+ ADDQ AX, acc5
+ ADCQ $0, DX
+ MOVQ DX, y_ptr
+ XORQ t1, t1
+ // *2
+ ADDQ acc1, acc1
+ ADCQ acc2, acc2
+ ADCQ acc3, acc3
+ ADCQ acc4, acc4
+ ADCQ acc5, acc5
+ ADCQ y_ptr, y_ptr
+ ADCQ $0, t1
+ // Missing products
+ MOVQ (8*0)(x_ptr), AX
+ MULQ AX
+ MOVQ AX, acc0
+ MOVQ DX, t0
+
+ MOVQ (8*1)(x_ptr), AX
+ MULQ AX
+ ADDQ t0, acc1
+ ADCQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t0
+
+ MOVQ (8*2)(x_ptr), AX
+ MULQ AX
+ ADDQ t0, acc3
+ ADCQ AX, acc4
+ ADCQ $0, DX
+ MOVQ DX, t0
+
+ MOVQ (8*3)(x_ptr), AX
+ MULQ AX
+ ADDQ t0, acc5
+ ADCQ AX, y_ptr
+ ADCQ DX, t1
+ MOVQ t1, x_ptr
+ // First reduction step
+ MOVQ acc0, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc0
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc1
+ ADCQ $0, DX
+ ADDQ AX, acc1
+
+ MOVQ t0, t1
+ ADCQ DX, acc2
+ ADCQ $0, t1
+ SUBQ t0, acc2
+ SBBQ $0, t1
+
+ MOVQ t0, AX
+ MOVQ t0, DX
+ MOVQ t0, acc0
+ SHLQ $32, AX
+ SHRQ $32, DX
+
+ ADDQ t1, acc3
+ ADCQ $0, acc0
+ SUBQ AX, acc3
+ SBBQ DX, acc0
+ // Second reduction step
+ MOVQ acc1, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc1
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc2
+ ADCQ $0, DX
+ ADDQ AX, acc2
+
+ MOVQ t0, t1
+ ADCQ DX, acc3
+ ADCQ $0, t1
+ SUBQ t0, acc3
+ SBBQ $0, t1
+
+ MOVQ t0, AX
+ MOVQ t0, DX
+ MOVQ t0, acc1
+ SHLQ $32, AX
+ SHRQ $32, DX
+
+ ADDQ t1, acc0
+ ADCQ $0, acc1
+ SUBQ AX, acc0
+ SBBQ DX, acc1
+ // Third reduction step
+ MOVQ acc2, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc2
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc3
+ ADCQ $0, DX
+ ADDQ AX, acc3
+
+ MOVQ t0, t1
+ ADCQ DX, acc0
+ ADCQ $0, t1
+ SUBQ t0, acc0
+ SBBQ $0, t1
+
+ MOVQ t0, AX
+ MOVQ t0, DX
+ MOVQ t0, acc2
+ SHLQ $32, AX
+ SHRQ $32, DX
+
+ ADDQ t1, acc1
+ ADCQ $0, acc2
+ SUBQ AX, acc1
+ SBBQ DX, acc2
+ // Last reduction step
+ MOVQ acc3, AX
+ MULQ p256ordK0<>(SB)
+ MOVQ AX, t0
+
+ MOVQ p256ord<>+0x00(SB), AX
+ MULQ t0
+ ADDQ AX, acc3
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ p256ord<>+0x08(SB), AX
+ MULQ t0
+ ADDQ t1, acc0
+ ADCQ $0, DX
+ ADDQ AX, acc0
+ ADCQ $0, DX
+ MOVQ DX, t1
+
+ MOVQ t0, t1
+ ADCQ DX, acc1
+ ADCQ $0, t1
+ SUBQ t0, acc1
+ SBBQ $0, t1
+
+ MOVQ t0, AX
+ MOVQ t0, DX
+ MOVQ t0, acc3
+ SHLQ $32, AX
+ SHRQ $32, DX
+
+ ADDQ t1, acc2
+ ADCQ $0, acc3
+ SUBQ AX, acc2
+ SBBQ DX, acc3
+ XORQ t0, t0
+ // Add bits [511:256] of the sqr result
+ ADCQ acc4, acc0
+ ADCQ acc5, acc1
+ ADCQ y_ptr, acc2
+ ADCQ x_ptr, acc3
+ ADCQ $0, t0
+
+ MOVQ acc0, acc4
+ MOVQ acc1, acc5
+ MOVQ acc2, y_ptr
+ MOVQ acc3, t1
+ // Subtract p256
+ SUBQ p256ord<>+0x00(SB), acc0
+ SBBQ p256ord<>+0x08(SB) ,acc1
+ SBBQ p256ord<>+0x10(SB), acc2
+ SBBQ p256ord<>+0x18(SB), acc3
+ SBBQ $0, t0
+
+ CMOVQCS acc4, acc0
+ CMOVQCS acc5, acc1
+ CMOVQCS y_ptr, acc2
+ CMOVQCS t1, acc3
+
+ MOVQ acc0, (8*0)(res_ptr)
+ MOVQ acc1, (8*1)(res_ptr)
+ MOVQ acc2, (8*2)(res_ptr)
+ MOVQ acc3, (8*3)(res_ptr)
+ MOVQ res_ptr, x_ptr
+ DECQ BX
+ JNE ordSqrLoop
+
+ RET
+/* ---------------------------------------*/
+#undef res_ptr
+#undef x_ptr
+#undef y_ptr
+
+#undef acc0
+#undef acc1
+#undef acc2
+#undef acc3
+#undef acc4
+#undef acc5
+#undef t0
+#undef t1
+/* ---------------------------------------*/
+#define mul0 AX
+#define mul1 DX
+#define acc0 BX
+#define acc1 CX
+#define acc2 R8
+#define acc3 R9
+#define acc4 R10
+#define acc5 R11
+#define acc6 R12
+#define acc7 R13
+#define t0 R14
+#define t1 R15
+#define t2 DI
+#define t3 SI
+#define hlp BP
+/* ---------------------------------------*/
+TEXT p256SubInternal(SB),NOSPLIT,$0
+ XORQ mul0, mul0
+ SUBQ t0, acc4
+ SBBQ t1, acc5
+ SBBQ t2, acc6
+ SBBQ t3, acc7
+ SBBQ $0, mul0
+
+ MOVQ acc4, acc0
+ MOVQ acc5, acc1
+ MOVQ acc6, acc2
+ MOVQ acc7, acc3
+
+ ADDQ $-1, acc4
+ ADCQ p256const0<>(SB), acc5
+ ADCQ $0, acc6
+ ADCQ p256const1<>(SB), acc7
+ ANDQ $1, mul0
+
+ CMOVQEQ acc0, acc4
+ CMOVQEQ acc1, acc5
+ CMOVQEQ acc2, acc6
+ CMOVQEQ acc3, acc7
+
+ RET
+/* ---------------------------------------*/
+TEXT p256MulInternal(SB),NOSPLIT,$8
+ MOVQ acc4, mul0
+ MULQ t0
+ MOVQ mul0, acc0
+ MOVQ mul1, acc1
+
+ MOVQ acc4, mul0
+ MULQ t1
+ ADDQ mul0, acc1
+ ADCQ $0, mul1
+ MOVQ mul1, acc2
+
+ MOVQ acc4, mul0
+ MULQ t2
+ ADDQ mul0, acc2
+ ADCQ $0, mul1
+ MOVQ mul1, acc3
+
+ MOVQ acc4, mul0
+ MULQ t3
+ ADDQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, acc4
+
+ MOVQ acc5, mul0
+ MULQ t0
+ ADDQ mul0, acc1
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc5, mul0
+ MULQ t1
+ ADDQ hlp, acc2
+ ADCQ $0, mul1
+ ADDQ mul0, acc2
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc5, mul0
+ MULQ t2
+ ADDQ hlp, acc3
+ ADCQ $0, mul1
+ ADDQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc5, mul0
+ MULQ t3
+ ADDQ hlp, acc4
+ ADCQ $0, mul1
+ ADDQ mul0, acc4
+ ADCQ $0, mul1
+ MOVQ mul1, acc5
+
+ MOVQ acc6, mul0
+ MULQ t0
+ ADDQ mul0, acc2
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc6, mul0
+ MULQ t1
+ ADDQ hlp, acc3
+ ADCQ $0, mul1
+ ADDQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc6, mul0
+ MULQ t2
+ ADDQ hlp, acc4
+ ADCQ $0, mul1
+ ADDQ mul0, acc4
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc6, mul0
+ MULQ t3
+ ADDQ hlp, acc5
+ ADCQ $0, mul1
+ ADDQ mul0, acc5
+ ADCQ $0, mul1
+ MOVQ mul1, acc6
+
+ MOVQ acc7, mul0
+ MULQ t0
+ ADDQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc7, mul0
+ MULQ t1
+ ADDQ hlp, acc4
+ ADCQ $0, mul1
+ ADDQ mul0, acc4
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc7, mul0
+ MULQ t2
+ ADDQ hlp, acc5
+ ADCQ $0, mul1
+ ADDQ mul0, acc5
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc7, mul0
+ MULQ t3
+ ADDQ hlp, acc6
+ ADCQ $0, mul1
+ ADDQ mul0, acc6
+ ADCQ $0, mul1
+ MOVQ mul1, acc7
+ // First reduction step
+ MOVQ acc0, mul0
+ MOVQ acc0, hlp
+ SHLQ $32, acc0
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc0, acc1
+ ADCQ hlp, acc2
+ ADCQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, acc0
+ // Second reduction step
+ MOVQ acc1, mul0
+ MOVQ acc1, hlp
+ SHLQ $32, acc1
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc1, acc2
+ ADCQ hlp, acc3
+ ADCQ mul0, acc0
+ ADCQ $0, mul1
+ MOVQ mul1, acc1
+ // Third reduction step
+ MOVQ acc2, mul0
+ MOVQ acc2, hlp
+ SHLQ $32, acc2
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc2, acc3
+ ADCQ hlp, acc0
+ ADCQ mul0, acc1
+ ADCQ $0, mul1
+ MOVQ mul1, acc2
+ // Last reduction step
+ MOVQ acc3, mul0
+ MOVQ acc3, hlp
+ SHLQ $32, acc3
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc3, acc0
+ ADCQ hlp, acc1
+ ADCQ mul0, acc2
+ ADCQ $0, mul1
+ MOVQ mul1, acc3
+ MOVQ $0, BP
+ // Add bits [511:256] of the result
+ ADCQ acc0, acc4
+ ADCQ acc1, acc5
+ ADCQ acc2, acc6
+ ADCQ acc3, acc7
+ ADCQ $0, hlp
+ // Copy result
+ MOVQ acc4, acc0
+ MOVQ acc5, acc1
+ MOVQ acc6, acc2
+ MOVQ acc7, acc3
+ // Subtract p256
+ SUBQ $-1, acc4
+ SBBQ p256const0<>(SB) ,acc5
+ SBBQ $0, acc6
+ SBBQ p256const1<>(SB), acc7
+ SBBQ $0, hlp
+ // If the result of the subtraction is negative, restore the previous result
+ CMOVQCS acc0, acc4
+ CMOVQCS acc1, acc5
+ CMOVQCS acc2, acc6
+ CMOVQCS acc3, acc7
+
+ RET
+/* ---------------------------------------*/
+TEXT p256SqrInternal(SB),NOSPLIT,$8
+
+ MOVQ acc4, mul0
+ MULQ acc5
+ MOVQ mul0, acc1
+ MOVQ mul1, acc2
+
+ MOVQ acc4, mul0
+ MULQ acc6
+ ADDQ mul0, acc2
+ ADCQ $0, mul1
+ MOVQ mul1, acc3
+
+ MOVQ acc4, mul0
+ MULQ acc7
+ ADDQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, t0
+
+ MOVQ acc5, mul0
+ MULQ acc6
+ ADDQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, hlp
+
+ MOVQ acc5, mul0
+ MULQ acc7
+ ADDQ hlp, t0
+ ADCQ $0, mul1
+ ADDQ mul0, t0
+ ADCQ $0, mul1
+ MOVQ mul1, t1
+
+ MOVQ acc6, mul0
+ MULQ acc7
+ ADDQ mul0, t1
+ ADCQ $0, mul1
+ MOVQ mul1, t2
+ XORQ t3, t3
+ // *2
+ ADDQ acc1, acc1
+ ADCQ acc2, acc2
+ ADCQ acc3, acc3
+ ADCQ t0, t0
+ ADCQ t1, t1
+ ADCQ t2, t2
+ ADCQ $0, t3
+ // Missing products
+ MOVQ acc4, mul0
+ MULQ mul0
+ MOVQ mul0, acc0
+ MOVQ DX, acc4
+
+ MOVQ acc5, mul0
+ MULQ mul0
+ ADDQ acc4, acc1
+ ADCQ mul0, acc2
+ ADCQ $0, DX
+ MOVQ DX, acc4
+
+ MOVQ acc6, mul0
+ MULQ mul0
+ ADDQ acc4, acc3
+ ADCQ mul0, t0
+ ADCQ $0, DX
+ MOVQ DX, acc4
+
+ MOVQ acc7, mul0
+ MULQ mul0
+ ADDQ acc4, t1
+ ADCQ mul0, t2
+ ADCQ DX, t3
+ // First reduction step
+ MOVQ acc0, mul0
+ MOVQ acc0, hlp
+ SHLQ $32, acc0
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc0, acc1
+ ADCQ hlp, acc2
+ ADCQ mul0, acc3
+ ADCQ $0, mul1
+ MOVQ mul1, acc0
+ // Second reduction step
+ MOVQ acc1, mul0
+ MOVQ acc1, hlp
+ SHLQ $32, acc1
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc1, acc2
+ ADCQ hlp, acc3
+ ADCQ mul0, acc0
+ ADCQ $0, mul1
+ MOVQ mul1, acc1
+ // Third reduction step
+ MOVQ acc2, mul0
+ MOVQ acc2, hlp
+ SHLQ $32, acc2
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc2, acc3
+ ADCQ hlp, acc0
+ ADCQ mul0, acc1
+ ADCQ $0, mul1
+ MOVQ mul1, acc2
+ // Last reduction step
+ MOVQ acc3, mul0
+ MOVQ acc3, hlp
+ SHLQ $32, acc3
+ MULQ p256const1<>(SB)
+ SHRQ $32, hlp
+ ADDQ acc3, acc0
+ ADCQ hlp, acc1
+ ADCQ mul0, acc2
+ ADCQ $0, mul1
+ MOVQ mul1, acc3
+ MOVQ $0, BP
+ // Add bits [511:256] of the result
+ ADCQ acc0, t0
+ ADCQ acc1, t1
+ ADCQ acc2, t2
+ ADCQ acc3, t3
+ ADCQ $0, hlp
+ // Copy result
+ MOVQ t0, acc4
+ MOVQ t1, acc5
+ MOVQ t2, acc6
+ MOVQ t3, acc7
+ // Subtract p256
+ SUBQ $-1, acc4
+ SBBQ p256const0<>(SB) ,acc5
+ SBBQ $0, acc6
+ SBBQ p256const1<>(SB), acc7
+ SBBQ $0, hlp
+ // If the result of the subtraction is negative, restore the previous result
+ CMOVQCS t0, acc4
+ CMOVQCS t1, acc5
+ CMOVQCS t2, acc6
+ CMOVQCS t3, acc7
+
+ RET
+/* ---------------------------------------*/
+#define p256MulBy2Inline\
+ XORQ mul0, mul0;\
+ ADDQ acc4, acc4;\
+ ADCQ acc5, acc5;\
+ ADCQ acc6, acc6;\
+ ADCQ acc7, acc7;\
+ ADCQ $0, mul0;\
+ MOVQ acc4, t0;\
+ MOVQ acc5, t1;\
+ MOVQ acc6, t2;\
+ MOVQ acc7, t3;\
+ SUBQ $-1, t0;\
+ SBBQ p256const0<>(SB), t1;\
+ SBBQ $0, t2;\
+ SBBQ p256const1<>(SB), t3;\
+ SBBQ $0, mul0;\
+ CMOVQCS acc4, t0;\
+ CMOVQCS acc5, t1;\
+ CMOVQCS acc6, t2;\
+ CMOVQCS acc7, t3;
+/* ---------------------------------------*/
+#define p256AddInline \
+ XORQ mul0, mul0;\
+ ADDQ t0, acc4;\
+ ADCQ t1, acc5;\
+ ADCQ t2, acc6;\
+ ADCQ t3, acc7;\
+ ADCQ $0, mul0;\
+ MOVQ acc4, t0;\
+ MOVQ acc5, t1;\
+ MOVQ acc6, t2;\
+ MOVQ acc7, t3;\
+ SUBQ $-1, t0;\
+ SBBQ p256const0<>(SB), t1;\
+ SBBQ $0, t2;\
+ SBBQ p256const1<>(SB), t3;\
+ SBBQ $0, mul0;\
+ CMOVQCS acc4, t0;\
+ CMOVQCS acc5, t1;\
+ CMOVQCS acc6, t2;\
+ CMOVQCS acc7, t3;
+/* ---------------------------------------*/
+#define LDacc(src) MOVQ src(8*0), acc4; MOVQ src(8*1), acc5; MOVQ src(8*2), acc6; MOVQ src(8*3), acc7
+#define LDt(src) MOVQ src(8*0), t0; MOVQ src(8*1), t1; MOVQ src(8*2), t2; MOVQ src(8*3), t3
+#define ST(dst) MOVQ acc4, dst(8*0); MOVQ acc5, dst(8*1); MOVQ acc6, dst(8*2); MOVQ acc7, dst(8*3)
+#define STt(dst) MOVQ t0, dst(8*0); MOVQ t1, dst(8*1); MOVQ t2, dst(8*2); MOVQ t3, dst(8*3)
+#define acc2t MOVQ acc4, t0; MOVQ acc5, t1; MOVQ acc6, t2; MOVQ acc7, t3
+#define t2acc MOVQ t0, acc4; MOVQ t1, acc5; MOVQ t2, acc6; MOVQ t3, acc7
+/* ---------------------------------------*/
+#define x1in(off) (32*0 + off)(SP)
+#define y1in(off) (32*1 + off)(SP)
+#define z1in(off) (32*2 + off)(SP)
+#define x2in(off) (32*3 + off)(SP)
+#define y2in(off) (32*4 + off)(SP)
+#define xout(off) (32*5 + off)(SP)
+#define yout(off) (32*6 + off)(SP)
+#define zout(off) (32*7 + off)(SP)
+#define s2(off) (32*8 + off)(SP)
+#define z1sqr(off) (32*9 + off)(SP)
+#define h(off) (32*10 + off)(SP)
+#define r(off) (32*11 + off)(SP)
+#define hsqr(off) (32*12 + off)(SP)
+#define rsqr(off) (32*13 + off)(SP)
+#define hcub(off) (32*14 + off)(SP)
+#define rptr (32*15)(SP)
+#define sel_save (32*15 + 8)(SP)
+#define zero_save (32*15 + 8 + 4)(SP)
+
+// func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
+TEXT ·p256PointAddAffineAsm(SB),0,$512-96
+ // Move input to stack in order to free registers
+ MOVQ res+0(FP), AX
+ MOVQ in1+24(FP), BX
+ MOVQ in2+48(FP), CX
+ MOVQ sign+72(FP), DX
+ MOVQ sel+80(FP), t1
+ MOVQ zero+88(FP), t2
+
+ MOVOU (16*0)(BX), X0
+ MOVOU (16*1)(BX), X1
+ MOVOU (16*2)(BX), X2
+ MOVOU (16*3)(BX), X3
+ MOVOU (16*4)(BX), X4
+ MOVOU (16*5)(BX), X5
+
+ MOVOU X0, x1in(16*0)
+ MOVOU X1, x1in(16*1)
+ MOVOU X2, y1in(16*0)
+ MOVOU X3, y1in(16*1)
+ MOVOU X4, z1in(16*0)
+ MOVOU X5, z1in(16*1)
+
+ MOVOU (16*0)(CX), X0
+ MOVOU (16*1)(CX), X1
+
+ MOVOU X0, x2in(16*0)
+ MOVOU X1, x2in(16*1)
+ // Store pointer to result
+ MOVQ mul0, rptr
+ MOVL t1, sel_save
+ MOVL t2, zero_save
+ // Negate y2in based on sign
+ MOVQ (16*2 + 8*0)(CX), acc4
+ MOVQ (16*2 + 8*1)(CX), acc5
+ MOVQ (16*2 + 8*2)(CX), acc6
+ MOVQ (16*2 + 8*3)(CX), acc7
+ MOVQ $-1, acc0
+ MOVQ p256const0<>(SB), acc1
+ MOVQ $0, acc2
+ MOVQ p256const1<>(SB), acc3
+ XORQ mul0, mul0
+ // Speculatively subtract
+ SUBQ acc4, acc0
+ SBBQ acc5, acc1
+ SBBQ acc6, acc2
+ SBBQ acc7, acc3
+ SBBQ $0, mul0
+ MOVQ acc0, t0
+ MOVQ acc1, t1
+ MOVQ acc2, t2
+ MOVQ acc3, t3
+ // Add in case the operand was > p256
+ ADDQ $-1, acc0
+ ADCQ p256const0<>(SB), acc1
+ ADCQ $0, acc2
+ ADCQ p256const1<>(SB), acc3
+ ADCQ $0, mul0
+ CMOVQNE t0, acc0
+ CMOVQNE t1, acc1
+ CMOVQNE t2, acc2
+ CMOVQNE t3, acc3
+ // If condition is 0, keep original value
+ TESTQ DX, DX
+ CMOVQEQ acc4, acc0
+ CMOVQEQ acc5, acc1
+ CMOVQEQ acc6, acc2
+ CMOVQEQ acc7, acc3
+ // Store result
+ MOVQ acc0, y2in(8*0)
+ MOVQ acc1, y2in(8*1)
+ MOVQ acc2, y2in(8*2)
+ MOVQ acc3, y2in(8*3)
+ // Begin point add
+ LDacc (z1in)
+ CALL p256SqrInternal(SB) // z1ˆ2
+ ST (z1sqr)
+
+ LDt (x2in)
+ CALL p256MulInternal(SB) // x2 * z1ˆ2
+
+ LDt (x1in)
+ CALL p256SubInternal(SB) // h = u2 - u1
+ ST (h)
+
+ LDt (z1in)
+ CALL p256MulInternal(SB) // z3 = h * z1
+ ST (zout)
+
+ LDacc (z1sqr)
+ CALL p256MulInternal(SB) // z1ˆ3
+
+ LDt (y2in)
+ CALL p256MulInternal(SB) // s2 = y2 * z1ˆ3
+ ST (s2)
+
+ LDt (y1in)
+ CALL p256SubInternal(SB) // r = s2 - s1
+ ST (r)
+
+ CALL p256SqrInternal(SB) // rsqr = rˆ2
+ ST (rsqr)
+
+ LDacc (h)
+ CALL p256SqrInternal(SB) // hsqr = hˆ2
+ ST (hsqr)
+
+ LDt (h)
+ CALL p256MulInternal(SB) // hcub = hˆ3
+ ST (hcub)
+
+ LDt (y1in)
+ CALL p256MulInternal(SB) // y1 * hˆ3
+ ST (s2)
+
+ LDacc (x1in)
+ LDt (hsqr)
+ CALL p256MulInternal(SB) // u1 * hˆ2
+ ST (h)
+
+ p256MulBy2Inline // u1 * hˆ2 * 2, inline
+ LDacc (rsqr)
+ CALL p256SubInternal(SB) // rˆ2 - u1 * hˆ2 * 2
+
+ LDt (hcub)
+ CALL p256SubInternal(SB)
+ ST (xout)
+
+ MOVQ acc4, t0
+ MOVQ acc5, t1
+ MOVQ acc6, t2
+ MOVQ acc7, t3
+ LDacc (h)
+ CALL p256SubInternal(SB)
+
+ LDt (r)
+ CALL p256MulInternal(SB)
+
+ LDt (s2)
+ CALL p256SubInternal(SB)
+ ST (yout)
+ // Load stored values from stack
+ MOVQ rptr, AX
+ MOVL sel_save, BX
+ MOVL zero_save, CX
+ // The result is not valid if (sel == 0), conditional choose
+ MOVOU xout(16*0), X0
+ MOVOU xout(16*1), X1
+ MOVOU yout(16*0), X2
+ MOVOU yout(16*1), X3
+ MOVOU zout(16*0), X4
+ MOVOU zout(16*1), X5
+
+ MOVL BX, X6
+ MOVL CX, X7
+
+ PXOR X8, X8
+ PCMPEQL X9, X9
+
+ PSHUFD $0, X6, X6
+ PSHUFD $0, X7, X7
+
+ PCMPEQL X8, X6
+ PCMPEQL X8, X7
+
+ MOVOU X6, X15
+ PANDN X9, X15
+
+ MOVOU x1in(16*0), X9
+ MOVOU x1in(16*1), X10
+ MOVOU y1in(16*0), X11
+ MOVOU y1in(16*1), X12
+ MOVOU z1in(16*0), X13
+ MOVOU z1in(16*1), X14
+
+ PAND X15, X0
+ PAND X15, X1
+ PAND X15, X2
+ PAND X15, X3
+ PAND X15, X4
+ PAND X15, X5
+
+ PAND X6, X9
+ PAND X6, X10
+ PAND X6, X11
+ PAND X6, X12
+ PAND X6, X13
+ PAND X6, X14
+
+ PXOR X9, X0
+ PXOR X10, X1
+ PXOR X11, X2
+ PXOR X12, X3
+ PXOR X13, X4
+ PXOR X14, X5
+ // Similarly if zero == 0
+ PCMPEQL X9, X9
+ MOVOU X7, X15
+ PANDN X9, X15
+
+ MOVOU x2in(16*0), X9
+ MOVOU x2in(16*1), X10
+ MOVOU y2in(16*0), X11
+ MOVOU y2in(16*1), X12
+ MOVOU p256one<>+0x00(SB), X13
+ MOVOU p256one<>+0x10(SB), X14
+
+ PAND X15, X0
+ PAND X15, X1
+ PAND X15, X2
+ PAND X15, X3
+ PAND X15, X4
+ PAND X15, X5
+
+ PAND X7, X9
+ PAND X7, X10
+ PAND X7, X11
+ PAND X7, X12
+ PAND X7, X13
+ PAND X7, X14
+
+ PXOR X9, X0
+ PXOR X10, X1
+ PXOR X11, X2
+ PXOR X12, X3
+ PXOR X13, X4
+ PXOR X14, X5
+ // Finally output the result
+ MOVOU X0, (16*0)(AX)
+ MOVOU X1, (16*1)(AX)
+ MOVOU X2, (16*2)(AX)
+ MOVOU X3, (16*3)(AX)
+ MOVOU X4, (16*4)(AX)
+ MOVOU X5, (16*5)(AX)
+ MOVQ $0, rptr
+
+ RET
+#undef x1in
+#undef y1in
+#undef z1in
+#undef x2in
+#undef y2in
+#undef xout
+#undef yout
+#undef zout
+#undef s2
+#undef z1sqr
+#undef h
+#undef r
+#undef hsqr
+#undef rsqr
+#undef hcub
+#undef rptr
+#undef sel_save
+#undef zero_save
+
+// p256IsZero returns 1 in AX if [acc4..acc7] represents zero and zero
+// otherwise. It writes to [acc4..acc7], t0 and t1.
+TEXT p256IsZero(SB),NOSPLIT,$0
+ // AX contains a flag that is set if the input is zero.
+ XORQ AX, AX
+ MOVQ $1, t1
+
+ // Check whether [acc4..acc7] are all zero.
+ MOVQ acc4, t0
+ ORQ acc5, t0
+ ORQ acc6, t0
+ ORQ acc7, t0
+
+ // Set the zero flag if so. (CMOV of a constant to a register doesn't
+ // appear to be supported in Go. Thus t1 = 1.)
+ CMOVQEQ t1, AX
+
+ // XOR [acc4..acc7] with P and compare with zero again.
+ XORQ $-1, acc4
+ XORQ p256const0<>(SB), acc5
+ XORQ p256const1<>(SB), acc7
+ ORQ acc5, acc4
+ ORQ acc6, acc4
+ ORQ acc7, acc4
+
+ // Set the zero flag if so.
+ CMOVQEQ t1, AX
+ RET
+
+/* ---------------------------------------*/
+#define x1in(off) (32*0 + off)(SP)
+#define y1in(off) (32*1 + off)(SP)
+#define z1in(off) (32*2 + off)(SP)
+#define x2in(off) (32*3 + off)(SP)
+#define y2in(off) (32*4 + off)(SP)
+#define z2in(off) (32*5 + off)(SP)
+
+#define xout(off) (32*6 + off)(SP)
+#define yout(off) (32*7 + off)(SP)
+#define zout(off) (32*8 + off)(SP)
+
+#define u1(off) (32*9 + off)(SP)
+#define u2(off) (32*10 + off)(SP)
+#define s1(off) (32*11 + off)(SP)
+#define s2(off) (32*12 + off)(SP)
+#define z1sqr(off) (32*13 + off)(SP)
+#define z2sqr(off) (32*14 + off)(SP)
+#define h(off) (32*15 + off)(SP)
+#define r(off) (32*16 + off)(SP)
+#define hsqr(off) (32*17 + off)(SP)
+#define rsqr(off) (32*18 + off)(SP)
+#define hcub(off) (32*19 + off)(SP)
+#define rptr (32*20)(SP)
+#define points_eq (32*20+8)(SP)
+
+//func p256PointAddAsm(res, in1, in2 []uint64) int
+TEXT ·p256PointAddAsm(SB),0,$680-80
+ // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+ // Move input to stack in order to free registers
+ MOVQ res+0(FP), AX
+ MOVQ in1+24(FP), BX
+ MOVQ in2+48(FP), CX
+
+ MOVOU (16*0)(BX), X0
+ MOVOU (16*1)(BX), X1
+ MOVOU (16*2)(BX), X2
+ MOVOU (16*3)(BX), X3
+ MOVOU (16*4)(BX), X4
+ MOVOU (16*5)(BX), X5
+
+ MOVOU X0, x1in(16*0)
+ MOVOU X1, x1in(16*1)
+ MOVOU X2, y1in(16*0)
+ MOVOU X3, y1in(16*1)
+ MOVOU X4, z1in(16*0)
+ MOVOU X5, z1in(16*1)
+
+ MOVOU (16*0)(CX), X0
+ MOVOU (16*1)(CX), X1
+ MOVOU (16*2)(CX), X2
+ MOVOU (16*3)(CX), X3
+ MOVOU (16*4)(CX), X4
+ MOVOU (16*5)(CX), X5
+
+ MOVOU X0, x2in(16*0)
+ MOVOU X1, x2in(16*1)
+ MOVOU X2, y2in(16*0)
+ MOVOU X3, y2in(16*1)
+ MOVOU X4, z2in(16*0)
+ MOVOU X5, z2in(16*1)
+ // Store pointer to result
+ MOVQ AX, rptr
+ // Begin point add
+ LDacc (z2in)
+ CALL p256SqrInternal(SB) // z2ˆ2
+ ST (z2sqr)
+ LDt (z2in)
+ CALL p256MulInternal(SB) // z2ˆ3
+ LDt (y1in)
+ CALL p256MulInternal(SB) // s1 = z2ˆ3*y1
+ ST (s1)
+
+ LDacc (z1in)
+ CALL p256SqrInternal(SB) // z1ˆ2
+ ST (z1sqr)
+ LDt (z1in)
+ CALL p256MulInternal(SB) // z1ˆ3
+ LDt (y2in)
+ CALL p256MulInternal(SB) // s2 = z1ˆ3*y2
+ ST (s2)
+
+ LDt (s1)
+ CALL p256SubInternal(SB) // r = s2 - s1
+ ST (r)
+ CALL p256IsZero(SB)
+ MOVQ AX, points_eq
+
+ LDacc (z2sqr)
+ LDt (x1in)
+ CALL p256MulInternal(SB) // u1 = x1 * z2ˆ2
+ ST (u1)
+ LDacc (z1sqr)
+ LDt (x2in)
+ CALL p256MulInternal(SB) // u2 = x2 * z1ˆ2
+ ST (u2)
+
+ LDt (u1)
+ CALL p256SubInternal(SB) // h = u2 - u1
+ ST (h)
+ CALL p256IsZero(SB)
+ ANDQ points_eq, AX
+ MOVQ AX, points_eq
+
+ LDacc (r)
+ CALL p256SqrInternal(SB) // rsqr = rˆ2
+ ST (rsqr)
+
+ LDacc (h)
+ CALL p256SqrInternal(SB) // hsqr = hˆ2
+ ST (hsqr)
+
+ LDt (h)
+ CALL p256MulInternal(SB) // hcub = hˆ3
+ ST (hcub)
+
+ LDt (s1)
+ CALL p256MulInternal(SB)
+ ST (s2)
+
+ LDacc (z1in)
+ LDt (z2in)
+ CALL p256MulInternal(SB) // z1 * z2
+ LDt (h)
+ CALL p256MulInternal(SB) // z1 * z2 * h
+ ST (zout)
+
+ LDacc (hsqr)
+ LDt (u1)
+ CALL p256MulInternal(SB) // hˆ2 * u1
+ ST (u2)
+
+ p256MulBy2Inline // u1 * hˆ2 * 2, inline
+ LDacc (rsqr)
+ CALL p256SubInternal(SB) // rˆ2 - u1 * hˆ2 * 2
+
+ LDt (hcub)
+ CALL p256SubInternal(SB)
+ ST (xout)
+
+ MOVQ acc4, t0
+ MOVQ acc5, t1
+ MOVQ acc6, t2
+ MOVQ acc7, t3
+ LDacc (u2)
+ CALL p256SubInternal(SB)
+
+ LDt (r)
+ CALL p256MulInternal(SB)
+
+ LDt (s2)
+ CALL p256SubInternal(SB)
+ ST (yout)
+
+ MOVOU xout(16*0), X0
+ MOVOU xout(16*1), X1
+ MOVOU yout(16*0), X2
+ MOVOU yout(16*1), X3
+ MOVOU zout(16*0), X4
+ MOVOU zout(16*1), X5
+ // Finally output the result
+ MOVQ rptr, AX
+ MOVQ $0, rptr
+ MOVOU X0, (16*0)(AX)
+ MOVOU X1, (16*1)(AX)
+ MOVOU X2, (16*2)(AX)
+ MOVOU X3, (16*3)(AX)
+ MOVOU X4, (16*4)(AX)
+ MOVOU X5, (16*5)(AX)
+
+ MOVQ points_eq, AX
+ MOVQ AX, ret+72(FP)
+
+ RET
+#undef x1in
+#undef y1in
+#undef z1in
+#undef x2in
+#undef y2in
+#undef z2in
+#undef xout
+#undef yout
+#undef zout
+#undef s1
+#undef s2
+#undef u1
+#undef u2
+#undef z1sqr
+#undef z2sqr
+#undef h
+#undef r
+#undef hsqr
+#undef rsqr
+#undef hcub
+#undef rptr
+/* ---------------------------------------*/
+#define x(off) (32*0 + off)(SP)
+#define y(off) (32*1 + off)(SP)
+#define z(off) (32*2 + off)(SP)
+
+#define s(off) (32*3 + off)(SP)
+#define m(off) (32*4 + off)(SP)
+#define zsqr(off) (32*5 + off)(SP)
+#define tmp(off) (32*6 + off)(SP)
+#define rptr (32*7)(SP)
+
+//func p256PointDoubleAsm(res, in []uint64)
+TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$256-48
+ // Move input to stack in order to free registers
+ MOVQ res+0(FP), AX
+ MOVQ in+24(FP), BX
+
+ MOVOU (16*0)(BX), X0
+ MOVOU (16*1)(BX), X1
+ MOVOU (16*2)(BX), X2
+ MOVOU (16*3)(BX), X3
+ MOVOU (16*4)(BX), X4
+ MOVOU (16*5)(BX), X5
+
+ MOVOU X0, x(16*0)
+ MOVOU X1, x(16*1)
+ MOVOU X2, y(16*0)
+ MOVOU X3, y(16*1)
+ MOVOU X4, z(16*0)
+ MOVOU X5, z(16*1)
+ // Store pointer to result
+ MOVQ AX, rptr
+ // Begin point double
+ LDacc (z)
+ CALL p256SqrInternal(SB)
+ ST (zsqr)
+
+ LDt (x)
+ p256AddInline
+ STt (m)
+
+ LDacc (z)
+ LDt (y)
+ CALL p256MulInternal(SB)
+ p256MulBy2Inline
+ MOVQ rptr, AX
+ // Store z
+ MOVQ t0, (16*4 + 8*0)(AX)
+ MOVQ t1, (16*4 + 8*1)(AX)
+ MOVQ t2, (16*4 + 8*2)(AX)
+ MOVQ t3, (16*4 + 8*3)(AX)
+
+ LDacc (x)
+ LDt (zsqr)
+ CALL p256SubInternal(SB)
+ LDt (m)
+ CALL p256MulInternal(SB)
+ ST (m)
+ // Multiply by 3
+ p256MulBy2Inline
+ LDacc (m)
+ p256AddInline
+ STt (m)
+ ////////////////////////
+ LDacc (y)
+ p256MulBy2Inline
+ t2acc
+ CALL p256SqrInternal(SB)
+ ST (s)
+ CALL p256SqrInternal(SB)
+ // Divide by 2
+ XORQ mul0, mul0
+ MOVQ acc4, t0
+ MOVQ acc5, t1
+ MOVQ acc6, t2
+ MOVQ acc7, t3
+
+ ADDQ $-1, acc4
+ ADCQ p256const0<>(SB), acc5
+ ADCQ $0, acc6
+ ADCQ p256const1<>(SB), acc7
+ ADCQ $0, mul0
+ TESTQ $1, t0
+
+ CMOVQEQ t0, acc4
+ CMOVQEQ t1, acc5
+ CMOVQEQ t2, acc6
+ CMOVQEQ t3, acc7
+ ANDQ t0, mul0
+
+ SHRQ $1, acc5, acc4
+ SHRQ $1, acc6, acc5
+ SHRQ $1, acc7, acc6
+ SHRQ $1, mul0, acc7
+ ST (y)
+ /////////////////////////
+ LDacc (x)
+ LDt (s)
+ CALL p256MulInternal(SB)
+ ST (s)
+ p256MulBy2Inline
+ STt (tmp)
+
+ LDacc (m)
+ CALL p256SqrInternal(SB)
+ LDt (tmp)
+ CALL p256SubInternal(SB)
+
+ MOVQ rptr, AX
+ // Store x
+ MOVQ acc4, (16*0 + 8*0)(AX)
+ MOVQ acc5, (16*0 + 8*1)(AX)
+ MOVQ acc6, (16*0 + 8*2)(AX)
+ MOVQ acc7, (16*0 + 8*3)(AX)
+
+ acc2t
+ LDacc (s)
+ CALL p256SubInternal(SB)
+
+ LDt (m)
+ CALL p256MulInternal(SB)
+
+ LDt (y)
+ CALL p256SubInternal(SB)
+ MOVQ rptr, AX
+ // Store y
+ MOVQ acc4, (16*2 + 8*0)(AX)
+ MOVQ acc5, (16*2 + 8*1)(AX)
+ MOVQ acc6, (16*2 + 8*2)(AX)
+ MOVQ acc7, (16*2 + 8*3)(AX)
+ ///////////////////////
+ MOVQ $0, rptr
+
+ RET
+/* ---------------------------------------*/
diff --git a/src/crypto/elliptic/p256_asm_arm64.s b/src/crypto/elliptic/p256_asm_arm64.s
new file mode 100644
index 0000000..f571c50
--- /dev/null
+++ b/src/crypto/elliptic/p256_asm_arm64.s
@@ -0,0 +1,1529 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains constant-time, 64-bit assembly implementation of
+// P256. The optimizations performed here are described in detail in:
+// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
+// 256-bit primes"
+// http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
+// https://eprint.iacr.org/2013/816.pdf
+
+#include "textflag.h"
+
+#define res_ptr R0
+#define a_ptr R1
+#define b_ptr R2
+
+#define acc0 R3
+#define acc1 R4
+#define acc2 R5
+#define acc3 R6
+
+#define acc4 R7
+#define acc5 R8
+#define acc6 R9
+#define acc7 R10
+#define t0 R11
+#define t1 R12
+#define t2 R13
+#define t3 R14
+#define const0 R15
+#define const1 R16
+
+#define hlp0 R17
+#define hlp1 res_ptr
+
+#define x0 R19
+#define x1 R20
+#define x2 R21
+#define x3 R22
+#define y0 R23
+#define y1 R24
+#define y2 R25
+#define y3 R26
+
+#define const2 t2
+#define const3 t3
+
+DATA p256const0<>+0x00(SB)/8, $0x00000000ffffffff
+DATA p256const1<>+0x00(SB)/8, $0xffffffff00000001
+DATA p256ordK0<>+0x00(SB)/8, $0xccd1c8aaee00bc4f
+DATA p256ord<>+0x00(SB)/8, $0xf3b9cac2fc632551
+DATA p256ord<>+0x08(SB)/8, $0xbce6faada7179e84
+DATA p256ord<>+0x10(SB)/8, $0xffffffffffffffff
+DATA p256ord<>+0x18(SB)/8, $0xffffffff00000000
+DATA p256one<>+0x00(SB)/8, $0x0000000000000001
+DATA p256one<>+0x08(SB)/8, $0xffffffff00000000
+DATA p256one<>+0x10(SB)/8, $0xffffffffffffffff
+DATA p256one<>+0x18(SB)/8, $0x00000000fffffffe
+GLOBL p256const0<>(SB), 8, $8
+GLOBL p256const1<>(SB), 8, $8
+GLOBL p256ordK0<>(SB), 8, $8
+GLOBL p256ord<>(SB), 8, $32
+GLOBL p256one<>(SB), 8, $32
+
+/* ---------------------------------------*/
+// func p256LittleToBig(res []byte, in []uint64)
+TEXT ·p256LittleToBig(SB),NOSPLIT,$0
+ JMP ·p256BigToLittle(SB)
+/* ---------------------------------------*/
+// func p256BigToLittle(res []uint64, in []byte)
+TEXT ·p256BigToLittle(SB),NOSPLIT,$0
+ MOVD res+0(FP), res_ptr
+ MOVD in+24(FP), a_ptr
+
+ LDP 0*16(a_ptr), (acc0, acc1)
+ LDP 1*16(a_ptr), (acc2, acc3)
+
+ REV acc0, acc0
+ REV acc1, acc1
+ REV acc2, acc2
+ REV acc3, acc3
+
+ STP (acc3, acc2), 0*16(res_ptr)
+ STP (acc1, acc0), 1*16(res_ptr)
+ RET
+/* ---------------------------------------*/
+// func p256MovCond(res, a, b []uint64, cond int)
+// If cond == 0 res=b, else res=a
+TEXT ·p256MovCond(SB),NOSPLIT,$0
+ MOVD res+0(FP), res_ptr
+ MOVD a+24(FP), a_ptr
+ MOVD b+48(FP), b_ptr
+ MOVD cond+72(FP), R3
+
+ CMP $0, R3
+ // Two remarks:
+ // 1) Will want to revisit NEON, when support is better
+ // 2) CSEL might not be constant time on all ARM processors
+ LDP 0*16(a_ptr), (R4, R5)
+ LDP 1*16(a_ptr), (R6, R7)
+ LDP 2*16(a_ptr), (R8, R9)
+ LDP 0*16(b_ptr), (R16, R17)
+ LDP 1*16(b_ptr), (R19, R20)
+ LDP 2*16(b_ptr), (R21, R22)
+ CSEL EQ, R16, R4, R4
+ CSEL EQ, R17, R5, R5
+ CSEL EQ, R19, R6, R6
+ CSEL EQ, R20, R7, R7
+ CSEL EQ, R21, R8, R8
+ CSEL EQ, R22, R9, R9
+ STP (R4, R5), 0*16(res_ptr)
+ STP (R6, R7), 1*16(res_ptr)
+ STP (R8, R9), 2*16(res_ptr)
+
+ LDP 3*16(a_ptr), (R4, R5)
+ LDP 4*16(a_ptr), (R6, R7)
+ LDP 5*16(a_ptr), (R8, R9)
+ LDP 3*16(b_ptr), (R16, R17)
+ LDP 4*16(b_ptr), (R19, R20)
+ LDP 5*16(b_ptr), (R21, R22)
+ CSEL EQ, R16, R4, R4
+ CSEL EQ, R17, R5, R5
+ CSEL EQ, R19, R6, R6
+ CSEL EQ, R20, R7, R7
+ CSEL EQ, R21, R8, R8
+ CSEL EQ, R22, R9, R9
+ STP (R4, R5), 3*16(res_ptr)
+ STP (R6, R7), 4*16(res_ptr)
+ STP (R8, R9), 5*16(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256NegCond(val []uint64, cond int)
+TEXT ·p256NegCond(SB),NOSPLIT,$0
+ MOVD val+0(FP), a_ptr
+ MOVD cond+24(FP), hlp0
+ MOVD a_ptr, res_ptr
+ // acc = poly
+ MOVD $-1, acc0
+ MOVD p256const0<>(SB), acc1
+ MOVD $0, acc2
+ MOVD p256const1<>(SB), acc3
+ // Load the original value
+ LDP 0*16(a_ptr), (t0, t1)
+ LDP 1*16(a_ptr), (t2, t3)
+ // Speculatively subtract
+ SUBS t0, acc0
+ SBCS t1, acc1
+ SBCS t2, acc2
+ SBC t3, acc3
+ // If condition is 0, keep original value
+ CMP $0, hlp0
+ CSEL EQ, t0, acc0, acc0
+ CSEL EQ, t1, acc1, acc1
+ CSEL EQ, t2, acc2, acc2
+ CSEL EQ, t3, acc3, acc3
+ // Store result
+ STP (acc0, acc1), 0*16(res_ptr)
+ STP (acc2, acc3), 1*16(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256Sqr(res, in []uint64, n int)
+TEXT ·p256Sqr(SB),NOSPLIT,$0
+ MOVD res+0(FP), res_ptr
+ MOVD in+24(FP), a_ptr
+ MOVD n+48(FP), b_ptr
+
+ MOVD p256const0<>(SB), const0
+ MOVD p256const1<>(SB), const1
+
+ LDP 0*16(a_ptr), (x0, x1)
+ LDP 1*16(a_ptr), (x2, x3)
+
+sqrLoop:
+ SUB $1, b_ptr
+ CALL p256SqrInternal<>(SB)
+ MOVD y0, x0
+ MOVD y1, x1
+ MOVD y2, x2
+ MOVD y3, x3
+ CBNZ b_ptr, sqrLoop
+
+ STP (y0, y1), 0*16(res_ptr)
+ STP (y2, y3), 1*16(res_ptr)
+ RET
+/* ---------------------------------------*/
+// func p256Mul(res, in1, in2 []uint64)
+TEXT ·p256Mul(SB),NOSPLIT,$0
+ MOVD res+0(FP), res_ptr
+ MOVD in1+24(FP), a_ptr
+ MOVD in2+48(FP), b_ptr
+
+ MOVD p256const0<>(SB), const0
+ MOVD p256const1<>(SB), const1
+
+ LDP 0*16(a_ptr), (x0, x1)
+ LDP 1*16(a_ptr), (x2, x3)
+
+ LDP 0*16(b_ptr), (y0, y1)
+ LDP 1*16(b_ptr), (y2, y3)
+
+ CALL p256MulInternal<>(SB)
+
+ STP (y0, y1), 0*16(res_ptr)
+ STP (y2, y3), 1*16(res_ptr)
+ RET
+/* ---------------------------------------*/
+// func p256FromMont(res, in []uint64)
+TEXT ·p256FromMont(SB),NOSPLIT,$0
+ MOVD res+0(FP), res_ptr
+ MOVD in+24(FP), a_ptr
+
+ MOVD p256const0<>(SB), const0
+ MOVD p256const1<>(SB), const1
+
+ LDP 0*16(a_ptr), (acc0, acc1)
+ LDP 1*16(a_ptr), (acc2, acc3)
+ // Only reduce, no multiplications are needed
+ // First reduction step
+ ADDS acc0<<32, acc1, acc1
+ LSR $32, acc0, t0
+ MUL acc0, const1, t1
+ UMULH acc0, const1, acc0
+ ADCS t0, acc2
+ ADCS t1, acc3
+ ADC $0, acc0
+ // Second reduction step
+ ADDS acc1<<32, acc2, acc2
+ LSR $32, acc1, t0
+ MUL acc1, const1, t1
+ UMULH acc1, const1, acc1
+ ADCS t0, acc3
+ ADCS t1, acc0
+ ADC $0, acc1
+ // Third reduction step
+ ADDS acc2<<32, acc3, acc3
+ LSR $32, acc2, t0
+ MUL acc2, const1, t1
+ UMULH acc2, const1, acc2
+ ADCS t0, acc0
+ ADCS t1, acc1
+ ADC $0, acc2
+ // Last reduction step
+ ADDS acc3<<32, acc0, acc0
+ LSR $32, acc3, t0
+ MUL acc3, const1, t1
+ UMULH acc3, const1, acc3
+ ADCS t0, acc1
+ ADCS t1, acc2
+ ADC $0, acc3
+
+ SUBS $-1, acc0, t0
+ SBCS const0, acc1, t1
+ SBCS $0, acc2, t2
+ SBCS const1, acc3, t3
+
+ CSEL CS, t0, acc0, acc0
+ CSEL CS, t1, acc1, acc1
+ CSEL CS, t2, acc2, acc2
+ CSEL CS, t3, acc3, acc3
+
+ STP (acc0, acc1), 0*16(res_ptr)
+ STP (acc2, acc3), 1*16(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// Constant time point access to arbitrary point table.
+// Indexed from 1 to 15, with -1 offset
+// (index 0 is implicitly point at infinity)
+// func p256Select(point, table []uint64, idx int)
+TEXT ·p256Select(SB),NOSPLIT,$0
+ MOVD idx+48(FP), const0
+ MOVD table+24(FP), b_ptr
+ MOVD point+0(FP), res_ptr
+
+ EOR x0, x0, x0
+ EOR x1, x1, x1
+ EOR x2, x2, x2
+ EOR x3, x3, x3
+ EOR y0, y0, y0
+ EOR y1, y1, y1
+ EOR y2, y2, y2
+ EOR y3, y3, y3
+ EOR t0, t0, t0
+ EOR t1, t1, t1
+ EOR t2, t2, t2
+ EOR t3, t3, t3
+
+ MOVD $0, const1
+
+loop_select:
+ ADD $1, const1
+ CMP const0, const1
+ LDP.P 16(b_ptr), (acc0, acc1)
+ CSEL EQ, acc0, x0, x0
+ CSEL EQ, acc1, x1, x1
+ LDP.P 16(b_ptr), (acc2, acc3)
+ CSEL EQ, acc2, x2, x2
+ CSEL EQ, acc3, x3, x3
+ LDP.P 16(b_ptr), (acc4, acc5)
+ CSEL EQ, acc4, y0, y0
+ CSEL EQ, acc5, y1, y1
+ LDP.P 16(b_ptr), (acc6, acc7)
+ CSEL EQ, acc6, y2, y2
+ CSEL EQ, acc7, y3, y3
+ LDP.P 16(b_ptr), (acc0, acc1)
+ CSEL EQ, acc0, t0, t0
+ CSEL EQ, acc1, t1, t1
+ LDP.P 16(b_ptr), (acc2, acc3)
+ CSEL EQ, acc2, t2, t2
+ CSEL EQ, acc3, t3, t3
+
+ CMP $16, const1
+ BNE loop_select
+
+ STP (x0, x1), 0*16(res_ptr)
+ STP (x2, x3), 1*16(res_ptr)
+ STP (y0, y1), 2*16(res_ptr)
+ STP (y2, y3), 3*16(res_ptr)
+ STP (t0, t1), 4*16(res_ptr)
+ STP (t2, t3), 5*16(res_ptr)
+ RET
+/* ---------------------------------------*/
+// Constant time point access to base point table.
+// func p256SelectBase(point, table []uint64, idx int)
+TEXT ·p256SelectBase(SB),NOSPLIT,$0
+ MOVD idx+48(FP), t0
+ MOVD table+24(FP), t1
+ MOVD point+0(FP), res_ptr
+
+ EOR x0, x0, x0
+ EOR x1, x1, x1
+ EOR x2, x2, x2
+ EOR x3, x3, x3
+ EOR y0, y0, y0
+ EOR y1, y1, y1
+ EOR y2, y2, y2
+ EOR y3, y3, y3
+
+ MOVD $0, t2
+
+loop_select:
+ ADD $1, t2
+ CMP t0, t2
+ LDP.P 16(t1), (acc0, acc1)
+ CSEL EQ, acc0, x0, x0
+ CSEL EQ, acc1, x1, x1
+ LDP.P 16(t1), (acc2, acc3)
+ CSEL EQ, acc2, x2, x2
+ CSEL EQ, acc3, x3, x3
+ LDP.P 16(t1), (acc4, acc5)
+ CSEL EQ, acc4, y0, y0
+ CSEL EQ, acc5, y1, y1
+ LDP.P 16(t1), (acc6, acc7)
+ CSEL EQ, acc6, y2, y2
+ CSEL EQ, acc7, y3, y3
+
+ CMP $32, t2
+ BNE loop_select
+
+ STP (x0, x1), 0*16(res_ptr)
+ STP (x2, x3), 1*16(res_ptr)
+ STP (y0, y1), 2*16(res_ptr)
+ STP (y2, y3), 3*16(res_ptr)
+ RET
+/* ---------------------------------------*/
+// func p256OrdSqr(res, in []uint64, n int)
+TEXT ·p256OrdSqr(SB),NOSPLIT,$0
+ MOVD in+24(FP), a_ptr
+ MOVD n+48(FP), b_ptr
+
+ MOVD p256ordK0<>(SB), hlp1
+ LDP p256ord<>+0x00(SB), (const0, const1)
+ LDP p256ord<>+0x10(SB), (const2, const3)
+
+ LDP 0*16(a_ptr), (x0, x1)
+ LDP 1*16(a_ptr), (x2, x3)
+
+ordSqrLoop:
+ SUB $1, b_ptr
+
+ // x[1:] * x[0]
+ MUL x0, x1, acc1
+ UMULH x0, x1, acc2
+
+ MUL x0, x2, t0
+ ADDS t0, acc2, acc2
+ UMULH x0, x2, acc3
+
+ MUL x0, x3, t0
+ ADCS t0, acc3, acc3
+ UMULH x0, x3, acc4
+ ADC $0, acc4, acc4
+ // x[2:] * x[1]
+ MUL x1, x2, t0
+ ADDS t0, acc3
+ UMULH x1, x2, t1
+ ADCS t1, acc4
+ ADC $0, ZR, acc5
+
+ MUL x1, x3, t0
+ ADDS t0, acc4
+ UMULH x1, x3, t1
+ ADC t1, acc5
+ // x[3] * x[2]
+ MUL x2, x3, t0
+ ADDS t0, acc5
+ UMULH x2, x3, acc6
+ ADC $0, acc6
+
+ MOVD $0, acc7
+ // *2
+ ADDS acc1, acc1
+ ADCS acc2, acc2
+ ADCS acc3, acc3
+ ADCS acc4, acc4
+ ADCS acc5, acc5
+ ADCS acc6, acc6
+ ADC $0, acc7
+ // Missing products
+ MUL x0, x0, acc0
+ UMULH x0, x0, t0
+ ADDS t0, acc1, acc1
+
+ MUL x1, x1, t0
+ ADCS t0, acc2, acc2
+ UMULH x1, x1, t1
+ ADCS t1, acc3, acc3
+
+ MUL x2, x2, t0
+ ADCS t0, acc4, acc4
+ UMULH x2, x2, t1
+ ADCS t1, acc5, acc5
+
+ MUL x3, x3, t0
+ ADCS t0, acc6, acc6
+ UMULH x3, x3, t1
+ ADC t1, acc7, acc7
+ // First reduction step
+ MUL acc0, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc0, acc0
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc1, acc1
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc2, acc2
+ UMULH const2, hlp0, acc0
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc3, acc3
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, hlp0
+
+ ADDS t1, acc1, acc1
+ ADCS y0, acc2, acc2
+ ADCS acc0, acc3, acc3
+ ADC $0, hlp0, acc0
+ // Second reduction step
+ MUL acc1, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc1, acc1
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc2, acc2
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc3, acc3
+ UMULH const2, hlp0, acc1
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc0, acc0
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, hlp0
+
+ ADDS t1, acc2, acc2
+ ADCS y0, acc3, acc3
+ ADCS acc1, acc0, acc0
+ ADC $0, hlp0, acc1
+ // Third reduction step
+ MUL acc2, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc2, acc2
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc3, acc3
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc0, acc0
+ UMULH const2, hlp0, acc2
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc1, acc1
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, hlp0
+
+ ADDS t1, acc3, acc3
+ ADCS y0, acc0, acc0
+ ADCS acc2, acc1, acc1
+ ADC $0, hlp0, acc2
+
+ // Last reduction step
+ MUL acc3, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc3, acc3
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc0, acc0
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc1, acc1
+ UMULH const2, hlp0, acc3
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc2, acc2
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, acc7
+
+ ADDS t1, acc0, acc0
+ ADCS y0, acc1, acc1
+ ADCS acc3, acc2, acc2
+ ADC $0, hlp0, acc3
+
+ ADDS acc4, acc0, acc0
+ ADCS acc5, acc1, acc1
+ ADCS acc6, acc2, acc2
+ ADCS acc7, acc3, acc3
+ ADC $0, ZR, acc4
+
+ SUBS const0, acc0, y0
+ SBCS const1, acc1, y1
+ SBCS const2, acc2, y2
+ SBCS const3, acc3, y3
+ SBCS $0, acc4, acc4
+
+ CSEL CS, y0, acc0, x0
+ CSEL CS, y1, acc1, x1
+ CSEL CS, y2, acc2, x2
+ CSEL CS, y3, acc3, x3
+
+ CBNZ b_ptr, ordSqrLoop
+
+ MOVD res+0(FP), res_ptr
+ STP (x0, x1), 0*16(res_ptr)
+ STP (x2, x3), 1*16(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+// func p256OrdMul(res, in1, in2 []uint64)
+TEXT ·p256OrdMul(SB),NOSPLIT,$0
+ MOVD in1+24(FP), a_ptr
+ MOVD in2+48(FP), b_ptr
+
+ MOVD p256ordK0<>(SB), hlp1
+ LDP p256ord<>+0x00(SB), (const0, const1)
+ LDP p256ord<>+0x10(SB), (const2, const3)
+
+ LDP 0*16(a_ptr), (x0, x1)
+ LDP 1*16(a_ptr), (x2, x3)
+ LDP 0*16(b_ptr), (y0, y1)
+ LDP 1*16(b_ptr), (y2, y3)
+
+ // y[0] * x
+ MUL y0, x0, acc0
+ UMULH y0, x0, acc1
+
+ MUL y0, x1, t0
+ ADDS t0, acc1
+ UMULH y0, x1, acc2
+
+ MUL y0, x2, t0
+ ADCS t0, acc2
+ UMULH y0, x2, acc3
+
+ MUL y0, x3, t0
+ ADCS t0, acc3
+ UMULH y0, x3, acc4
+ ADC $0, acc4
+ // First reduction step
+ MUL acc0, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc0, acc0
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc1, acc1
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc2, acc2
+ UMULH const2, hlp0, acc0
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc3, acc3
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, acc4
+
+ ADDS t1, acc1, acc1
+ ADCS y0, acc2, acc2
+ ADCS acc0, acc3, acc3
+ ADC $0, hlp0, acc0
+ // y[1] * x
+ MUL y1, x0, t0
+ ADDS t0, acc1
+ UMULH y1, x0, t1
+
+ MUL y1, x1, t0
+ ADCS t0, acc2
+ UMULH y1, x1, hlp0
+
+ MUL y1, x2, t0
+ ADCS t0, acc3
+ UMULH y1, x2, y0
+
+ MUL y1, x3, t0
+ ADCS t0, acc4
+ UMULH y1, x3, y1
+ ADC $0, ZR, acc5
+
+ ADDS t1, acc2
+ ADCS hlp0, acc3
+ ADCS y0, acc4
+ ADC y1, acc5
+ // Second reduction step
+ MUL acc1, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc1, acc1
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc2, acc2
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc3, acc3
+ UMULH const2, hlp0, acc1
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc0, acc0
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, acc5
+
+ ADDS t1, acc2, acc2
+ ADCS y0, acc3, acc3
+ ADCS acc1, acc0, acc0
+ ADC $0, hlp0, acc1
+ // y[2] * x
+ MUL y2, x0, t0
+ ADDS t0, acc2
+ UMULH y2, x0, t1
+
+ MUL y2, x1, t0
+ ADCS t0, acc3
+ UMULH y2, x1, hlp0
+
+ MUL y2, x2, t0
+ ADCS t0, acc4
+ UMULH y2, x2, y0
+
+ MUL y2, x3, t0
+ ADCS t0, acc5
+ UMULH y2, x3, y1
+ ADC $0, ZR, acc6
+
+ ADDS t1, acc3
+ ADCS hlp0, acc4
+ ADCS y0, acc5
+ ADC y1, acc6
+ // Third reduction step
+ MUL acc2, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc2, acc2
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc3, acc3
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc0, acc0
+ UMULH const2, hlp0, acc2
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc1, acc1
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, acc6
+
+ ADDS t1, acc3, acc3
+ ADCS y0, acc0, acc0
+ ADCS acc2, acc1, acc1
+ ADC $0, hlp0, acc2
+ // y[3] * x
+ MUL y3, x0, t0
+ ADDS t0, acc3
+ UMULH y3, x0, t1
+
+ MUL y3, x1, t0
+ ADCS t0, acc4
+ UMULH y3, x1, hlp0
+
+ MUL y3, x2, t0
+ ADCS t0, acc5
+ UMULH y3, x2, y0
+
+ MUL y3, x3, t0
+ ADCS t0, acc6
+ UMULH y3, x3, y1
+ ADC $0, ZR, acc7
+
+ ADDS t1, acc4
+ ADCS hlp0, acc5
+ ADCS y0, acc6
+ ADC y1, acc7
+ // Last reduction step
+ MUL acc3, hlp1, hlp0
+
+ MUL const0, hlp1, t0
+ ADDS t0, acc3, acc3
+ UMULH const0, hlp0, t1
+
+ MUL const1, hlp0, t0
+ ADCS t0, acc0, acc0
+ UMULH const1, hlp0, y0
+
+ MUL const2, hlp0, t0
+ ADCS t0, acc1, acc1
+ UMULH const2, hlp0, acc3
+
+ MUL const3, hlp0, t0
+ ADCS t0, acc2, acc2
+
+ UMULH const3, hlp0, hlp0
+ ADC $0, acc7
+
+ ADDS t1, acc0, acc0
+ ADCS y0, acc1, acc1
+ ADCS acc3, acc2, acc2
+ ADC $0, hlp0, acc3
+
+ ADDS acc4, acc0, acc0
+ ADCS acc5, acc1, acc1
+ ADCS acc6, acc2, acc2
+ ADCS acc7, acc3, acc3
+ ADC $0, ZR, acc4
+
+ SUBS const0, acc0, t0
+ SBCS const1, acc1, t1
+ SBCS const2, acc2, t2
+ SBCS const3, acc3, t3
+ SBCS $0, acc4, acc4
+
+ CSEL CS, t0, acc0, acc0
+ CSEL CS, t1, acc1, acc1
+ CSEL CS, t2, acc2, acc2
+ CSEL CS, t3, acc3, acc3
+
+ MOVD res+0(FP), res_ptr
+ STP (acc0, acc1), 0*16(res_ptr)
+ STP (acc2, acc3), 1*16(res_ptr)
+
+ RET
+/* ---------------------------------------*/
+TEXT p256SubInternal<>(SB),NOSPLIT,$0
+ SUBS x0, y0, acc0
+ SBCS x1, y1, acc1
+ SBCS x2, y2, acc2
+ SBCS x3, y3, acc3
+ SBC $0, ZR, t0
+
+ ADDS $-1, acc0, acc4
+ ADCS const0, acc1, acc5
+ ADCS $0, acc2, acc6
+ ADC const1, acc3, acc7
+
+ ANDS $1, t0
+ CSEL EQ, acc0, acc4, x0
+ CSEL EQ, acc1, acc5, x1
+ CSEL EQ, acc2, acc6, x2
+ CSEL EQ, acc3, acc7, x3
+
+ RET
+/* ---------------------------------------*/
+TEXT p256SqrInternal<>(SB),NOSPLIT,$0
+ // x[1:] * x[0]
+ MUL x0, x1, acc1
+ UMULH x0, x1, acc2
+
+ MUL x0, x2, t0
+ ADDS t0, acc2, acc2
+ UMULH x0, x2, acc3
+
+ MUL x0, x3, t0
+ ADCS t0, acc3, acc3
+ UMULH x0, x3, acc4
+ ADC $0, acc4, acc4
+ // x[2:] * x[1]
+ MUL x1, x2, t0
+ ADDS t0, acc3
+ UMULH x1, x2, t1
+ ADCS t1, acc4
+ ADC $0, ZR, acc5
+
+ MUL x1, x3, t0
+ ADDS t0, acc4
+ UMULH x1, x3, t1
+ ADC t1, acc5
+ // x[3] * x[2]
+ MUL x2, x3, t0
+ ADDS t0, acc5
+ UMULH x2, x3, acc6
+ ADC $0, acc6
+
+ MOVD $0, acc7
+ // *2
+ ADDS acc1, acc1
+ ADCS acc2, acc2
+ ADCS acc3, acc3
+ ADCS acc4, acc4
+ ADCS acc5, acc5
+ ADCS acc6, acc6
+ ADC $0, acc7
+ // Missing products
+ MUL x0, x0, acc0
+ UMULH x0, x0, t0
+ ADDS t0, acc1, acc1
+
+ MUL x1, x1, t0
+ ADCS t0, acc2, acc2
+ UMULH x1, x1, t1
+ ADCS t1, acc3, acc3
+
+ MUL x2, x2, t0
+ ADCS t0, acc4, acc4
+ UMULH x2, x2, t1
+ ADCS t1, acc5, acc5
+
+ MUL x3, x3, t0
+ ADCS t0, acc6, acc6
+ UMULH x3, x3, t1
+ ADCS t1, acc7, acc7
+ // First reduction step
+ ADDS acc0<<32, acc1, acc1
+ LSR $32, acc0, t0
+ MUL acc0, const1, t1
+ UMULH acc0, const1, acc0
+ ADCS t0, acc2, acc2
+ ADCS t1, acc3, acc3
+ ADC $0, acc0, acc0
+ // Second reduction step
+ ADDS acc1<<32, acc2, acc2
+ LSR $32, acc1, t0
+ MUL acc1, const1, t1
+ UMULH acc1, const1, acc1
+ ADCS t0, acc3, acc3
+ ADCS t1, acc0, acc0
+ ADC $0, acc1, acc1
+ // Third reduction step
+ ADDS acc2<<32, acc3, acc3
+ LSR $32, acc2, t0
+ MUL acc2, const1, t1
+ UMULH acc2, const1, acc2
+ ADCS t0, acc0, acc0
+ ADCS t1, acc1, acc1
+ ADC $0, acc2, acc2
+ // Last reduction step
+ ADDS acc3<<32, acc0, acc0
+ LSR $32, acc3, t0
+ MUL acc3, const1, t1
+ UMULH acc3, const1, acc3
+ ADCS t0, acc1, acc1
+ ADCS t1, acc2, acc2
+ ADC $0, acc3, acc3
+ // Add bits [511:256] of the sqr result
+ ADDS acc4, acc0, acc0
+ ADCS acc5, acc1, acc1
+ ADCS acc6, acc2, acc2
+ ADCS acc7, acc3, acc3
+ ADC $0, ZR, acc4
+
+ SUBS $-1, acc0, t0
+ SBCS const0, acc1, t1
+ SBCS $0, acc2, t2
+ SBCS const1, acc3, t3
+ SBCS $0, acc4, acc4
+
+ CSEL CS, t0, acc0, y0
+ CSEL CS, t1, acc1, y1
+ CSEL CS, t2, acc2, y2
+ CSEL CS, t3, acc3, y3
+ RET
+/* ---------------------------------------*/
+TEXT p256MulInternal<>(SB),NOSPLIT,$0
+ // y[0] * x
+ MUL y0, x0, acc0
+ UMULH y0, x0, acc1
+
+ MUL y0, x1, t0
+ ADDS t0, acc1
+ UMULH y0, x1, acc2
+
+ MUL y0, x2, t0
+ ADCS t0, acc2
+ UMULH y0, x2, acc3
+
+ MUL y0, x3, t0
+ ADCS t0, acc3
+ UMULH y0, x3, acc4
+ ADC $0, acc4
+ // First reduction step
+ ADDS acc0<<32, acc1, acc1
+ LSR $32, acc0, t0
+ MUL acc0, const1, t1
+ UMULH acc0, const1, acc0
+ ADCS t0, acc2
+ ADCS t1, acc3
+ ADC $0, acc0
+ // y[1] * x
+ MUL y1, x0, t0
+ ADDS t0, acc1
+ UMULH y1, x0, t1
+
+ MUL y1, x1, t0
+ ADCS t0, acc2
+ UMULH y1, x1, t2
+
+ MUL y1, x2, t0
+ ADCS t0, acc3
+ UMULH y1, x2, t3
+
+ MUL y1, x3, t0
+ ADCS t0, acc4
+ UMULH y1, x3, hlp0
+ ADC $0, ZR, acc5
+
+ ADDS t1, acc2
+ ADCS t2, acc3
+ ADCS t3, acc4
+ ADC hlp0, acc5
+ // Second reduction step
+ ADDS acc1<<32, acc2, acc2
+ LSR $32, acc1, t0
+ MUL acc1, const1, t1
+ UMULH acc1, const1, acc1
+ ADCS t0, acc3
+ ADCS t1, acc0
+ ADC $0, acc1
+ // y[2] * x
+ MUL y2, x0, t0
+ ADDS t0, acc2
+ UMULH y2, x0, t1
+
+ MUL y2, x1, t0
+ ADCS t0, acc3
+ UMULH y2, x1, t2
+
+ MUL y2, x2, t0
+ ADCS t0, acc4
+ UMULH y2, x2, t3
+
+ MUL y2, x3, t0
+ ADCS t0, acc5
+ UMULH y2, x3, hlp0
+ ADC $0, ZR, acc6
+
+ ADDS t1, acc3
+ ADCS t2, acc4
+ ADCS t3, acc5
+ ADC hlp0, acc6
+ // Third reduction step
+ ADDS acc2<<32, acc3, acc3
+ LSR $32, acc2, t0
+ MUL acc2, const1, t1
+ UMULH acc2, const1, acc2
+ ADCS t0, acc0
+ ADCS t1, acc1
+ ADC $0, acc2
+ // y[3] * x
+ MUL y3, x0, t0
+ ADDS t0, acc3
+ UMULH y3, x0, t1
+
+ MUL y3, x1, t0
+ ADCS t0, acc4
+ UMULH y3, x1, t2
+
+ MUL y3, x2, t0
+ ADCS t0, acc5
+ UMULH y3, x2, t3
+
+ MUL y3, x3, t0
+ ADCS t0, acc6
+ UMULH y3, x3, hlp0
+ ADC $0, ZR, acc7
+
+ ADDS t1, acc4
+ ADCS t2, acc5
+ ADCS t3, acc6
+ ADC hlp0, acc7
+ // Last reduction step
+ ADDS acc3<<32, acc0, acc0
+ LSR $32, acc3, t0
+ MUL acc3, const1, t1
+ UMULH acc3, const1, acc3
+ ADCS t0, acc1
+ ADCS t1, acc2
+ ADC $0, acc3
+ // Add bits [511:256] of the mul result
+ ADDS acc4, acc0, acc0
+ ADCS acc5, acc1, acc1
+ ADCS acc6, acc2, acc2
+ ADCS acc7, acc3, acc3
+ ADC $0, ZR, acc4
+
+ SUBS $-1, acc0, t0
+ SBCS const0, acc1, t1
+ SBCS $0, acc2, t2
+ SBCS const1, acc3, t3
+ SBCS $0, acc4, acc4
+
+ CSEL CS, t0, acc0, y0
+ CSEL CS, t1, acc1, y1
+ CSEL CS, t2, acc2, y2
+ CSEL CS, t3, acc3, y3
+ RET
+/* ---------------------------------------*/
+#define p256MulBy2Inline \
+ ADDS y0, y0, x0; \
+ ADCS y1, y1, x1; \
+ ADCS y2, y2, x2; \
+ ADCS y3, y3, x3; \
+ ADC $0, ZR, hlp0; \
+ SUBS $-1, x0, t0; \
+ SBCS const0, x1, t1;\
+ SBCS $0, x2, t2; \
+ SBCS const1, x3, t3;\
+ SBCS $0, hlp0, hlp0;\
+ CSEL CC, x0, t0, x0;\
+ CSEL CC, x1, t1, x1;\
+ CSEL CC, x2, t2, x2;\
+ CSEL CC, x3, t3, x3;
+/* ---------------------------------------*/
+#define x1in(off) (off)(a_ptr)
+#define y1in(off) (off + 32)(a_ptr)
+#define z1in(off) (off + 64)(a_ptr)
+#define x2in(off) (off)(b_ptr)
+#define z2in(off) (off + 64)(b_ptr)
+#define x3out(off) (off)(res_ptr)
+#define y3out(off) (off + 32)(res_ptr)
+#define z3out(off) (off + 64)(res_ptr)
+#define LDx(src) LDP src(0), (x0, x1); LDP src(16), (x2, x3)
+#define LDy(src) LDP src(0), (y0, y1); LDP src(16), (y2, y3)
+#define STx(src) STP (x0, x1), src(0); STP (x2, x3), src(16)
+#define STy(src) STP (y0, y1), src(0); STP (y2, y3), src(16)
+/* ---------------------------------------*/
+#define y2in(off) (32*0 + 8 + off)(RSP)
+#define s2(off) (32*1 + 8 + off)(RSP)
+#define z1sqr(off) (32*2 + 8 + off)(RSP)
+#define h(off) (32*3 + 8 + off)(RSP)
+#define r(off) (32*4 + 8 + off)(RSP)
+#define hsqr(off) (32*5 + 8 + off)(RSP)
+#define rsqr(off) (32*6 + 8 + off)(RSP)
+#define hcub(off) (32*7 + 8 + off)(RSP)
+
+#define z2sqr(off) (32*8 + 8 + off)(RSP)
+#define s1(off) (32*9 + 8 + off)(RSP)
+#define u1(off) (32*10 + 8 + off)(RSP)
+#define u2(off) (32*11 + 8 + off)(RSP)
+
+// func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
+TEXT ·p256PointAddAffineAsm(SB),0,$264-96
+ MOVD in1+24(FP), a_ptr
+ MOVD in2+48(FP), b_ptr
+ MOVD sign+72(FP), hlp0
+ MOVD sel+80(FP), hlp1
+ MOVD zero+88(FP), t2
+
+ MOVD $1, t0
+ CMP $0, t2
+ CSEL EQ, ZR, t0, t2
+ CMP $0, hlp1
+ CSEL EQ, ZR, t0, hlp1
+
+ MOVD p256const0<>(SB), const0
+ MOVD p256const1<>(SB), const1
+ EOR t2<<1, hlp1
+
+ // Negate y2in based on sign
+ LDP 2*16(b_ptr), (y0, y1)
+ LDP 3*16(b_ptr), (y2, y3)
+ MOVD $-1, acc0
+
+ SUBS y0, acc0, acc0
+ SBCS y1, const0, acc1
+ SBCS y2, ZR, acc2
+ SBCS y3, const1, acc3
+ SBC $0, ZR, t0
+
+ ADDS $-1, acc0, acc4
+ ADCS const0, acc1, acc5
+ ADCS $0, acc2, acc6
+ ADCS const1, acc3, acc7
+ ADC $0, t0, t0
+
+ CMP $0, t0
+ CSEL EQ, acc4, acc0, acc0
+ CSEL EQ, acc5, acc1, acc1
+ CSEL EQ, acc6, acc2, acc2
+ CSEL EQ, acc7, acc3, acc3
+ // If condition is 0, keep original value
+ CMP $0, hlp0
+ CSEL EQ, y0, acc0, y0
+ CSEL EQ, y1, acc1, y1
+ CSEL EQ, y2, acc2, y2
+ CSEL EQ, y3, acc3, y3
+ // Store result
+ STy(y2in)
+ // Begin point add
+ LDx(z1in)
+ CALL p256SqrInternal<>(SB) // z1ˆ2
+ STy(z1sqr)
+
+ LDx(x2in)
+ CALL p256MulInternal<>(SB) // x2 * z1ˆ2
+
+ LDx(x1in)
+ CALL p256SubInternal<>(SB) // h = u2 - u1
+ STx(h)
+
+ LDy(z1in)
+ CALL p256MulInternal<>(SB) // z3 = h * z1
+
+ LDP 4*16(a_ptr), (acc0, acc1)// iff select[0] == 0, z3 = z1
+ LDP 5*16(a_ptr), (acc2, acc3)
+ ANDS $1, hlp1, ZR
+ CSEL EQ, acc0, y0, y0
+ CSEL EQ, acc1, y1, y1
+ CSEL EQ, acc2, y2, y2
+ CSEL EQ, acc3, y3, y3
+ LDP p256one<>+0x00(SB), (acc0, acc1)
+ LDP p256one<>+0x10(SB), (acc2, acc3)
+ ANDS $2, hlp1, ZR // iff select[1] == 0, z3 = 1
+ CSEL EQ, acc0, y0, y0
+ CSEL EQ, acc1, y1, y1
+ CSEL EQ, acc2, y2, y2
+ CSEL EQ, acc3, y3, y3
+ LDx(z1in)
+ MOVD res+0(FP), t0
+ STP (y0, y1), 4*16(t0)
+ STP (y2, y3), 5*16(t0)
+
+ LDy(z1sqr)
+ CALL p256MulInternal<>(SB) // z1 ^ 3
+
+ LDx(y2in)
+ CALL p256MulInternal<>(SB) // s2 = y2 * z1ˆ3
+ STy(s2)
+
+ LDx(y1in)
+ CALL p256SubInternal<>(SB) // r = s2 - s1
+ STx(r)
+
+ CALL p256SqrInternal<>(SB) // rsqr = rˆ2
+ STy (rsqr)
+
+ LDx(h)
+ CALL p256SqrInternal<>(SB) // hsqr = hˆ2
+ STy(hsqr)
+
+ CALL p256MulInternal<>(SB) // hcub = hˆ3
+ STy(hcub)
+
+ LDx(y1in)
+ CALL p256MulInternal<>(SB) // y1 * hˆ3
+ STy(s2)
+
+ LDP hsqr(0*8), (x0, x1)
+ LDP hsqr(2*8), (x2, x3)
+ LDP 0*16(a_ptr), (y0, y1)
+ LDP 1*16(a_ptr), (y2, y3)
+ CALL p256MulInternal<>(SB) // u1 * hˆ2
+ STP (y0, y1), h(0*8)
+ STP (y2, y3), h(2*8)
+
+ p256MulBy2Inline // u1 * hˆ2 * 2, inline
+
+ LDy(rsqr)
+ CALL p256SubInternal<>(SB) // rˆ2 - u1 * hˆ2 * 2
+
+ MOVD x0, y0
+ MOVD x1, y1
+ MOVD x2, y2
+ MOVD x3, y3
+ LDx(hcub)
+ CALL p256SubInternal<>(SB)
+
+ LDP 0*16(a_ptr), (acc0, acc1)
+ LDP 1*16(a_ptr), (acc2, acc3)
+ ANDS $1, hlp1, ZR // iff select[0] == 0, x3 = x1
+ CSEL EQ, acc0, x0, x0
+ CSEL EQ, acc1, x1, x1
+ CSEL EQ, acc2, x2, x2
+ CSEL EQ, acc3, x3, x3
+ LDP 0*16(b_ptr), (acc0, acc1)
+ LDP 1*16(b_ptr), (acc2, acc3)
+ ANDS $2, hlp1, ZR // iff select[1] == 0, x3 = x2
+ CSEL EQ, acc0, x0, x0
+ CSEL EQ, acc1, x1, x1
+ CSEL EQ, acc2, x2, x2
+ CSEL EQ, acc3, x3, x3
+ MOVD res+0(FP), t0
+ STP (x0, x1), 0*16(t0)
+ STP (x2, x3), 1*16(t0)
+
+ LDP h(0*8), (y0, y1)
+ LDP h(2*8), (y2, y3)
+ CALL p256SubInternal<>(SB)
+
+ LDP r(0*8), (y0, y1)
+ LDP r(2*8), (y2, y3)
+ CALL p256MulInternal<>(SB)
+
+ LDP s2(0*8), (x0, x1)
+ LDP s2(2*8), (x2, x3)
+ CALL p256SubInternal<>(SB)
+ LDP 2*16(a_ptr), (acc0, acc1)
+ LDP 3*16(a_ptr), (acc2, acc3)
+ ANDS $1, hlp1, ZR // iff select[0] == 0, y3 = y1
+ CSEL EQ, acc0, x0, x0
+ CSEL EQ, acc1, x1, x1
+ CSEL EQ, acc2, x2, x2
+ CSEL EQ, acc3, x3, x3
+ LDP y2in(0*8), (acc0, acc1)
+ LDP y2in(2*8), (acc2, acc3)
+ ANDS $2, hlp1, ZR // iff select[1] == 0, y3 = y2
+ CSEL EQ, acc0, x0, x0
+ CSEL EQ, acc1, x1, x1
+ CSEL EQ, acc2, x2, x2
+ CSEL EQ, acc3, x3, x3
+ MOVD res+0(FP), t0
+ STP (x0, x1), 2*16(t0)
+ STP (x2, x3), 3*16(t0)
+
+ RET
+
+#define p256AddInline \
+ ADDS y0, x0, x0; \
+ ADCS y1, x1, x1; \
+ ADCS y2, x2, x2; \
+ ADCS y3, x3, x3; \
+ ADC $0, ZR, hlp0; \
+ SUBS $-1, x0, t0; \
+ SBCS const0, x1, t1;\
+ SBCS $0, x2, t2; \
+ SBCS const1, x3, t3;\
+ SBCS $0, hlp0, hlp0;\
+ CSEL CC, x0, t0, x0;\
+ CSEL CC, x1, t1, x1;\
+ CSEL CC, x2, t2, x2;\
+ CSEL CC, x3, t3, x3;
+
+#define s(off) (32*0 + 8 + off)(RSP)
+#define m(off) (32*1 + 8 + off)(RSP)
+#define zsqr(off) (32*2 + 8 + off)(RSP)
+#define tmp(off) (32*3 + 8 + off)(RSP)
+
+//func p256PointDoubleAsm(res, in []uint64)
+TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-48
+ MOVD res+0(FP), res_ptr
+ MOVD in+24(FP), a_ptr
+
+ MOVD p256const0<>(SB), const0
+ MOVD p256const1<>(SB), const1
+
+ // Begin point double
+ LDP 4*16(a_ptr), (x0, x1)
+ LDP 5*16(a_ptr), (x2, x3)
+ CALL p256SqrInternal<>(SB)
+ STP (y0, y1), zsqr(0*8)
+ STP (y2, y3), zsqr(2*8)
+
+ LDP 0*16(a_ptr), (x0, x1)
+ LDP 1*16(a_ptr), (x2, x3)
+ p256AddInline
+ STx(m)
+
+ LDx(z1in)
+ LDy(y1in)
+ CALL p256MulInternal<>(SB)
+ p256MulBy2Inline
+ STx(z3out)
+
+ LDy(x1in)
+ LDx(zsqr)
+ CALL p256SubInternal<>(SB)
+ LDy(m)
+ CALL p256MulInternal<>(SB)
+
+ // Multiply by 3
+ p256MulBy2Inline
+ p256AddInline
+ STx(m)
+
+ LDy(y1in)
+ p256MulBy2Inline
+ CALL p256SqrInternal<>(SB)
+ STy(s)
+ MOVD y0, x0
+ MOVD y1, x1
+ MOVD y2, x2
+ MOVD y3, x3
+ CALL p256SqrInternal<>(SB)
+
+ // Divide by 2
+ ADDS $-1, y0, t0
+ ADCS const0, y1, t1
+ ADCS $0, y2, t2
+ ADCS const1, y3, t3
+ ADC $0, ZR, hlp0
+
+ ANDS $1, y0, ZR
+ CSEL EQ, y0, t0, t0
+ CSEL EQ, y1, t1, t1
+ CSEL EQ, y2, t2, t2
+ CSEL EQ, y3, t3, t3
+ AND y0, hlp0, hlp0
+
+ EXTR $1, t0, t1, y0
+ EXTR $1, t1, t2, y1
+ EXTR $1, t2, t3, y2
+ EXTR $1, t3, hlp0, y3
+ STy(y3out)
+
+ LDx(x1in)
+ LDy(s)
+ CALL p256MulInternal<>(SB)
+ STy(s)
+ p256MulBy2Inline
+ STx(tmp)
+
+ LDx(m)
+ CALL p256SqrInternal<>(SB)
+ LDx(tmp)
+ CALL p256SubInternal<>(SB)
+
+ STx(x3out)
+
+ LDy(s)
+ CALL p256SubInternal<>(SB)
+
+ LDy(m)
+ CALL p256MulInternal<>(SB)
+
+ LDx(y3out)
+ CALL p256SubInternal<>(SB)
+ STx(y3out)
+ RET
+/* ---------------------------------------*/
+#undef y2in
+#undef x3out
+#undef y3out
+#undef z3out
+#define y2in(off) (off + 32)(b_ptr)
+#define x3out(off) (off)(b_ptr)
+#define y3out(off) (off + 32)(b_ptr)
+#define z3out(off) (off + 64)(b_ptr)
+//func p256PointAddAsm(res, in1, in2 []uint64) int
+TEXT ·p256PointAddAsm(SB),0,$392-80
+ // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+ // Move input to stack in order to free registers
+ MOVD in1+24(FP), a_ptr
+ MOVD in2+48(FP), b_ptr
+
+ MOVD p256const0<>(SB), const0
+ MOVD p256const1<>(SB), const1
+
+ // Begin point add
+ LDx(z2in)
+ CALL p256SqrInternal<>(SB) // z2^2
+ STy(z2sqr)
+
+ CALL p256MulInternal<>(SB) // z2^3
+
+ LDx(y1in)
+ CALL p256MulInternal<>(SB) // s1 = z2ˆ3*y1
+ STy(s1)
+
+ LDx(z1in)
+ CALL p256SqrInternal<>(SB) // z1^2
+ STy(z1sqr)
+
+ CALL p256MulInternal<>(SB) // z1^3
+
+ LDx(y2in)
+ CALL p256MulInternal<>(SB) // s2 = z1ˆ3*y2
+
+ LDx(s1)
+ CALL p256SubInternal<>(SB) // r = s2 - s1
+ STx(r)
+
+ MOVD $1, t2
+ ORR x0, x1, t0 // Check if zero mod p256
+ ORR x2, x3, t1
+ ORR t1, t0, t0
+ CMP $0, t0
+ CSEL EQ, t2, ZR, hlp1
+
+ EOR $-1, x0, t0
+ EOR const0, x1, t1
+ EOR const1, x3, t3
+
+ ORR t0, t1, t0
+ ORR x2, t3, t1
+ ORR t1, t0, t0
+ CMP $0, t0
+ CSEL EQ, t2, hlp1, hlp1
+
+ LDx(z2sqr)
+ LDy(x1in)
+ CALL p256MulInternal<>(SB) // u1 = x1 * z2ˆ2
+ STy(u1)
+
+ LDx(z1sqr)
+ LDy(x2in)
+ CALL p256MulInternal<>(SB) // u2 = x2 * z1ˆ2
+ STy(u2)
+
+ LDx(u1)
+ CALL p256SubInternal<>(SB) // h = u2 - u1
+ STx(h)
+
+ MOVD $1, t2
+ ORR x0, x1, t0 // Check if zero mod p256
+ ORR x2, x3, t1
+ ORR t1, t0, t0
+ CMP $0, t0
+ CSEL EQ, t2, ZR, hlp0
+
+ EOR $-1, x0, t0
+ EOR const0, x1, t1
+ EOR const1, x3, t3
+
+ ORR t0, t1, t0
+ ORR x2, t3, t1
+ ORR t1, t0, t0
+ CMP $0, t0
+ CSEL EQ, t2, hlp0, hlp0
+
+ AND hlp0, hlp1, hlp1
+
+ LDx(r)
+ CALL p256SqrInternal<>(SB) // rsqr = rˆ2
+ STy(rsqr)
+
+ LDx(h)
+ CALL p256SqrInternal<>(SB) // hsqr = hˆ2
+ STy(hsqr)
+
+ LDx(h)
+ CALL p256MulInternal<>(SB) // hcub = hˆ3
+ STy(hcub)
+
+ LDx(s1)
+ CALL p256MulInternal<>(SB)
+ STy(s2)
+
+ LDx(z1in)
+ LDy(z2in)
+ CALL p256MulInternal<>(SB) // z1 * z2
+ LDx(h)
+ CALL p256MulInternal<>(SB) // z1 * z2 * h
+ MOVD res+0(FP), b_ptr
+ STy(z3out)
+
+ LDx(hsqr)
+ LDy(u1)
+ CALL p256MulInternal<>(SB) // hˆ2 * u1
+ STy(u2)
+
+ p256MulBy2Inline // u1 * hˆ2 * 2, inline
+ LDy(rsqr)
+ CALL p256SubInternal<>(SB) // rˆ2 - u1 * hˆ2 * 2
+
+ MOVD x0, y0
+ MOVD x1, y1
+ MOVD x2, y2
+ MOVD x3, y3
+ LDx(hcub)
+ CALL p256SubInternal<>(SB)
+ STx(x3out)
+
+ LDy(u2)
+ CALL p256SubInternal<>(SB)
+
+ LDy(r)
+ CALL p256MulInternal<>(SB)
+
+ LDx(s2)
+ CALL p256SubInternal<>(SB)
+ STx(y3out)
+
+ MOVD hlp1, R0
+ MOVD R0, ret+72(FP)
+
+ RET
diff --git a/src/crypto/elliptic/p256_asm_ppc64le.s b/src/crypto/elliptic/p256_asm_ppc64le.s
new file mode 100644
index 0000000..69e96e2
--- /dev/null
+++ b/src/crypto/elliptic/p256_asm_ppc64le.s
@@ -0,0 +1,2494 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// This is a port of the s390x asm implementation.
+// to ppc64le.
+
+// Some changes were needed due to differences in
+// the Go opcodes and/or available instructions
+// between s390x and ppc64le.
+
+// 1. There were operand order differences in the
+// VSUBUQM, VSUBCUQ, and VSEL instructions.
+
+// 2. ppc64 does not have a multiply high and low
+// like s390x, so those were implemented using
+// macros to compute the equivalent values.
+
+// 3. The LVX, STVX instructions on ppc64 require
+// 16 byte alignment of the data. To avoid that
+// requirement, data is loaded using LXVD2X and
+// STXVD2X with VPERM to reorder bytes correctly.
+
+// I have identified some areas where I believe
+// changes would be needed to make this work for big
+// endian; however additional changes beyond what I
+// have noted are most likely needed to make it work.
+// - The string used with VPERM to swap the byte order
+// for loads and stores.
+// - The EXTRACT_HI and EXTRACT_LO strings.
+// - The constants that are loaded from CPOOL.
+//
+
+// Permute string used by VPERM to reorder bytes
+// loaded or stored using LXVD2X or STXVD2X
+// on little endian.
+DATA byteswap<>+0(SB)/8, $0x08090a0b0c0d0e0f
+DATA byteswap<>+8(SB)/8, $0x0001020304050607
+
+// The following constants are defined in an order
+// that is correct for use with LXVD2X/STXVD2X
+// on little endian.
+DATA p256<>+0x00(SB)/8, $0xffffffff00000001 // P256
+DATA p256<>+0x08(SB)/8, $0x0000000000000000 // P256
+DATA p256<>+0x10(SB)/8, $0x00000000ffffffff // P256
+DATA p256<>+0x18(SB)/8, $0xffffffffffffffff // P256
+DATA p256<>+0x20(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256<>+0x28(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256<>+0x30(SB)/8, $0x0000000010111213 // SEL 0 d1 d0 0
+DATA p256<>+0x38(SB)/8, $0x1415161700000000 // SEL 0 d1 d0 0
+DATA p256<>+0x40(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256<>+0x48(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256mul<>+0x00(SB)/8, $0x00000000ffffffff // P256 original
+DATA p256mul<>+0x08(SB)/8, $0xffffffffffffffff // P256
+DATA p256mul<>+0x10(SB)/8, $0xffffffff00000001 // P256 original
+DATA p256mul<>+0x18(SB)/8, $0x0000000000000000 // P256
+DATA p256mul<>+0x20(SB)/8, $0x1c1d1e1f00000000 // SEL d0 0 0 d0
+DATA p256mul<>+0x28(SB)/8, $0x000000001c1d1e1f // SEL d0 0 0 d0
+DATA p256mul<>+0x30(SB)/8, $0x0001020304050607 // SEL d0 0 d1 d0
+DATA p256mul<>+0x38(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL d0 0 d1 d0
+DATA p256mul<>+0x40(SB)/8, $0x040506071c1d1e1f // SEL 0 d1 d0 d1
+DATA p256mul<>+0x48(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL 0 d1 d0 d1
+DATA p256mul<>+0x50(SB)/8, $0x0405060704050607 // SEL 0 0 d1 d0
+DATA p256mul<>+0x58(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL 0 0 d1 d0
+DATA p256mul<>+0x60(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256mul<>+0x68(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256mul<>+0x70(SB)/8, $0x141516170c0d0e0f // SEL 0 d1 d0 0
+DATA p256mul<>+0x78(SB)/8, $0x1c1d1e1f14151617 // SEL 0 d1 d0 0
+DATA p256mul<>+0x80(SB)/8, $0xffffffff00000000 // (1*2^256)%P256
+DATA p256mul<>+0x88(SB)/8, $0x0000000000000001 // (1*2^256)%P256
+DATA p256mul<>+0x90(SB)/8, $0x00000000fffffffe // (1*2^256)%P256
+DATA p256mul<>+0x98(SB)/8, $0xffffffffffffffff // (1*2^256)%P256
+
+// The following are used with VPERM to extract the high and low
+// values from the intermediate results of a vector multiply.
+// They are used in the VMULTxxx macros. These have been tested
+// only on little endian, I think they would have to be different
+// for big endian.
+DATA p256permhilo<>+0x00(SB)/8, $0x0405060714151617 // least significant
+DATA p256permhilo<>+0x08(SB)/8, $0x0c0d0e0f1c1d1e1f
+DATA p256permhilo<>+0x10(SB)/8, $0x0001020310111213 // most significant
+DATA p256permhilo<>+0x18(SB)/8, $0x08090a0b18191A1B
+
+// External declarations for constants
+GLOBL p256ord<>(SB), 8, $32
+GLOBL p256<>(SB), 8, $80
+GLOBL p256mul<>(SB), 8, $160
+GLOBL p256permhilo<>(SB), 8, $32
+GLOBL byteswap<>+0(SB), RODATA, $16
+
+// The following macros are used to implement the ppc64le
+// equivalent function from the corresponding s390x
+// instruction for vector multiply high, low, and add,
+// since there aren't exact equivalent instructions.
+// The corresponding s390x instructions appear in the
+// comments.
+// Implementation for big endian would have to be
+// investigated, I think it would be different.
+//
+// Vector multiply low word
+//
+// VMLF x0, x1, out_low
+#define VMULT_LOW(x1, x2, out_low) \
+ VMULUWM x1, x2, out_low
+
+//
+// Vector multiply high word
+//
+// VMLHF x0, x1, out_hi
+#define VMULT_HI(x1, x2, out_hi) \
+ VMULEUW x1, x2, TMP1; \
+ VMULOUW x1, x2, TMP2; \
+ VPERM TMP1, TMP2, EXTRACT_HI, out_hi
+
+//
+// Vector multiply word
+//
+// VMLF x0, x1, out_low
+// VMLHF x0, x1, out_hi
+#define VMULT(x1, x2, out_low, out_hi) \
+ VMULEUW x1, x2, TMP1; \
+ VMULOUW x1, x2, TMP2; \
+ VPERM TMP1, TMP2, EXTRACT_LO, out_low; \
+ VPERM TMP1, TMP2, EXTRACT_HI, out_hi
+
+//
+// Vector multiply add word
+//
+// VMALF x0, x1, y, out_low
+// VMALHF x0, x1, y, out_hi
+#define VMULT_ADD(x1, x2, y, out_low, out_hi) \
+ VSPLTISW $1, TMP1; \
+ VMULEUW y, TMP1, TMP2; \
+ VMULOUW y, TMP1, TMP1; \
+ VMULEUW x1, x2, out_low; \
+ VMULOUW x1, x2, out_hi; \
+ VADDUDM TMP1, out_hi, TMP1; \
+ VADDUDM TMP2, out_low, TMP2; \
+ VPERM TMP2, TMP1, EXTRACT_LO, out_low; \
+ VPERM TMP2, TMP1, EXTRACT_HI, out_hi
+
+//
+// Vector multiply add high word
+//
+// VMALF x0, x1, y, out_low
+// VMALHF x0, x1, y, out_hi
+#define VMULT_ADD_HI(x1, x2, y, out_low, out_hi) \
+ VSPLTISW $1, TMP1; \
+ VMULOUW y, TMP1, TMP2; \
+ VMULEUW y, TMP1, TMP1; \
+ VMULEUW x1, x2, out_hi; \
+ VMULOUW x1, x2, out_low; \
+ VADDUDM TMP1, out_hi, TMP1; \
+ VADDUDM TMP2, out_low, TMP2; \
+ VPERM TMP2, TMP1, EXTRACT_HI, out_hi
+
+//
+// Vector multiply add low word
+//
+// VMALF s0, x1, y, out_low
+#define VMULT_ADD_LOW(x1, x2, y, out_low) \
+ VMULUWM x1, x2, out_low; \
+ VADDUWM out_low, y, out_low
+
+#define res_ptr R3
+#define a_ptr R4
+
+#undef res_ptr
+#undef a_ptr
+
+// func p256NegCond(val *p256Point, cond int)
+#define P1ptr R3
+#define CPOOL R7
+
+#define Y1L V0
+#define Y1L_ VS32
+#define Y1H V1
+#define Y1H_ VS33
+#define T1L V2
+#define T1L_ VS34
+#define T1H V3
+#define T1H_ VS35
+
+#define SWAP V28
+#define SWAP_ VS60
+
+#define PL V30
+#define PL_ VS62
+#define PH V31
+#define PH_ VS63
+
+#define SEL1 V5
+#define SEL1_ VS37
+#define CAR1 V6
+//
+// iff cond == 1 val <- -val
+//
+TEXT ·p256NegCond(SB), NOSPLIT, $0-16
+ MOVD val+0(FP), P1ptr
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $40, R19
+
+ MOVD cond+8(FP), R6
+ CMP $0, R6
+ BC 12, 2, LR // just return if cond == 0
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+
+ MOVD $byteswap<>+0x00(SB), R8
+ LXVD2X (R8)(R0), SWAP_
+
+ LXVD2X (P1ptr)(R17), Y1L_
+ LXVD2X (P1ptr)(R18), Y1H_
+
+ VPERM Y1H, Y1H, SWAP, Y1H
+ VPERM Y1L, Y1L, SWAP, Y1L
+
+ LXVD2X (CPOOL)(R0), PL_
+ LXVD2X (CPOOL)(R16), PH_
+
+ VSUBCUQ PL, Y1L, CAR1 // subtract part2 giving carry
+ VSUBUQM PL, Y1L, T1L // subtract part2 giving result
+ VSUBEUQM PH, Y1H, CAR1, T1H // subtract part1 using carry from part2
+
+ VPERM T1H, T1H, SWAP, T1H
+ VPERM T1L, T1L, SWAP, T1L
+
+ STXVD2X T1L_, (R17+P1ptr)
+ STXVD2X T1H_, (R18+P1ptr)
+ RET
+
+#undef P1ptr
+#undef CPOOL
+#undef Y1L
+#undef Y1L_
+#undef Y1H
+#undef Y1H_
+#undef T1L
+#undef T1L_
+#undef T1H
+#undef T1H_
+#undef PL
+#undef PL_
+#undef PH
+#undef PH_
+#undef SEL1
+#undef SEL1_
+#undef CAR1
+
+//
+// if cond == 0 res <-b else res <-a
+//
+// func p256MovCond(res, a, b *p256Point, cond int)
+#define P3ptr R3
+#define P1ptr R4
+#define P2ptr R5
+
+#define FROMptr R7
+#define X1L V0
+#define X1H V1
+#define Y1L V2
+#define Y1H V3
+#define Z1L V4
+#define Z1H V5
+#define X1L_ VS32
+#define X1H_ VS33
+#define Y1L_ VS34
+#define Y1H_ VS35
+#define Z1L_ VS36
+#define Z1H_ VS37
+
+// This function uses LXVD2X and STXVD2X to avoid the
+// data alignment requirement for LVX, STVX. Since
+// this code is just moving bytes and not doing arithmetic,
+// order of the bytes doesn't matter.
+//
+TEXT ·p256MovCond(SB), NOSPLIT, $0-32
+ MOVD res+0(FP), P3ptr
+ MOVD a+8(FP), P1ptr
+ MOVD b+16(FP), P2ptr
+ MOVD cond+24(FP), R6
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $56, R21
+ MOVD $64, R19
+ MOVD $80, R20
+
+ // Check the condition
+ CMP $0, R6
+
+ // If 0, use b as the source
+ BEQ FROMB
+
+ // Not 0, use a as the source
+ MOVD P1ptr, FROMptr
+ BR LOADVALS
+
+FROMB:
+ MOVD P2ptr, FROMptr
+
+LOADVALS:
+ // Load from a or b depending on the setting
+ // of FROMptr
+ LXVW4X (FROMptr+R0), X1H_
+ LXVW4X (FROMptr+R16), X1L_
+ LXVW4X (FROMptr+R17), Y1H_
+ LXVW4X (FROMptr+R18), Y1L_
+ LXVW4X (FROMptr+R19), Z1H_
+ LXVW4X (FROMptr+R20), Z1L_
+
+ STXVW4X X1H_, (P3ptr+R0)
+ STXVW4X X1L_, (P3ptr+R16)
+ STXVW4X Y1H_, (P3ptr+R17)
+ STXVW4X Y1L_, (P3ptr+R18)
+ STXVW4X Z1H_, (P3ptr+R19)
+ STXVW4X Z1L_, (P3ptr+R20)
+
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef P2ptr
+#undef FROMptr
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef X1L_
+#undef X1H_
+#undef Y1L_
+#undef Y1H_
+#undef Z1L_
+#undef Z1H_
+//
+// Select the point from the table for idx
+//
+// func p256Select(point *p256Point, table []p256Point, idx int)
+#define P3ptr R3
+#define P1ptr R4
+#define COUNT R5
+
+#define X1L V0
+#define X1H V1
+#define Y1L V2
+#define Y1H V3
+#define Z1L V4
+#define Z1H V5
+#define X1L_ VS32
+#define X1H_ VS33
+#define Y1L_ VS34
+#define Y1H_ VS35
+#define Z1L_ VS36
+#define Z1H_ VS37
+#define X2L V6
+#define X2H V7
+#define Y2L V8
+#define Y2H V9
+#define Z2L V10
+#define Z2H V11
+#define X2L_ VS38
+#define X2H_ VS39
+#define Y2L_ VS40
+#define Y2H_ VS41
+#define Z2L_ VS42
+#define Z2H_ VS43
+
+#define ONE V18
+#define IDX V19
+#define SEL1 V20
+#define SEL1_ VS52
+#define SEL2 V21
+//
+TEXT ·p256Select(SB), NOSPLIT, $0-40
+ MOVD point+0(FP), P3ptr
+ MOVD table+8(FP), P1ptr
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $64, R19
+ MOVD $80, R20
+
+ LXVDSX (R1)(R19), SEL1_ // VLREPG idx+32(FP), SEL1
+ VSPLTB $7, SEL1, IDX // splat byte
+ VSPLTISB $1, ONE // VREPIB $1, ONE
+ VSPLTISB $1, SEL2 // VREPIB $1, SEL2
+ MOVD $17, COUNT
+ MOVD COUNT, CTR // set up ctr
+
+ VSPLTISB $0, X1H // VZERO X1H
+ VSPLTISB $0, X1L // VZERO X1L
+ VSPLTISB $0, Y1H // VZERO Y1H
+ VSPLTISB $0, Y1L // VZERO Y1L
+ VSPLTISB $0, Z1H // VZERO Z1H
+ VSPLTISB $0, Z1L // VZERO Z1L
+
+loop_select:
+
+ // LVXD2X is used here since data alignment doesn't
+ // matter.
+
+ LXVD2X (P1ptr+R0), X2H_
+ LXVD2X (P1ptr+R16), X2L_
+ LXVD2X (P1ptr+R17), Y2H_
+ LXVD2X (P1ptr+R18), Y2L_
+ LXVD2X (P1ptr+R19), Z2H_
+ LXVD2X (P1ptr+R20), Z2L_
+
+ VCMPEQUD SEL2, IDX, SEL1 // VCEQG SEL2, IDX, SEL1 OK
+
+ // This will result in SEL1 being all 0s or 1s, meaning
+ // the result is either X1L or X2L, no individual byte
+ // selection.
+
+ VSEL X1L, X2L, SEL1, X1L
+ VSEL X1H, X2H, SEL1, X1H
+ VSEL Y1L, Y2L, SEL1, Y1L
+ VSEL Y1H, Y2H, SEL1, Y1H
+ VSEL Z1L, Z2L, SEL1, Z1L
+ VSEL Z1H, Z2H, SEL1, Z1H
+
+ // Add 1 to all bytes in SEL2
+ VADDUBM SEL2, ONE, SEL2 // VAB SEL2, ONE, SEL2 OK
+ ADD $96, P1ptr
+ BC 16, 0, loop_select
+
+ // STXVD2X is used here so that alignment doesn't
+ // need to be verified. Since values were loaded
+ // using LXVD2X this is OK.
+ STXVD2X X1H_, (P3ptr+R0)
+ STXVD2X X1L_, (P3ptr+R16)
+ STXVD2X Y1H_, (P3ptr+R17)
+ STXVD2X Y1L_, (P3ptr+R18)
+ STXVD2X Z1H_, (P3ptr+R19)
+ STXVD2X Z1L_, (P3ptr+R20)
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef COUNT
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef X2L
+#undef X2H
+#undef Y2L
+#undef Y2H
+#undef Z2L
+#undef Z2H
+#undef X2L_
+#undef X2H_
+#undef Y2L_
+#undef Y2H_
+#undef Z2L_
+#undef Z2H_
+#undef ONE
+#undef IDX
+#undef SEL1
+#undef SEL1_
+#undef SEL2
+
+// func p256SelectBase(point, table []uint64, idx int)
+#define P3ptr R3
+#define P1ptr R4
+#define COUNT R5
+
+#define X1L V0
+#define X1H V1
+#define Y1L V2
+#define Y1H V3
+#define Z1L V4
+#define Z1H V5
+#define X2L V6
+#define X2H V7
+#define Y2L V8
+#define Y2H V9
+#define Z2L V10
+#define Z2H V11
+#define X2L_ VS38
+#define X2H_ VS39
+#define Y2L_ VS40
+#define Y2H_ VS41
+#define Z2L_ VS42
+#define Z2H_ VS43
+
+#define ONE V18
+#define IDX V19
+#define SEL1 V20
+#define SEL1_ VS52
+#define SEL2 V21
+TEXT ·p256SelectBase(SB), NOSPLIT, $0-40
+ MOVD point+0(FP), P3ptr
+ MOVD table+8(FP), P1ptr
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $64, R19
+ MOVD $80, R20
+ MOVD $56, R21
+
+ LXVDSX (R1)(R19), SEL1_
+ VSPLTB $7, SEL1, IDX // splat byte
+
+ VSPLTISB $1, ONE // Vector with byte 1s
+ VSPLTISB $1, SEL2 // Vector with byte 1s
+ MOVD $65, COUNT
+ MOVD COUNT, CTR // loop count
+
+ VSPLTISB $0, X1H // VZERO X1H
+ VSPLTISB $0, X1L // VZERO X1L
+ VSPLTISB $0, Y1H // VZERO Y1H
+ VSPLTISB $0, Y1L // VZERO Y1L
+ VSPLTISB $0, Z1H // VZERO Z1H
+ VSPLTISB $0, Z1L // VZERO Z1L
+
+loop_select:
+ LXVD2X (P1ptr+R0), X2H_
+ LXVD2X (P1ptr+R16), X2L_
+ LXVD2X (P1ptr+R17), Y2H_
+ LXVD2X (P1ptr+R18), Y2L_
+ LXVD2X (P1ptr+R19), Z2H_
+ LXVD2X (P1ptr+R20), Z2L_
+
+ VCMPEQUD SEL2, IDX, SEL1 // Compare against idx
+
+ VSEL X1L, X2L, SEL1, X1L // Select if idx matched
+ VSEL X1H, X2H, SEL1, X1H
+ VSEL Y1L, Y2L, SEL1, Y1L
+ VSEL Y1H, Y2H, SEL1, Y1H
+ VSEL Z1L, Z2L, SEL1, Z1L
+ VSEL Z1H, Z2H, SEL1, Z1H
+
+ VADDUBM SEL2, ONE, SEL2 // Increment SEL2 bytes by 1
+ ADD $96, P1ptr // Next chunk
+ BC 16, 0, loop_select
+
+ STXVD2X X1H_, (P3ptr+R0)
+ STXVD2X X1L_, (P3ptr+R16)
+ STXVD2X Y1H_, (P3ptr+R17)
+ STXVD2X Y1L_, (P3ptr+R18)
+ STXVD2X Z1H_, (P3ptr+R19)
+ STXVD2X Z1L_, (P3ptr+R20)
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef COUNT
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef X2L
+#undef X2H
+#undef Y2L
+#undef Y2H
+#undef Z2L
+#undef Z2H
+#undef X1L_
+#undef X1H_
+#undef X2L_
+#undef X2H_
+#undef Y1L_
+#undef Y1H_
+#undef Y2L_
+#undef Y2H_
+#undef Z1L_
+#undef Z1H_
+#undef Z2L_
+#undef Z2H_
+#undef ONE
+#undef IDX
+#undef SEL1
+#undef SEL1_
+#undef SEL2
+#undef SWAP
+#undef SWAP_
+
+// ---------------------------------------
+// func p256FromMont(res, in []byte)
+#define res_ptr R3
+#define x_ptr R4
+#define CPOOL R7
+
+#define T0 V0
+#define T0_ VS32
+#define T1 V1
+#define T1_ VS33
+#define T2 V2
+#define TT0 V3
+#define TT1 V4
+#define TT0_ VS35
+#define TT1_ VS36
+
+#define ZER V6
+#define SEL1 V7
+#define SEL1_ VS39
+#define SEL2 V8
+#define SEL2_ VS40
+#define CAR1 V9
+#define CAR2 V10
+#define RED1 V11
+#define RED2 V12
+#define PL V13
+#define PL_ VS45
+#define PH V14
+#define PH_ VS46
+#define SWAP V28
+#define SWAP_ VS57
+
+TEXT ·p256FromMont(SB), NOSPLIT, $0-48
+ MOVD res+0(FP), res_ptr
+ MOVD in+24(FP), x_ptr
+
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $64, R19
+ MOVD $p256<>+0x00(SB), CPOOL
+ MOVD $byteswap<>+0x00(SB), R15
+
+ VSPLTISB $0, T2 // VZERO T2
+ VSPLTISB $0, ZER // VZERO ZER
+
+ // Constants are defined so that the LXVD2X is correct
+ LXVD2X (CPOOL+R0), PH_
+ LXVD2X (CPOOL+R16), PL_
+
+ // VPERM byte selections
+ LXVD2X (CPOOL+R18), SEL2_
+ LXVD2X (CPOOL+R19), SEL1_
+
+ LXVD2X (R15)(R0), SWAP_
+
+ LXVD2X (R16)(x_ptr), T1_
+ LXVD2X (R0)(x_ptr), T0_
+
+ // Put in true little endian order
+ VPERM T0, T0, SWAP, T0
+ VPERM T1, T1, SWAP, T1
+
+ // First round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0
+ VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1
+ VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0
+ VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1
+ VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2
+
+ // Second round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0
+ VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1
+ VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0
+ VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1
+ VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2
+
+ // Third round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0
+ VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1
+ VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0
+ VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1
+ VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2
+
+ // Last round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0
+ VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1
+ VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0
+ VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1
+ VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2
+
+ // ---------------------------------------------------
+
+ VSUBCUQ T0, PL, CAR1 // VSCBIQ PL, T0, CAR1
+ VSUBUQM T0, PL, TT0 // VSQ PL, T0, TT0
+ VSUBECUQ T1, PH, CAR1, CAR2 // VSBCBIQ T1, PH, CAR1, CAR2
+ VSUBEUQM T1, PH, CAR1, TT1 // VSBIQ T1, PH, CAR1, TT1
+ VSUBEUQM T2, ZER, CAR2, T2 // VSBIQ T2, ZER, CAR2, T2
+
+ VSEL TT0, T0, T2, T0
+ VSEL TT1, T1, T2, T1
+
+ // Reorder the bytes so STXVD2X can be used.
+ // TT0, TT1 used for VPERM result in case
+ // the caller expects T0, T1 to be good.
+ VPERM T0, T0, SWAP, TT0
+ VPERM T1, T1, SWAP, TT1
+
+ STXVD2X TT0_, (R0)(res_ptr)
+ STXVD2X TT1_, (R16)(res_ptr)
+ RET
+
+#undef res_ptr
+#undef x_ptr
+#undef CPOOL
+#undef T0
+#undef T0_
+#undef T1
+#undef T1_
+#undef T2
+#undef TT0
+#undef TT1
+#undef ZER
+#undef SEL1
+#undef SEL1_
+#undef SEL2
+#undef SEL2_
+#undef CAR1
+#undef CAR2
+#undef RED1
+#undef RED2
+#undef PL
+#undef PL_
+#undef PH
+#undef PH_
+#undef SWAP
+#undef SWAP_
+
+// ---------------------------------------
+// p256MulInternal
+// V0-V3 V30,V31 - Not Modified
+// V4-V15 V27-V29 - Volatile
+
+#define CPOOL R7
+
+// Parameters
+#define X0 V0 // Not modified
+#define X1 V1 // Not modified
+#define Y0 V2 // Not modified
+#define Y1 V3 // Not modified
+#define T0 V4 // Result
+#define T1 V5 // Result
+#define P0 V30 // Not modified
+#define P1 V31 // Not modified
+
+// Temporaries: lots of reused vector regs
+#define YDIG V6 // Overloaded with CAR2
+#define ADD1H V7 // Overloaded with ADD3H
+#define ADD2H V8 // Overloaded with ADD4H
+#define ADD3 V9 // Overloaded with SEL2,SEL5
+#define ADD4 V10 // Overloaded with SEL3,SEL6
+#define RED1 V11 // Overloaded with CAR2
+#define RED2 V12
+#define RED3 V13 // Overloaded with SEL1
+#define T2 V14
+// Overloaded temporaries
+#define ADD1 V4 // Overloaded with T0
+#define ADD2 V5 // Overloaded with T1
+#define ADD3H V7 // Overloaded with ADD1H
+#define ADD4H V8 // Overloaded with ADD2H
+#define ZER V28 // Overloaded with TMP1
+#define CAR1 V6 // Overloaded with YDIG
+#define CAR2 V11 // Overloaded with RED1
+// Constant Selects
+#define SEL1 V13 // Overloaded with RED3
+#define SEL2 V9 // Overloaded with ADD3,SEL5
+#define SEL3 V10 // Overloaded with ADD4,SEL6
+#define SEL4 V6 // Overloaded with YDIG,CAR1
+#define SEL5 V9 // Overloaded with ADD3,SEL2
+#define SEL6 V10 // Overloaded with ADD4,SEL3
+#define SEL1_ VS45
+#define SEL2_ VS41
+#define SEL3_ VS42
+#define SEL4_ VS38
+#define SEL5_ VS41
+#define SEL6_ VS42
+
+// TMP1, TMP2, EXTRACT_LO, EXTRACT_HI used in
+// VMULT macros
+#define TMP1 V13 // Overloaded with RED3
+#define TMP2 V27
+#define EVENODD R5
+#define EXTRACT_LO V28
+#define EXTRACT_LO_ VS60
+#define EXTRACT_HI V29
+#define EXTRACT_HI_ VS61
+
+/* *
+ * To follow the flow of bits, for your own sanity a stiff drink, need you shall.
+ * Of a single round, a 'helpful' picture, here is. Meaning, column position has.
+ * With you, SIMD be...
+ *
+ * +--------+--------+
+ * +--------| RED2 | RED1 |
+ * | +--------+--------+
+ * | ---+--------+--------+
+ * | +---- T2| T1 | T0 |--+
+ * | | ---+--------+--------+ |
+ * | | |
+ * | | ======================= |
+ * | | |
+ * | | +--------+--------+<-+
+ * | +-------| ADD2 | ADD1 |--|-----+
+ * | | +--------+--------+ | |
+ * | | +--------+--------+<---+ |
+ * | | | ADD2H | ADD1H |--+ |
+ * | | +--------+--------+ | |
+ * | | +--------+--------+<-+ |
+ * | | | ADD4 | ADD3 |--|-+ |
+ * | | +--------+--------+ | | |
+ * | | +--------+--------+<---+ | |
+ * | | | ADD4H | ADD3H |------|-+ |(+vzero)
+ * | | +--------+--------+ | | V
+ * | | ------------------------ | | +--------+
+ * | | | | | RED3 | [d0 0 0 d0]
+ * | | | | +--------+
+ * | +---->+--------+--------+ | | |
+ * (T2[1w]||ADD2[4w]||ADD1[3w]) +--------| T1 | T0 | | | |
+ * | +--------+--------+ | | |
+ * +---->---+--------+--------+ | | |
+ * T2| T1 | T0 |----+ | |
+ * ---+--------+--------+ | | |
+ * ---+--------+--------+<---+ | |
+ * +--- T2| T1 | T0 |----------+
+ * | ---+--------+--------+ | |
+ * | +--------+--------+<-------------+
+ * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0]
+ * | +--------+--------+ | | |
+ * | +--------+<----------------------+
+ * | | RED3 |--------------+ | [0 0 d1 d0]
+ * | +--------+ | |
+ * +--->+--------+--------+ | |
+ * | T1 | T0 |--------+
+ * +--------+--------+ | |
+ * --------------------------- | |
+ * | |
+ * +--------+--------+<----+ |
+ * | RED2 | RED1 | |
+ * +--------+--------+ |
+ * ---+--------+--------+<-------+
+ * T2| T1 | T0 | (H1P-H1P-H00RRAY!)
+ * ---+--------+--------+
+ *
+ * *Mi obra de arte de siglo XXI @vpaprots
+ *
+ *
+ * First group is special, doesn't get the two inputs:
+ * +--------+--------+<-+
+ * +-------| ADD2 | ADD1 |--|-----+
+ * | +--------+--------+ | |
+ * | +--------+--------+<---+ |
+ * | | ADD2H | ADD1H |--+ |
+ * | +--------+--------+ | |
+ * | +--------+--------+<-+ |
+ * | | ADD4 | ADD3 |--|-+ |
+ * | +--------+--------+ | | |
+ * | +--------+--------+<---+ | |
+ * | | ADD4H | ADD3H |------|-+ |(+vzero)
+ * | +--------+--------+ | | V
+ * | ------------------------ | | +--------+
+ * | | | | RED3 | [d0 0 0 d0]
+ * | | | +--------+
+ * +---->+--------+--------+ | | |
+ * (T2[1w]||ADD2[4w]||ADD1[3w]) | T1 | T0 |----+ | |
+ * +--------+--------+ | | |
+ * ---+--------+--------+<---+ | |
+ * +--- T2| T1 | T0 |----------+
+ * | ---+--------+--------+ | |
+ * | +--------+--------+<-------------+
+ * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0]
+ * | +--------+--------+ | | |
+ * | +--------+<----------------------+
+ * | | RED3 |--------------+ | [0 0 d1 d0]
+ * | +--------+ | |
+ * +--->+--------+--------+ | |
+ * | T1 | T0 |--------+
+ * +--------+--------+ | |
+ * --------------------------- | |
+ * | |
+ * +--------+--------+<----+ |
+ * | RED2 | RED1 | |
+ * +--------+--------+ |
+ * ---+--------+--------+<-------+
+ * T2| T1 | T0 | (H1P-H1P-H00RRAY!)
+ * ---+--------+--------+
+ *
+ * Last 'group' needs to RED2||RED1 shifted less
+ */
+TEXT p256MulInternal<>(SB), NOSPLIT, $0-16
+ // CPOOL loaded from caller
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $64, R19
+ MOVD $80, R20
+ MOVD $96, R21
+ MOVD $112, R22
+
+ MOVD $p256permhilo<>+0x00(SB), EVENODD
+
+ // These values are used by the VMULTxxx macros to
+ // extract the high and low portions of the intermediate
+ // result.
+ LXVD2X (R0)(EVENODD), EXTRACT_LO_
+ LXVD2X (R16)(EVENODD), EXTRACT_HI_
+
+ // ---------------------------------------------------
+
+ VSPLTW $3, Y0, YDIG // VREPF Y0 is input
+
+ // VMLHF X0, YDIG, ADD1H
+ // VMLHF X1, YDIG, ADD2H
+ // VMLF X0, YDIG, ADD1
+ // VMLF X1, YDIG, ADD2
+ //
+ VMULT(X0, YDIG, ADD1, ADD1H)
+ VMULT(X1, YDIG, ADD2, ADD2H)
+
+ VSPLTW $2, Y0, YDIG // VREPF
+
+ // VMALF X0, YDIG, ADD1H, ADD3
+ // VMALF X1, YDIG, ADD2H, ADD4
+ // VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free
+ // VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free
+ VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H)
+ VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H)
+
+ LXVD2X (R17)(CPOOL), SEL1_
+ VSPLTISB $0, ZER // VZERO ZER
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDOI $12, ADD2, ADD1, T0 // ADD1 Free // VSLDB
+ VSLDOI $12, ZER, ADD2, T1 // ADD2 Free // VSLDB
+
+ VADDCUQ T0, ADD3, CAR1 // VACCQ
+ VADDUQM T0, ADD3, T0 // ADD3 Free // VAQ
+ VADDECUQ T1, ADD4, CAR1, T2 // VACCCQ
+ VADDEUQM T1, ADD4, CAR1, T1 // ADD4 Free // VACQ
+
+ LXVD2X (R18)(CPOOL), SEL2_
+ LXVD2X (R19)(CPOOL), SEL3_
+ LXVD2X (R20)(CPOOL), SEL4_
+ VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0]
+ VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1]
+ VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0]
+ VSUBUQM RED2, RED3, RED2 // Guaranteed not to underflow -->? // VSQ
+
+ VSLDOI $12, T1, T0, T0 // VSLDB
+ VSLDOI $12, T2, T1, T1 // VSLDB
+
+ VADDCUQ T0, ADD3H, CAR1 // VACCQ
+ VADDUQM T0, ADD3H, T0 // VAQ
+ VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ
+ VADDEUQM T1, ADD4H, CAR1, T1 // VACQ
+
+ // ---------------------------------------------------
+
+ VSPLTW $1, Y0, YDIG // VREPF
+ LXVD2X (R0)(EVENODD), EXTRACT_LO_
+ LXVD2X (R16)(EVENODD), EXTRACT_HI_
+
+ // VMALHF X0, YDIG, T0, ADD1H
+ // VMALHF X1, YDIG, T1, ADD2H
+ // VMALF X0, YDIG, T0, ADD1 // T0 Free->ADD1
+ // VMALF X1, YDIG, T1, ADD2 // T1 Free->ADD2
+ VMULT_ADD(X0, YDIG, T0, ADD1, ADD1H)
+ VMULT_ADD(X1, YDIG, T1, ADD2, ADD2H)
+
+ VSPLTW $0, Y0, YDIG // VREPF
+
+ // VMALF X0, YDIG, ADD1H, ADD3
+ // VMALF X1, YDIG, ADD2H, ADD4
+ // VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free->ADD3H
+ // VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free->ADD4H , YDIG Free->ZER
+ VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H)
+ VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H)
+
+ VSPLTISB $0, ZER // VZERO ZER
+ LXVD2X (R17)(CPOOL), SEL1_
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDOI $12, ADD2, ADD1, T0 // ADD1 Free->T0 // VSLDB
+ VSLDOI $12, T2, ADD2, T1 // ADD2 Free->T1, T2 Free // VSLDB
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ
+ VADDUQM T0, RED1, T0 // VAQ
+ VADDECUQ T1, RED2, CAR1, T2 // VACCCQ
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ
+
+ VADDCUQ T0, ADD3, CAR1 // VACCQ
+ VADDUQM T0, ADD3, T0 // VAQ
+ VADDECUQ T1, ADD4, CAR1, CAR2 // VACCCQ
+ VADDEUQM T1, ADD4, CAR1, T1 // VACQ
+ VADDUQM T2, CAR2, T2 // VAQ
+
+ LXVD2X (R18)(CPOOL), SEL2_
+ LXVD2X (R19)(CPOOL), SEL3_
+ LXVD2X (R20)(CPOOL), SEL4_
+ VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0]
+ VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1]
+ VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0]
+ VSUBUQM RED2, RED3, RED2 // Guaranteed not to underflow // VSQ
+
+ VSLDOI $12, T1, T0, T0 // VSLDB
+ VSLDOI $12, T2, T1, T1 // VSLDB
+
+ VADDCUQ T0, ADD3H, CAR1 // VACCQ
+ VADDUQM T0, ADD3H, T0 // VAQ
+ VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ
+ VADDEUQM T1, ADD4H, CAR1, T1 // VACQ
+
+ // ---------------------------------------------------
+
+ VSPLTW $3, Y1, YDIG // VREPF
+ LXVD2X (R0)(EVENODD), EXTRACT_LO_
+ LXVD2X (R16)(EVENODD), EXTRACT_HI_
+
+ // VMALHF X0, YDIG, T0, ADD1H
+ // VMALHF X1, YDIG, T1, ADD2H
+ // VMALF X0, YDIG, T0, ADD1
+ // VMALF X1, YDIG, T1, ADD2
+ VMULT_ADD(X0, YDIG, T0, ADD1, ADD1H)
+ VMULT_ADD(X1, YDIG, T1, ADD2, ADD2H)
+
+ VSPLTW $2, Y1, YDIG // VREPF
+
+ // VMALF X0, YDIG, ADD1H, ADD3
+ // VMALF X1, YDIG, ADD2H, ADD4
+ // VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free
+ // VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free
+ VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H)
+ VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H)
+
+ LXVD2X (R17)(CPOOL), SEL1_
+ VSPLTISB $0, ZER // VZERO ZER
+ LXVD2X (R17)(CPOOL), SEL1_
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDOI $12, ADD2, ADD1, T0 // ADD1 Free // VSLDB
+ VSLDOI $12, T2, ADD2, T1 // ADD2 Free // VSLDB
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ
+ VADDUQM T0, RED1, T0 // VAQ
+ VADDECUQ T1, RED2, CAR1, T2 // VACCCQ
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ
+
+ VADDCUQ T0, ADD3, CAR1 // VACCQ
+ VADDUQM T0, ADD3, T0 // VAQ
+ VADDECUQ T1, ADD4, CAR1, CAR2 // VACCCQ
+ VADDEUQM T1, ADD4, CAR1, T1 // VACQ
+ VADDUQM T2, CAR2, T2 // VAQ
+
+ LXVD2X (R18)(CPOOL), SEL2_
+ LXVD2X (R19)(CPOOL), SEL3_
+ LXVD2X (R20)(CPOOL), SEL4_
+ VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0]
+ VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1]
+ VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0]
+ VSUBUQM RED2, RED3, RED2 // Guaranteed not to underflow // VSQ
+
+ VSLDOI $12, T1, T0, T0 // VSLDB
+ VSLDOI $12, T2, T1, T1 // VSLDB
+
+ VADDCUQ T0, ADD3H, CAR1 // VACCQ
+ VADDUQM T0, ADD3H, T0 // VAQ
+ VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ
+ VADDEUQM T1, ADD4H, CAR1, T1 // VACQ
+
+ // ---------------------------------------------------
+
+ VSPLTW $1, Y1, YDIG // VREPF
+ LXVD2X (R0)(EVENODD), EXTRACT_LO_
+ LXVD2X (R16)(EVENODD), EXTRACT_HI_
+
+ // VMALHF X0, YDIG, T0, ADD1H
+ // VMALHF X1, YDIG, T1, ADD2H
+ // VMALF X0, YDIG, T0, ADD1
+ // VMALF X1, YDIG, T1, ADD2
+ VMULT_ADD(X0, YDIG, T0, ADD1, ADD1H)
+ VMULT_ADD(X1, YDIG, T1, ADD2, ADD2H)
+
+ VSPLTW $0, Y1, YDIG // VREPF
+
+ // VMALF X0, YDIG, ADD1H, ADD3
+ // VMALF X1, YDIG, ADD2H, ADD4
+ // VMALHF X0, YDIG, ADD1H, ADD3H
+ // VMALHF X1, YDIG, ADD2H, ADD4H
+ VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H)
+ VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H)
+
+ VSPLTISB $0, ZER // VZERO ZER
+ LXVD2X (R17)(CPOOL), SEL1_
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDOI $12, ADD2, ADD1, T0 // VSLDB
+ VSLDOI $12, T2, ADD2, T1 // VSLDB
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ
+ VADDUQM T0, RED1, T0 // VAQ
+ VADDECUQ T1, RED2, CAR1, T2 // VACCCQ
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ
+
+ VADDCUQ T0, ADD3, CAR1 // VACCQ
+ VADDUQM T0, ADD3, T0 // VAQ
+ VADDECUQ T1, ADD4, CAR1, CAR2 // VACCCQ
+ VADDEUQM T1, ADD4, CAR1, T1 // VACQ
+ VADDUQM T2, CAR2, T2 // VAQ
+
+ LXVD2X (R21)(CPOOL), SEL5_
+ LXVD2X (R22)(CPOOL), SEL6_
+ VPERM T0, RED3, SEL5, RED2 // [d1 d0 d1 d0]
+ VPERM T0, RED3, SEL6, RED1 // [ 0 d1 d0 0]
+ VSUBUQM RED2, RED1, RED2 // Guaranteed not to underflow // VSQ
+
+ VSLDOI $12, T1, T0, T0 // VSLDB
+ VSLDOI $12, T2, T1, T1 // VSLDB
+
+ VADDCUQ T0, ADD3H, CAR1 // VACCQ
+ VADDUQM T0, ADD3H, T0 // VAQ
+ VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ
+ VADDEUQM T1, ADD4H, CAR1, T1 // VACQ
+
+ VADDCUQ T0, RED1, CAR1 // VACCQ
+ VADDUQM T0, RED1, T0 // VAQ
+ VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ
+ VADDEUQM T1, RED2, CAR1, T1 // VACQ
+ VADDUQM T2, CAR2, T2 // VAQ
+
+ // ---------------------------------------------------
+
+ VSPLTISB $0, RED3 // VZERO RED3
+ VSUBCUQ T0, P0, CAR1 // VSCBIQ
+ VSUBUQM T0, P0, ADD1H // VSQ
+ VSUBECUQ T1, P1, CAR1, CAR2 // VSBCBIQ
+ VSUBEUQM T1, P1, CAR1, ADD2H // VSBIQ
+ VSUBEUQM T2, RED3, CAR2, T2 // VSBIQ
+
+ // what output to use, ADD2H||ADD1H or T1||T0?
+ VSEL ADD1H, T0, T2, T0
+ VSEL ADD2H, T1, T2, T1
+ RET
+
+#undef CPOOL
+
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef T0
+#undef T1
+#undef P0
+#undef P1
+
+#undef SEL1
+#undef SEL2
+#undef SEL3
+#undef SEL4
+#undef SEL5
+#undef SEL6
+#undef SEL1_
+#undef SEL2_
+#undef SEL3_
+#undef SEL4_
+#undef SEL5_
+#undef SEL6_
+
+#undef YDIG
+#undef ADD1H
+#undef ADD2H
+#undef ADD3
+#undef ADD4
+#undef RED1
+#undef RED2
+#undef RED3
+#undef T2
+#undef ADD1
+#undef ADD2
+#undef ADD3H
+#undef ADD4H
+#undef ZER
+#undef CAR1
+#undef CAR2
+
+#undef TMP1
+#undef TMP2
+#undef EVENODD
+#undef EXTRACT_HI
+#undef EXTRACT_HI_
+#undef EXTRACT_LO
+#undef EXTRACT_LO_
+
+#define p256SubInternal(T1, T0, X1, X0, Y1, Y0) \
+ VSPLTISB $0, ZER \ // VZERO
+ VSUBCUQ X0, Y0, CAR1 \
+ VSUBUQM X0, Y0, T0 \
+ VSUBECUQ X1, Y1, CAR1, SEL1 \
+ VSUBEUQM X1, Y1, CAR1, T1 \
+ VSUBUQM ZER, SEL1, SEL1 \ // VSQ
+ \
+ VADDCUQ T0, PL, CAR1 \ // VACCQ
+ VADDUQM T0, PL, TT0 \ // VAQ
+ VADDEUQM T1, PH, CAR1, TT1 \ // VACQ
+ \
+ VSEL TT0, T0, SEL1, T0 \
+ VSEL TT1, T1, SEL1, T1 \
+
+#define p256AddInternal(T1, T0, X1, X0, Y1, Y0) \
+ VADDCUQ X0, Y0, CAR1 \
+ VADDUQM X0, Y0, T0 \
+ VADDECUQ X1, Y1, CAR1, T2 \ // VACCCQ
+ VADDEUQM X1, Y1, CAR1, T1 \
+ \
+ VSPLTISB $0, ZER \
+ VSUBCUQ T0, PL, CAR1 \ // VSCBIQ
+ VSUBUQM T0, PL, TT0 \
+ VSUBECUQ T1, PH, CAR1, CAR2 \ // VSBCBIQ
+ VSUBEUQM T1, PH, CAR1, TT1 \ // VSBIQ
+ VSUBEUQM T2, ZER, CAR2, SEL1 \
+ \
+ VSEL TT0, T0, SEL1, T0 \
+ VSEL TT1, T1, SEL1, T1
+
+#define p256HalfInternal(T1, T0, X1, X0) \
+ VSPLTISB $0, ZER \
+ VSUBEUQM ZER, ZER, X0, SEL1 \
+ \
+ VADDCUQ X0, PL, CAR1 \
+ VADDUQM X0, PL, T0 \
+ VADDECUQ X1, PH, CAR1, T2 \
+ VADDEUQM X1, PH, CAR1, T1 \
+ \
+ VSEL T0, X0, SEL1, T0 \
+ VSEL T1, X1, SEL1, T1 \
+ VSEL T2, ZER, SEL1, T2 \
+ \
+ VSLDOI $15, T2, ZER, TT1 \
+ VSLDOI $15, T1, ZER, TT0 \
+ VSPLTISB $1, SEL1 \
+ VSR T0, SEL1, T0 \ // VSRL
+ VSR T1, SEL1, T1 \
+ VSPLTISB $7, SEL1 \ // VREPIB
+ VSL TT0, SEL1, TT0 \
+ VSL TT1, SEL1, TT1 \
+ VOR T0, TT0, T0 \
+ VOR T1, TT1, T1
+
+// ---------------------------------------
+// func p256MulAsm(res, in1, in2 []byte)
+#define res_ptr R3
+#define x_ptr R4
+#define y_ptr R5
+#define CPOOL R7
+#define TEMP R8
+
+// Parameters
+#define X0 V0
+#define X1 V1
+#define Y0 V2
+#define Y1 V3
+#define T0 V4
+#define T1 V5
+#define X0_ VS32
+#define X1_ VS33
+#define Y0_ VS34
+#define Y1_ VS35
+#define T0_ VS36
+#define T1_ VS37
+#define SWAP V28
+#define SWAP_ VS60
+
+// Constants
+#define P0 V30
+#define P1 V31
+#define P0_ VS62
+#define P1_ VS63
+//
+// Montgomery multiplication modulo P256
+//
+TEXT ·p256MulAsm(SB), NOSPLIT, $0-72
+ MOVD res+0(FP), res_ptr
+ MOVD in1+24(FP), x_ptr
+ MOVD in2+48(FP), y_ptr
+ MOVD $16, R16
+ MOVD $32, R17
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ MOVD $byteswap<>+0x00(SB), R8
+
+ LXVD2X (R8)(R0), SWAP_
+
+ LXVD2X (R0)(x_ptr), X0_
+ LXVD2X (R16)(x_ptr), X1_
+
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+
+ LXVD2X (R0)(y_ptr), Y0_
+ LXVD2X (R16)(y_ptr), Y1_
+
+ VPERM Y0, Y0, SWAP, Y0
+ VPERM Y1, Y1, SWAP, Y1
+
+ LXVD2X (R16)(CPOOL), P1_
+ LXVD2X (R0)(CPOOL), P0_
+
+ CALL p256MulInternal<>(SB)
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ MOVD $byteswap<>+0x00(SB), R8
+
+ LXVD2X (R8)(R0), SWAP_
+
+ VPERM T0, T0, SWAP, T0
+ VPERM T1, T1, SWAP, T1
+ STXVD2X T0_, (R0)(res_ptr)
+ STXVD2X T1_, (R16)(res_ptr)
+ RET
+
+#undef res_ptr
+#undef x_ptr
+#undef y_ptr
+#undef CPOOL
+
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef T0
+#undef T1
+#undef P0
+#undef P1
+#undef X0_
+#undef X1_
+#undef Y0_
+#undef Y1_
+#undef T0_
+#undef T1_
+#undef P0_
+#undef P1_
+
+// Point add with P2 being affine point
+// If sign == 1 -> P2 = -P2
+// If sel == 0 -> P3 = P1
+// if zero == 0 -> P3 = P2
+// p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
+#define P3ptr R3
+#define P1ptr R4
+#define P2ptr R5
+#define CPOOL R7
+
+// Temporaries in REGs
+#define Y2L V15
+#define Y2H V16
+#define Y2L_ VS47
+#define Y2H_ VS48
+#define T1L V17
+#define T1H V18
+#define T2L V19
+#define T2H V20
+#define T3L V21
+#define T3H V22
+#define T4L V23
+#define T4H V24
+
+// Temps for Sub and Add
+#define TT0 V11
+#define TT1 V12
+#define T2 V13
+
+// p256MulAsm Parameters
+#define X0 V0
+#define X1 V1
+#define X0_ VS32
+#define X1_ VS33
+#define Y0 V2
+#define Y1 V3
+#define Y0_ VS34
+#define Y1_ VS35
+#define T0 V4
+#define T1 V5
+
+#define PL V30
+#define PH V31
+#define PL_ VS62
+#define PH_ VS63
+
+// Names for zero/sel selects
+#define X1L V0
+#define X1H V1
+#define X1L_ VS32
+#define X1H_ VS33
+#define Y1L V2 // p256MulAsmParmY
+#define Y1H V3 // p256MulAsmParmY
+#define Y1L_ VS34
+#define Y1H_ VS35
+#define Z1L V4
+#define Z1H V5
+#define Z1L_ VS36
+#define Z1H_ VS37
+#define X2L V0
+#define X2H V1
+#define X2L_ VS32
+#define X2H_ VS33
+#define Z2L V4
+#define Z2H V5
+#define Z2L_ VS36
+#define Z2H_ VS37
+#define X3L V17 // T1L
+#define X3H V18 // T1H
+#define Y3L V21 // T3L
+#define Y3H V22 // T3H
+#define Z3L V25
+#define Z3H V26
+#define X3L_ VS49
+#define X3H_ VS50
+#define Y3L_ VS53
+#define Y3H_ VS54
+#define Z3L_ VS57
+#define Z3H_ VS58
+
+#define ZER V6
+#define SEL1 V7
+#define SEL1_ VS39
+#define CAR1 V8
+#define CAR2 V9
+/* *
+ * Three operand formula:
+ * Source: 2004 Hankerson–Menezes–Vanstone, page 91.
+ * T1 = Z1²
+ * T2 = T1*Z1
+ * T1 = T1*X2
+ * T2 = T2*Y2
+ * T1 = T1-X1
+ * T2 = T2-Y1
+ * Z3 = Z1*T1
+ * T3 = T1²
+ * T4 = T3*T1
+ * T3 = T3*X1
+ * T1 = 2*T3
+ * X3 = T2²
+ * X3 = X3-T1
+ * X3 = X3-T4
+ * T3 = T3-X3
+ * T3 = T3*T2
+ * T4 = T4*Y1
+ * Y3 = T3-T4
+
+ * Three operand formulas, but with MulInternal X,Y used to store temps
+X=Z1; Y=Z1; MUL;T- // T1 = Z1² T1
+X=T ; Y- ; MUL;T2=T // T2 = T1*Z1 T1 T2
+X- ; Y=X2; MUL;T1=T // T1 = T1*X2 T1 T2
+X=T2; Y=Y2; MUL;T- // T2 = T2*Y2 T1 T2
+SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2
+SUB(Y<T1-X1) // T1 = T1-X1 T1 T2
+X=Z1; Y- ; MUL;Z3:=T// Z3 = Z1*T1 T2
+X=Y; Y- ; MUL;X=T // T3 = T1*T1 T2
+X- ; Y- ; MUL;T4=T // T4 = T3*T1 T2 T4
+X- ; Y=X1; MUL;T3=T // T3 = T3*X1 T2 T3 T4
+ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4
+X=T2; Y=T2; MUL;T- // X3 = T2*T2 T1 T2 T3 T4
+SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4
+SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4
+SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4
+X- ; Y- ; MUL;T3=T // T3 = T3*T2 T2 T3 T4
+X=T4; Y=Y1; MUL;T- // T4 = T4*Y1 T3 T4
+SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4
+
+ */
+//
+// V27 is clobbered by p256MulInternal so must be
+// saved in a temp.
+//
+TEXT ·p256PointAddAffineAsm(SB), NOSPLIT, $16-48
+ MOVD res+0(FP), P3ptr
+ MOVD in1+8(FP), P1ptr
+ MOVD in2+16(FP), P2ptr
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $64, R19
+ MOVD $80, R20
+ MOVD $96, R21
+ MOVD $112, R22
+ MOVD $128, R23
+ MOVD $144, R24
+ MOVD $160, R25
+ MOVD $104, R26 // offset of sign+24(FP)
+
+ MOVD $byteswap<>+0+00(SB), R8
+ LXVD2X (R16)(CPOOL), PH_
+ LXVD2X (R0)(CPOOL), PL_
+
+ // if (sign == 1) {
+ // Y2 = fromBig(new(big.Int).Mod(new(big.Int).Sub(p256.P, new(big.Int).SetBytes(Y2)), p256.P)) // Y2 = P-Y2
+ // }
+
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R17)(P2ptr), Y2L_
+ LXVD2X (R18)(P2ptr), Y2H_
+ VPERM Y2H, Y2H, SWAP, Y2H
+ VPERM Y2L, Y2L, SWAP, Y2L
+
+ // Equivalent of VLREPG sign+24(FP), SEL1
+ LXVDSX (R1)(R26), SEL1_
+ VSPLTISB $0, ZER
+ VCMPEQUD SEL1, ZER, SEL1
+
+ VSUBCUQ PL, Y2L, CAR1
+ VSUBUQM PL, Y2L, T1L
+ VSUBEUQM PH, Y2H, CAR1, T1H
+
+ VSEL T1L, Y2L, SEL1, Y2L
+ VSEL T1H, Y2H, SEL1, Y2H
+
+/* *
+ * Three operand formula:
+ * Source: 2004 Hankerson–Menezes–Vanstone, page 91.
+ */
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1² T1
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R19)(P1ptr), X0_ // Z1H
+ LXVD2X (R20)(P1ptr), X1_ // Z1L
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ VOR X0, X0, Y0
+ VOR X1, X1, Y1
+ CALL p256MulInternal<>(SB)
+
+ // X=T ; Y- ; MUL; T2=T // T2 = T1*Z1 T1 T2
+ VOR T0, T0, X0
+ VOR T1, T1, X1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, T2L
+ VOR T1, T1, T2H
+
+ // X- ; Y=X2; MUL; T1=T // T1 = T1*X2 T1 T2
+ MOVD in2+16(FP), P2ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R0)(P2ptr), Y0_ // X2H
+ LXVD2X (R16)(P2ptr), Y1_ // X2L
+ VPERM Y0, Y0, SWAP, Y0
+ VPERM Y1, Y1, SWAP, Y1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, T1L
+ VOR T1, T1, T1H
+
+ // X=T2; Y=Y2; MUL; T- // T2 = T2*Y2 T1 T2
+ VOR T2L, T2L, X0
+ VOR T2H, T2H, X1
+ VOR Y2L, Y2L, Y0
+ VOR Y2H, Y2H, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2
+ MOVD in1+8(FP), P1ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R17)(P1ptr), Y1L_
+ LXVD2X (R18)(P1ptr), Y1H_
+ VPERM Y1H, Y1H, SWAP, Y1H
+ VPERM Y1L, Y1L, SWAP, Y1L
+ p256SubInternal(T2H,T2L,T1,T0,Y1H,Y1L)
+
+ // SUB(Y<T1-X1) // T1 = T1-X1 T1 T2
+ LXVD2X (R0)(P1ptr), X1L_
+ LXVD2X (R16)(P1ptr), X1H_
+ VPERM X1H, X1H, SWAP, X1H
+ VPERM X1L, X1L, SWAP, X1L
+ p256SubInternal(Y1,Y0,T1H,T1L,X1H,X1L)
+
+ // X=Z1; Y- ; MUL; Z3:=T// Z3 = Z1*T1 T2
+ LXVD2X (R19)(P1ptr), X0_ // Z1H
+ LXVD2X (R20)(P1ptr), X1_ // Z1L
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ CALL p256MulInternal<>(SB)
+
+ VOR T0, T0, Z3L
+ VOR T1, T1, Z3H
+
+ // X=Y; Y- ; MUL; X=T // T3 = T1*T1 T2
+ VOR Y0, Y0, X0
+ VOR Y1, Y1, X1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, X0
+ VOR T1, T1, X1
+
+ // X- ; Y- ; MUL; T4=T // T4 = T3*T1 T2 T4
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, T4L
+ VOR T1, T1, T4H
+
+ // X- ; Y=X1; MUL; T3=T // T3 = T3*X1 T2 T3 T4
+ MOVD in1+8(FP), P1ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R0)(P1ptr), Y0_ // X1H
+ LXVD2X (R16)(P1ptr), Y1_ // X1L
+ VPERM Y1, Y1, SWAP, Y1
+ VPERM Y0, Y0, SWAP, Y0
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, T3L
+ VOR T1, T1, T3H
+
+ // ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4
+ p256AddInternal(T1H,T1L, T1,T0,T1,T0)
+
+ // X=T2; Y=T2; MUL; T- // X3 = T2*T2 T1 T2 T3 T4
+ VOR T2L, T2L, X0
+ VOR T2H, T2H, X1
+ VOR T2L, T2L, Y0
+ VOR T2H, T2H, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4 (T1 = X3)
+ p256SubInternal(T1,T0,T1,T0,T1H,T1L)
+
+ // SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4
+ p256SubInternal(T1,T0,T1,T0,T4H,T4L)
+ VOR T0, T0, X3L
+ VOR T1, T1, X3H
+
+ // SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4
+ p256SubInternal(X1,X0,T3H,T3L,T1,T0)
+
+ // X- ; Y- ; MUL; T3=T // T3 = T3*T2 T2 T3 T4
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, T3L
+ VOR T1, T1, T3H
+
+ // X=T4; Y=Y1; MUL; T- // T4 = T4*Y1 T3 T4
+ VOR T4L, T4L, X0
+ VOR T4H, T4H, X1
+ MOVD in1+8(FP), P1ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R17)(P1ptr), Y0_ // Y1H
+ LXVD2X (R18)(P1ptr), Y1_ // Y1L
+ VPERM Y0, Y0, SWAP, Y0
+ VPERM Y1, Y1, SWAP, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4 (T3 = Y3)
+ p256SubInternal(Y3H,Y3L,T3H,T3L,T1,T0)
+
+ // if (sel == 0) {
+ // copy(P3.x[:], X1)
+ // copy(P3.y[:], Y1)
+ // copy(P3.z[:], Z1)
+ // }
+
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R0)(P1ptr), X1L_
+ LXVD2X (R16)(P1ptr), X1H_
+ VPERM X1H, X1H, SWAP, X1H
+ VPERM X1L, X1L, SWAP, X1L
+
+ // Y1 already loaded, left over from addition
+ LXVD2X (R19)(P1ptr), Z1L_
+ LXVD2X (R20)(P1ptr), Z1H_
+ VPERM Z1H, Z1H, SWAP, Z1H
+ VPERM Z1L, Z1L, SWAP, Z1L
+
+ MOVD $112, R26 // Get offset to sel+32
+ LXVDSX (R1)(R26), SEL1_
+ VSPLTISB $0, ZER
+ VCMPEQUD SEL1, ZER, SEL1
+
+ VSEL X3L, X1L, SEL1, X3L
+ VSEL X3H, X1H, SEL1, X3H
+ VSEL Y3L, Y1L, SEL1, Y3L
+ VSEL Y3H, Y1H, SEL1, Y3H
+ VSEL Z3L, Z1L, SEL1, Z3L
+ VSEL Z3H, Z1H, SEL1, Z3H
+
+ // if (zero == 0) {
+ // copy(P3.x[:], X2)
+ // copy(P3.y[:], Y2)
+ // copy(P3.z[:], []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ // 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}) //(p256.z*2^256)%p
+ // }
+ MOVD in2+16(FP), P2ptr
+ LXVD2X (R0)(P2ptr), X2L_
+ LXVD2X (R16)(P2ptr), X2H_
+ VPERM X2H, X2H, SWAP, X2H
+ VPERM X2L, X2L, SWAP, X2L
+
+ // Y2 already loaded
+ LXVD2X (R23)(CPOOL), Z2L_
+ LXVD2X (R24)(CPOOL), Z2H_
+
+ MOVD $120, R26 // Get the value from zero+40(FP)
+ LXVDSX (R1)(R26), SEL1_
+ VSPLTISB $0, ZER
+ VCMPEQUD SEL1, ZER, SEL1
+
+ VSEL X3L, X2L, SEL1, X3L
+ VSEL X3H, X2H, SEL1, X3H
+ VSEL Y3L, Y2L, SEL1, Y3L
+ VSEL Y3H, Y2H, SEL1, Y3H
+ VSEL Z3L, Z2L, SEL1, Z3L
+ VSEL Z3H, Z2H, SEL1, Z3H
+
+ // Reorder the bytes so they can be stored using STXVD2X.
+ MOVD res+0(FP), P3ptr
+ VPERM X3H, X3H, SWAP, X3H
+ VPERM X3L, X3L, SWAP, X3L
+ VPERM Y3H, Y3H, SWAP, Y3H
+ VPERM Y3L, Y3L, SWAP, Y3L
+ VPERM Z3H, Z3H, SWAP, Z3H
+ VPERM Z3L, Z3L, SWAP, Z3L
+ STXVD2X X3L_, (R0)(P3ptr)
+ STXVD2X X3H_, (R16)(P3ptr)
+ STXVD2X Y3L_, (R17)(P3ptr)
+ STXVD2X Y3H_, (R18)(P3ptr)
+ STXVD2X Z3L_, (R19)(P3ptr)
+ STXVD2X Z3H_, (R20)(P3ptr)
+
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef P2ptr
+#undef CPOOL
+#undef SWAP
+#undef SWAP_
+
+#undef Y2L
+#undef Y2H
+#undef Y2L_
+#undef Y2H_
+#undef T1L
+#undef T1H
+#undef T2L
+#undef T2H
+#undef T3L
+#undef T3H
+#undef T4L
+#undef T4H
+
+#undef TT0
+#undef TT1
+#undef TT0_
+#undef TT1_
+#undef T2
+
+#undef X0
+#undef X1
+#undef X0_
+#undef X1_
+#undef Y0
+#undef Y1
+#undef Y0_
+#undef Y1_
+#undef T0
+#undef T1
+
+#undef PL
+#undef PH
+#undef PL_
+#undef PH_
+
+#undef X1L
+#undef X1H
+#undef X1L_
+#undef X1H_
+#undef Y1L
+#undef Y1H
+#undef Y1L_
+#undef Y1H_
+#undef Z1L
+#undef Z1H
+#undef Z1L_
+#undef Z1H_
+#undef X2L
+#undef X2H
+#undef X2L_
+#undef X2H_
+#undef Z2L
+#undef Z2H
+#undef Z2L_
+#undef Z2H_
+#undef X3L
+#undef X3H
+#undef X3L_
+#undef X3H_
+#undef Y3L
+#undef Y3H
+#undef Y3L_
+#undef Y3H_
+#undef Z3L
+#undef Z3H
+#undef Z3L_
+#undef Z3H_
+
+#undef ZER
+#undef SEL1
+#undef SEL1_
+#undef CAR1
+#undef CAR2
+
+// p256PointDoubleAsm(P3, P1 *p256Point)
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw.html
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html
+#define P3ptr R3
+#define P1ptr R4
+#define CPOOL R7
+
+// Temporaries in REGs
+#define X3L V15
+#define X3H V16
+#define X3L_ VS47
+#define X3H_ VS48
+#define Y3L V17
+#define Y3H V18
+#define Y3L_ VS49
+#define Y3H_ VS50
+#define T1L V19
+#define T1H V20
+#define T2L V21
+#define T2H V22
+#define T3L V23
+#define T3H V24
+
+#define X1L V6
+#define X1H V7
+#define X1L_ VS38
+#define X1H_ VS39
+#define Y1L V8
+#define Y1H V9
+#define Y1L_ VS40
+#define Y1H_ VS41
+#define Z1L V10
+#define Z1H V11
+
+// Temps for Sub and Add
+#define TT0 V11
+#define TT1 V12
+#define TT0_ VS43
+#define TT1_ VS44
+#define T2 V13
+
+// p256MulAsm Parameters
+#define X0 V0
+#define X1 V1
+#define X0_ VS32
+#define X1_ VS33
+#define Y0 V2
+#define Y1 V3
+#define Y0_ VS34
+#define Y1_ VS35
+#define T0 V4
+#define T1 V5
+#define T0_ VS36
+#define T1_ VS37
+
+#define PL V30
+#define PH V31
+#define PL_ VS62
+#define PH_ VS63
+
+#define Z3L V23
+#define Z3H V24
+
+#define SWAP V25
+#define SWAP_ VS57
+#define ZER V26
+#define SEL1 V27
+#define CAR1 V28
+#define CAR2 V29
+/*
+ * http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2004-hmv
+ * Cost: 4M + 4S + 1*half + 5add + 2*2 + 1*3.
+ * Source: 2004 Hankerson–Menezes–Vanstone, page 91.
+ * A = 3(X₁-Z₁²)×(X₁+Z₁²)
+ * B = 2Y₁
+ * Z₃ = B×Z₁
+ * C = B²
+ * D = C×X₁
+ * X₃ = A²-2D
+ * Y₃ = (D-X₃)×A-C²/2
+ *
+ * Three-operand formula:
+ * T1 = Z1²
+ * T2 = X1-T1
+ * T1 = X1+T1
+ * T2 = T2*T1
+ * T2 = 3*T2
+ * Y3 = 2*Y1
+ * Z3 = Y3*Z1
+ * Y3 = Y3²
+ * T3 = Y3*X1
+ * Y3 = Y3²
+ * Y3 = half*Y3
+ * X3 = T2²
+ * T1 = 2*T3
+ * X3 = X3-T1
+ * T1 = T3-X3
+ * T1 = T1*T2
+ * Y3 = T1-Y3
+ */
+
+TEXT ·p256PointDoubleAsm(SB), NOSPLIT, $0-16
+ MOVD res+0(FP), P3ptr
+ MOVD in+8(FP), P1ptr
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ MOVD $byteswap<>+0x00(SB), R15
+
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $64, R19
+ MOVD $80, R20
+
+ LXVD2X (R16)(CPOOL), PH_
+ LXVD2X (R0)(CPOOL), PL_
+
+ LXVD2X (R15)(R0), SWAP_
+
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1²
+ LXVD2X (R19)(P1ptr), X0_ // Z1H
+ LXVD2X (R20)(P1ptr), X1_ // Z1L
+
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+
+ VOR X0, X0, Y0
+ VOR X1, X1, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(X<X1-T) // T2 = X1-T1
+ LXVD2X (R0)(P1ptr), X1L_
+ LXVD2X (R16)(P1ptr), X1H_
+ VPERM X1L, X1L, SWAP, X1L
+ VPERM X1H, X1H, SWAP, X1H
+
+ p256SubInternal(X1,X0,X1H,X1L,T1,T0)
+
+ // ADD(Y<X1+T) // T1 = X1+T1
+ p256AddInternal(Y1,Y0,X1H,X1L,T1,T0)
+
+ // X- ; Y- ; MUL; T- // T2 = T2*T1
+ CALL p256MulInternal<>(SB)
+
+ // ADD(T2<T+T); ADD(T2<T2+T) // T2 = 3*T2
+ p256AddInternal(T2H,T2L,T1,T0,T1,T0)
+ p256AddInternal(T2H,T2L,T2H,T2L,T1,T0)
+
+ // ADD(X<Y1+Y1) // Y3 = 2*Y1
+ LXVD2X (R15)(R0), SWAP_
+ LXVD2X (R17)(P1ptr), Y1L_
+ LXVD2X (R18)(P1ptr), Y1H_
+ VPERM Y1L, Y1L, SWAP, Y1L
+ VPERM Y1H, Y1H, SWAP, Y1H
+
+ p256AddInternal(X1,X0,Y1H,Y1L,Y1H,Y1L)
+
+ // X- ; Y=Z1; MUL; Z3:=T // Z3 = Y3*Z1
+ LXVD2X (R15)(R0), SWAP_
+ LXVD2X (R19)(P1ptr), Y0_
+ LXVD2X (R20)(P1ptr), Y1_
+ VPERM Y0, Y0, SWAP, Y0
+ VPERM Y1, Y1, SWAP, Y1
+
+ CALL p256MulInternal<>(SB)
+
+ LXVD2X (R15)(R0), SWAP_
+
+ // Leave T0, T1 as is.
+ VPERM T0, T0, SWAP, TT0
+ VPERM T1, T1, SWAP, TT1
+ STXVD2X TT0_, (R19)(P3ptr)
+ STXVD2X TT1_, (R20)(P3ptr)
+
+ // X- ; Y=X ; MUL; T- // Y3 = Y3²
+ VOR X0, X0, Y0
+ VOR X1, X1, Y1
+ CALL p256MulInternal<>(SB)
+
+ // X=T ; Y=X1; MUL; T3=T // T3 = Y3*X1
+ VOR T0, T0, X0
+ VOR T1, T1, X1
+ LXVD2X (R15)(R0), SWAP_
+ LXVD2X (R0)(P1ptr), Y0_
+ LXVD2X (R16)(P1ptr), Y1_
+ VPERM Y0, Y0, SWAP, Y0
+ VPERM Y1, Y1, SWAP, Y1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, T3L
+ VOR T1, T1, T3H
+
+ // X- ; Y=X ; MUL; T- // Y3 = Y3²
+ VOR X0, X0, Y0
+ VOR X1, X1, Y1
+ CALL p256MulInternal<>(SB)
+
+ // HAL(Y3<T) // Y3 = half*Y3
+ p256HalfInternal(Y3H,Y3L, T1,T0)
+
+ // X=T2; Y=T2; MUL; T- // X3 = T2²
+ VOR T2L, T2L, X0
+ VOR T2H, T2H, X1
+ VOR T2L, T2L, Y0
+ VOR T2H, T2H, Y1
+ CALL p256MulInternal<>(SB)
+
+ // ADD(T1<T3+T3) // T1 = 2*T3
+ p256AddInternal(T1H,T1L,T3H,T3L,T3H,T3L)
+
+ // SUB(X3<T-T1) X3:=X3 // X3 = X3-T1
+ p256SubInternal(X3H,X3L,T1,T0,T1H,T1L)
+
+ LXVD2X (R15)(R0), SWAP_
+ VPERM X3L, X3L, SWAP, TT0
+ VPERM X3H, X3H, SWAP, TT1
+ STXVD2X TT0_, (R0)(P3ptr)
+ STXVD2X TT1_, (R16)(P3ptr)
+
+ // SUB(X<T3-X3) // T1 = T3-X3
+ p256SubInternal(X1,X0,T3H,T3L,X3H,X3L)
+
+ // X- ; Y- ; MUL; T- // T1 = T1*T2
+ CALL p256MulInternal<>(SB)
+
+ // SUB(Y3<T-Y3) // Y3 = T1-Y3
+ p256SubInternal(Y3H,Y3L,T1,T0,Y3H,Y3L)
+
+ LXVD2X (R15)(R0), SWAP_
+ VPERM Y3L, Y3L, SWAP, Y3L
+ VPERM Y3H, Y3H, SWAP, Y3H
+ STXVD2X Y3L_, (R17)(P3ptr)
+ STXVD2X Y3H_, (R18)(P3ptr)
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef CPOOL
+#undef X3L
+#undef X3H
+#undef X3L_
+#undef X3H_
+#undef Y3L
+#undef Y3H
+#undef Y3L_
+#undef Y3H_
+#undef T1L
+#undef T1H
+#undef T2L
+#undef T2H
+#undef T3L
+#undef T3H
+#undef X1L
+#undef X1H
+#undef X1L_
+#undef X1H_
+#undef Y1L
+#undef Y1H
+#undef Y1L_
+#undef Y1H_
+#undef Z1L
+#undef Z1H
+#undef TT0
+#undef TT1
+#undef TT0_
+#undef TT1_
+#undef T2
+#undef X0
+#undef X1
+#undef X0_
+#undef X1_
+#undef Y0
+#undef Y1
+#undef Y0_
+#undef Y1_
+#undef T0
+#undef T1
+#undef T0_
+#undef T1_
+#undef PL
+#undef PH
+#undef PL_
+#undef PH_
+#undef Z3L
+#undef Z3H
+#undef ZER
+#undef SEL1
+#undef CAR1
+#undef CAR2
+#undef SWAP
+#undef SWAP_
+
+// p256PointAddAsm(P3, P1, P2 *p256Point)
+#define P3ptr R3
+#define P1ptr R4
+#define P2ptr R5
+#define CPOOL R7
+#define TRUE R14
+#define RES1 R9
+#define RES2 R10
+
+// Temporaries in REGs
+#define T1L V16
+#define T1H V17
+#define T2L V18
+#define T2H V19
+#define U1L V20
+#define U1H V21
+#define S1L V22
+#define S1H V23
+#define HL V24
+#define HH V25
+#define RL V26
+#define RH V27
+#define RH_ VS59
+
+// Temps for Sub and Add
+#define ZER V6
+#define SEL1 V7
+#define CAR1 V8
+#define CAR2 V9
+#define TT0 V11
+#define TT0_ VS43
+#define TT1 V12
+#define TT1_ VS44
+#define T2 V13
+
+#define SWAP V28
+#define SWAP_ VS60
+
+// p256MulAsm Parameters
+#define X0 V0
+#define X1 V1
+#define X0_ VS32
+#define X1_ VS33
+#define Y0 V2
+#define Y1 V3
+#define Y0_ VS34
+#define Y1_ VS35
+#define T0 V4
+#define T1 V5
+#define T0_ VS36
+#define T1_ VS37
+
+#define PL V30
+#define PH V31
+#define PL_ VS62
+#define PH_ VS63
+/*
+ * https://choucroutage.com/Papers/SideChannelAttacks/ctrsa-2011-brown.pdf "Software Implementation of the NIST Elliptic Curves Over Prime Fields"
+ *
+ * A = X₁×Z₂²
+ * B = Y₁×Z₂³
+ * C = X₂×Z₁²-A
+ * D = Y₂×Z₁³-B
+ * X₃ = D² - 2A×C² - C³
+ * Y₃ = D×(A×C² - X₃) - B×C³
+ * Z₃ = Z₁×Z₂×C
+ *
+ * Three-operand formula (adopted): http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-1998-cmo-2
+ * Temp storage: T1,T2,U1,H,Z3=X3=Y3,S1,R
+ *
+ * T1 = Z1*Z1
+ * T2 = Z2*Z2
+ * U1 = X1*T2
+ * H = X2*T1
+ * H = H-U1
+ * Z3 = Z1*Z2
+ * Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array
+ *
+ * S1 = Z2*T2
+ * S1 = Y1*S1
+ * R = Z1*T1
+ * R = Y2*R
+ * R = R-S1
+ *
+ * T1 = H*H
+ * T2 = H*T1
+ * U1 = U1*T1
+ *
+ * X3 = R*R
+ * X3 = X3-T2
+ * T1 = 2*U1
+ * X3 = X3-T1 << store-out X3 result reg
+ *
+ * T2 = S1*T2
+ * Y3 = U1-X3
+ * Y3 = R*Y3
+ * Y3 = Y3-T2 << store-out Y3 result reg
+
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1
+ // X- ; Y=T ; MUL; R=T // R = Z1*T1
+ // X=X2; Y- ; MUL; H=T // H = X2*T1
+ // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2
+ // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2
+ // X=X1; Y- ; MUL; U1=T // U1 = X1*T2
+ // SUB(H<H-T) // H = H-U1
+ // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2
+ // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array
+ // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1
+ // X=Y2; Y=R ; MUL; T- // R = Y2*R
+ // SUB(R<T-S1) // R = R-S1
+ // X=H ; Y=H ; MUL; T- // T1 = H*H
+ // X- ; Y=T ; MUL; T2=T // T2 = H*T1
+ // X=U1; Y- ; MUL; U1=T // U1 = U1*T1
+ // X=R ; Y=R ; MUL; T- // X3 = R*R
+ // SUB(T<T-T2) // X3 = X3-T2
+ // ADD(X<U1+U1) // T1 = 2*U1
+ // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg
+ // SUB(Y<U1-T) // Y3 = U1-X3
+ // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3
+ // X=S1; Y=T2; MUL; T- // T2 = S1*T2
+ // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg
+ */
+TEXT ·p256PointAddAsm(SB), NOSPLIT, $16-32
+ MOVD res+0(FP), P3ptr
+ MOVD in1+8(FP), P1ptr
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ MOVD $16, R16
+ MOVD $32, R17
+ MOVD $48, R18
+ MOVD $64, R19
+ MOVD $80, R20
+
+ MOVD $byteswap<>+0x00(SB), R8
+ LXVD2X (R16)(CPOOL), PH_
+ LXVD2X (R0)(CPOOL), PL_
+
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R19)(P1ptr), X0_ // Z1L
+ LXVD2X (R20)(P1ptr), X1_ // Z1H
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ VOR X0, X0, Y0
+ VOR X1, X1, Y1
+ CALL p256MulInternal<>(SB)
+
+ // X- ; Y=T ; MUL; R=T // R = Z1*T1
+ VOR T0, T0, Y0
+ VOR T1, T1, Y1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, RL // SAVE: RL
+ VOR T1, T1, RH // SAVE: RH
+
+ STXVD2X RH_, (R1)(R17) // V27 has to be saved
+
+ // X=X2; Y- ; MUL; H=T // H = X2*T1
+ MOVD in2+16(FP), P2ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R0)(P2ptr), X0_ // X2L
+ LXVD2X (R16)(P2ptr), X1_ // X2H
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, HL // SAVE: HL
+ VOR T1, T1, HH // SAVE: HH
+
+ // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2
+ MOVD in2+16(FP), P2ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R19)(P2ptr), X0_ // Z2L
+ LXVD2X (R20)(P2ptr), X1_ // Z2H
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ VOR X0, X0, Y0
+ VOR X1, X1, Y1
+ CALL p256MulInternal<>(SB)
+
+ // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2
+ VOR T0, T0, Y0
+ VOR T1, T1, Y1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, S1L // SAVE: S1L
+ VOR T1, T1, S1H // SAVE: S1H
+
+ // X=X1; Y- ; MUL; U1=T // U1 = X1*T2
+ MOVD in1+8(FP), P1ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R0)(P1ptr), X0_ // X1L
+ LXVD2X (R16)(P1ptr), X1_ // X1H
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, U1L // SAVE: U1L
+ VOR T1, T1, U1H // SAVE: U1H
+
+ // SUB(H<H-T) // H = H-U1
+ p256SubInternal(HH,HL,HH,HL,T1,T0)
+
+ // if H == 0 or H^P == 0 then ret=1 else ret=0
+ // clobbers T1H and T1L
+ MOVD $1, TRUE
+ VSPLTISB $0, ZER
+ VOR HL, HH, T1H
+ VCMPEQUDCC ZER, T1H, T1H
+
+ // 26 = CR6 NE
+ ISEL $26, R0, TRUE, RES1
+ VXOR HL, PL, T1L // SAVE: T1L
+ VXOR HH, PH, T1H // SAVE: T1H
+ VOR T1L, T1H, T1H
+ VCMPEQUDCC ZER, T1H, T1H
+
+ // 26 = CR6 NE
+ ISEL $26, R0, TRUE, RES2
+ OR RES2, RES1, RES1
+ MOVD RES1, ret+24(FP)
+
+ // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2
+ MOVD $byteswap<>+0x00(SB), R8
+ MOVD in1+8(FP), P1ptr
+ MOVD in2+16(FP), P2ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R19)(P1ptr), X0_ // Z1L
+ LXVD2X (R20)(P1ptr), X1_ // Z1H
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ LXVD2X (R19)(P2ptr), Y0_ // Z2L
+ LXVD2X (R20)(P2ptr), Y1_ // Z2H
+ VPERM Y0, Y0, SWAP, Y0
+ VPERM Y1, Y1, SWAP, Y1
+ CALL p256MulInternal<>(SB)
+
+ // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H
+ VOR T0, T0, X0
+ VOR T1, T1, X1
+ VOR HL, HL, Y0
+ VOR HH, HH, Y1
+ CALL p256MulInternal<>(SB)
+ MOVD res+0(FP), P3ptr
+ LXVD2X (R8)(R0), SWAP_
+ VPERM T1, T1, SWAP, TT1
+ VPERM T0, T0, SWAP, TT0
+ STXVD2X TT0_, (R19)(P3ptr)
+ STXVD2X TT1_, (R20)(P3ptr)
+
+ // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1
+ MOVD in1+8(FP), P1ptr
+ LXVD2X (R17)(P1ptr), X0_
+ LXVD2X (R18)(P1ptr), X1_
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ VOR S1L, S1L, Y0
+ VOR S1H, S1H, Y1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, S1L
+ VOR T1, T1, S1H
+
+ // X=Y2; Y=R ; MUL; T- // R = Y2*R
+ MOVD in2+16(FP), P2ptr
+ LXVD2X (R8)(R0), SWAP_
+ LXVD2X (R17)(P2ptr), X0_
+ LXVD2X (R18)(P2ptr), X1_
+ VPERM X0, X0, SWAP, X0
+ VPERM X1, X1, SWAP, X1
+ VOR RL, RL, Y0
+
+ // VOR RH, RH, Y1 RH was saved above in D2X format
+ LXVD2X (R1)(R17), Y1_
+ CALL p256MulInternal<>(SB)
+
+ // SUB(R<T-S1) // R = T-S1
+ p256SubInternal(RH,RL,T1,T0,S1H,S1L)
+
+ STXVD2X RH_, (R1)(R17) // Save RH
+
+ // if R == 0 or R^P == 0 then ret=ret else ret=0
+ // clobbers T1H and T1L
+ // Redo this using ISEL??
+ MOVD $1, TRUE
+ VSPLTISB $0, ZER
+ VOR RL, RH, T1H
+ VCMPEQUDCC ZER, T1H, T1H
+
+ // 24 = CR6 NE
+ ISEL $26, R0, TRUE, RES1
+ VXOR RL, PL, T1L
+ VXOR RH, PH, T1H // SAVE: T1L
+ VOR T1L, T1H, T1H
+ VCMPEQUDCC ZER, T1H, T1H
+
+ // 26 = CR6 NE
+ ISEL $26, R0, TRUE, RES2
+ OR RES2, RES1, RES1
+ MOVD ret+24(FP), RES2
+ AND RES2, RES1, RES1
+ MOVD RES1, ret+24(FP)
+
+ // X=H ; Y=H ; MUL; T- // T1 = H*H
+ VOR HL, HL, X0
+ VOR HH, HH, X1
+ VOR HL, HL, Y0
+ VOR HH, HH, Y1
+ CALL p256MulInternal<>(SB)
+
+ // X- ; Y=T ; MUL; T2=T // T2 = H*T1
+ VOR T0, T0, Y0
+ VOR T1, T1, Y1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, T2L
+ VOR T1, T1, T2H
+
+ // X=U1; Y- ; MUL; U1=T // U1 = U1*T1
+ VOR U1L, U1L, X0
+ VOR U1H, U1H, X1
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, U1L
+ VOR T1, T1, U1H
+
+ // X=R ; Y=R ; MUL; T- // X3 = R*R
+ VOR RL, RL, X0
+
+ // VOR RH, RH, X1
+ VOR RL, RL, Y0
+
+ // RH was saved above using STXVD2X
+ LXVD2X (R1)(R17), X1_
+ VOR X1, X1, Y1
+
+ // VOR RH, RH, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T<T-T2) // X3 = X3-T2
+ p256SubInternal(T1,T0,T1,T0,T2H,T2L)
+
+ // ADD(X<U1+U1) // T1 = 2*U1
+ p256AddInternal(X1,X0,U1H,U1L,U1H,U1L)
+
+ // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg
+ p256SubInternal(T1,T0,T1,T0,X1,X0)
+ MOVD res+0(FP), P3ptr
+ LXVD2X (R8)(R0), SWAP_
+ VPERM T1, T1, SWAP, TT1
+ VPERM T0, T0, SWAP, TT0
+ STXVD2X TT0_, (R0)(P3ptr)
+ STXVD2X TT1_, (R16)(P3ptr)
+
+ // SUB(Y<U1-T) // Y3 = U1-X3
+ p256SubInternal(Y1,Y0,U1H,U1L,T1,T0)
+
+ // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3
+ VOR RL, RL, X0
+
+ // VOR RH, RH, X1
+ LXVD2X (R1)(R17), X1_
+ CALL p256MulInternal<>(SB)
+ VOR T0, T0, U1L
+ VOR T1, T1, U1H
+
+ // X=S1; Y=T2; MUL; T- // T2 = S1*T2
+ VOR S1L, S1L, X0
+ VOR S1H, S1H, X1
+ VOR T2L, T2L, Y0
+ VOR T2H, T2H, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg
+ p256SubInternal(T1,T0,U1H,U1L,T1,T0)
+ MOVD res+0(FP), P3ptr
+ LXVD2X (R8)(R0), SWAP_
+ VPERM T1, T1, SWAP, TT1
+ VPERM T0, T0, SWAP, TT0
+ STXVD2X TT0_, (R17)(P3ptr)
+ STXVD2X TT1_, (R18)(P3ptr)
+
+ RET
diff --git a/src/crypto/elliptic/p256_asm_s390x.s b/src/crypto/elliptic/p256_asm_s390x.s
new file mode 100644
index 0000000..cf37e20
--- /dev/null
+++ b/src/crypto/elliptic/p256_asm_s390x.s
@@ -0,0 +1,2714 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+#include "go_asm.h"
+
+
+DATA p256ordK0<>+0x00(SB)/4, $0xee00bc4f
+DATA p256ord<>+0x00(SB)/8, $0xffffffff00000000
+DATA p256ord<>+0x08(SB)/8, $0xffffffffffffffff
+DATA p256ord<>+0x10(SB)/8, $0xbce6faada7179e84
+DATA p256ord<>+0x18(SB)/8, $0xf3b9cac2fc632551
+DATA p256<>+0x00(SB)/8, $0xffffffff00000001 // P256
+DATA p256<>+0x08(SB)/8, $0x0000000000000000 // P256
+DATA p256<>+0x10(SB)/8, $0x00000000ffffffff // P256
+DATA p256<>+0x18(SB)/8, $0xffffffffffffffff // P256
+DATA p256<>+0x20(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256<>+0x28(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256<>+0x30(SB)/8, $0x0000000010111213 // SEL 0 d1 d0 0
+DATA p256<>+0x38(SB)/8, $0x1415161700000000 // SEL 0 d1 d0 0
+DATA p256<>+0x40(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256<>+0x48(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256mul<>+0x00(SB)/8, $0xffffffff00000001 // P256
+DATA p256mul<>+0x08(SB)/8, $0x0000000000000000 // P256
+DATA p256mul<>+0x10(SB)/8, $0x00000000ffffffff // P256
+DATA p256mul<>+0x18(SB)/8, $0xffffffffffffffff // P256
+DATA p256mul<>+0x20(SB)/8, $0x1c1d1e1f00000000 // SEL d0 0 0 d0
+DATA p256mul<>+0x28(SB)/8, $0x000000001c1d1e1f // SEL d0 0 0 d0
+DATA p256mul<>+0x30(SB)/8, $0x0001020304050607 // SEL d0 0 d1 d0
+DATA p256mul<>+0x38(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL d0 0 d1 d0
+DATA p256mul<>+0x40(SB)/8, $0x040506071c1d1e1f // SEL 0 d1 d0 d1
+DATA p256mul<>+0x48(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL 0 d1 d0 d1
+DATA p256mul<>+0x50(SB)/8, $0x0405060704050607 // SEL 0 0 d1 d0
+DATA p256mul<>+0x58(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL 0 0 d1 d0
+DATA p256mul<>+0x60(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256mul<>+0x68(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0
+DATA p256mul<>+0x70(SB)/8, $0x141516170c0d0e0f // SEL 0 d1 d0 0
+DATA p256mul<>+0x78(SB)/8, $0x1c1d1e1f14151617 // SEL 0 d1 d0 0
+DATA p256mul<>+0x80(SB)/8, $0x00000000fffffffe // (1*2^256)%P256
+DATA p256mul<>+0x88(SB)/8, $0xffffffffffffffff // (1*2^256)%P256
+DATA p256mul<>+0x90(SB)/8, $0xffffffff00000000 // (1*2^256)%P256
+DATA p256mul<>+0x98(SB)/8, $0x0000000000000001 // (1*2^256)%P256
+GLOBL p256ordK0<>(SB), 8, $4
+GLOBL p256ord<>(SB), 8, $32
+GLOBL p256<>(SB), 8, $80
+GLOBL p256mul<>(SB), 8, $160
+
+DATA p256vmsl<>+0x0(SB)/8, $0x0012131415161718
+DATA p256vmsl<>+0x8(SB)/8, $0x00191a1b1c1d1e1f
+DATA p256vmsl<>+0x10(SB)/8, $0x0012131415161718
+DATA p256vmsl<>+0x18(SB)/8, $0x000b0c0d0e0f1011
+DATA p256vmsl<>+0x20(SB)/8, $0x00191a1b1c1d1e1f
+DATA p256vmsl<>+0x28(SB)/8, $0x0012131415161718
+DATA p256vmsl<>+0x30(SB)/8, $0x000b0c0d0e0f1011
+DATA p256vmsl<>+0x38(SB)/8, $0x0012131415161718
+DATA p256vmsl<>+0x40(SB)/8, $0x000405060708090a
+DATA p256vmsl<>+0x48(SB)/8, $0x000b0c0d0e0f1011
+DATA p256vmsl<>+0x50(SB)/8, $0x000b0c0d0e0f1011
+DATA p256vmsl<>+0x58(SB)/8, $0x000405060708090a
+DATA p256vmsl<>+0x60(SB)/8, $0x1010101000010203
+DATA p256vmsl<>+0x68(SB)/8, $0x100405060708090a
+DATA p256vmsl<>+0x70(SB)/8, $0x100405060708090a
+DATA p256vmsl<>+0x78(SB)/8, $0x1010101000010203
+GLOBL p256vmsl<>(SB), 8, $128
+
+// ---------------------------------------
+// iff cond == 1 val <- -val
+// func p256NegCond(val *p256Point, cond int)
+#define P1ptr R1
+#define CPOOL R4
+
+#define Y1L V0
+#define Y1H V1
+#define T1L V2
+#define T1H V3
+
+#define PL V30
+#define PH V31
+
+#define ZER V4
+#define SEL1 V5
+#define CAR1 V6
+TEXT ·p256NegCond(SB), NOSPLIT, $0
+ MOVD val+0(FP), P1ptr
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ VL 16(CPOOL), PL
+ VL 0(CPOOL), PH
+
+ VL 32(P1ptr), Y1H
+ VL 48(P1ptr), Y1L
+
+ VLREPG cond+8(FP), SEL1
+ VZERO ZER
+ VCEQG SEL1, ZER, SEL1
+
+ VSCBIQ Y1L, PL, CAR1
+ VSQ Y1L, PL, T1L
+ VSBIQ PH, Y1H, CAR1, T1H
+
+ VSEL Y1L, T1L, SEL1, Y1L
+ VSEL Y1H, T1H, SEL1, Y1H
+
+ VST Y1H, 32(P1ptr)
+ VST Y1L, 48(P1ptr)
+ RET
+
+#undef P1ptr
+#undef CPOOL
+#undef Y1L
+#undef Y1H
+#undef T1L
+#undef T1H
+#undef PL
+#undef PH
+#undef ZER
+#undef SEL1
+#undef CAR1
+
+// ---------------------------------------
+// if cond == 0 res <- b; else res <- a
+// func p256MovCond(res, a, b *p256Point, cond int)
+#define P3ptr R1
+#define P1ptr R2
+#define P2ptr R3
+
+#define X1L V0
+#define X1H V1
+#define Y1L V2
+#define Y1H V3
+#define Z1L V4
+#define Z1H V5
+#define X2L V6
+#define X2H V7
+#define Y2L V8
+#define Y2H V9
+#define Z2L V10
+#define Z2H V11
+
+#define ZER V18
+#define SEL1 V19
+TEXT ·p256MovCond(SB), NOSPLIT, $0
+ MOVD res+0(FP), P3ptr
+ MOVD a+8(FP), P1ptr
+ MOVD b+16(FP), P2ptr
+ VLREPG cond+24(FP), SEL1
+ VZERO ZER
+ VCEQG SEL1, ZER, SEL1
+
+ VL 0(P1ptr), X1H
+ VL 16(P1ptr), X1L
+ VL 32(P1ptr), Y1H
+ VL 48(P1ptr), Y1L
+ VL 64(P1ptr), Z1H
+ VL 80(P1ptr), Z1L
+
+ VL 0(P2ptr), X2H
+ VL 16(P2ptr), X2L
+ VL 32(P2ptr), Y2H
+ VL 48(P2ptr), Y2L
+ VL 64(P2ptr), Z2H
+ VL 80(P2ptr), Z2L
+
+ VSEL X2L, X1L, SEL1, X1L
+ VSEL X2H, X1H, SEL1, X1H
+ VSEL Y2L, Y1L, SEL1, Y1L
+ VSEL Y2H, Y1H, SEL1, Y1H
+ VSEL Z2L, Z1L, SEL1, Z1L
+ VSEL Z2H, Z1H, SEL1, Z1H
+
+ VST X1H, 0(P3ptr)
+ VST X1L, 16(P3ptr)
+ VST Y1H, 32(P3ptr)
+ VST Y1L, 48(P3ptr)
+ VST Z1H, 64(P3ptr)
+ VST Z1L, 80(P3ptr)
+
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef P2ptr
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef X2L
+#undef X2H
+#undef Y2L
+#undef Y2H
+#undef Z2L
+#undef Z2H
+#undef ZER
+#undef SEL1
+
+// ---------------------------------------
+// Constant time table access
+// Indexed from 1 to 15, with -1 offset
+// (index 0 is implicitly point at infinity)
+// func p256Select(point *p256Point, table []p256Point, idx int)
+#define P3ptr R1
+#define P1ptr R2
+#define COUNT R4
+
+#define X1L V0
+#define X1H V1
+#define Y1L V2
+#define Y1H V3
+#define Z1L V4
+#define Z1H V5
+#define X2L V6
+#define X2H V7
+#define Y2L V8
+#define Y2H V9
+#define Z2L V10
+#define Z2H V11
+
+#define ONE V18
+#define IDX V19
+#define SEL1 V20
+#define SEL2 V21
+TEXT ·p256Select(SB), NOSPLIT, $0
+ MOVD point+0(FP), P3ptr
+ MOVD table+8(FP), P1ptr
+ VLREPB idx+(32+7)(FP), IDX
+ VREPIB $1, ONE
+ VREPIB $1, SEL2
+ MOVD $1, COUNT
+
+ VZERO X1H
+ VZERO X1L
+ VZERO Y1H
+ VZERO Y1L
+ VZERO Z1H
+ VZERO Z1L
+
+loop_select:
+ VL 0(P1ptr), X2H
+ VL 16(P1ptr), X2L
+ VL 32(P1ptr), Y2H
+ VL 48(P1ptr), Y2L
+ VL 64(P1ptr), Z2H
+ VL 80(P1ptr), Z2L
+
+ VCEQG SEL2, IDX, SEL1
+
+ VSEL X2L, X1L, SEL1, X1L
+ VSEL X2H, X1H, SEL1, X1H
+ VSEL Y2L, Y1L, SEL1, Y1L
+ VSEL Y2H, Y1H, SEL1, Y1H
+ VSEL Z2L, Z1L, SEL1, Z1L
+ VSEL Z2H, Z1H, SEL1, Z1H
+
+ VAB SEL2, ONE, SEL2
+ ADDW $1, COUNT
+ ADD $96, P1ptr
+ CMPW COUNT, $17
+ BLT loop_select
+
+ VST X1H, 0(P3ptr)
+ VST X1L, 16(P3ptr)
+ VST Y1H, 32(P3ptr)
+ VST Y1L, 48(P3ptr)
+ VST Z1H, 64(P3ptr)
+ VST Z1L, 80(P3ptr)
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef COUNT
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef X2L
+#undef X2H
+#undef Y2L
+#undef Y2H
+#undef Z2L
+#undef Z2H
+#undef ONE
+#undef IDX
+#undef SEL1
+#undef SEL2
+
+// ---------------------------------------
+// Constant time table access
+// Indexed from 1 to 15, with -1 offset
+// (index 0 is implicitly point at infinity)
+// func p256SelectBase(point *p256Point, table []p256Point, idx int)
+#define P3ptr R1
+#define P1ptr R2
+#define COUNT R4
+
+#define X1L V0
+#define X1H V1
+#define Y1L V2
+#define Y1H V3
+#define Z1L V4
+#define Z1H V5
+#define X2L V6
+#define X2H V7
+#define Y2L V8
+#define Y2H V9
+#define Z2L V10
+#define Z2H V11
+
+#define ONE V18
+#define IDX V19
+#define SEL1 V20
+#define SEL2 V21
+TEXT ·p256SelectBase(SB), NOSPLIT, $0
+ MOVD point+0(FP), P3ptr
+ MOVD table+8(FP), P1ptr
+ VLREPB idx+(32+7)(FP), IDX
+ VREPIB $1, ONE
+ VREPIB $1, SEL2
+ MOVD $1, COUNT
+
+ VZERO X1H
+ VZERO X1L
+ VZERO Y1H
+ VZERO Y1L
+ VZERO Z1H
+ VZERO Z1L
+
+loop_select:
+ VL 0(P1ptr), X2H
+ VL 16(P1ptr), X2L
+ VL 32(P1ptr), Y2H
+ VL 48(P1ptr), Y2L
+ VL 64(P1ptr), Z2H
+ VL 80(P1ptr), Z2L
+
+ VCEQG SEL2, IDX, SEL1
+
+ VSEL X2L, X1L, SEL1, X1L
+ VSEL X2H, X1H, SEL1, X1H
+ VSEL Y2L, Y1L, SEL1, Y1L
+ VSEL Y2H, Y1H, SEL1, Y1H
+ VSEL Z2L, Z1L, SEL1, Z1L
+ VSEL Z2H, Z1H, SEL1, Z1H
+
+ VAB SEL2, ONE, SEL2
+ ADDW $1, COUNT
+ ADD $96, P1ptr
+ CMPW COUNT, $65
+ BLT loop_select
+
+ VST X1H, 0(P3ptr)
+ VST X1L, 16(P3ptr)
+ VST Y1H, 32(P3ptr)
+ VST Y1L, 48(P3ptr)
+ VST Z1H, 64(P3ptr)
+ VST Z1L, 80(P3ptr)
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef COUNT
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef X2L
+#undef X2H
+#undef Y2L
+#undef Y2H
+#undef Z2L
+#undef Z2H
+#undef ONE
+#undef IDX
+#undef SEL1
+#undef SEL2
+
+// ---------------------------------------
+// func p256FromMont(res, in []byte)
+#define res_ptr R1
+#define x_ptr R2
+#define CPOOL R4
+
+#define T0 V0
+#define T1 V1
+#define T2 V2
+#define TT0 V3
+#define TT1 V4
+
+#define ZER V6
+#define SEL1 V7
+#define SEL2 V8
+#define CAR1 V9
+#define CAR2 V10
+#define RED1 V11
+#define RED2 V12
+#define PL V13
+#define PH V14
+
+TEXT ·p256FromMont(SB), NOSPLIT, $0
+ MOVD res+0(FP), res_ptr
+ MOVD in+24(FP), x_ptr
+
+ VZERO T2
+ VZERO ZER
+ MOVD $p256<>+0x00(SB), CPOOL
+ VL 16(CPOOL), PL
+ VL 0(CPOOL), PH
+ VL 48(CPOOL), SEL2
+ VL 64(CPOOL), SEL1
+
+ VL (1*16)(x_ptr), T0
+ VL (0*16)(x_ptr), T1
+
+ // First round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $8, T1, T0, T0
+ VSLDB $8, T2, T1, T1
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, CAR2
+ VACQ T1, RED2, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ // Second round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $8, T1, T0, T0
+ VSLDB $8, T2, T1, T1
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, CAR2
+ VACQ T1, RED2, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ // Third round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $8, T1, T0, T0
+ VSLDB $8, T2, T1, T1
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, CAR2
+ VACQ T1, RED2, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ // Last round
+ VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0
+ VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0
+ VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $8, T1, T0, T0
+ VSLDB $8, T2, T1, T1
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, CAR2
+ VACQ T1, RED2, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ // ---------------------------------------------------
+
+ VSCBIQ PL, T0, CAR1
+ VSQ PL, T0, TT0
+ VSBCBIQ T1, PH, CAR1, CAR2
+ VSBIQ T1, PH, CAR1, TT1
+ VSBIQ T2, ZER, CAR2, T2
+
+ // what output to use, TT1||TT0 or T1||T0?
+ VSEL T0, TT0, T2, T0
+ VSEL T1, TT1, T2, T1
+
+ VST T0, (1*16)(res_ptr)
+ VST T1, (0*16)(res_ptr)
+ RET
+
+#undef res_ptr
+#undef x_ptr
+#undef CPOOL
+#undef T0
+#undef T1
+#undef T2
+#undef TT0
+#undef TT1
+#undef ZER
+#undef SEL1
+#undef SEL2
+#undef CAR1
+#undef CAR2
+#undef RED1
+#undef RED2
+#undef PL
+#undef PH
+
+// ---------------------------------------
+// func p256OrdMul(res, in1, in2 []byte)
+#define res_ptr R1
+#define x_ptr R2
+#define y_ptr R3
+#define X0 V0
+#define X1 V1
+#define Y0 V2
+#define Y1 V3
+#define M0 V4
+#define M1 V5
+#define T0 V6
+#define T1 V7
+#define T2 V8
+#define YDIG V9
+
+#define ADD1 V16
+#define ADD1H V17
+#define ADD2 V18
+#define ADD2H V19
+#define RED1 V20
+#define RED1H V21
+#define RED2 V22
+#define RED2H V23
+#define CAR1 V24
+#define CAR1M V25
+
+#define MK0 V30
+#define K0 V31
+TEXT ·p256OrdMul(SB), NOSPLIT, $0
+ MOVD res+0(FP), res_ptr
+ MOVD in1+24(FP), x_ptr
+ MOVD in2+48(FP), y_ptr
+
+ VZERO T2
+ MOVD $p256ordK0<>+0x00(SB), R4
+
+ // VLEF $3, 0(R4), K0
+ WORD $0xE7F40000
+ BYTE $0x38
+ BYTE $0x03
+ MOVD $p256ord<>+0x00(SB), R4
+ VL 16(R4), M0
+ VL 0(R4), M1
+
+ VL (1*16)(x_ptr), X0
+ VL (0*16)(x_ptr), X1
+ VL (1*16)(y_ptr), Y0
+ VL (0*16)(y_ptr), Y1
+
+ // ---------------------------------------------------------------------------/
+ VREPF $3, Y0, YDIG
+ VMLF X0, YDIG, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMLF X1, YDIG, ADD2
+ VMLHF X0, YDIG, ADD1H
+ VMLHF X1, YDIG, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+/* *
+ * ---+--------+--------+
+ * T2| T1 | T0 |
+ * ---+--------+--------+
+ * *(add)*
+ * +--------+--------+
+ * | X1 | X0 |
+ * +--------+--------+
+ * *(mul)*
+ * +--------+--------+
+ * | YDIG | YDIG |
+ * +--------+--------+
+ * *(add)*
+ * +--------+--------+
+ * | M1 | M0 |
+ * +--------+--------+
+ * *(mul)*
+ * +--------+--------+
+ * | MK0 | MK0 |
+ * +--------+--------+
+ *
+ * ---------------------
+ *
+ * +--------+--------+
+ * | ADD2 | ADD1 |
+ * +--------+--------+
+ * +--------+--------+
+ * | ADD2H | ADD1H |
+ * +--------+--------+
+ * +--------+--------+
+ * | RED2 | RED1 |
+ * +--------+--------+
+ * +--------+--------+
+ * | RED2H | RED1H |
+ * +--------+--------+
+ */
+ VREPF $2, Y0, YDIG
+ VMALF X0, YDIG, T0, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMALF X1, YDIG, T1, ADD2
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+ VREPF $1, Y0, YDIG
+ VMALF X0, YDIG, T0, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMALF X1, YDIG, T1, ADD2
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+ VREPF $0, Y0, YDIG
+ VMALF X0, YDIG, T0, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMALF X1, YDIG, T1, ADD2
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+ VREPF $3, Y1, YDIG
+ VMALF X0, YDIG, T0, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMALF X1, YDIG, T1, ADD2
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+ VREPF $2, Y1, YDIG
+ VMALF X0, YDIG, T0, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMALF X1, YDIG, T1, ADD2
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+ VREPF $1, Y1, YDIG
+ VMALF X0, YDIG, T0, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMALF X1, YDIG, T1, ADD2
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+ VREPF $0, Y1, YDIG
+ VMALF X0, YDIG, T0, ADD1
+ VMLF ADD1, K0, MK0
+ VREPF $3, MK0, MK0
+
+ VMALF X1, YDIG, T1, ADD2
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+
+ VMALF M0, MK0, ADD1, RED1
+ VMALHF M0, MK0, ADD1, RED1H
+ VMALF M1, MK0, ADD2, RED2
+ VMALHF M1, MK0, ADD2, RED2H
+
+ VSLDB $12, RED2, RED1, RED1
+ VSLDB $12, T2, RED2, RED2
+
+ VACCQ RED1, ADD1H, CAR1
+ VAQ RED1, ADD1H, T0
+ VACCQ RED1H, T0, CAR1M
+ VAQ RED1H, T0, T0
+
+ // << ready for next MK0
+
+ VACQ RED2, ADD2H, CAR1, T1
+ VACCCQ RED2, ADD2H, CAR1, CAR1
+ VACCCQ RED2H, T1, CAR1M, T2
+ VACQ RED2H, T1, CAR1M, T1
+ VAQ CAR1, T2, T2
+
+ // ---------------------------------------------------
+
+ VZERO RED1
+ VSCBIQ M0, T0, CAR1
+ VSQ M0, T0, ADD1
+ VSBCBIQ T1, M1, CAR1, CAR1M
+ VSBIQ T1, M1, CAR1, ADD2
+ VSBIQ T2, RED1, CAR1M, T2
+
+ // what output to use, ADD2||ADD1 or T1||T0?
+ VSEL T0, ADD1, T2, T0
+ VSEL T1, ADD2, T2, T1
+
+ VST T0, (1*16)(res_ptr)
+ VST T1, (0*16)(res_ptr)
+ RET
+
+#undef res_ptr
+#undef x_ptr
+#undef y_ptr
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef M0
+#undef M1
+#undef T0
+#undef T1
+#undef T2
+#undef YDIG
+
+#undef ADD1
+#undef ADD1H
+#undef ADD2
+#undef ADD2H
+#undef RED1
+#undef RED1H
+#undef RED2
+#undef RED2H
+#undef CAR1
+#undef CAR1M
+
+#undef MK0
+#undef K0
+
+// ---------------------------------------
+// p256MulInternalVX
+// V0-V3,V30,V31 - Not Modified
+// V4-V15 - Volatile
+
+#define CPOOL R4
+
+// Parameters
+#define X0 V0 // Not modified
+#define X1 V1 // Not modified
+#define Y0 V2 // Not modified
+#define Y1 V3 // Not modified
+#define T0 V4
+#define T1 V5
+#define P0 V30 // Not modified
+#define P1 V31 // Not modified
+
+// Temporaries
+#define YDIG V6 // Overloaded with CAR2, ZER
+#define ADD1H V7 // Overloaded with ADD3H
+#define ADD2H V8 // Overloaded with ADD4H
+#define ADD3 V9 // Overloaded with SEL2,SEL5
+#define ADD4 V10 // Overloaded with SEL3,SEL6
+#define RED1 V11 // Overloaded with CAR2
+#define RED2 V12
+#define RED3 V13 // Overloaded with SEL1
+#define T2 V14
+// Overloaded temporaries
+#define ADD1 V4 // Overloaded with T0
+#define ADD2 V5 // Overloaded with T1
+#define ADD3H V7 // Overloaded with ADD1H
+#define ADD4H V8 // Overloaded with ADD2H
+#define ZER V6 // Overloaded with YDIG, CAR2
+#define CAR1 V6 // Overloaded with YDIG, ZER
+#define CAR2 V11 // Overloaded with RED1
+// Constant Selects
+#define SEL1 V13 // Overloaded with RED3
+#define SEL2 V9 // Overloaded with ADD3,SEL5
+#define SEL3 V10 // Overloaded with ADD4,SEL6
+#define SEL4 V6 // Overloaded with YDIG,CAR2,ZER
+#define SEL5 V9 // Overloaded with ADD3,SEL2
+#define SEL6 V10 // Overloaded with ADD4,SEL3
+
+/* *
+ * To follow the flow of bits, for your own sanity a stiff drink, need you shall.
+ * Of a single round, a 'helpful' picture, here is. Meaning, column position has.
+ * With you, SIMD be...
+ *
+ * +--------+--------+
+ * +--------| RED2 | RED1 |
+ * | +--------+--------+
+ * | ---+--------+--------+
+ * | +---- T2| T1 | T0 |--+
+ * | | ---+--------+--------+ |
+ * | | |
+ * | | ======================= |
+ * | | |
+ * | | +--------+--------+<-+
+ * | +-------| ADD2 | ADD1 |--|-----+
+ * | | +--------+--------+ | |
+ * | | +--------+--------+<---+ |
+ * | | | ADD2H | ADD1H |--+ |
+ * | | +--------+--------+ | |
+ * | | +--------+--------+<-+ |
+ * | | | ADD4 | ADD3 |--|-+ |
+ * | | +--------+--------+ | | |
+ * | | +--------+--------+<---+ | |
+ * | | | ADD4H | ADD3H |------|-+ |(+vzero)
+ * | | +--------+--------+ | | V
+ * | | ------------------------ | | +--------+
+ * | | | | | RED3 | [d0 0 0 d0]
+ * | | | | +--------+
+ * | +---->+--------+--------+ | | |
+ * (T2[1w]||ADD2[4w]||ADD1[3w]) +--------| T1 | T0 | | | |
+ * | +--------+--------+ | | |
+ * +---->---+--------+--------+ | | |
+ * T2| T1 | T0 |----+ | |
+ * ---+--------+--------+ | | |
+ * ---+--------+--------+<---+ | |
+ * +--- T2| T1 | T0 |----------+
+ * | ---+--------+--------+ | |
+ * | +--------+--------+<-------------+
+ * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0]
+ * | +--------+--------+ | | |
+ * | +--------+<----------------------+
+ * | | RED3 |--------------+ | [0 0 d1 d0]
+ * | +--------+ | |
+ * +--->+--------+--------+ | |
+ * | T1 | T0 |--------+
+ * +--------+--------+ | |
+ * --------------------------- | |
+ * | |
+ * +--------+--------+<----+ |
+ * | RED2 | RED1 | |
+ * +--------+--------+ |
+ * ---+--------+--------+<-------+
+ * T2| T1 | T0 | (H1P-H1P-H00RRAY!)
+ * ---+--------+--------+
+ *
+ * *Mi obra de arte de siglo XXI @vpaprots
+ *
+ *
+ * First group is special, doesn't get the two inputs:
+ * +--------+--------+<-+
+ * +-------| ADD2 | ADD1 |--|-----+
+ * | +--------+--------+ | |
+ * | +--------+--------+<---+ |
+ * | | ADD2H | ADD1H |--+ |
+ * | +--------+--------+ | |
+ * | +--------+--------+<-+ |
+ * | | ADD4 | ADD3 |--|-+ |
+ * | +--------+--------+ | | |
+ * | +--------+--------+<---+ | |
+ * | | ADD4H | ADD3H |------|-+ |(+vzero)
+ * | +--------+--------+ | | V
+ * | ------------------------ | | +--------+
+ * | | | | RED3 | [d0 0 0 d0]
+ * | | | +--------+
+ * +---->+--------+--------+ | | |
+ * (T2[1w]||ADD2[4w]||ADD1[3w]) | T1 | T0 |----+ | |
+ * +--------+--------+ | | |
+ * ---+--------+--------+<---+ | |
+ * +--- T2| T1 | T0 |----------+
+ * | ---+--------+--------+ | |
+ * | +--------+--------+<-------------+
+ * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0]
+ * | +--------+--------+ | | |
+ * | +--------+<----------------------+
+ * | | RED3 |--------------+ | [0 0 d1 d0]
+ * | +--------+ | |
+ * +--->+--------+--------+ | |
+ * | T1 | T0 |--------+
+ * +--------+--------+ | |
+ * --------------------------- | |
+ * | |
+ * +--------+--------+<----+ |
+ * | RED2 | RED1 | |
+ * +--------+--------+ |
+ * ---+--------+--------+<-------+
+ * T2| T1 | T0 | (H1P-H1P-H00RRAY!)
+ * ---+--------+--------+
+ *
+ * Last 'group' needs to RED2||RED1 shifted less
+ */
+TEXT ·p256MulInternalVX(SB), NOSPLIT, $0-0
+ VL 32(CPOOL), SEL1
+ VL 48(CPOOL), SEL2
+ VL 64(CPOOL), SEL3
+ VL 80(CPOOL), SEL4
+
+ // ---------------------------------------------------
+
+ VREPF $3, Y0, YDIG
+ VMLHF X0, YDIG, ADD1H
+ VMLHF X1, YDIG, ADD2H
+ VMLF X0, YDIG, ADD1
+ VMLF X1, YDIG, ADD2
+
+ VREPF $2, Y0, YDIG
+ VMALF X0, YDIG, ADD1H, ADD3
+ VMALF X1, YDIG, ADD2H, ADD4
+ VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free
+ VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free
+
+ VZERO ZER
+ VL 32(CPOOL), SEL1
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDB $12, ADD2, ADD1, T0 // ADD1 Free
+ VSLDB $12, ZER, ADD2, T1 // ADD2 Free
+
+ VACCQ T0, ADD3, CAR1
+ VAQ T0, ADD3, T0 // ADD3 Free
+ VACCCQ T1, ADD4, CAR1, T2
+ VACQ T1, ADD4, CAR1, T1 // ADD4 Free
+
+ VL 48(CPOOL), SEL2
+ VL 64(CPOOL), SEL3
+ VL 80(CPOOL), SEL4
+ VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0]
+ VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1]
+ VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0]
+ VSQ RED3, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $12, T1, T0, T0
+ VSLDB $12, T2, T1, T1
+
+ VACCQ T0, ADD3H, CAR1
+ VAQ T0, ADD3H, T0
+ VACCCQ T1, ADD4H, CAR1, T2
+ VACQ T1, ADD4H, CAR1, T1
+
+ // ---------------------------------------------------
+
+ VREPF $1, Y0, YDIG
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+ VMALF X0, YDIG, T0, ADD1 // T0 Free->ADD1
+ VMALF X1, YDIG, T1, ADD2 // T1 Free->ADD2
+
+ VREPF $0, Y0, YDIG
+ VMALF X0, YDIG, ADD1H, ADD3
+ VMALF X1, YDIG, ADD2H, ADD4
+ VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free->ADD3H
+ VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free->ADD4H , YDIG Free->ZER
+
+ VZERO ZER
+ VL 32(CPOOL), SEL1
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDB $12, ADD2, ADD1, T0 // ADD1 Free->T0
+ VSLDB $12, T2, ADD2, T1 // ADD2 Free->T1, T2 Free
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, T2
+ VACQ T1, RED2, CAR1, T1
+
+ VACCQ T0, ADD3, CAR1
+ VAQ T0, ADD3, T0
+ VACCCQ T1, ADD4, CAR1, CAR2
+ VACQ T1, ADD4, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ VL 48(CPOOL), SEL2
+ VL 64(CPOOL), SEL3
+ VL 80(CPOOL), SEL4
+ VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0]
+ VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1]
+ VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0]
+ VSQ RED3, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $12, T1, T0, T0
+ VSLDB $12, T2, T1, T1
+
+ VACCQ T0, ADD3H, CAR1
+ VAQ T0, ADD3H, T0
+ VACCCQ T1, ADD4H, CAR1, T2
+ VACQ T1, ADD4H, CAR1, T1
+
+ // ---------------------------------------------------
+
+ VREPF $3, Y1, YDIG
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+ VMALF X0, YDIG, T0, ADD1
+ VMALF X1, YDIG, T1, ADD2
+
+ VREPF $2, Y1, YDIG
+ VMALF X0, YDIG, ADD1H, ADD3
+ VMALF X1, YDIG, ADD2H, ADD4
+ VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free
+ VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free
+
+ VZERO ZER
+ VL 32(CPOOL), SEL1
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDB $12, ADD2, ADD1, T0 // ADD1 Free
+ VSLDB $12, T2, ADD2, T1 // ADD2 Free
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, T2
+ VACQ T1, RED2, CAR1, T1
+
+ VACCQ T0, ADD3, CAR1
+ VAQ T0, ADD3, T0
+ VACCCQ T1, ADD4, CAR1, CAR2
+ VACQ T1, ADD4, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ VL 48(CPOOL), SEL2
+ VL 64(CPOOL), SEL3
+ VL 80(CPOOL), SEL4
+ VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0]
+ VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1]
+ VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0]
+ VSQ RED3, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $12, T1, T0, T0
+ VSLDB $12, T2, T1, T1
+
+ VACCQ T0, ADD3H, CAR1
+ VAQ T0, ADD3H, T0
+ VACCCQ T1, ADD4H, CAR1, T2
+ VACQ T1, ADD4H, CAR1, T1
+
+ // ---------------------------------------------------
+
+ VREPF $1, Y1, YDIG
+ VMALHF X0, YDIG, T0, ADD1H
+ VMALHF X1, YDIG, T1, ADD2H
+ VMALF X0, YDIG, T0, ADD1
+ VMALF X1, YDIG, T1, ADD2
+
+ VREPF $0, Y1, YDIG
+ VMALF X0, YDIG, ADD1H, ADD3
+ VMALF X1, YDIG, ADD2H, ADD4
+ VMALHF X0, YDIG, ADD1H, ADD3H
+ VMALHF X1, YDIG, ADD2H, ADD4H
+
+ VZERO ZER
+ VL 32(CPOOL), SEL1
+ VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
+
+ VSLDB $12, ADD2, ADD1, T0
+ VSLDB $12, T2, ADD2, T1
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, T2
+ VACQ T1, RED2, CAR1, T1
+
+ VACCQ T0, ADD3, CAR1
+ VAQ T0, ADD3, T0
+ VACCCQ T1, ADD4, CAR1, CAR2
+ VACQ T1, ADD4, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ VL 96(CPOOL), SEL5
+ VL 112(CPOOL), SEL6
+ VPERM T0, RED3, SEL5, RED2 // [d1 d0 d1 d0]
+ VPERM T0, RED3, SEL6, RED1 // [ 0 d1 d0 0]
+ VSQ RED1, RED2, RED2 // Guaranteed not to underflow
+
+ VSLDB $12, T1, T0, T0
+ VSLDB $12, T2, T1, T1
+
+ VACCQ T0, ADD3H, CAR1
+ VAQ T0, ADD3H, T0
+ VACCCQ T1, ADD4H, CAR1, T2
+ VACQ T1, ADD4H, CAR1, T1
+
+ VACCQ T0, RED1, CAR1
+ VAQ T0, RED1, T0
+ VACCCQ T1, RED2, CAR1, CAR2
+ VACQ T1, RED2, CAR1, T1
+ VAQ T2, CAR2, T2
+
+ // ---------------------------------------------------
+
+ VZERO RED3
+ VSCBIQ P0, T0, CAR1
+ VSQ P0, T0, ADD1H
+ VSBCBIQ T1, P1, CAR1, CAR2
+ VSBIQ T1, P1, CAR1, ADD2H
+ VSBIQ T2, RED3, CAR2, T2
+
+ // what output to use, ADD2H||ADD1H or T1||T0?
+ VSEL T0, ADD1H, T2, T0
+ VSEL T1, ADD2H, T2, T1
+ RET
+
+#undef CPOOL
+
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef T0
+#undef T1
+#undef P0
+#undef P1
+
+#undef SEL1
+#undef SEL2
+#undef SEL3
+#undef SEL4
+#undef SEL5
+#undef SEL6
+
+#undef YDIG
+#undef ADD1H
+#undef ADD2H
+#undef ADD3
+#undef ADD4
+#undef RED1
+#undef RED2
+#undef RED3
+#undef T2
+#undef ADD1
+#undef ADD2
+#undef ADD3H
+#undef ADD4H
+#undef ZER
+#undef CAR1
+#undef CAR2
+
+// ---------------------------------------
+// p256MulInternalVMSL
+// V0-V3,V30,V31 - Not Modified
+// V4-V14 - Volatile
+
+#define CPOOL R4
+#define SCRATCH R9
+
+// Parameters
+#define X0 V0 // Not modified
+#define X1 V1 // Not modified
+#define Y0 V2 // Not modified
+#define Y1 V3 // Not modified
+#define T0 V4
+#define T1 V5
+#define T2 V6
+#define P0 V30 // Not modified
+#define P1 V31 // Not modified
+
+// input: d0
+// output: h0, h1
+// temp: TEMP, ZERO, BORROW
+#define OBSERVATION3(d0, h0, h1, TEMP, ZERO, BORROW) \
+ VZERO ZERO \
+ VSLDB $4, d0, ZERO, h0 \
+ VLR h0, BORROW \
+ VSLDB $12, ZERO, h0, TEMP \
+ VSQ TEMP, h0, h0 \
+ VSLDB $12, d0, BORROW, h1 \
+ VSLDB $8, ZERO, BORROW, TEMP \
+ VAQ TEMP, h0, h0 \
+
+#define OBSERVATION3A(d2, h0, h1, TEMP, ZERO) \
+ VZERO ZERO \
+ VSLDB $8, d2, ZERO, TEMP \
+ VSLDB $8, d2, TEMP, h0 \
+ VSLDB $12, ZERO, TEMP, h1 \
+ VSQ h1, h0, h0 \
+
+TEXT ·p256MulInternalVMSL(SB), NOFRAME|NOSPLIT, $0-0
+ VSTM V16, V19, (SCRATCH)
+
+ MOVD $p256vmsl<>+0x00(SB), CPOOL
+
+ // Divide input1 into 5 limbs
+ VGBM $0x007f, V14
+ VZERO V12
+ VSLDB $2, X1, X0, V13
+ VSLDB $2, Y1, Y0, V8
+ VSLDB $4, V12, X1, V11 // V11(X1): 4 bytes limb
+ VSLDB $4, V12, Y1, V6 // V6: 4 bytes limb
+
+ VN V14, X0, V5 // V5: first 7 bytes limb
+ VN V14, Y0, V10 // V10: first 7 bytes limb
+ VN V14, V13, V13 // v13: third 7 bytes limb
+ VN V14, V8, V8 // V8: third 7 bytes limb
+
+ VMSLG V10, V5, V12, V10 // v10: l10 x l5 (column 1)
+ VMSLG V8, V5, V12, V8 // v8: l8 x l5
+ VMSLG V6, V13, V12, V13 // v13: l6 x l3
+ VMSLG V6, V11, V12, V11 // v11: l6 x l1 (column 9)
+ VMSLG V6, V5, V12, V6 // v6: l6 x l5
+
+ MOVD $p256vmsl<>+0x00(SB), CPOOL
+ VGBM $0x7f7f, V14
+
+ VL 0(CPOOL), V4
+ VL 16(CPOOL), V7
+ VL 32(CPOOL), V9
+ VL 48(CPOOL), V5
+ VLM 64(CPOOL), V16, V19
+
+ VPERM V12, X0, V4, V4 // v4: limb4 | limb5
+ VPERM Y1, Y0, V7, V7
+ VPERM V12, Y0, V9, V9 // v9: limb10 | limb9
+ VPERM X1, X0, V5, V5
+ VPERM X1, X0, V16, V16
+ VPERM Y1, Y0, V17, V17
+ VPERM X1, V12, V18, V18 // v18: limb1 | limb2
+ VPERM Y1, V12, V19, V19 // v19: limb7 | limb6
+ VN V14, V7, V7 // v7: limb9 | limb8
+ VN V14, V5, V5 // v5: limb3 | limb4
+ VN V14, V16, V16 // v16: limb2 | limb3
+ VN V14, V17, V17 // v17: limb8 | limb7
+
+ VMSLG V9, V4, V12, V14 // v14: l10 x l4 + l9 x l5 (column 2)
+ VMSLG V9, V5, V8, V8 // v8: l10 x l9 + l3 x l4 + l8 x l5 (column 3)
+ VMSLG V9, V16, V12, V16 // v16: l10 x l9 + l2 x l3
+ VMSLG V9, V18, V12, V9 // v9: l10 x l1 + l9 x l2
+ VMSLG V7, V18, V12, V7 // v7: l9 x l1 + l8 x l2
+ VMSLG V17, V4, V16, V16 // v16: l8 x l4 + l7 x l5 + l10 x l9 + l2 x l3 (column 4)
+ VMSLG V17, V5, V9, V9 // v9: l10 x l1 + l9 x l2 + l8 x l3 + l7 x l4
+ VMSLG V17, V18, V12, V17 // v18: l8 x l1 + l7 x l2
+ VMSLG V19, V5, V7, V7 // v7: l9 x l1 + l8 x l2 + l7 x l3 + l6 x l4 (column 6)
+ VMSLG V19, V18, V12, V19 // v19: l7 x l1 + l6 x l2 (column 8)
+ VAQ V9, V6, V9 // v9: l10 x l1 + l9 x l2 + l8 x l3 + l7 x l4 + l6 x l5 (column 5)
+ VAQ V17, V13, V13 // v13: l8 x l1 + l7 x l2 + l6 x l3 (column 7)
+
+ VSLDB $9, V12, V10, V4
+ VSLDB $9, V12, V7, V5
+ VAQ V4, V14, V14
+ VAQ V5, V13, V13
+
+ VSLDB $9, V12, V14, V4
+ VSLDB $9, V12, V13, V5
+ VAQ V4, V8, V8
+ VAQ V5, V19, V19
+
+ VSLDB $9, V12, V8, V4
+ VSLDB $9, V12, V19, V5
+ VAQ V4, V16, V16
+ VAQ V5, V11, V11
+
+ VSLDB $9, V12, V16, V4
+ VAQ V4, V9, V17
+
+ VGBM $0x007f, V4
+ VGBM $0x00ff, V5
+
+ VN V10, V4, V10
+ VN V14, V4, V14
+ VN V8, V4, V8
+ VN V16, V4, V16
+ VN V17, V4, V9
+ VN V7, V4, V7
+ VN V13, V4, V13
+ VN V19, V4, V19
+ VN V11, V5, V11
+
+ VSLDB $7, V14, V14, V14
+ VSLDB $14, V8, V12, V4
+ VSLDB $14, V12, V8, V8
+ VSLDB $5, V16, V16, V16
+ VSLDB $12, V9, V12, V5
+
+ VO V14, V10, V10
+ VO V8, V16, V16
+ VO V4, V10, V10 // first rightmost 128bits of the multiplication result
+ VO V5, V16, V16 // second rightmost 128bits of the multiplication result
+
+ // adjust v7, v13, v19, v11
+ VSLDB $7, V13, V13, V13
+ VSLDB $14, V19, V12, V4
+ VSLDB $14, V12, V19, V19
+ VSLDB $5, V11, V12, V5
+ VO V13, V7, V7
+ VO V4, V7, V7
+ VO V19, V5, V11
+
+ VSLDB $9, V12, V17, V14
+ VSLDB $12, V12, V9, V9
+ VACCQ V7, V14, V13
+ VAQ V7, V14, V7
+ VAQ V11, V13, V11
+
+ // First reduction, 96 bits
+ VSLDB $4, V16, V10, T0
+ VSLDB $4, V12, V16, T1
+ VSLDB $3, V11, V7, V11 // fourth rightmost 128bits of the multiplication result
+ VSLDB $3, V7, V12, V7
+ OBSERVATION3(V10, V8, T2, V17, V18, V19)// results V8 | T2
+ VO V7, V9, V7 // third rightmost 128bits of the multiplication result
+ VACCQ T0, T2, V9
+ VAQ T0, T2, T2
+ VACQ T1, V8, V9, V8
+
+ // Second reduction 96 bits
+ VSLDB $4, V8, T2, T0
+ VSLDB $4, V12, V8, T1
+ OBSERVATION3(T2, V9, V8, V17, V18, V19)// results V9 | V8
+ VACCQ T0, V8, T2
+ VAQ T0, V8, V8
+ VACQ T1, V9, T2, V9
+
+ // Third reduction 64 bits
+ VSLDB $8, V9, V8, T0
+ VSLDB $8, V12, V9, T1
+ OBSERVATION3A(V8, V14, V13, V17, V18)// results V14 | V13
+ VACCQ T0, V13, V12
+ VAQ T0, V13, V13
+ VACQ T1, V14, V12, V14
+ VACCQ V13, V7, V12
+ VAQ V13, V7, T0
+ VACCCQ V14, V11, V12, T2
+ VACQ V14, V11, V12, T1 // results T2 | T1 | T0
+
+ // ---------------------------------------------------
+ MOVD $p256mul<>+0x00(SB), CPOOL
+
+ VZERO V12
+ VSCBIQ P0, T0, V8
+ VSQ P0, T0, V7
+ VSBCBIQ T1, P1, V8, V10
+ VSBIQ T1, P1, V8, V9
+ VSBIQ T2, V12, V10, T2
+
+ // what output to use, V9||V7 or T1||T0?
+ VSEL T0, V7, T2, T0
+ VSEL T1, V9, T2, T1
+
+ VLM (SCRATCH), V16, V19
+
+ RET
+
+// ---------------------------------------
+// p256SqrInternalVMSL
+// V0-V1,V30,V31 - Not Modified
+// V4-V14 - Volatile
+
+TEXT ·p256SqrInternalVMSL(SB), NOFRAME|NOSPLIT, $0-0
+ VSTM V16, V18, (SCRATCH)
+
+ MOVD $p256vmsl<>+0x00(SB), CPOOL
+ // Divide input into limbs
+ VGBM $0x007f, V14
+ VZERO V12
+ VSLDB $2, X1, X0, V13
+ VSLDB $4, V12, X1, V11 // V11(X1): 4 bytes limb
+
+ VN V14, X0, V10 // V10: first 7 bytes limb
+ VN V14, V13, V13 // v13: third 7 bytes limb
+
+ VMSLG V10, V10, V12, V10 // v10: l10 x l5 (column 1)
+ VMSLG V13, V13, V12, V13 // v13: l8 x l3
+ VMSLG V11, V11, V12, V11 // v11: l6 x l1 (column 9)
+
+ MOVD $p256vmsl<>+0x00(SB), CPOOL
+ VGBM $0x7f7f, V14
+
+ VL 0(CPOOL), V4
+ VL 16(CPOOL), V7
+ VL 32(CPOOL), V9
+ VL 48(CPOOL), V5
+ VLM 64(CPOOL), V16, V18
+ VL 112(CPOOL), V8
+
+ VPERM V12, X0, V4, V4 // v4: limb4 | limb5
+ VPERM X1, X0, V7, V7
+ VPERM V12, X0, V9, V9 // v9: limb10 | limb9
+ VPERM X1, X0, V5, V5
+ VPERM X1, X0, V16, V16
+ VPERM X1, X0, V17, V17
+ VPERM X1, V12, V18, V18 // v18: limb1 | limb2
+ VPERM X1, V12, V8, V8 // v8: limb7 | limb6
+ VN V14, V7, V7 // v7: limb9 | limb8
+ VN V14, V5, V5 // v5: limb3 | limb4
+ VN V14, V16, V16 // v16: limb2 | limb3
+ VN V14, V17, V17 // v17: limb8 | limb7
+
+ VMSLEOG V9, V18, V13, V6 // v6: l10 x l1 + l9 x l2 + l8 x l3 + l7 x l4 + l6 x l5 (column 5)
+ VMSLG V9, V4, V12, V14 // v14: l10 x l4 + l9 x l5 (column 2)
+ VMSLEOG V9, V16, V12, V16 // v16: l10 x l2 + l9 x l3 + l8 x l4 + l7 x l5 (column 4)
+ VMSLEOG V7, V18, V12, V7 // v7: l9 x l1 + l8 x l2 (column 6)
+ VMSLEG V17, V18, V12, V13 // v13: l8 x l1 + l7 x l2 + l6 x l3 (column 7)
+ VMSLG V8, V18, V12, V8 // v8: l7 x l1 + l6 x l2 (column 8)
+ VMSLEG V9, V5, V12, V18 // v18: l10 x l3 + l9 x l4 + l8 x l5 (column 3)
+
+ VSLDB $9, V12, V10, V4
+ VSLDB $9, V12, V7, V5
+ VAQ V4, V14, V14
+ VAQ V5, V13, V13
+
+ VSLDB $9, V12, V14, V4
+ VSLDB $9, V12, V13, V5
+ VAQ V4, V18, V18
+ VAQ V5, V8, V8
+
+ VSLDB $9, V12, V18, V4
+ VSLDB $9, V12, V8, V5
+ VAQ V4, V16, V16
+ VAQ V5, V11, V11
+
+ VSLDB $9, V12, V16, V4
+ VAQ V4, V6, V17
+
+ VGBM $0x007f, V4
+ VGBM $0x00ff, V5
+
+ VN V10, V4, V10
+ VN V14, V4, V14
+ VN V18, V4, V18
+ VN V16, V4, V16
+ VN V17, V4, V9
+ VN V7, V4, V7
+ VN V13, V4, V13
+ VN V8, V4, V8
+ VN V11, V5, V11
+
+ VSLDB $7, V14, V14, V14
+ VSLDB $14, V18, V12, V4
+ VSLDB $14, V12, V18, V18
+ VSLDB $5, V16, V16, V16
+ VSLDB $12, V9, V12, V5
+
+ VO V14, V10, V10
+ VO V18, V16, V16
+ VO V4, V10, V10 // first rightmost 128bits of the multiplication result
+ VO V5, V16, V16 // second rightmost 128bits of the multiplication result
+
+ // adjust v7, v13, v8, v11
+ VSLDB $7, V13, V13, V13
+ VSLDB $14, V8, V12, V4
+ VSLDB $14, V12, V8, V8
+ VSLDB $5, V11, V12, V5
+ VO V13, V7, V7
+ VO V4, V7, V7
+ VO V8, V5, V11
+
+ VSLDB $9, V12, V17, V14
+ VSLDB $12, V12, V9, V9
+ VACCQ V7, V14, V13
+ VAQ V7, V14, V7
+ VAQ V11, V13, V11
+
+ // First reduction, 96 bits
+ VSLDB $4, V16, V10, T0
+ VSLDB $4, V12, V16, T1
+ VSLDB $3, V11, V7, V11 // fourth rightmost 128bits of the multiplication result
+ VSLDB $3, V7, V12, V7
+ OBSERVATION3(V10, V8, T2, V16, V17, V18)// results V8 | T2
+ VO V7, V9, V7 // third rightmost 128bits of the multiplication result
+ VACCQ T0, T2, V9
+ VAQ T0, T2, T2
+ VACQ T1, V8, V9, V8
+
+ // Second reduction 96 bits
+ VSLDB $4, V8, T2, T0
+ VSLDB $4, V12, V8, T1
+ OBSERVATION3(T2, V9, V8, V16, V17, V18)// results V9 | V8
+ VACCQ T0, V8, T2
+ VAQ T0, V8, V8
+ VACQ T1, V9, T2, V9
+
+ // Third reduction 64 bits
+ VSLDB $8, V9, V8, T0
+ VSLDB $8, V12, V9, T1
+ OBSERVATION3A(V8, V14, V13, V17, V18)// results V14 | V13
+ VACCQ T0, V13, V12
+ VAQ T0, V13, V13
+ VACQ T1, V14, V12, V14
+ VACCQ V13, V7, V12
+ VAQ V13, V7, T0
+ VACCCQ V14, V11, V12, T2
+ VACQ V14, V11, V12, T1 // results T2 | T1 | T0
+
+ // ---------------------------------------------------
+ MOVD $p256mul<>+0x00(SB), CPOOL
+
+ VZERO V12
+ VSCBIQ P0, T0, V8
+ VSQ P0, T0, V7
+ VSBCBIQ T1, P1, V8, V10
+ VSBIQ T1, P1, V8, V9
+ VSBIQ T2, V12, V10, T2
+
+ // what output to use, V9||V7 or T1||T0?
+ VSEL T0, V7, T2, T0
+ VSEL T1, V9, T2, T1
+
+ VLM (SCRATCH), V16, V18
+ RET
+
+
+
+#undef CPOOL
+#undef SCRATCH
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef T0
+#undef T1
+#undef T2
+#undef P0
+#undef P1
+
+#define SCRATCH R9
+
+TEXT p256MulInternal<>(SB),NOSPLIT,$64-0
+ MOVD $scratch-64(SP), SCRATCH
+ MOVD ·p256MulInternalFacility+0x00(SB),R7
+ CALL (R7)
+ RET
+
+TEXT ·p256MulInternalTrampolineSetup(SB),NOSPLIT|NOFRAME, $0
+ MOVBZ internal∕cpu·S390X+const_offsetS390xHasVE1(SB), R0
+ MOVD $·p256MulInternalFacility+0x00(SB), R7
+ MOVD $·p256MulInternalVX(SB), R8
+ CMPBEQ R0, $0, novmsl // VE1 facility = 1, VMSL supported
+ MOVD $·p256MulInternalVMSL(SB), R8
+novmsl:
+ MOVD R8, 0(R7)
+ BR (R8)
+
+GLOBL ·p256MulInternalFacility+0x00(SB), NOPTR, $8
+DATA ·p256MulInternalFacility+0x00(SB)/8, $·p256MulInternalTrampolineSetup(SB)
+
+// Parameters
+#define X0 V0
+#define X1 V1
+#define Y0 V2
+#define Y1 V3
+
+TEXT ·p256SqrInternalVX(SB), NOFRAME|NOSPLIT, $0
+ VLR X0, Y0
+ VLR X1, Y1
+ BR ·p256MulInternalVX(SB)
+
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+
+
+TEXT p256SqrInternal<>(SB),NOSPLIT,$48-0
+ MOVD $scratch-48(SP), SCRATCH
+ MOVD ·p256SqrInternalFacility+0x00(SB),R7
+ CALL (R7)
+ RET
+
+TEXT ·p256SqrInternalTrampolineSetup(SB),NOSPLIT|NOFRAME, $0
+ MOVBZ internal∕cpu·S390X+const_offsetS390xHasVE1(SB), R0
+ MOVD $·p256SqrInternalFacility+0x00(SB), R7
+ MOVD $·p256SqrInternalVX(SB), R8
+ CMPBEQ R0, $0, novmsl // VE1 facility = 1, VMSL supported
+ MOVD $·p256SqrInternalVMSL(SB), R8
+novmsl:
+ MOVD R8, 0(R7)
+ BR (R8)
+
+
+GLOBL ·p256SqrInternalFacility+0x00(SB), NOPTR, $8
+DATA ·p256SqrInternalFacility+0x00(SB)/8, $·p256SqrInternalTrampolineSetup(SB)
+
+#undef SCRATCH
+
+
+#define p256SubInternal(T1, T0, X1, X0, Y1, Y0) \
+ VZERO ZER \
+ VSCBIQ Y0, X0, CAR1 \
+ VSQ Y0, X0, T0 \
+ VSBCBIQ X1, Y1, CAR1, SEL1 \
+ VSBIQ X1, Y1, CAR1, T1 \
+ VSQ SEL1, ZER, SEL1 \
+ \
+ VACCQ T0, PL, CAR1 \
+ VAQ T0, PL, TT0 \
+ VACQ T1, PH, CAR1, TT1 \
+ \
+ VSEL T0, TT0, SEL1, T0 \
+ VSEL T1, TT1, SEL1, T1 \
+
+#define p256AddInternal(T1, T0, X1, X0, Y1, Y0) \
+ VACCQ X0, Y0, CAR1 \
+ VAQ X0, Y0, T0 \
+ VACCCQ X1, Y1, CAR1, T2 \
+ VACQ X1, Y1, CAR1, T1 \
+ \
+ VZERO ZER \
+ VSCBIQ PL, T0, CAR1 \
+ VSQ PL, T0, TT0 \
+ VSBCBIQ T1, PH, CAR1, CAR2 \
+ VSBIQ T1, PH, CAR1, TT1 \
+ VSBIQ T2, ZER, CAR2, SEL1 \
+ \
+ VSEL T0, TT0, SEL1, T0 \
+ VSEL T1, TT1, SEL1, T1
+
+#define p256HalfInternal(T1, T0, X1, X0) \
+ VZERO ZER \
+ VSBIQ ZER, ZER, X0, SEL1 \
+ \
+ VACCQ X0, PL, CAR1 \
+ VAQ X0, PL, T0 \
+ VACCCQ X1, PH, CAR1, T2 \
+ VACQ X1, PH, CAR1, T1 \
+ \
+ VSEL X0, T0, SEL1, T0 \
+ VSEL X1, T1, SEL1, T1 \
+ VSEL ZER, T2, SEL1, T2 \
+ \
+ VSLDB $15, T2, ZER, TT1 \
+ VSLDB $15, T1, ZER, TT0 \
+ VREPIB $1, SEL1 \
+ VSRL SEL1, T0, T0 \
+ VSRL SEL1, T1, T1 \
+ VREPIB $7, SEL1 \
+ VSL SEL1, TT0, TT0 \
+ VSL SEL1, TT1, TT1 \
+ VO T0, TT0, T0 \
+ VO T1, TT1, T1
+
+// ---------------------------------------
+// func p256MulAsm(res, in1, in2 []byte)
+#define res_ptr R1
+#define x_ptr R2
+#define y_ptr R3
+#define CPOOL R4
+
+// Parameters
+#define X0 V0
+#define X1 V1
+#define Y0 V2
+#define Y1 V3
+#define T0 V4
+#define T1 V5
+
+// Constants
+#define P0 V30
+#define P1 V31
+TEXT ·p256MulAsm(SB), NOSPLIT, $0
+ MOVD res+0(FP), res_ptr
+ MOVD in1+24(FP), x_ptr
+ MOVD in2+48(FP), y_ptr
+
+ VL (1*16)(x_ptr), X0
+ VL (0*16)(x_ptr), X1
+ VL (1*16)(y_ptr), Y0
+ VL (0*16)(y_ptr), Y1
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ VL 16(CPOOL), P0
+ VL 0(CPOOL), P1
+
+ CALL p256MulInternal<>(SB)
+
+ VST T0, (1*16)(res_ptr)
+ VST T1, (0*16)(res_ptr)
+ RET
+
+#undef res_ptr
+#undef x_ptr
+#undef y_ptr
+#undef CPOOL
+
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef T0
+#undef T1
+#undef P0
+#undef P1
+
+// ---------------------------------------
+// func p256SqrAsm(res, in1 []byte)
+#define res_ptr R1
+#define x_ptr R2
+#define y_ptr R3
+#define CPOOL R4
+
+// Parameters
+#define X0 V0
+#define X1 V1
+#define T0 V4
+#define T1 V5
+
+// Constants
+#define P0 V30
+#define P1 V31
+TEXT ·p256SqrAsm(SB), NOSPLIT, $0
+ MOVD res+0(FP), res_ptr
+ MOVD in1+24(FP), x_ptr
+
+ VL (1*16)(x_ptr), X0
+ VL (0*16)(x_ptr), X1
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ VL 16(CPOOL), P0
+ VL 0(CPOOL), P1
+
+ CALL p256SqrInternal<>(SB)
+
+ VST T0, (1*16)(res_ptr)
+ VST T1, (0*16)(res_ptr)
+ RET
+
+#undef res_ptr
+#undef x_ptr
+#undef y_ptr
+#undef CPOOL
+
+#undef X0
+#undef X1
+#undef T0
+#undef T1
+#undef P0
+#undef P1
+
+
+// Point add with P2 being affine point
+// If sign == 1 -> P2 = -P2
+// If sel == 0 -> P3 = P1
+// if zero == 0 -> P3 = P2
+// p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
+#define P3ptr R1
+#define P1ptr R2
+#define P2ptr R3
+#define CPOOL R4
+
+// Temporaries in REGs
+#define Y2L V15
+#define Y2H V16
+#define T1L V17
+#define T1H V18
+#define T2L V19
+#define T2H V20
+#define T3L V21
+#define T3H V22
+#define T4L V23
+#define T4H V24
+
+// Temps for Sub and Add
+#define TT0 V11
+#define TT1 V12
+#define T2 V13
+
+// p256MulAsm Parameters
+#define X0 V0
+#define X1 V1
+#define Y0 V2
+#define Y1 V3
+#define T0 V4
+#define T1 V5
+
+#define PL V30
+#define PH V31
+
+// Names for zero/sel selects
+#define X1L V0
+#define X1H V1
+#define Y1L V2 // p256MulAsmParmY
+#define Y1H V3 // p256MulAsmParmY
+#define Z1L V4
+#define Z1H V5
+#define X2L V0
+#define X2H V1
+#define Z2L V4
+#define Z2H V5
+#define X3L V17 // T1L
+#define X3H V18 // T1H
+#define Y3L V21 // T3L
+#define Y3H V22 // T3H
+#define Z3L V28
+#define Z3H V29
+
+#define ZER V6
+#define SEL1 V7
+#define CAR1 V8
+#define CAR2 V9
+/* *
+ * Three operand formula:
+ * Source: 2004 Hankerson–Menezes–Vanstone, page 91.
+ * T1 = Z1²
+ * T2 = T1*Z1
+ * T1 = T1*X2
+ * T2 = T2*Y2
+ * T1 = T1-X1
+ * T2 = T2-Y1
+ * Z3 = Z1*T1
+ * T3 = T1²
+ * T4 = T3*T1
+ * T3 = T3*X1
+ * T1 = 2*T3
+ * X3 = T2²
+ * X3 = X3-T1
+ * X3 = X3-T4
+ * T3 = T3-X3
+ * T3 = T3*T2
+ * T4 = T4*Y1
+ * Y3 = T3-T4
+
+ * Three operand formulas, but with MulInternal X,Y used to store temps
+X=Z1; Y=Z1; MUL;T- // T1 = Z1² T1
+X=T ; Y- ; MUL;T2=T // T2 = T1*Z1 T1 T2
+X- ; Y=X2; MUL;T1=T // T1 = T1*X2 T1 T2
+X=T2; Y=Y2; MUL;T- // T2 = T2*Y2 T1 T2
+SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2
+SUB(Y<T1-X1) // T1 = T1-X1 T1 T2
+X=Z1; Y- ; MUL;Z3:=T// Z3 = Z1*T1 T2
+X=Y; Y- ; MUL;X=T // T3 = T1*T1 T2
+X- ; Y- ; MUL;T4=T // T4 = T3*T1 T2 T4
+X- ; Y=X1; MUL;T3=T // T3 = T3*X1 T2 T3 T4
+ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4
+X=T2; Y=T2; MUL;T- // X3 = T2*T2 T1 T2 T3 T4
+SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4
+SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4
+SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4
+X- ; Y- ; MUL;T3=T // T3 = T3*T2 T2 T3 T4
+X=T4; Y=Y1; MUL;T- // T4 = T4*Y1 T3 T4
+SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4
+
+ */
+TEXT ·p256PointAddAffineAsm(SB), NOSPLIT, $0
+ MOVD P3+0(FP), P3ptr
+ MOVD P1+8(FP), P1ptr
+ MOVD P2+16(FP), P2ptr
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ VL 16(CPOOL), PL
+ VL 0(CPOOL), PH
+
+ // if (sign == 1) {
+ // Y2 = fromBig(new(big.Int).Mod(new(big.Int).Sub(p256.P, new(big.Int).SetBytes(Y2)), p256.P)) // Y2 = P-Y2
+ // }
+
+ VL 32(P2ptr), Y2H
+ VL 48(P2ptr), Y2L
+
+ VLREPG sign+24(FP), SEL1
+ VZERO ZER
+ VCEQG SEL1, ZER, SEL1
+
+ VSCBIQ Y2L, PL, CAR1
+ VSQ Y2L, PL, T1L
+ VSBIQ PH, Y2H, CAR1, T1H
+
+ VSEL Y2L, T1L, SEL1, Y2L
+ VSEL Y2H, T1H, SEL1, Y2H
+
+/* *
+ * Three operand formula:
+ * Source: 2004 Hankerson–Menezes–Vanstone, page 91.
+ */
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1² T1
+ VL 64(P1ptr), X1 // Z1H
+ VL 80(P1ptr), X0 // Z1L
+ VLR X0, Y0
+ VLR X1, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // X=T ; Y- ; MUL; T2=T // T2 = T1*Z1 T1 T2
+ VLR T0, X0
+ VLR T1, X1
+ CALL p256MulInternal<>(SB)
+ VLR T0, T2L
+ VLR T1, T2H
+
+ // X- ; Y=X2; MUL; T1=T // T1 = T1*X2 T1 T2
+ VL 0(P2ptr), Y1 // X2H
+ VL 16(P2ptr), Y0 // X2L
+ CALL p256MulInternal<>(SB)
+ VLR T0, T1L
+ VLR T1, T1H
+
+ // X=T2; Y=Y2; MUL; T- // T2 = T2*Y2 T1 T2
+ VLR T2L, X0
+ VLR T2H, X1
+ VLR Y2L, Y0
+ VLR Y2H, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2
+ VL 32(P1ptr), Y1H
+ VL 48(P1ptr), Y1L
+ p256SubInternal(T2H,T2L,T1,T0,Y1H,Y1L)
+
+ // SUB(Y<T1-X1) // T1 = T1-X1 T1 T2
+ VL 0(P1ptr), X1H
+ VL 16(P1ptr), X1L
+ p256SubInternal(Y1,Y0,T1H,T1L,X1H,X1L)
+
+ // X=Z1; Y- ; MUL; Z3:=T// Z3 = Z1*T1 T2
+ VL 64(P1ptr), X1 // Z1H
+ VL 80(P1ptr), X0 // Z1L
+ CALL p256MulInternal<>(SB)
+
+ // VST T1, 64(P3ptr)
+ // VST T0, 80(P3ptr)
+ VLR T0, Z3L
+ VLR T1, Z3H
+
+ // X=Y; Y- ; MUL; X=T // T3 = T1*T1 T2
+ VLR Y0, X0
+ VLR Y1, X1
+ CALL p256SqrInternal<>(SB)
+ VLR T0, X0
+ VLR T1, X1
+
+ // X- ; Y- ; MUL; T4=T // T4 = T3*T1 T2 T4
+ CALL p256MulInternal<>(SB)
+ VLR T0, T4L
+ VLR T1, T4H
+
+ // X- ; Y=X1; MUL; T3=T // T3 = T3*X1 T2 T3 T4
+ VL 0(P1ptr), Y1 // X1H
+ VL 16(P1ptr), Y0 // X1L
+ CALL p256MulInternal<>(SB)
+ VLR T0, T3L
+ VLR T1, T3H
+
+ // ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4
+ p256AddInternal(T1H,T1L, T1,T0,T1,T0)
+
+ // X=T2; Y=T2; MUL; T- // X3 = T2*T2 T1 T2 T3 T4
+ VLR T2L, X0
+ VLR T2H, X1
+ VLR T2L, Y0
+ VLR T2H, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4 (T1 = X3)
+ p256SubInternal(T1,T0,T1,T0,T1H,T1L)
+
+ // SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4
+ p256SubInternal(T1,T0,T1,T0,T4H,T4L)
+ VLR T0, X3L
+ VLR T1, X3H
+
+ // SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4
+ p256SubInternal(X1,X0,T3H,T3L,T1,T0)
+
+ // X- ; Y- ; MUL; T3=T // T3 = T3*T2 T2 T3 T4
+ CALL p256MulInternal<>(SB)
+ VLR T0, T3L
+ VLR T1, T3H
+
+ // X=T4; Y=Y1; MUL; T- // T4 = T4*Y1 T3 T4
+ VLR T4L, X0
+ VLR T4H, X1
+ VL 32(P1ptr), Y1 // Y1H
+ VL 48(P1ptr), Y0 // Y1L
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4 (T3 = Y3)
+ p256SubInternal(Y3H,Y3L,T3H,T3L,T1,T0)
+
+ // if (sel == 0) {
+ // copy(P3.x[:], X1)
+ // copy(P3.y[:], Y1)
+ // copy(P3.z[:], Z1)
+ // }
+
+ VL 0(P1ptr), X1H
+ VL 16(P1ptr), X1L
+
+ // Y1 already loaded, left over from addition
+ VL 64(P1ptr), Z1H
+ VL 80(P1ptr), Z1L
+
+ VLREPG sel+32(FP), SEL1
+ VZERO ZER
+ VCEQG SEL1, ZER, SEL1
+
+ VSEL X1L, X3L, SEL1, X3L
+ VSEL X1H, X3H, SEL1, X3H
+ VSEL Y1L, Y3L, SEL1, Y3L
+ VSEL Y1H, Y3H, SEL1, Y3H
+ VSEL Z1L, Z3L, SEL1, Z3L
+ VSEL Z1H, Z3H, SEL1, Z3H
+
+ // if (zero == 0) {
+ // copy(P3.x[:], X2)
+ // copy(P3.y[:], Y2)
+ // copy(P3.z[:], []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ // 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}) //(p256.z*2^256)%p
+ // }
+ VL 0(P2ptr), X2H
+ VL 16(P2ptr), X2L
+
+ // Y2 already loaded
+ VL 128(CPOOL), Z2H
+ VL 144(CPOOL), Z2L
+
+ VLREPG zero+40(FP), SEL1
+ VZERO ZER
+ VCEQG SEL1, ZER, SEL1
+
+ VSEL X2L, X3L, SEL1, X3L
+ VSEL X2H, X3H, SEL1, X3H
+ VSEL Y2L, Y3L, SEL1, Y3L
+ VSEL Y2H, Y3H, SEL1, Y3H
+ VSEL Z2L, Z3L, SEL1, Z3L
+ VSEL Z2H, Z3H, SEL1, Z3H
+
+ // All done, store out the result!!!
+ VST X3H, 0(P3ptr)
+ VST X3L, 16(P3ptr)
+ VST Y3H, 32(P3ptr)
+ VST Y3L, 48(P3ptr)
+ VST Z3H, 64(P3ptr)
+ VST Z3L, 80(P3ptr)
+
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef P2ptr
+#undef CPOOL
+
+#undef Y2L
+#undef Y2H
+#undef T1L
+#undef T1H
+#undef T2L
+#undef T2H
+#undef T3L
+#undef T3H
+#undef T4L
+#undef T4H
+
+#undef TT0
+#undef TT1
+#undef T2
+
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef T0
+#undef T1
+
+#undef PL
+#undef PH
+
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef X2L
+#undef X2H
+#undef Z2L
+#undef Z2H
+#undef X3L
+#undef X3H
+#undef Y3L
+#undef Y3H
+#undef Z3L
+#undef Z3H
+
+#undef ZER
+#undef SEL1
+#undef CAR1
+#undef CAR2
+
+// p256PointDoubleAsm(P3, P1 *p256Point)
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw.html
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html
+#define P3ptr R1
+#define P1ptr R2
+#define CPOOL R4
+
+// Temporaries in REGs
+#define X3L V15
+#define X3H V16
+#define Y3L V17
+#define Y3H V18
+#define T1L V19
+#define T1H V20
+#define T2L V21
+#define T2H V22
+#define T3L V23
+#define T3H V24
+
+#define X1L V6
+#define X1H V7
+#define Y1L V8
+#define Y1H V9
+#define Z1L V10
+#define Z1H V11
+
+// Temps for Sub and Add
+#define TT0 V11
+#define TT1 V12
+#define T2 V13
+
+// p256MulAsm Parameters
+#define X0 V0
+#define X1 V1
+#define Y0 V2
+#define Y1 V3
+#define T0 V4
+#define T1 V5
+
+#define PL V30
+#define PH V31
+
+#define Z3L V23
+#define Z3H V24
+
+#define ZER V26
+#define SEL1 V27
+#define CAR1 V28
+#define CAR2 V29
+/*
+ * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2004-hmv
+ * Cost: 4M + 4S + 1*half + 5add + 2*2 + 1*3.
+ * Source: 2004 Hankerson–Menezes–Vanstone, page 91.
+ * A = 3(X₁-Z₁²)×(X₁+Z₁²)
+ * B = 2Y₁
+ * Z₃ = B×Z₁
+ * C = B²
+ * D = C×X₁
+ * X₃ = A²-2D
+ * Y₃ = (D-X₃)×A-C²/2
+ *
+ * Three-operand formula:
+ * T1 = Z1²
+ * T2 = X1-T1
+ * T1 = X1+T1
+ * T2 = T2*T1
+ * T2 = 3*T2
+ * Y3 = 2*Y1
+ * Z3 = Y3*Z1
+ * Y3 = Y3²
+ * T3 = Y3*X1
+ * Y3 = Y3²
+ * Y3 = half*Y3
+ * X3 = T2²
+ * T1 = 2*T3
+ * X3 = X3-T1
+ * T1 = T3-X3
+ * T1 = T1*T2
+ * Y3 = T1-Y3
+ */
+
+TEXT ·p256PointDoubleAsm(SB), NOSPLIT, $0
+ MOVD P3+0(FP), P3ptr
+ MOVD P1+8(FP), P1ptr
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ VL 16(CPOOL), PL
+ VL 0(CPOOL), PH
+
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1²
+ VL 64(P1ptr), X1 // Z1H
+ VL 80(P1ptr), X0 // Z1L
+ VLR X0, Y0
+ VLR X1, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // SUB(X<X1-T) // T2 = X1-T1
+ VL 0(P1ptr), X1H
+ VL 16(P1ptr), X1L
+ p256SubInternal(X1,X0,X1H,X1L,T1,T0)
+
+ // ADD(Y<X1+T) // T1 = X1+T1
+ p256AddInternal(Y1,Y0,X1H,X1L,T1,T0)
+
+ // X- ; Y- ; MUL; T- // T2 = T2*T1
+ CALL p256MulInternal<>(SB)
+
+ // ADD(T2<T+T); ADD(T2<T2+T) // T2 = 3*T2
+ p256AddInternal(T2H,T2L,T1,T0,T1,T0)
+ p256AddInternal(T2H,T2L,T2H,T2L,T1,T0)
+
+ // ADD(X<Y1+Y1) // Y3 = 2*Y1
+ VL 32(P1ptr), Y1H
+ VL 48(P1ptr), Y1L
+ p256AddInternal(X1,X0,Y1H,Y1L,Y1H,Y1L)
+
+ // X- ; Y=Z1; MUL; Z3:=T // Z3 = Y3*Z1
+ VL 64(P1ptr), Y1 // Z1H
+ VL 80(P1ptr), Y0 // Z1L
+ CALL p256MulInternal<>(SB)
+ VST T1, 64(P3ptr)
+ VST T0, 80(P3ptr)
+
+ // X- ; Y=X ; MUL; T- // Y3 = Y3²
+ VLR X0, Y0
+ VLR X1, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // X=T ; Y=X1; MUL; T3=T // T3 = Y3*X1
+ VLR T0, X0
+ VLR T1, X1
+ VL 0(P1ptr), Y1
+ VL 16(P1ptr), Y0
+ CALL p256MulInternal<>(SB)
+ VLR T0, T3L
+ VLR T1, T3H
+
+ // X- ; Y=X ; MUL; T- // Y3 = Y3²
+ VLR X0, Y0
+ VLR X1, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // HAL(Y3<T) // Y3 = half*Y3
+ p256HalfInternal(Y3H,Y3L, T1,T0)
+
+ // X=T2; Y=T2; MUL; T- // X3 = T2²
+ VLR T2L, X0
+ VLR T2H, X1
+ VLR T2L, Y0
+ VLR T2H, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // ADD(T1<T3+T3) // T1 = 2*T3
+ p256AddInternal(T1H,T1L,T3H,T3L,T3H,T3L)
+
+ // SUB(X3<T-T1) X3:=X3 // X3 = X3-T1
+ p256SubInternal(X3H,X3L,T1,T0,T1H,T1L)
+ VST X3H, 0(P3ptr)
+ VST X3L, 16(P3ptr)
+
+ // SUB(X<T3-X3) // T1 = T3-X3
+ p256SubInternal(X1,X0,T3H,T3L,X3H,X3L)
+
+ // X- ; Y- ; MUL; T- // T1 = T1*T2
+ CALL p256MulInternal<>(SB)
+
+ // SUB(Y3<T-Y3) // Y3 = T1-Y3
+ p256SubInternal(Y3H,Y3L,T1,T0,Y3H,Y3L)
+
+ VST Y3H, 32(P3ptr)
+ VST Y3L, 48(P3ptr)
+ RET
+
+#undef P3ptr
+#undef P1ptr
+#undef CPOOL
+#undef X3L
+#undef X3H
+#undef Y3L
+#undef Y3H
+#undef T1L
+#undef T1H
+#undef T2L
+#undef T2H
+#undef T3L
+#undef T3H
+#undef X1L
+#undef X1H
+#undef Y1L
+#undef Y1H
+#undef Z1L
+#undef Z1H
+#undef TT0
+#undef TT1
+#undef T2
+#undef X0
+#undef X1
+#undef Y0
+#undef Y1
+#undef T0
+#undef T1
+#undef PL
+#undef PH
+#undef Z3L
+#undef Z3H
+#undef ZER
+#undef SEL1
+#undef CAR1
+#undef CAR2
+
+// p256PointAddAsm(P3, P1, P2 *p256Point)
+#define P3ptr R1
+#define P1ptr R2
+#define P2ptr R3
+#define CPOOL R4
+#define ISZERO R5
+#define TRUE R6
+
+// Temporaries in REGs
+#define T1L V16
+#define T1H V17
+#define T2L V18
+#define T2H V19
+#define U1L V20
+#define U1H V21
+#define S1L V22
+#define S1H V23
+#define HL V24
+#define HH V25
+#define RL V26
+#define RH V27
+
+// Temps for Sub and Add
+#define ZER V6
+#define SEL1 V7
+#define CAR1 V8
+#define CAR2 V9
+#define TT0 V11
+#define TT1 V12
+#define T2 V13
+
+// p256MulAsm Parameters
+#define X0 V0
+#define X1 V1
+#define Y0 V2
+#define Y1 V3
+#define T0 V4
+#define T1 V5
+
+#define PL V30
+#define PH V31
+/*
+ * https://delta.cs.cinvestav.mx/~francisco/arith/julio.pdf "Software Implementation of the NIST Elliptic Curves Over Prime Fields"
+ *
+ * A = X₁×Z₂²
+ * B = Y₁×Z₂³
+ * C = X₂×Z₁²-A
+ * D = Y₂×Z₁³-B
+ * X₃ = D² - 2A×C² - C³
+ * Y₃ = D×(A×C² - X₃) - B×C³
+ * Z₃ = Z₁×Z₂×C
+ *
+ * Three-operand formula (adopted): https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-1998-cmo-2
+ * Temp storage: T1,T2,U1,H,Z3=X3=Y3,S1,R
+ *
+ * T1 = Z1*Z1
+ * T2 = Z2*Z2
+ * U1 = X1*T2
+ * H = X2*T1
+ * H = H-U1
+ * Z3 = Z1*Z2
+ * Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array
+ *
+ * S1 = Z2*T2
+ * S1 = Y1*S1
+ * R = Z1*T1
+ * R = Y2*R
+ * R = R-S1
+ *
+ * T1 = H*H
+ * T2 = H*T1
+ * U1 = U1*T1
+ *
+ * X3 = R*R
+ * X3 = X3-T2
+ * T1 = 2*U1
+ * X3 = X3-T1 << store-out X3 result reg
+ *
+ * T2 = S1*T2
+ * Y3 = U1-X3
+ * Y3 = R*Y3
+ * Y3 = Y3-T2 << store-out Y3 result reg
+
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1
+ // X- ; Y=T ; MUL; R=T // R = Z1*T1
+ // X=X2; Y- ; MUL; H=T // H = X2*T1
+ // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2
+ // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2
+ // X=X1; Y- ; MUL; U1=T // U1 = X1*T2
+ // SUB(H<H-T) // H = H-U1
+ // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2
+ // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array
+ // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1
+ // X=Y2; Y=R ; MUL; T- // R = Y2*R
+ // SUB(R<T-S1) // R = R-S1
+ // X=H ; Y=H ; MUL; T- // T1 = H*H
+ // X- ; Y=T ; MUL; T2=T // T2 = H*T1
+ // X=U1; Y- ; MUL; U1=T // U1 = U1*T1
+ // X=R ; Y=R ; MUL; T- // X3 = R*R
+ // SUB(T<T-T2) // X3 = X3-T2
+ // ADD(X<U1+U1) // T1 = 2*U1
+ // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg
+ // SUB(Y<U1-T) // Y3 = U1-X3
+ // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3
+ // X=S1; Y=T2; MUL; T- // T2 = S1*T2
+ // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg
+ */
+TEXT ·p256PointAddAsm(SB), NOSPLIT, $0
+ MOVD P3+0(FP), P3ptr
+ MOVD P1+8(FP), P1ptr
+ MOVD P2+16(FP), P2ptr
+
+ MOVD $p256mul<>+0x00(SB), CPOOL
+ VL 16(CPOOL), PL
+ VL 0(CPOOL), PH
+
+ // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1
+ VL 64(P1ptr), X1 // Z1H
+ VL 80(P1ptr), X0 // Z1L
+ VLR X0, Y0
+ VLR X1, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // X- ; Y=T ; MUL; R=T // R = Z1*T1
+ VLR T0, Y0
+ VLR T1, Y1
+ CALL p256MulInternal<>(SB)
+ VLR T0, RL
+ VLR T1, RH
+
+ // X=X2; Y- ; MUL; H=T // H = X2*T1
+ VL 0(P2ptr), X1 // X2H
+ VL 16(P2ptr), X0 // X2L
+ CALL p256MulInternal<>(SB)
+ VLR T0, HL
+ VLR T1, HH
+
+ // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2
+ VL 64(P2ptr), X1 // Z2H
+ VL 80(P2ptr), X0 // Z2L
+ VLR X0, Y0
+ VLR X1, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2
+ VLR T0, Y0
+ VLR T1, Y1
+ CALL p256MulInternal<>(SB)
+ VLR T0, S1L
+ VLR T1, S1H
+
+ // X=X1; Y- ; MUL; U1=T // U1 = X1*T2
+ VL 0(P1ptr), X1 // X1H
+ VL 16(P1ptr), X0 // X1L
+ CALL p256MulInternal<>(SB)
+ VLR T0, U1L
+ VLR T1, U1H
+
+ // SUB(H<H-T) // H = H-U1
+ p256SubInternal(HH,HL,HH,HL,T1,T0)
+
+ // if H == 0 or H^P == 0 then ret=1 else ret=0
+ // clobbers T1H and T1L
+ MOVD $0, ISZERO
+ MOVD $1, TRUE
+ VZERO ZER
+ VO HL, HH, T1H
+ VCEQGS ZER, T1H, T1H
+ MOVDEQ TRUE, ISZERO
+ VX HL, PL, T1L
+ VX HH, PH, T1H
+ VO T1L, T1H, T1H
+ VCEQGS ZER, T1H, T1H
+ MOVDEQ TRUE, ISZERO
+ MOVD ISZERO, ret+24(FP)
+
+ // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2
+ VL 64(P1ptr), X1 // Z1H
+ VL 80(P1ptr), X0 // Z1L
+ VL 64(P2ptr), Y1 // Z2H
+ VL 80(P2ptr), Y0 // Z2L
+ CALL p256MulInternal<>(SB)
+
+ // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H
+ VLR T0, X0
+ VLR T1, X1
+ VLR HL, Y0
+ VLR HH, Y1
+ CALL p256MulInternal<>(SB)
+ VST T1, 64(P3ptr)
+ VST T0, 80(P3ptr)
+
+ // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1
+ VL 32(P1ptr), X1
+ VL 48(P1ptr), X0
+ VLR S1L, Y0
+ VLR S1H, Y1
+ CALL p256MulInternal<>(SB)
+ VLR T0, S1L
+ VLR T1, S1H
+
+ // X=Y2; Y=R ; MUL; T- // R = Y2*R
+ VL 32(P2ptr), X1
+ VL 48(P2ptr), X0
+ VLR RL, Y0
+ VLR RH, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(R<T-S1) // R = T-S1
+ p256SubInternal(RH,RL,T1,T0,S1H,S1L)
+
+ // if R == 0 or R^P == 0 then ret=ret else ret=0
+ // clobbers T1H and T1L
+ MOVD $0, ISZERO
+ MOVD $1, TRUE
+ VZERO ZER
+ VO RL, RH, T1H
+ VCEQGS ZER, T1H, T1H
+ MOVDEQ TRUE, ISZERO
+ VX RL, PL, T1L
+ VX RH, PH, T1H
+ VO T1L, T1H, T1H
+ VCEQGS ZER, T1H, T1H
+ MOVDEQ TRUE, ISZERO
+ AND ret+24(FP), ISZERO
+ MOVD ISZERO, ret+24(FP)
+
+ // X=H ; Y=H ; MUL; T- // T1 = H*H
+ VLR HL, X0
+ VLR HH, X1
+ VLR HL, Y0
+ VLR HH, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // X- ; Y=T ; MUL; T2=T // T2 = H*T1
+ VLR T0, Y0
+ VLR T1, Y1
+ CALL p256MulInternal<>(SB)
+ VLR T0, T2L
+ VLR T1, T2H
+
+ // X=U1; Y- ; MUL; U1=T // U1 = U1*T1
+ VLR U1L, X0
+ VLR U1H, X1
+ CALL p256MulInternal<>(SB)
+ VLR T0, U1L
+ VLR T1, U1H
+
+ // X=R ; Y=R ; MUL; T- // X3 = R*R
+ VLR RL, X0
+ VLR RH, X1
+ VLR RL, Y0
+ VLR RH, Y1
+ CALL p256SqrInternal<>(SB)
+
+ // SUB(T<T-T2) // X3 = X3-T2
+ p256SubInternal(T1,T0,T1,T0,T2H,T2L)
+
+ // ADD(X<U1+U1) // T1 = 2*U1
+ p256AddInternal(X1,X0,U1H,U1L,U1H,U1L)
+
+ // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg
+ p256SubInternal(T1,T0,T1,T0,X1,X0)
+ VST T1, 0(P3ptr)
+ VST T0, 16(P3ptr)
+
+ // SUB(Y<U1-T) // Y3 = U1-X3
+ p256SubInternal(Y1,Y0,U1H,U1L,T1,T0)
+
+ // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3
+ VLR RL, X0
+ VLR RH, X1
+ CALL p256MulInternal<>(SB)
+ VLR T0, U1L
+ VLR T1, U1H
+
+ // X=S1; Y=T2; MUL; T- // T2 = S1*T2
+ VLR S1L, X0
+ VLR S1H, X1
+ VLR T2L, Y0
+ VLR T2H, Y1
+ CALL p256MulInternal<>(SB)
+
+ // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg
+ p256SubInternal(T1,T0,U1H,U1L,T1,T0)
+ VST T1, 32(P3ptr)
+ VST T0, 48(P3ptr)
+
+ RET
diff --git a/src/crypto/elliptic/p256_asm_table.go b/src/crypto/elliptic/p256_asm_table.go
new file mode 100644
index 0000000..16a4b4f
--- /dev/null
+++ b/src/crypto/elliptic/p256_asm_table.go
@@ -0,0 +1,1473 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64
+// +build amd64 arm64
+
+package elliptic
+
+var p256Precomputed = &[43][32 * 8]uint64{
+ {
+ 8784043285714375740, 8483257759279461889, 8789745728267363600, 1770019616739251654, 15992936863339206154, 10037038012062884956, 15197544864945402661, 9615747158586711429,
+ 9583737883674400333, 12279877754802111101, 8296198976379850969, 17778859909846088251, 3401986641240187301, 1525831644595056632, 1849003687033449918, 8702493044913179195,
+ 18423170064697770279, 12693387071620743675, 7398701556189346968, 2779682216903406718, 12703629940499916779, 6358598532389273114, 8683512038509439374, 15415938252666293255,
+ 8408419572923862476, 5066733120953500019, 926242532005776114, 6301489109130024811, 3285079390283344806, 1685054835664548935, 7740622190510199342, 9561507292862134371,
+ 13698695174800826869, 10442832251048252285, 10672604962207744524, 14485711676978308040, 16947216143812808464, 8342189264337602603, 3837253281927274344, 8331789856935110934,
+ 4627808394696681034, 6174000022702321214, 15351247319787348909, 1371147458593240691, 10651965436787680331, 2998319090323362997, 17592419471314886417, 11874181791118522207,
+ 524165018444839759, 3157588572894920951, 17599692088379947784, 1421537803477597699, 2902517390503550285, 7440776657136679901, 17263207614729765269, 16928425260420958311,
+ 2878166099891431311, 5056053391262430293, 10345032411278802027, 13214556496570163981, 17698482058276194679, 2441850938900527637, 1314061001345252336, 6263402014353842038,
+ 8487436533858443496, 12386798851261442113, 3224748875345095424, 16166568617729909099, 2213369110503306004, 6246347469485852131, 3129440554298978074, 605269941184323483,
+ 3177531230451277512, 11022989490494865721, 8321856985295555401, 14727273563873821327, 876865438755954294, 14139765236890058248, 6880705719513638354, 8678887646434118325,
+ 16896703203004244996, 11377226897030111200, 2302364246994590389, 4499255394192625779, 1906858144627445384, 2670515414718439880, 868537809054295101, 7535366755622172814,
+ 339769604981749608, 12384581172556225075, 2596838235904096350, 5684069910326796630, 913125548148611907, 1661497269948077623, 2892028918424825190, 9220412792897768138,
+ 14754959387565938441, 1023838193204581133, 13599978343236540433, 8323909593307920217, 3852032956982813055, 7526785533690696419, 8993798556223495105, 18140648187477079959,
+ 11692087196810962506, 1328079167955601379, 1664008958165329504, 18063501818261063470, 2861243404839114859, 13702578580056324034, 16781565866279299035, 1524194541633674171,
+ 8267721299596412251, 273633183929630283, 17164190306640434032, 16332882679719778825, 4663567915067622493, 15521151801790569253, 7273215397645141911, 2324445691280731636,
+ 9262509587234749312, 6377906816499461739, 15415592259881792841, 6113140067088447267, 17505803833889144731, 3922849788840338823, 6180172597177093790, 7272741317181956640,
+ 10883262735810583709, 17399134577381480315, 12750035195491397119, 13953097270363313998, 2485196748577282439, 393430759246706702, 3408702096065201083, 16993508724217005008,
+ 6863012803048745338, 5403358912802668606, 12029756355518540153, 7447472859343023066, 7395681837001497117, 5416243410959797052, 2749045697176435642, 7813683613416962381,
+ 18350218749545913064, 7509253999860752750, 13295789896366834092, 18326374111226872828, 3258048562491844971, 6342797029666289605, 9612474478300039361, 17807494771134060923,
+ 9986610307523639857, 6419466267862117340, 324428332031706042, 16344106992423140174, 9434790811884847197, 11733262517030341550, 10475871084940451430, 6836751077061091536,
+ 6209985515928007176, 8422208926220809341, 7866188125456775984, 17071696982741312252, 17482363193291892153, 1949968206030126177, 8823961512355572132, 13873010936602132387,
+ 1136614876084383862, 14528605954969250458, 7641208364604111421, 18286716654189762724, 10711679476846124804, 7209187038097309348, 11378652158954714549, 8736575389915992180,
+ 17651988839102959720, 4019365809979531118, 15024676458991206505, 12622721674297982786, 243375780477240745, 16572583194372185930, 5189991834753707889, 9498206494038386287,
+ 7870185149723322393, 7858065912377481781, 7050328012468471455, 17977001023138181397, 11912539230960339911, 1290924823368819542, 8532615064753763585, 6439520053123840906,
+ 11863354073576605599, 8052916203396229040, 16399182063316628399, 10837704607074036270, 5444229620349428264, 10866168931038804304, 1298875461760659950, 8904898233335519592,
+ 7053953350607273992, 16629865408105641272, 1929728580699289339, 11046110151029488400, 9569890690274721816, 6501378768832742664, 1512621765961532661, 17084264658609527495,
+ 300662505135603380, 15234062183651308307, 5319536121171934679, 11496182741145732970, 10520610739189276272, 4310163027157056344, 3285324287508696251, 3261468597637865009,
+ 5427469457023618335, 8296698312021944466, 10882999905197052907, 16825284839961451415, 2106903142647479713, 12008841566652934760, 5361688498494289694, 12228623597691228792,
+ 6799789383171367218, 17419525584795621504, 10685476378747845090, 16314000862057872712, 11014509959317588121, 10396318154903722147, 9986284919279346079, 4005974039875805723,
+ 12980531573372269379, 11790791377897350561, 14264262096731994829, 14999393269362129980, 15405842559289002684, 13147503876582120266, 15449556979080143937, 5047767509783027570,
+ 16354606995222167433, 15941889353688553446, 10301254747221876749, 10704428461746587572, 15702361407127630489, 3283718562982354984, 18081088275079496700, 17549094608770385733,
+ 15620168851912893400, 13762314693487747007, 13577625382054330775, 4116976667540664996, 712200332640207605, 9098568002202933512, 8905476951567380032, 7075673514150314660,
+ },
+ {
+ 15811459837282651008, 4038049993244767161, 9054218236388056577, 10807376403937048775, 11113869110590438959, 10336442539492421506, 9228056645601479672, 2983388754829658480,
+ 7189376137127712298, 9147574701720272724, 3983921661449444789, 12479790317447783959, 13221020976679959405, 13001868979553711359, 17918176319017234744, 18030393246529651080,
+ 13903368497828271814, 8050546449078305498, 6932373216915935575, 16010012759059004304, 15451708272653450375, 211711129247219149, 10435723642185827585, 17790507800712341232,
+ 5218985848865375996, 5835476376320976237, 9096499658845542933, 8952489298840360492, 16605895184424459659, 505731104315931813, 17784248872812610986, 4151555707609374291,
+ 2650786429127545939, 15186825834659456704, 6983758297535957561, 1268007525252235269, 11433489869282443481, 12352824209154665189, 14555720778982121964, 3578659908492218328,
+ 4038120750249579469, 8304001815287976437, 251700810512667658, 17170873737273596918, 12809642145843734737, 5765073376623244019, 7923401246629348462, 5401634486288115286,
+ 12830329023522940651, 1302336131164509751, 7011936976469244509, 8446680306499168806, 1386043952588989461, 9057741950701319559, 6719607413978812889, 17371065020414280890,
+ 4963953120977916566, 6585250545846301109, 4490972754776862709, 10674499592300479120, 17494988318349821345, 1814042777264686189, 8938395423478059768, 9062966010283426056,
+ 4683683785459765575, 8782133909400988812, 7201726172249963453, 16855412996123591560, 12016536526053703526, 6004686128951599125, 4323130180079993082, 7578596978943754924,
+ 13272384473390546409, 15052793429857194942, 3597944793474963009, 5659515835454510338, 18013102402609680655, 15096936721974473248, 9096406212198787674, 948405832772983882,
+ 7533214627823316056, 8009521942734148665, 15370181949218639901, 17395801240657257383, 13217990616373936619, 16323510521919428293, 10737557422966235859, 11113837024463125316,
+ 3879469818714011415, 11493820457829716643, 10091807027724827301, 5032134849078736052, 18015749204009276046, 9667758283246223357, 4629693773460610802, 3807196436109718946,
+ 3214144211600931504, 9454007581819416365, 15275469051811991033, 18196204074031746058, 14853793468372596948, 7707818604949749658, 3266782539036976530, 12810995382868348472,
+ 17461133192060813135, 16870745208936545685, 5903437034120721004, 9683544852742219696, 7903581348090948266, 13461951083275541722, 2454715926022970060, 11781268272016425522,
+ 3026443193966185195, 8027125782547146632, 17998666917527163386, 17214321347454260227, 10230244243647560419, 8728893474426972780, 16205038544687771303, 13771356055080873468,
+ 5724217930909760221, 15420221591912350415, 9910465013739250802, 12511683773790779491, 2841532862707547306, 16614426471009646589, 407574220261191228, 17422057170120224031,
+ 17151505459068312604, 2041124098660595410, 3221606782120899146, 1382742171848704850, 17222534050501711510, 3195635935311449393, 17295393886051199723, 6296241746423654238,
+ 16510537668852450348, 15650518534057329304, 475704671950317376, 782416820485820540, 10970369699990900408, 16601298147057372792, 1599283701215392453, 8464446359559369186,
+ 602848212987973622, 9275953159572178443, 15133444738457385545, 7264639569553811785, 14831517002293569763, 4277078717758211028, 12880437544322007232, 16339691585101354857,
+ 3725101809714764432, 2995111022132376824, 9338883729618021504, 8824923738291347476, 3745497072215538317, 3576446898425965872, 2350077748663612773, 15793255155885543355,
+ 8020669822852194733, 5865488997462325879, 13508617967776537887, 12934930135071898613, 11708185641249600577, 14265801329217514207, 10451671488560352734, 13251349632343744236,
+ 10155392082337432799, 16725751190167983672, 17791070776350679280, 6653785531179322348, 10087876256887835780, 8422365820961469204, 13614411778251983480, 11687935452237305960,
+ 6094298050768263419, 13949448434836272414, 14194554169090099223, 2625524360834828424, 2823844915913169919, 14077211807091283985, 15592587505996634401, 6562949920171518305,
+ 12904339001067417585, 1972265247859388742, 8908590936681392405, 10320719400504224158, 8298368064038407143, 2105745729886511647, 16509751074757847095, 15156289751671616565,
+ 13922601558154971179, 10541544592627074316, 9996345160543671471, 2956094548427827774, 3384122842786043431, 7003441038438960180, 11945730502649377204, 14797331669618433927,
+ 533287349958250211, 13929395807878461602, 10036169742264959424, 18402398814900480008, 8793706913568807269, 4438954128288889267, 13996827050589647592, 6637236830429213437,
+ 4308464751705576538, 12193581114161194913, 7322672375339219540, 329737085274484939, 15312435499764491079, 1530264129038944128, 1545025965426980152, 14559433923601957161,
+ 885617946245226760, 17871561572095909264, 16413308527330544768, 2938750343525834336, 6332736376778563648, 16772687696658236231, 9760470695949399178, 17420556823805232882,
+ 15040671962589188567, 5841393164221253498, 8923415548773111750, 17449640982747951473, 541686588805328549, 9579487672246868564, 12793813917570150236, 5744918442591934533,
+ 12406806484060734474, 4655292039647906135, 5745141229449178147, 12923621608907306732, 1329768221522217568, 16293616079741201891, 16309603918079351385, 8282635243638078478,
+ 10842935462043546322, 9397166878596340773, 5807145729199489911, 17083242467628861919, 11473470924584853882, 15220945465200609206, 10470712526179658906, 8129601297261864071,
+ 4407657041250297528, 13832960959347315977, 15056546959967740939, 4350443362134191429, 8013419913016572733, 8888684099801298477, 15401479253620745715, 18279947201669400847,
+ },
+ {
+ 7454214833719424418, 2128900810399984335, 8254953962723841408, 5341529923812819418, 11486532916552139238, 4528331821405917357, 5048485034448492541, 4189495710753944825,
+ 9223539587116638677, 879142711399260546, 2878829560233905572, 13081522393987952773, 3067261963348061547, 16008150780594711643, 5466468631453287640, 16659147898971349582,
+ 8047766502132608624, 8715815570878148809, 9093649079884580138, 3437267783531715968, 17562225708466062266, 3512667182749067601, 9223059995161026858, 1366690634827047376,
+ 17929132376737482163, 5565926714491294456, 5252303555134107501, 8169975563698132305, 1829326230943509100, 13780918661853274468, 17736665596871232627, 4246797342334307384,
+ 5113556452592134569, 7304867793218750141, 6016631926489320290, 16471860203547627727, 3444471410714850041, 16571738096215232488, 2003912224952639024, 4203884000388032492,
+ 16858425223672505353, 3192567884201479579, 1688923188642613263, 4159042130811153987, 14900413503869744297, 7521485042788722327, 14038093805562042641, 14713051866364662650,
+ 3738774192924465076, 14037618828827556145, 12882915396530927286, 10882862194263941815, 13115222579444494605, 18244214260845794346, 17217867059738274194, 2458870331386500577,
+ 8768788172344064509, 2761897497254272960, 1400167175024837359, 2591319596670212133, 1499652044223484974, 6820326453941021707, 9701009499879906773, 6897435972338565418,
+ 8314355711464256163, 8435944020779763783, 1814803522962187872, 1875253356755380905, 8214588085484192137, 18062045700553127821, 6649947905482043494, 3119726140944397531,
+ 11881571666857493523, 6300786026612414187, 4928573492087752294, 18343550313462089203, 6770642120472453960, 11296815027803718740, 17312916793783562794, 13028515123652649961,
+ 583508939637547757, 3195115082527873085, 8497784022800549923, 15135719419829981016, 1180291993378114150, 5447281494912347899, 4710517655176242215, 606503081693490000,
+ 17732293765863716052, 853971165248416821, 2022886284923413326, 7522564752651732633, 1417954193520965954, 5046659528429172066, 11426939225844711305, 17687206690616539318,
+ 6518552374736169438, 7940416740434530770, 15228615103587329007, 10662504584317997513, 18370800756791469891, 5564085264882124489, 51043856061444814, 11996026931884728651,
+ 7009195269496826002, 18330778751427846129, 7903886241001925539, 8198930484643879628, 5453546027419466587, 10378692433982210944, 2242412603379120569, 14762161396393277464,
+ 6146718865488327808, 8559779132928137805, 14001994895150170346, 9915701789711858841, 17119408219089522083, 4345363585985782988, 7559492126634131602, 17074565022279033371,
+ 1081257522368061155, 16144982029632101790, 2280210790959566458, 5000326950136078873, 10441086845859171982, 5717080014740953823, 16816253740467637151, 5185838557034755093,
+ 5835900675852976641, 14850646160621635240, 7784412869482958370, 2550282385659033114, 5102457400165153071, 16480738116286616710, 10726825595752219296, 3297564208881065856,
+ 1801069609044825811, 12940364451588241698, 11725236374887225564, 4731241880784054295, 8874695411069669852, 10719836334830130110, 12983549127094653526, 3498884940869443564,
+ 5044718241380586525, 9152928988998314956, 13274353997717834972, 5731942246653336838, 9453579625340936273, 6013700107129234092, 8535405089994224035, 12024982282827680252,
+ 8296339816955798913, 17352704937535250450, 11768204246645005024, 263391795317009517, 6622964609352808474, 14563414288208376628, 17887389278697258458, 1552357122641071119,
+ 13193447428787140357, 6701071088186218635, 17090465712567125397, 13844922841372559745, 4526664599449630663, 11321311100342816810, 2490532976442445147, 12506579957986846905,
+ 15880983134955361922, 12396434033732989105, 16113831675045409693, 2288598422068078002, 2992425583703267972, 5056435264545116829, 12961420259857080621, 3600697676136115398,
+ 12817090821406743193, 18109694174173693756, 17050997692981853333, 15796663317536421728, 15462568940214327094, 15151767385082666402, 17711108405932879054, 4030072071760424157,
+ 15725012875727498089, 7622633481063722008, 2241559275264867670, 4018985714167034871, 6507661741395512701, 13435865773135624096, 9564553262368888543, 2283482276790194513,
+ 2968718708369505078, 5001470555537559034, 985353756026354700, 3145435598301843880, 16410370922030326027, 13981261173663429189, 12565590835494971812, 222252377400865123,
+ 3096505383227168071, 9129898186148780825, 13266305466206099845, 16135309871416543685, 17993464782237414425, 8124413037611692960, 8497190057115012501, 16568064870411415344,
+ 7703394153241465824, 2740794419858297964, 4228931976096177316, 11207299659959318490, 3319492324515199082, 15611937984154016609, 3431896575019701337, 10686234123352138088,
+ 9979039962499030832, 11759414020433216431, 15164411293105859178, 2834464820255498467, 10716371968480406389, 2722055037405581557, 9240657586762413776, 267811388229104916,
+ 8385388272348876655, 16035999427286017211, 11255427708348651444, 18402964994101916340, 17964277099260928049, 16377129731672778885, 12034488763192562427, 15854195461740399790,
+ 14134352329607480415, 2406779189916280288, 1148342927890148438, 15859735140823564382, 12055096645840568482, 16044304241162505738, 7466809979198503617, 4519925955244504432,
+ 576546655711744206, 7729660311499709518, 16864252562359023573, 10537888397995064025, 11828992107618549830, 875843272290362538, 5328675038336932433, 3162100370899364658,
+ 3897734650128449985, 3892478283047854402, 153850176068596076, 4448963435449755938, 9911011406822436713, 6379744721342779297, 360324666931028426, 8002086405015565067,
+ },
+ {
+ 18068291112584812890, 10288580446655316879, 16866690514199445805, 15458664010538199871, 17175925202246173890, 12208778610361755998, 927739404697616036, 10330183209424493139,
+ 17095781462759061506, 13984950766342782369, 7038608479806172868, 7073471193601880894, 9212126909381119712, 15865851343492198107, 3874039137971369196, 15366505242302572319,
+ 3235058267792568155, 8030551941244615429, 12851363585510555514, 9786973022560471711, 4901414898060251968, 13240438628487941170, 4707852389766057435, 6503805666511566831,
+ 12081034000518110752, 2773740877021737013, 6429475399936543899, 6804412599792308979, 16082860502313571182, 10013838454082912732, 7176778953927883654, 8065751639278576331,
+ 2577328127707636877, 17299987330349518428, 15104751346159263209, 15422444943335124822, 7934601464470854115, 2803659700354182364, 186161376356596699, 10049927908892587253,
+ 14226302346438044875, 11259466681405724608, 3945860952925292329, 3287876564263331089, 9461394118536996000, 12404803552046013762, 16569223065554335449, 15881973563254196893,
+ 6665552777203645232, 5302850683863152336, 7778922028047389493, 14212904863357452615, 15966527672828304861, 3802704091320278344, 10517530714917238300, 3742971938824724276,
+ 17741525051518363144, 1374267330336191942, 4677891169679898540, 9639823611685431337, 5482431056894095950, 6629146695374718376, 3884574854221819712, 13266790928096813258,
+ 16028684877412556061, 13405436055762225966, 12268280467958284214, 17956874998292211440, 8610745336676661311, 2945210828327041653, 7889492068658434371, 8032335517518163423,
+ 17096678157067198548, 11751872841896846778, 14951591239724148110, 14946797103632354342, 15001812923533485850, 1354666869219009884, 15934246013528228877, 17674533676709494379,
+ 5120889077991995845, 1349360562851394781, 7965752316528770369, 16276575673105864366, 16613052983770261287, 11454051240568463780, 16146905806612132291, 15806571503343623359,
+ 17505459029376928465, 14087658194607915364, 12240552390931686885, 1809009366037859941, 9468395484396723291, 9446052656478984520, 3720796795453397330, 18035355127900871187,
+ 10937782812576841853, 2203021845094443505, 9622750764486021477, 5518948515641487288, 8021555402800950130, 8884074473434139094, 3925095588129480413, 6199533424090268508,
+ 3824657730049729815, 4646911366274335188, 5283751816651713473, 16396970607630079440, 9102362834410101425, 3641728106404110497, 5923630184251416230, 13179374259571264822,
+ 656781601862662976, 13284523818709348938, 14255949685810234815, 17662122984572331094, 14005272187244763785, 2769973460677437621, 9745740172208222044, 4399769039990314590,
+ 18221138344920380175, 16761112742545021198, 14421639991115588619, 9419167563943683547, 6237248361283446238, 889754338720059891, 2289880386545166424, 4280148734121099084,
+ 8456687949810928719, 6224977199672540221, 9826855484829367341, 10345583292656523462, 715642447678891171, 10791247680314230673, 4390153264475025171, 6710036254773432312,
+ 11434352627810350015, 2720291857616706675, 15035363195148805272, 2358906741960696657, 9439888337759970778, 9654563570832962181, 6013570511081660799, 12742404245236356978,
+ 16538557679183724426, 6137267799072203525, 13879005145532324670, 14569289845201353615, 13784903266526414562, 1051135786146476033, 9343275973150690259, 18338317230916899584,
+ 5259165122317589354, 6306061166879114159, 2622163270351284906, 15212813592118086979, 11497359168652342849, 7085380391293263482, 11799059272489792659, 4912160901181298022,
+ 10956239956289671191, 12649697329483184719, 17174497942073298839, 16454040570484021598, 1964314354536023134, 15533681501854792750, 11384243972598433754, 14387731385073008825,
+ 1627256545355657442, 9010013556811110994, 14616120598404631231, 8391972858400276155, 244817907132802237, 1484397967210729699, 2663560284299448875, 4820353472962713985,
+ 6567121006858416952, 17119959301239619281, 3965303761563489817, 10211688306206007722, 724618943950031080, 16749575186941231354, 9181397347361086360, 470616631211702666,
+ 10512473611770099290, 11735037909076331019, 2922606398008539984, 2599799834378751770, 10226702495047936424, 6109026252412854338, 15572556822827169237, 11993701786788749663,
+ 12951512375864063487, 9270845327881823041, 13198171264747338719, 9240466821667660686, 14918848310897324032, 10111918274408213257, 9869937817792431868, 11316967879062014936,
+ 5691364106926740802, 2782311978438620206, 13067832493996441150, 12682941211395745221, 10515041347274706648, 9910726429809603026, 10540072260993990483, 15078586909891471233,
+ 10892036011159420575, 9833543566604313963, 15176473361412960115, 14504845496732895720, 15845034940445278100, 5364672156984348678, 308167090528488415, 12390239901799634313,
+ 5106897453764491321, 10540479232768400094, 3938246597140945859, 13156157265138481529, 4932443191001849637, 11374218582626530502, 2907557293167002437, 6807344711257391317,
+ 5413712686694380670, 13147753106081951274, 2960292187746417883, 13977097845105551070, 17642067093469099040, 7699556298723812469, 11597002672863564976, 12392265705126521509,
+ 4607947077536257852, 17763906282816835996, 11288463953418287893, 18299911648497821660, 3194698850612319839, 13371221457376435780, 10047718724723936081, 4488868185988157408,
+ 13742985513958274859, 14201726926009534105, 2183997556320958457, 2564950343266657518, 8846625471673389624, 10147673280504740463, 186524308060896613, 16409126738088317791,
+ 3993197157612782947, 14810882170056329119, 3476738916713041107, 1512933700383846542, 17820889751141311776, 9132720567660500233, 17598924894608972696, 9704537908665702455,
+ },
+ {
+ 15695265447782578013, 15506558718447241194, 12341861439298989232, 18400958156300100845, 13704811286967591150, 5802260047239067846, 3266013253411255445, 9892957742319181954,
+ 6053458788987550519, 3243549259991905443, 17990025476026266205, 3918149716414968130, 562225565996136148, 12175287017902713154, 16317970834262634990, 5431198806775607012,
+ 15123186170178367264, 3054913246068400920, 8130663466272395280, 2852812622149318730, 6747630551931917110, 18302049774259156971, 3663865061939346221, 12340091610704678811,
+ 17453312277789785069, 1148609799196055657, 5931758962926317381, 5297597381576544508, 9136842287749269744, 3007653173299649609, 12364677607049679091, 3656932227466449489,
+ 17732801126130995365, 16093023291975796560, 17028512234142609413, 6293028118993952199, 4019599708379609715, 3557632583507755601, 16737140015288442165, 7041678499288317736,
+ 5187842645752673199, 14914432544387315508, 4944360914161982641, 4177329533692612281, 18423887817846407126, 11935898134550505219, 9191754399457615042, 962223885189306707,
+ 5467152559317256227, 13407023128339380091, 656806207084558176, 10823228592509573890, 282802378108427873, 13003319652664166176, 11501612671901244594, 8595770155487539951,
+ 1805255869285898834, 11676967700876044626, 3486630203654875772, 8366336693945605431, 17874084888474647879, 8803709150655606891, 680993178667234696, 1603785777136148315,
+ 3637712241808318043, 1673622542455477465, 14468899013751477571, 6607241816357171779, 2332200248940843815, 14077155355848738174, 17298155509321457563, 13624623885136011130,
+ 12550073596584561722, 13898852145577133829, 31360866049197404, 12629522544897401454, 4563446134761510332, 7272813487383198495, 16767195814334178362, 13600498026689482359,
+ 5170414136067325373, 10875290278133558518, 7331268231382198947, 4082234313337835514, 11725295114019619895, 6488017511603508543, 11399466652931338200, 2898113599278666684,
+ 7553418756944942759, 2324485560009697358, 16004009784734101843, 8345371056076551939, 7377284629098981027, 11049718262343754276, 8021605820736410979, 8175349759714152494,
+ 805625092558946929, 11996029844853502332, 1725029464967101849, 8189609560660709539, 10396089572662865065, 1756007811373736605, 7645529329516777410, 2608132938055260111,
+ 17014595621244604813, 1574499958190108567, 8684546755761322906, 9997538531915911005, 5652402150243592363, 6315865356369667235, 8687115108443972047, 12369176255146729475,
+ 5117004045853299762, 2238407612548002914, 12392667457861900490, 10614312910576266709, 6446577503758597410, 3767883614425995197, 17954966454260719539, 5233322901463099,
+ 9194523390605857831, 2335161171394626054, 12290490336543843159, 12464172604156286188, 15842196578033029974, 9081285134201585379, 6123406461662038242, 5504260508195082396,
+ 5345718195770885739, 10397124463462860788, 13439280718993427040, 15556816356929214653, 17286785976489019906, 13891416634083443852, 16346644312573564447, 917629419222512011,
+ 7892626957952656865, 11229871638807701267, 5249460237065799805, 9529842808650392581, 11803106868201977918, 4600784567377577444, 7178870186583927975, 13829108773971861655,
+ 1289703927368007546, 16175369390684210381, 2195306032582966573, 11284704298115036296, 4385727852306374408, 6915138269030006111, 13440775052829788601, 7742359656861485339,
+ 1418252737937945274, 10664598666472920539, 5146625453081549320, 7716408631061669307, 15481216546649435253, 9087869057854823101, 8438781502742166926, 16066665509570633845,
+ 13091912700111633391, 15443255616678375884, 11338466119311799740, 9641919445273031117, 12359044485240987114, 7665168466660756789, 13845183791692700257, 10700410793152785934,
+ 8528578803314347534, 18126789418189181716, 6819229450418907940, 10542951903574211724, 10335954505229790439, 773396809431100386, 763679854573261611, 7782804492252573047,
+ 14102889564247144327, 15479515297699126743, 6400257822506917934, 12400211936189844586, 8190162441518233318, 16489757979266243333, 11074854544980782334, 5650848136444824725,
+ 49299995316533601, 1658812626798186796, 14321322055030354, 1062750633396640947, 11458188738562323084, 10319462683954522885, 15398455357851813321, 9473384090828584828,
+ 17723857482947114516, 17563952877363564604, 3160294749187111891, 15485369139883303866, 7487527166185553604, 9661144290316674030, 16387431126208996705, 7499766964252214921,
+ 18331281993562501191, 16034836629143884193, 11149238385567891503, 1413880911197321245, 513884082842080653, 462183409527465946, 404977499728878854, 1459875951423441439,
+ 17249202826373649180, 11151860661862313351, 13606113651347187963, 13653298062695357606, 12346841931589648448, 10659825520992339734, 17857177794617595021, 12846976930859175746,
+ 7421386827645898229, 4567141110780940726, 9022716764262477732, 7287282267953917487, 12950322172330872807, 2675202024861592519, 18339036521791481203, 7003909904344391740,
+ 13539440097944399044, 7108271818419613260, 13229113021954558930, 14920952138366964142, 11082363944167072746, 16156446445495173622, 15515349862819133984, 12359827931233605745,
+ 17013104138257181849, 6982996767213561282, 16259302607976830987, 1579942890487532721, 8635464047344487404, 10435012080572068738, 2976844365866972753, 9471858478345924179,
+ 2347775782107812070, 311367032040459687, 10852128873404113998, 1338612296059509077, 1599132038614432432, 11533828746166635650, 14264087792956755961, 13004615331499537807,
+ 9599159247384506427, 13011559959971256245, 6331802547989957841, 2642320882309956905, 9140255367820630480, 6883541786494880896, 9560822438260750771, 9273862677291701694,
+ },
+ {
+ 6185067974942684922, 6452325785873503452, 15247257292915812252, 3689324664200339196, 3264129576054154137, 14977721630214750038, 10720913377937894256, 15914487080763816378,
+ 5498621600689515064, 11498885804037952971, 12697073118321478539, 13999008478737234463, 4324885099420360642, 14021182496192299466, 10763088992583373843, 12898585157666243299,
+ 11622402383521841588, 18271726976204770291, 4805501248621435402, 13483122453682180759, 9288143526654965614, 4688427612851085031, 3280806057528512375, 2249349779106893174,
+ 2317249352525369754, 15030797481081533168, 12237590576308284360, 15660076992611440873, 11341015819700735887, 8280257602754365719, 9212501234696283343, 9673375202141178099,
+ 1576552382781604867, 14644873134348781157, 9860879950368557094, 12941927431704225849, 11959666739052502184, 16171152369226594315, 18364294470656574060, 15172859394935215584,
+ 7521577570681641021, 410124023045579310, 13806534589183327176, 2731801491329207875, 9959087943014269549, 16205635445474657221, 12069145915768935630, 14906463509471682144,
+ 13803661539609326912, 3233094883493439444, 13409742688319755923, 4298169788674760921, 12847636671560067434, 6702365884489797170, 5650404576647289813, 7380374625644226212,
+ 9762534010701623728, 15349113802811029024, 8305297169667044246, 18013018210154476198, 18414050009163335761, 2190126859433916996, 5444481274452218807, 13415988647365667724,
+ 12538763466834894269, 1385338113050483955, 17489001033590767373, 12734464941837886525, 5762578105750980174, 1466528690684075875, 4231060025049573530, 11218444849081704286,
+ 13520439252664960951, 3770430920651384098, 18274623217805904547, 11545267616883260257, 4084783424859113779, 13863968977657663867, 10237835480648574698, 7395611844854084274,
+ 12891185742908997266, 2889148203703402614, 663579373261619221, 7616112456816883701, 9002890885722034437, 10753730807791931375, 3553894671829778441, 4368240432504457389,
+ 11641532224340210206, 71261001011952801, 2321745415903209397, 6595984444855093239, 10341299657770272718, 11529878689049370930, 14381510695605158203, 6869272773579747365,
+ 18407769999912340511, 13801873415230385184, 8080153703167941484, 14143463895182697871, 5248006623352376562, 9271982788122934275, 11225161179021692620, 5076519449784098178,
+ 2388868518767189674, 381125256886459269, 11316805024982236485, 11671355336111917711, 3564177948445768908, 5245093046097324328, 16144139891410041039, 1201010948447708436,
+ 16591032462856670656, 533850935988668408, 2263232738784171066, 5572124915367282948, 12583782854491678471, 15015562207264407653, 1167486851113549964, 2987176661273439670,
+ 1073778278665127828, 301289653767944933, 11482324780950168317, 10626716202328004967, 7258024014655363102, 12597648090458567878, 2617914106162603499, 12324652644266476584,
+ 3803745429517922765, 1699256829990081513, 2179986933466617785, 4330084096014112280, 10403896221616386733, 11682369673591069651, 3879392495596713886, 17249085256521020624,
+ 13938687445393525296, 13822790166673492712, 13216892716897321740, 2165870280409061482, 9147830602451471293, 16339144108911449834, 17837507766320968023, 15340710005345158245,
+ 4485838034764604283, 8426970493386126530, 7709201418726935942, 13468796449943783583, 16441091014185628843, 2374269830239737237, 13423059086148485716, 17132218391020520330,
+ 13957732574177025801, 1100344278733481546, 12156946932617203965, 2007756425747006287, 16454508610548016441, 18058668363064769203, 15619001054652658323, 1201476243910931863,
+ 1292412381963708933, 10908631677673353277, 15138649576688192753, 10741579086621436813, 16534963566169304684, 10228646729525192055, 4256608246548223655, 16176814322718522963,
+ 3012125536603284881, 5466178753014623970, 16598422155702380175, 9902071731736699653, 13861556771123036741, 9534711573278556274, 9805898455743897538, 14683694395093520838,
+ 9125214754744730198, 143353181770418225, 8976042597473555858, 13405481215779362056, 2863125481657826330, 12333764090388731260, 15250359513836914002, 5086706595778559056,
+ 16341334571424533804, 4657494880554316004, 18092783076286521475, 19832895869027143, 8667365497283429486, 11501953273367138162, 5832487631687776985, 664754160781757174,
+ 13604774408440609080, 12660956565879820101, 4535452877362385626, 8063311919592303330, 4591511734775484245, 5164119991271568303, 5282209033000495913, 15181017218549722334,
+ 5070228782679699429, 277881850788922387, 17424210585510719837, 13586569401109299583, 16661809787463692974, 11543434536078774133, 2062889565117549061, 14261113517104795690,
+ 17683343658262519228, 13727986689138309987, 9352982220536939533, 7578301410191413286, 7970586266869249410, 14357824150096386968, 4381700479561442130, 13217807763329274501,
+ 13524719886605363170, 11553593166914861649, 12034020205580665813, 985175984881624722, 11040631342644137311, 12107496606245418129, 7328748654066365904, 14741972847454326011,
+ 9790251609380109406, 2449588380962781369, 4694893010228180943, 5172145489692928848, 5411277023198532852, 12752098113265127406, 4559500999103209672, 14921033543082064351,
+ 2248082728820243864, 8720404853152831367, 12431505213690884811, 14883031570121462817, 2278273022936639560, 14988529405400451352, 16732922988142595529, 1071232319627396164,
+ 8793441611217807824, 13198147137434605751, 8167936709747999601, 7685928882032231744, 8779182451658438210, 8059106577297479923, 18394590909567718507, 15508315617300242773,
+ 12828567388121500364, 2264959796189127526, 6222205589523394088, 8827682270015065013, 2619170858745203321, 14835030117099398518, 5960028394532808010, 2233397097850889639,
+ },
+ {
+ 16319476863352440541, 10283930715856640343, 17675054544532098447, 11261132162985242084, 13712716897981761400, 2681907143459288706, 6930256922080133347, 1445069157579547822,
+ 16928574868467385886, 166417019993787654, 5882811520342817815, 14106304179344008065, 3747123724781081800, 197109533566874475, 14303280595714789450, 15457633026018307066,
+ 10773597511592584859, 12552868588431074640, 13500771767160426835, 8002499270056378440, 13792839099998553174, 12949371255843262119, 1713974340992291550, 16150173130483658061,
+ 14745984256428057001, 9333707338036985191, 15365925315303462, 11789129028059619744, 7873100217437235208, 5289763977161829145, 17731215200358323788, 8876377479309635703,
+ 265950821974454804, 5047467530470542278, 17523044368516619801, 10054436503372765176, 10321185867287373431, 1212061937729015591, 15311258419138633926, 11236518538207084768,
+ 10904693956407098222, 9013418755007070130, 15510005599846320670, 995378633446250700, 12234095025518917836, 6689106237771514188, 2809193993744369126, 548124799387888260,
+ 267350765778774337, 2853877351938464055, 3677119082593358544, 7685335121077514739, 6491980094762370981, 1384870316654548002, 10568872345228996498, 241629538659623878,
+ 5839400175629113441, 5238299193248381245, 16787876416022887315, 6051613843621920008, 9219569873668974552, 5916753090530931032, 13390630214643423749, 3265335490455191268,
+ 1507475744849985303, 2597075068965622770, 14968669113917266926, 597606442423659891, 44293923912414886, 3832651144752540816, 17438860065613195810, 782112340603343331,
+ 9050896199196733903, 6427608033444590004, 13787696679536621857, 9682087046920409188, 4519093754155995429, 13564098392055667371, 10512507082236058799, 5289934424008191746,
+ 3477191607403300916, 18283244229745029067, 8462159792484018099, 3056576498976014760, 7259283167233712785, 12530251965039903998, 10232104933720625111, 14190745998092156987,
+ 15759362035410895911, 16075598437961371531, 4651513528876405575, 7694151626503869614, 14468862724637972284, 1838601521755586245, 4168957446435305706, 8694905613027663664,
+ 14825552838983151434, 8639609968952840931, 15547914584352392016, 2313507499500708287, 6902543375698033684, 9987468886016348918, 9068175779860656258, 6899641689193116297,
+ 7449110402827616954, 5689206471789540768, 12722069021950813669, 16017131401246233663, 2240977975275954837, 4794705713606614946, 5734937900461809607, 118285984764445639,
+ 13917685647531721740, 2432096911145792817, 17733637484624159521, 3848601825403209903, 8192433211051054683, 17388066421914150767, 14336306308847565282, 7501625553608785022,
+ 9844707592410952215, 8167497709587672711, 18115463401502884199, 8471563375691441535, 17525405040226359165, 2460181803833371342, 3375780580907171205, 3024376384135966552,
+ 15446204978718816914, 10131769697256428720, 10937168660849553757, 12174257400309926241, 4774490771219074715, 13764876998736771340, 14299051476546774038, 11409551629093945893,
+ 5471366060803425265, 5927845172343647238, 13321967429805195545, 1981489848211999240, 3657997933879283852, 1341488083977381854, 11001000434984172675, 12666440712128450303,
+ 2127920575549495889, 8861781778185702064, 10501256952986753134, 8967441017586595593, 12224364761159418844, 11856305587178745520, 8176487286367052930, 4988165299911239066,
+ 12279712962388998309, 7552494563001503282, 7388800608466198128, 15704167671257743517, 17781278773093632973, 6688613358271814698, 12261263098366698895, 2585757657252460656,
+ 17064077609211627595, 14437064866605257652, 14386408901468621215, 14935685266532365200, 16350265985491822570, 5195856143474093260, 4623415440658404410, 2936315929072503688,
+ 18399447684263070219, 11294713679768342649, 1600242588657342501, 2841752419388937826, 15111313732750279467, 8179471361906256467, 10611923788752807866, 10823997080784081293,
+ 15607034385960553579, 6934034624930683639, 13318618594774430459, 7552865293216027192, 13692640408995992185, 11143313220936824014, 16721999332340829387, 6993089466645938014,
+ 1749509641415861857, 8597876492914031095, 17942224190138398306, 1760222146515426983, 1030117883582412628, 6655464189981098460, 14222900139576809037, 13041171493967200019,
+ 12415333505074758067, 16988669193263876033, 10482573641416551070, 368049292090348794, 16109255925523895167, 4949934055388640387, 12038479532506656790, 10829864449693781036,
+ 8355401100656399994, 16963696462538033761, 10131652431024537917, 4166156454906166144, 12888240412190236596, 8396312458335953166, 445297814986468978, 10512295830234020744,
+ 14405236772348157690, 6950094157338825845, 13379457125020057613, 11396720859761560260, 14063367153572826959, 4476298515903789197, 12708437873915498860, 8163633089770520638,
+ 6432708226771955694, 7524978005081948959, 6442046223716866789, 4082077252385606982, 13626979819841658005, 4264573835620574342, 14788317690584235128, 4710414518172449402,
+ 9592766374657389840, 3206081438027351157, 16013702689312533501, 15788528722601567812, 18314198451066123850, 13236688692074994659, 3229858297074852748, 2968037605940641303,
+ 148470760067500612, 10319937899190593880, 7185467485673911733, 13041030391924988650, 17119842693824847662, 10989053172849710331, 16048294708726813099, 6612710923681979611,
+ 17408606709137505005, 4263600430695220256, 8941719936291239640, 6115030817534063286, 838024703679813150, 9772071103323550178, 14394690179107421621, 8621372196164819917,
+ 18138724984190926809, 4154767406604950048, 5209478936276366634, 15469512148904762872, 3613429290324777670, 12631812917445459751, 10280179062385049041, 19656860716268566,
+ },
+ {
+ 12030193727051241373, 13981191132445467472, 18205169646206905593, 12162181194046583886, 10929925499200074058, 15291772758590352491, 8452344520852919320, 8199821440451878776,
+ 9876482463501409337, 17331045017548147052, 17542726936580732548, 8299195336372136083, 12891767721269002608, 14062358772374813347, 4165127680073507756, 16350672737379495431,
+ 7595271959296927018, 15286771623821390629, 980612661413312852, 5738158184597378995, 6972117550390447038, 6246497134979772260, 2935831443692029059, 12837371529160496660,
+ 10228366644420455452, 12084265406352277347, 10265869676493821892, 3560143954870272139, 14368775225608592689, 13173522067191372913, 3592475039513249949, 11243554718624931135,
+ 15959892157814064696, 1882528744932347842, 3354709518288681174, 14147909176429982637, 12969888425264095514, 12509610747258602648, 10437226619684632037, 7605683691113295811,
+ 5697384515599291245, 14896081221061315030, 4329502046122948975, 16301954510440819492, 5191621061031169335, 12944691824816066027, 8080452646904372453, 793075614214301908,
+ 2662373477940179766, 8950125653339535995, 15717799081080898304, 7088593450282143511, 11723351531494404904, 38673945663198753, 5289769381065195334, 1742280067441486108,
+ 6543255501112359319, 6063422531520498671, 17228726254778244409, 7570754775709645369, 18222396492501523182, 14903937974816691813, 1216646642383641441, 7602781158453005929,
+ 3007861146695764376, 8267227185027440857, 3177625538211272080, 3144345182124760000, 9215753228267875296, 1342253891473772992, 8185239004636232247, 14951188427081280984,
+ 10569785547248986188, 3109435167722572449, 13661123218513357442, 4646373703143839998, 9712828426076586682, 11389514152975496268, 9796826623781200498, 234067627740956181,
+ 2188380882990542468, 9914251329861595119, 13616219875099353594, 16135919125023820218, 10732193797566088293, 7172996278550642736, 8531638916719074975, 14749121691457005349,
+ 14568782576170814716, 17582848432122737506, 1079387064102681806, 17278567874821675408, 3094302191119669899, 3730448048895347901, 3508894590334961422, 14091660942919228106,
+ 8414395549498864629, 370884714015114625, 9859130251236103342, 17120282094084970181, 15213286581037020029, 2737545283554254673, 12422648607044275574, 10601123825482591606,
+ 7332173975514924017, 6689389428877729175, 15162654177270425320, 17333672210015736765, 10717615330136876266, 15245080807947386155, 9830331968044142933, 16156512094344185621,
+ 9151817936103586393, 3495709317444898385, 10881665308947458223, 7087755261386867019, 18372828768905212167, 12873774245490293443, 12750361906770763255, 17688389897350887716,
+ 13329033749048978810, 4765695903433858377, 4452680451529163940, 5638854187414651620, 7897503022043776206, 17187635538005569019, 17766753195497526986, 13201529234556380285,
+ 3638940913491383692, 16533217388164981609, 4668956005988168087, 14053696531704823982, 12029006663244142834, 12043309283447175601, 7840964186868745782, 5691369519440953069,
+ 8500871974974230561, 6371833132372813539, 4002756625312698938, 7281624589760047276, 8813214993155334737, 7738842112797657395, 12616064385239668311, 11875709199633531753,
+ 13080663957403320059, 7305658810888170771, 12002632153917673214, 14713338572031413494, 11629178662016777533, 11669230410217642326, 6563025945717173284, 15889630204666511658,
+ 8934119620399766972, 4226481091885914732, 16000709195004445297, 14687868180928846933, 16269913392757896907, 18294005838441325342, 17625669613551023126, 10864307042201644469,
+ 1539179629859629979, 3357767843775461146, 2101360073251832751, 17234081431682678130, 10847284171622250162, 2314478515498414576, 17530153382031867644, 16210890762511619844,
+ 16932765725476234252, 17974885539458965712, 40892050255466590, 13934027942808790590, 6621543269907021363, 11796852321134174354, 16673775773384613137, 3370296079966463186,
+ 16962514259432373133, 4639769064050336100, 16965148588326727680, 2922022742538957808, 16752379982369550296, 12896050752492664079, 2708685629936199207, 17894058559833424187,
+ 14722530431277419104, 12333728409662476827, 4162946257347548625, 7052045338975163666, 16435875989376103923, 527141841571514699, 8480360258792897962, 4322786951393806781,
+ 17366230017999184054, 3121469063237497918, 16903979375392804254, 8204447732627807538, 10469007462907301028, 1938243801077680483, 10156223751968769092, 14290141776757271674,
+ 2576841693080637380, 17952976066680364360, 4927209893946473523, 12406446420948698970, 18104668447274679200, 11321546081935789882, 11763004760954725540, 7488613143173641394,
+ 7566233859107262691, 14065899052643539237, 17156645465457086529, 11936850757897686493, 17529993825395532749, 17145245039398670236, 17597009579379584432, 3472628483897995694,
+ 14380803034817149259, 11042134597430846555, 9992762805140200547, 9895143575678511242, 3228768306004664915, 16153114680219541243, 12036181641761325912, 491369524302296324,
+ 6378343037075212083, 4634622317247307316, 3415247351446435530, 5939760313513758783, 9540802244218497398, 2134082884335789009, 15623633842188443713, 6824600547587227473,
+ 2021901489047050575, 4365578779519871721, 8603845379677060773, 8929020214929242215, 13010370953605249273, 8071113541575997356, 10681914788594101574, 13369522272664029829,
+ 4241327321249984516, 12810477007632864750, 16880581634097860698, 3630333431846970294, 3439918565119627078, 6613942714960548236, 8647431674406387604, 13352599355545805355,
+ 12320933424684960076, 8906954611339502145, 16931708732547376267, 6446897311644431583, 13749563256713530203, 11522599770043814384, 14455556215959342791, 9022955938835504923,
+ },
+ {
+ 14734199666349771791, 4159865781008764547, 4413963548436270853, 4489431649937855880, 2824856157476907519, 5111945927271461159, 9825486821405155220, 7657332565522279292,
+ 8798422177670065832, 1250388939918922012, 5145324733132743026, 3716299737175574168, 14387202892738514375, 10255681359895407489, 8563822744122397080, 18430417713920116013,
+ 7857399215288147901, 2593284350589299949, 10086687882410147351, 14407837835956327624, 4469878646287387405, 6845712208092605310, 3019047164798827279, 2760591274816994522,
+ 10930605176025081830, 5850835312463207607, 9603163404664606781, 15623771803942139684, 4096748595672894276, 13060617166821837740, 11222939712741281346, 18445178894593253710,
+ 7976335583419810431, 16568954524837045521, 5581116346602990350, 18369129318179031997, 1930478946186473631, 14255806344323769850, 5302512325798050097, 10008570767203424472,
+ 929118050943456183, 445163760697913504, 7208246135487165437, 13565354217377841149, 3835063466770003968, 8214155842021576753, 2078717035803259674, 12502657820500987763,
+ 17741004372977419914, 16320851348541958575, 11853008901690968430, 9954777341381304447, 14274892887030297417, 10721339848307167540, 8731332510371325649, 13676383433520290197,
+ 6251805914672095769, 1063060143452094078, 16647397170554915088, 15454955736319919321, 11509235104607349098, 8441647766544589978, 956121464218389431, 10752793912033728149,
+ 14879603883666664508, 11158422912437249402, 8667988090392051507, 12702057193823135575, 13234479167199208223, 13752955092077643608, 1117550756888145931, 1184967682872965330,
+ 7004802666964716824, 395433501626287206, 16674151908279399004, 2279551165611331131, 18032555740803644790, 3573953642384243989, 9892480504006025100, 2397759375236735831,
+ 15032723931727013905, 4176673226777913017, 7270920367565154969, 8867675920919464620, 8174119413251699506, 15030539680120439131, 12325981936900309518, 2932283844656871802,
+ 3227890727136855788, 10772277744280338071, 10065219931092397730, 17233706446636791341, 1372997838851103811, 14430098741859995846, 3290287064497023581, 13054133915521430696,
+ 3404625123065318735, 11884832674715736469, 16513753127836546671, 2355202492840722899, 2182039060332057784, 17942518439766613327, 16731345075928062112, 5501869092504492134,
+ 15704211025163561149, 14596679078416770266, 10262177146233936833, 12443005669252298316, 1541294166390346144, 2952804996209068760, 15358481016273272880, 1321729586231860999,
+ 6862865331233391228, 15106279167311639122, 3283008666555627541, 13405610967904725343, 9845239597399801983, 10764942720093673542, 6506992344666983370, 2483131164245259278,
+ 9508384588323002196, 4786531415304518048, 2690306308552249535, 12208815515689370850, 5145548243685871201, 1536915039528215765, 709289946127949333, 7152734178531880864,
+ 1555309551759198315, 13156609441579560529, 3264147002083956017, 13629162110016732793, 10295903344220946201, 13835148808782379287, 15646840234722388037, 5850505683234532491,
+ 13348528763144385986, 8174978337870000956, 261860806345683810, 4561309263594732686, 973813267913726588, 5812176512716041479, 14361848397565071235, 9009461390894364897,
+ 4179825720483458009, 3146850391122074949, 16612045757277946305, 5253713786583408340, 5426943015912173764, 17275032103121420023, 8467860410750483621, 18150546576917688519,
+ 12762630566060098764, 9762645120146998728, 3635705124656800121, 3295681695622364165, 1764654124266477879, 16610065552429288987, 12182970194869512424, 1144643512390613083,
+ 10286763540130161498, 3400614528650953395, 16801086752981339314, 5746334081386875705, 4489937467701177898, 9249554406360134002, 8258038249111147700, 1922443676251769427,
+ 5981893063992017591, 11780456280008557800, 6865399120670384939, 6192294124015342683, 12388377965276278882, 754882660516751393, 13476094069874573591, 1000347292494612314,
+ 12753947115797128456, 1864834899756446490, 12873792436322150068, 15420857687427067065, 6214698061215951246, 11174167031334850459, 16378107708672199269, 13073812080094584194,
+ 3531275996684891316, 11764951512033875164, 12239314899306115751, 12701224435273087197, 152051042384837402, 47099568832954431, 766394328496626388, 666084094458729910,
+ 145223485883456390, 2445564470200462527, 6482144922631612251, 2316071592874761856, 16548688346252111305, 342430477320199814, 17819268336019413198, 9112976726726029531,
+ 8272548370610798443, 13910434751574602849, 4252192981011400377, 9590519351704710983, 4172183866405471444, 5567646223402949380, 6059725713353637560, 7169982539312028092,
+ 8422701298619809502, 5749403137850675219, 16696307763582818287, 7683947097864552851, 8685161999810728314, 15931494353777534155, 21629333141204286, 14374464903502564030,
+ 2354260355218724624, 2822702976206523812, 15817484232297668127, 8389777239766813455, 7962301702220496539, 8489354871723216960, 6637394823950686331, 5853452396705295777,
+ 5949957329602421631, 18059540496274524007, 301304345046470155, 15775279105880040289, 15979581939040500554, 7142046514802237176, 9965619645220287167, 14369754349691428035,
+ 8777708254369249245, 17882304730732025188, 3127583543205765491, 17274466425160820249, 12870181470212293464, 5881181695848312122, 15338161852429951632, 7245877162563127935,
+ 2634466441339054819, 13057158409330115380, 7410310056430957860, 16362149248469660371, 1435827663585128427, 8005375008235415486, 10865470829924734253, 3177600727860863968,
+ 9627905753364576437, 12776456422166281355, 2851899644189834214, 16998174196920379994, 9001602524188761177, 8960294999163663712, 6580949973363012183, 7568306074815116797,
+ },
+ {
+ 13345381996769253014, 6278048669175242345, 15990349685835870151, 16358261962292186792, 8505261070637963021, 9396725065098564446, 15331953351049782155, 16194796339208321205,
+ 7076257175157660597, 13221730879315432464, 4499548735964119085, 7559253754663705944, 10196808438730350168, 2391718471837563931, 914409564589897387, 6649744842278078706,
+ 5502356275170104522, 12750322860129485966, 9581277511584938955, 3872282844937972630, 13668854758739140874, 15020438121087800965, 4581631138384430657, 13296613956093963588,
+ 14380343880230315251, 6834909096379668102, 15975839936772635678, 7738230945027714388, 7971648870467745046, 8320128435052726628, 10820026056424631461, 7058118462476293754,
+ 7264036154081699231, 1304060579978677195, 18166928140052040308, 12593450560996175288, 16027666629366472835, 3677101504316839585, 1477956412235870359, 11566116495906585190,
+ 5990066097615398579, 2092999434824845091, 774626818169813708, 12815491981614523669, 6993666706115136402, 17707947501239782961, 4907798919001453899, 8644498548438963907,
+ 10893744790193607860, 5425031394266373727, 7829758685118664025, 11448863214220512751, 12538690882574195219, 5021176303769859871, 5820297470465439264, 5119144562842414323,
+ 11165438517321289842, 8661037418831619311, 4401480924846865713, 9355650368902126256, 14352422544185763829, 12268031594529744663, 17607497268577344783, 12698954426769962214,
+ 5383780168621648368, 3613813760948797485, 5757643543708300439, 11434310585555684688, 2345981080034027974, 17817292757133028083, 18327982597027759229, 3812144950722314021,
+ 8792386854586041066, 18039588893139878329, 5501406599000558473, 17117814302557684702, 7341401626090786647, 13233670006052290433, 2192568639640447778, 8946253989965365943,
+ 16557600339807647183, 3024845786116641335, 415867109882960964, 5024769423994345740, 6707880221250244075, 1889203596997453159, 2063004021323938442, 15149146696320496954,
+ 5091534986326281709, 8005812102849517353, 17366496686564185717, 3658878980438307796, 8455810244524637026, 7649393961871195909, 8629386422022530934, 507133797409068858,
+ 8757568869745664865, 18237318628906764926, 3340648953867756474, 561136277959994100, 7271097445297070958, 2011054670254573698, 5452809131990689118, 13469888024447314143,
+ 609640447772904263, 18299734743899574512, 9082773418653747144, 17312950936823165983, 8565594704932296493, 366163155689429929, 12596423288683198842, 6787120926470876217,
+ 18404298100226778075, 8425329164435445437, 7431919288426595458, 10680902470574406179, 7944931189168061880, 9822761000408762356, 794258562727155839, 14627123549563592012,
+ 10098955144981013765, 4095621933458866623, 313357785962326778, 14880249056209398407, 10605627454111004262, 7841561005378789988, 5015399193497762029, 5818383539901203899,
+ 486788682374900660, 17535646661984222270, 10786441143551223569, 9744277172896797168, 3600223448942144865, 8847805713765285452, 15255336563751266306, 14792236445425212896,
+ 1569380763811592357, 16850350231819425451, 14974316825768144065, 2974893813338477794, 16726324408135381195, 15000665607479486464, 886632233152598796, 3783793991897659851,
+ 17267037371866619240, 16498595171305365582, 6453906744948258368, 17267405673163971478, 1676214913570622682, 5051819157163581842, 4315279707026326708, 15525840981760267164,
+ 829322392873083372, 3459913121476606612, 4547714508342797986, 5801286056105229312, 11591136160675314269, 17965845487226778560, 7022309609363885527, 5990586084398664157,
+ 17415594387520595717, 1453256163777911406, 8242859112004661961, 17561205531134219898, 4881570838375824387, 8392772608437744175, 8797957464506597080, 3142154188155936579,
+ 6248409301753564092, 912147011448492506, 10413108318416902948, 3349445377400918439, 15887909530347096066, 5138859116738406557, 10990929831134698266, 5911273060564729281,
+ 5197581879950405305, 3001975514736631403, 1919678782193534342, 16811100109848336275, 13086769249238533805, 4351245282425706500, 18281601243717625371, 16799537683941038764,
+ 6288277991521986035, 12231480617236286340, 8633990423689180869, 13535190275290488690, 13419223868164389899, 17622732596713666209, 10071779220640400024, 9451660464882303005,
+ 14029940454474238624, 4957972603307996503, 6311878080193524221, 7485203809274562760, 14450074580092843779, 16845073066403496624, 15870387052570816676, 15700883809468229388,
+ 6623653867803819118, 1037643671564549885, 1236506073247459642, 2997396263933967582, 6182371056378366305, 9773995232878719062, 3356495419149379609, 10710833980055523526,
+ 9965766500412107699, 16895652751136174589, 7310937934566947318, 17649841370865305295, 5052693369483848536, 8093060855248113928, 15777336335257114359, 4642979535698186085,
+ 11761084765182152401, 9226969173242076459, 5445719591917489914, 9371133174853825566, 18381867235964583187, 17262888224001222346, 1934253649802391267, 3574888052111089863,
+ 7862401495691359736, 674970329153306547, 14447324101549742216, 15408908436196428001, 5339840774639473099, 15542473999923245378, 17353380999887736706, 17787369898686553525,
+ 2301964326759931592, 17775852221342579469, 345763078096690431, 5079606662887837532, 14863722102334084051, 8905941255852264870, 3806200576416892831, 6668327519801544161,
+ 3672519063263749482, 11898207454593385701, 2364050348970898536, 3761861796987022528, 8159458273489411574, 3231262679605967088, 17179214124164291006, 8770284079122589350,
+ 15270079546607712667, 8471883658965202468, 10333206241569264742, 7414741159725097798, 7709713706668701850, 4461310100162632915, 10099194304024685274, 4760203118683828795,
+ },
+ {
+ 7237168574323016859, 17908810107504968137, 16225708127838178121, 11127095986977103648, 9346291308317410251, 7381283458277047010, 15168602815371846943, 9193719743691734294,
+ 9607254959726458411, 16837156817186931862, 16178143308706847393, 10970721795101931539, 10840376278605273092, 8298765077094727469, 15520543037910966843, 11484620381947762562,
+ 17395070963094117711, 7608132718922490881, 6531178971071758016, 1302814667007331361, 4002629363862769670, 14649492495298089937, 6868174210142585697, 15456170652716048683,
+ 3688308493559609536, 5123828159813683938, 6245436817220183971, 13062234263274605325, 1313395302496832996, 4961840797708687855, 4990192311341859196, 5283131006216350959,
+ 16952735659396950704, 1249482796824289329, 17859504101404809981, 13260403650830168625, 1583343855370236084, 11389120333403185826, 13676599385745755008, 5066667130137739346,
+ 4799264864906918301, 18347145714884655971, 17034125441088681346, 11828378178487279470, 1759038296522081901, 9582407833575609381, 4195817009050503354, 12618256395967210055,
+ 6486755100213398364, 5270923715575268322, 9268255346880651103, 3147724493117082267, 16710318779269985633, 1240462642985509644, 2400811796254219369, 13670743068809614358,
+ 11437779990773246594, 13609113239383378152, 31268927496646090, 11618772705165035208, 3078024460365036167, 12138983306908962712, 4779157919015181380, 11901698850260672002,
+ 14642787507945832432, 16258081639927585935, 2560321462542164242, 16286882196975458174, 4328159372518768969, 14313553865397841147, 13769372656320240568, 8950675126435331579,
+ 1309709344837422378, 18403288028608342631, 17102350491414677993, 1133523394849437315, 12526496222782003755, 5447309058259018864, 17311120581405946631, 3760058661125144368,
+ 17891474172606596407, 320094264574086461, 93927075222964142, 14355817838636249140, 1777882427773785068, 8237021421587560510, 11520559469641852991, 10567910693910023157,
+ 9164300764173563614, 12322279811717869943, 2768902015676238485, 7761799112151133995, 5740162933856370894, 8043946608495226840, 8923704438836381869, 15189322655373943849,
+ 12046275966378825708, 5909386969214935556, 1201990704907881028, 12128549009950617306, 10773478878317625320, 4554854625564648958, 14029736945871573056, 1353247251064221090,
+ 11909123603272307101, 5517949035179831783, 3295386484950432959, 5605137795524476293, 10718654547602473221, 4984952409581272095, 883267724006996723, 17328431094026829348,
+ 13671210467347025770, 9980617617519781731, 8339868735163579231, 2150724314757661915, 8571153021065445899, 9146903697769397191, 9122246102814640373, 7744779513085605450,
+ 5733697799130763963, 962857090393715865, 10538932452511329419, 3413281437172590207, 6635428480179771287, 4404718679994229138, 329074170931436370, 17746708206910902139,
+ 465623292609655544, 12740391094094156750, 15600221933467635716, 10712500242632508096, 4760789577759901270, 408323003115848014, 464706673403009712, 11801112998381624421,
+ 5545681129476190458, 8725689673989867696, 2143812364199804063, 12133065347666807143, 14072918710585286097, 9027361966934517640, 12608281386782336413, 2053972384317260152,
+ 11862793362110657447, 2615766363986534825, 5065170482898519596, 2878903691735623036, 5204682974850023849, 3382494556217615091, 15421437169807703253, 14936317525837021790,
+ 12363228588136583724, 15035239525394825264, 16312817933687103988, 5294641777817034798, 1197907056067071148, 13084871057903003628, 5240492218216802394, 13798739245031559476,
+ 16167519903361647970, 13106036157861122437, 2112866661358918425, 941609107359251234, 7240286582201258992, 17783018133711208779, 10433192833270769862, 4303514224506582113,
+ 1549168540255262874, 7958641388293572779, 17867057222043259063, 1752211085999288773, 11828273580240483781, 11712913016012991853, 2373360154803731890, 6779610934665321824,
+ 6633997178003959408, 145323909693817907, 2198452664824754574, 2920221908918689777, 11047294234593305837, 16840329825128198329, 5479733463453508239, 14054694221361300990,
+ 7908792292118068471, 2703859967221946473, 6268006869726038724, 9406994672213611166, 12424098071734234719, 11900750387777467409, 10724482893673149484, 3191937460539390989,
+ 7635348735946750413, 15019610708452651937, 107671898858364780, 16808612119435524257, 6120109999123152232, 2251481693156523545, 10792076820325151003, 12924551065805570215,
+ 11225852067204798645, 7665324911993544151, 10402215331683389905, 2856131542917118235, 14152013119539822346, 12731746180172488022, 16402457225039188820, 11129045951416314666,
+ 13009066459618795885, 9636541685854928780, 4783713672495759319, 13224924194053497016, 8044695420996588716, 4054951559323242873, 8718047667110955699, 13479622910466265834,
+ 4333570709684826136, 3188483692974505359, 17976757824852085147, 12585927226835238762, 16665346689006249776, 14033419552450595393, 12216049696243171604, 447536030797427204,
+ 14100792221456700414, 11757964686555543320, 3971513799111193530, 224065891283865758, 1282520657830780512, 9120233956471705454, 13711191981326748211, 3725543277190035504,
+ 12094220323799678341, 16876894723130932646, 14806566188177921577, 2283878370700345373, 12254260799985999571, 12764268124075139223, 2425316412794046071, 2088679341162220742,
+ 172600248248909168, 4338060849169481673, 12046085844143054569, 16220699277291497444, 10358794035289405365, 6739081270984238196, 16079387007561085178, 13471688054064884318,
+ 179238781994802586, 9017860544212172426, 16179187983079961859, 1380105261896053495, 6187003332254463653, 9896593306091975718, 17181949235068293496, 6820143159816507763,
+ },
+ {
+ 11156900805411912784, 1737275748212365168, 16993486692205800040, 12538938066475152524, 13167476167764338790, 17373319366642541595, 6054831428930905282, 7118452124011953000,
+ 743288751022377974, 9226164090128370715, 17669221687181604232, 1117842260351396541, 17697784451833850722, 14382209306891991103, 16951372733687268016, 188390802712613066,
+ 13268209255833199938, 16633146482265079265, 6044233937395578074, 1733872861371247406, 14239198956565602758, 4036306057099495106, 1778912298053970082, 6174082695927204648,
+ 6390361653475754940, 1093882175267357438, 3784079074949811167, 17343872969822169769, 3359715652121662979, 16808529787251986583, 5169730697018536870, 52766949850965558,
+ 1403521870654810721, 11668153767441183906, 12890534559297323775, 13255750643204945388, 8838011266491753399, 8488733891344199335, 7468199299966346977, 10645543722243906719,
+ 17019046551094178629, 3055711824867115856, 10841870683594132444, 382072777978364200, 725450988418371589, 12501774248954630111, 10130164067258601487, 7823254091037946722,
+ 1866199290196467180, 554488389245696218, 15654194385091454776, 6131785419830586829, 17856850737824439617, 17602611196814472081, 18244498160243832872, 5819736122106091373,
+ 13147212495082910072, 5628914243538540648, 439989514095330941, 7620472329041817834, 4272251824771633288, 16564977601507013119, 5441117136715882317, 5622571730905974389,
+ 8706277077547369587, 4848592786700984858, 11350018329458740004, 143688324288835298, 16536982722430328270, 6522962371020150117, 9511378862712142567, 18221873727623821967,
+ 3985496688414455558, 11118602672015528143, 5383196298446424580, 6116994806363767576, 4051800833472889243, 12236802048340231756, 2619572929682123355, 6008593592518397571,
+ 18159379849046931840, 11283007389905902377, 7465803490016928282, 7730454089227756796, 3569936088077890000, 2804205711716327520, 14107136042131448408, 12050401455009236937,
+ 4207421531032215145, 7924589143024019458, 5496622772676523695, 7061817399179278712, 7371689806847905735, 8282663810521090335, 11422487361383982601, 6397294642072086677,
+ 15924576759579234055, 9923512276109397968, 6142441356128988365, 11189756906743802872, 17790684027455851021, 14143953977451478354, 4002828116425422775, 7521923183240026608,
+ 8349314793194266723, 8850246337652136896, 16635419760398709839, 12466509089914036008, 2686508850003621675, 15822032015692543407, 2211302095278204426, 12550915236378020964,
+ 3844358527917324404, 15045102416646491178, 558560187822263769, 119801275176287297, 15593003307431898979, 12817356186703401527, 531419248069516921, 7957931139148417935,
+ 960381383357168877, 16774709963697143275, 17896181076874992240, 1150415168189819777, 5498481130530161530, 6895900060791069290, 2079514944189747822, 3428689507902936354,
+ 8525743843643076774, 6014251588524085384, 16423924202355808346, 2605496565158241796, 16676209604549923678, 4870342652354143869, 8337072895802905281, 2478197702680510974,
+ 17076629945963768258, 7647523525256473652, 3999707005939039144, 9578933944626939348, 10469359763046002307, 17515033080264359276, 14272203721184987484, 9979666177276073145,
+ 3896303254672567310, 17167309259158926479, 13231865857998180315, 2260217573257466310, 15465674863730076330, 4460025914284098438, 718707264700157542, 7215235109325196153,
+ 12056920178557394302, 5131253251712581899, 16251877995882633963, 10883064494054484051, 9473274815269607947, 8874308005470196739, 4412709653190846630, 16469366225886156991,
+ 13309932065155297376, 11504232979548718747, 14366229841750030472, 12797870458094162590, 12894694064456675527, 16977997035748966590, 15080606548583635864, 9292471759789693596,
+ 1226107676031264801, 2683510924831720521, 4344623677417870872, 6581244316153602113, 12516966236091151915, 6758725397469564403, 3190083750600883646, 13018714111448482165,
+ 877612159223846777, 1550479938877814973, 11850498481046386377, 18144061425551419288, 6811504328538189985, 16351446194603329949, 14495016749549056145, 15394017693349630053,
+ 3547198835521135552, 9940690805769919242, 15170549880149605690, 10701681269101530298, 5914809102954412895, 15790219349994701461, 13664049285300230863, 5735249081148743651,
+ 12395092509541605535, 2320592554345067500, 14213227630916688382, 12847883717490005600, 10192971941898238362, 18392117086623245662, 14369915478097042954, 9636190568462370402,
+ 13308009076055104849, 9679299541833059768, 1512845037409468381, 18211703514703978816, 8337549295001075127, 15882458005321062689, 1212364284844818969, 13856780176955551126,
+ 7602636613718406141, 2678224565451628558, 6976331354727208830, 7414684104929828967, 13536889690553324858, 13086024708711030952, 3748455179863046304, 1853373561223967487,
+ 14753645881295168970, 9410794713982813264, 13485977523338061503, 14267793994973767571, 15813187206361118954, 16449603969001878152, 17705410933007144731, 10960511750491378082,
+ 42380674434071200, 10068881872338405083, 14327149378182272744, 18264050153181932597, 8600790433943161385, 2952021171158508784, 13461070885471956301, 9633226987835715039,
+ 10360728550079953555, 8471248683929284652, 6673003382103023756, 8972723925695500528, 5941683993959582947, 13500073059240140624, 17856131583218769324, 16799784995825923732,
+ 7292102941225494742, 4807816512907139693, 11271815982004383122, 1065441269547643077, 5279915279332961503, 12100441009657772185, 10448964764090871372, 2944169874578456259,
+ 4669722990542640772, 10017368027073540045, 13392819319228640800, 1070815632453829996, 7274130058405640448, 4460835865761538026, 15283885107956150892, 5815884817303156137,
+ },
+ {
+ 987127652983079722, 6872208513691823695, 10110578719299008714, 16457381243184327116, 10779072738879347999, 5965282398006505989, 18251577267494363268, 9321323379108563422,
+ 6119049170407437179, 12938407708038792521, 10105420405825346383, 3416306788177016826, 330648587825212481, 125568991290568141, 13857127346570918649, 4376080986981308805,
+ 5930434126597036589, 11240994623469578584, 839346533006694594, 10015087329724081709, 6285097815938761787, 17244606485954860090, 11523249727882012981, 7178451948850151803,
+ 16733615335426327004, 14833564736134873977, 15736159435317472941, 17560193966711884894, 4670172300306758374, 9477713670983496835, 14243955354032737980, 6459864769649694970,
+ 16053517698989562256, 2069464650596796670, 18305730134729303993, 1090863611964008044, 10595928817546105363, 11743183506441120484, 8584767248013363683, 9187452830514379903,
+ 6938579711827578880, 16268282168771714072, 4567473305742437221, 754548046014059669, 7773636946295303572, 8132380485366854656, 5054542590195827722, 6074347500754248128,
+ 3955857908381474930, 11852678900999621852, 13783017203403417097, 13616745777931267488, 13189924588414196918, 14519927295809952876, 9334100965942760501, 15172548734242885286,
+ 4869852132020793901, 14921212823114724347, 14342559113353033349, 13336827701782657309, 11168346789878684452, 9790480725902426052, 9171116079841529936, 9268902731763220711,
+ 4417372365016540659, 17280138652899703220, 6650972319709075180, 11252221422799491438, 6825784270383052414, 18308562203755695191, 4237445205082731447, 8485872406483449772,
+ 11718726714045105993, 2624508790343569765, 5794111482300008213, 12819216567095091385, 285329842793152354, 17257707863943097374, 16912487373614458842, 9501952033607497057,
+ 13074643344738491288, 8505193022029238118, 9932652098552590323, 267408114280737600, 14802861246220332716, 17506028737327736269, 15929083591675282371, 10937680132245212187,
+ 16688733766460131050, 12777635326299184071, 7769405825716531061, 12435979606514844725, 2767987141420672349, 6723940414217341603, 16114182069580717377, 4541113259200434040,
+ 8506371389960537559, 18031198550689409305, 11510251852995503442, 1923472385053944821, 6189904387119195613, 345664413118550356, 6439003191582953600, 18344385760549575361,
+ 5320548958745018096, 8341561189829530680, 3327828110998403156, 17981346269177513223, 14254804273025688074, 11111147827704077234, 9093333670517437191, 7272552282364063165,
+ 7587268797387514088, 4402481408866840263, 11260815389095334885, 14391444701764948710, 4224880318604689771, 2472148458660279030, 961218007313548745, 13826356220879831691,
+ 4477325369345373420, 299141931133875776, 6787652129916359301, 2498016036564260464, 13342319943892943548, 11124287719737492205, 2456333599771808442, 5084719026388755304,
+ 11208298545009109072, 9361672558653085726, 11597473974277116433, 9123409098329262113, 2315750487079392743, 14788891796020803799, 16477373474431052867, 6394267218565388055,
+ 15099803091969065643, 7815556206466442264, 10304177785190927723, 6493522556494567776, 14386173575053274652, 4399768637448990408, 2996083572119906777, 9762267038999270167,
+ 204205141115459509, 15145142808374731170, 1678672167166598389, 12032976094920574395, 7472791648310888425, 3687068899306742850, 2661021646792225638, 13705804163602287184,
+ 9320611303424530507, 7295004960403065170, 15749376924513664173, 11543669941217924178, 3710258597217352196, 11371260298937340150, 3902566672028686120, 17983841473999282668,
+ 11814188798674654097, 13017429106091059634, 11079902218909650978, 9064898043690914606, 12023954026861289442, 3592879256613061321, 9997625973577562433, 16073808232708438680,
+ 15365030910001289574, 8789654976305207659, 3460532362538090307, 12992239135084766157, 11449242863169591803, 5418502644558230048, 7981056000757137731, 13021725301900406539,
+ 7782773734814780443, 9390379677704432300, 7130223982341575476, 6496906770393189740, 5095392170846288421, 11058865093605824851, 11021902438459302094, 6088242418938968652,
+ 5966014717657529537, 5136588784988696205, 6360472729290909457, 4447169061266082500, 12506934948427595318, 16433395045878253504, 5116318868244944213, 6640832565780621049,
+ 4132920736998081103, 11827975663170283136, 379030309731223785, 3149051804072184938, 12035317616979166548, 9642536337674500310, 15531497803043399747, 14399049832547796531,
+ 5493097462539196248, 1629980144571354138, 13641173578972112594, 11096917099468924214, 10934169048860132281, 11269991112518485434, 11206050457519414207, 7939067479736521906,
+ 3806755078949823895, 15988975666336042818, 8246267395645554105, 9703363749164024623, 7795606269507901456, 451102251659512683, 5865831186929065346, 81064915983990550,
+ 5810137226573964106, 8424253088401255706, 4543883508937971494, 7386148745257823512, 1348150366217548727, 2103092839740150931, 8353010273955146981, 16831854467863536537,
+ 5095778754322493440, 16577416628492146129, 12288076721910512064, 10563487622054214075, 14620553808255752273, 8159958565993701874, 6712100880095874267, 13203797993773281759,
+ 9640010815039549413, 17460773950401928956, 18229977265837445092, 9252634318278265623, 16798979334068219458, 11731856767337519419, 1551975378671663669, 14320102155496153821,
+ 10528988339881191052, 3808685834232068820, 1064376960475785008, 6466670187430364599, 16860839938395945578, 8876458447995837267, 7897541112736965607, 10283235741250763328,
+ 12246932755942887931, 10842213418139067847, 1827392608284960926, 6961756951208563550, 14626966634382828071, 12653808277900379805, 3359368990539585596, 1190255644328095727,
+ },
+ {
+ 10836955050615624183, 6228108794259714192, 3769609579318360527, 1481673673659326756, 301329772189110029, 3574710782304999603, 3037244905687722509, 14442941761397229485,
+ 7739110373965523311, 11019153332996240080, 7346010155518751797, 14853737521111213548, 2542883237207113209, 11015136828834421504, 1998601666328316353, 1540417751495800469,
+ 599034134015360884, 11263317530774995092, 6667501365379084069, 9844276013103312691, 7566306886180438990, 13702768345615509363, 12525031963170717968, 8463786212988210464,
+ 16430555432520738762, 11225321696360643709, 11777973654353154098, 13133115227828884393, 17678738157022850345, 1586660156856830642, 840647763693221003, 8645742354464326466,
+ 8067725618510810414, 6229041032854057836, 8025364837156161898, 1290443646630661359, 10991323655842003728, 9346782840170054789, 8939059450584871902, 4772347025802041241,
+ 8187379062828795656, 14744366698824202673, 17014663804743197103, 7704635785366672415, 17162645929628468457, 12942442527592695216, 13291710923558633295, 5800071247768119960,
+ 3552067877931717181, 590108493628737243, 497390871097272557, 12785795808441396281, 5561451613221796230, 14263823913216836845, 10481729699001112354, 7818094022046194698,
+ 3598736754916501501, 4827022662021197854, 8203180748141933720, 10283251600632599041, 16186680658340956808, 15183393398087319142, 8714402370450665212, 13217902016653171217,
+ 13766344083396522552, 3843491774288631026, 1174164282483810421, 6398024882371290634, 5459580537585379131, 5871096222069618377, 9611875286525007701, 1940221124144536452,
+ 11598781954647517543, 15695098933552070478, 14976427118847622464, 7147358677856872160, 17135524627354123951, 8338407198954935663, 18099467407613897492, 9025371359994313919,
+ 11451018285787631498, 11710749546511767589, 12969419493657624536, 2338911230088800542, 17492614411321910750, 15274296072504110502, 1258801445108685875, 15178229640559232741,
+ 11743016372336018414, 6125536020759642301, 16563280170798047733, 14529052521060111789, 15253502329174644388, 1556763737841306861, 3041940064290508531, 18020881359462953336,
+ 3035547704692483898, 462291414338050452, 4209186193642705826, 929930084709639338, 5246348806539841852, 16407585218848843770, 6076996892610687611, 16547395936238439809,
+ 7781914900266335582, 17967080065814694611, 14677062197599091614, 6667353926398988267, 5658765839564756939, 15807817354312473921, 7476371642206655552, 1194561222317134506,
+ 10645607622061780068, 11583886495890779582, 6879987349345299140, 5119348262235800038, 6546217876212430196, 5534845503308945424, 2003680599532879233, 2824480091030475274,
+ 2297585344913389180, 13965959247788377604, 11341378863608387800, 9890953382689640379, 10443523266835246852, 9631856696811288063, 6944503116553031744, 10030824122780966395,
+ 2699238910420203023, 13176131497943324759, 2961070701879546988, 1511208990650977146, 13153451530298458944, 254163587562261430, 6668642257003911712, 9040693998431169077,
+ 5596789796292416136, 10505785820531892681, 3803057245175025164, 13601446877323203658, 7939638174866660891, 14141848241206986005, 13782885912650943563, 6287188921995938212,
+ 12999700407775352603, 2664314004284450753, 7085329252118415126, 17472483363754271126, 15550759073378819069, 7572177503239633527, 15705617684783232308, 8586972133880335199,
+ 6636244234467769744, 18284390012667441869, 722929822219329257, 3274166388651987221, 13177791837760949480, 2568122970128111127, 3477167651270374316, 11371867850093288299,
+ 4065583279544717744, 15575999213135020408, 1319417044854511604, 14713082471342741360, 9399053450949627084, 8900637792268080986, 13511414538589410393, 601556817111475086,
+ 9243142430859561073, 14638657859832212223, 15324499241122936340, 14477761753578941934, 14699832840191454785, 5114873517253108658, 7427057898915535537, 12536800031182849473,
+ 11775949216482616439, 9736654184925478086, 2941126957776086651, 11331303510371804917, 3535865528819665754, 2488682969529955908, 922100556888217739, 17904958255578472855,
+ 12876460969864886954, 4692925517843901006, 17390372170975588763, 14692578774429957295, 1759864963511373267, 12016553435172103063, 13015881292003590351, 16187569590073958370,
+ 15154092100094191059, 8103665941237580228, 3265383210728928809, 8341136973870067643, 17281635732314163959, 16433737305161442677, 9130496888060417111, 7070932927684908859,
+ 2129695226164849659, 15163429615374108928, 2225893409909150746, 7870945454511315288, 11525457118388411301, 10519485231888976311, 17601979933176863246, 5257431126828903605,
+ 8898009465920717074, 9418463821940622934, 3166596280515383312, 14529914354665032916, 17743784337460844568, 15964972678726654742, 17116339264937332927, 17098426671353029801,
+ 5868176722765418764, 8429364877332796959, 7785756492654137899, 14975349488807886278, 3272010964035218775, 8946396491617515063, 3774503492701710541, 2113917870331282160,
+ 8185651664029868180, 7343199417670698200, 16586532029502206227, 1531724981369431299, 5596507566645246686, 1033673521199123313, 6075312368002387319, 6988496924054716781,
+ 4604669688530289732, 1867259042549585024, 16180237500272383882, 3296394050993129267, 4326520583229998770, 6557569103657179065, 4126356691591689646, 8674328613473447843,
+ 18057076842177382833, 13020419432300857029, 13646402493577420410, 16962268104464192344, 5358581430340252761, 16443104373870300795, 10099216829282619724, 14890543634568831960,
+ 13276631120161396750, 16968574704982977753, 2376454925072971568, 9477134218094425043, 13784622991820528988, 2074505569780291922, 14294803056569335889, 8759684036994631222,
+ },
+ {
+ 16549155412115374305, 75977962737081600, 8870825488941031605, 7403557657089993256, 5825394234709429518, 11978746413791918409, 1876293130790608838, 9416631623519692610,
+ 12346821858361333862, 6352722970730511156, 9920942104315084988, 10492373916708072181, 11163323626947992791, 17035282992056675772, 835685192553490421, 12792459312570503983,
+ 10447922451077327640, 1515090998837129975, 13498807582085718212, 1212623301098019779, 11573518650162536086, 4823461685803772444, 7289704908733783271, 13167881392748327839,
+ 16959954449957105311, 11051594981465026138, 16062472412226411858, 1976004115345602080, 3630495248560019222, 12698191555196404138, 10107923614229901155, 1597725222698864118,
+ 3416019089997140849, 3011417601086561495, 6275242351124667286, 14197557183081060761, 5633711018871057458, 9723496411345030748, 13910057756335530832, 7285051335492395815,
+ 16774605620279328247, 10223834398868957807, 15781733027568799739, 2607095622729296315, 16499590296954181006, 7576646116121533069, 16638313875339315529, 14296194207652471696,
+ 12874411821611501068, 1998417862388106640, 13882642435422318946, 5791129521978463837, 7512934139125398199, 6966283273373526248, 5475728301311080129, 13542211842758381589,
+ 7798993150530415596, 3288770257171840452, 13373286069558329631, 9784279046496345014, 15725032660238528813, 16038931923358478835, 6921788930154218900, 6093507035435214187,
+ 12194839527972802101, 8376199250979271449, 9055995864116790217, 8125030124567662718, 1070812422878398727, 3950906148308170707, 10121516919638719072, 813471384549762068,
+ 7610605259962047827, 14617655900985224117, 16089669078196617440, 10594009761533811214, 3425971360520359121, 1245021768718221594, 16690372892586114152, 9933179766195044175,
+ 15142732720676276463, 17435583143726694663, 6030257647680338377, 243683009289621849, 3064238900429394982, 7655019927497549159, 4484400084400458429, 11650860345018651068,
+ 16203650576183697977, 17614744533772112337, 1119292890050870301, 4574885263986729787, 8108207498737924504, 9708434439316056191, 10676823369888368482, 1078311554675991883,
+ 6876202174636220158, 18057502510666060150, 7821730895845187783, 8882321799602877790, 16220624224336230256, 15562800387021280583, 3043825293109091520, 697130162105054983,
+ 5394549491776704087, 14795348078061113966, 14206080764200897220, 4623073019242398493, 5850602047109930316, 13783531993760979209, 14764346975698048641, 8951680172224205890,
+ 7302725075484262769, 12572584665857403749, 6499691362933905099, 14476353114356185371, 7381716148644532401, 1573798302334388880, 8128390307766282880, 14547454176248589604,
+ 12109367644744157376, 18037756988516427980, 814572839386760074, 913616818805172815, 12974490864633240943, 5228203788953366069, 11458421156905635350, 14534725545904442520,
+ 7717994192616957513, 12204841747568567286, 3911093871228418198, 11236736110419819217, 3283907124458179474, 11742518530965881218, 15033771683298452989, 5201173013910789542,
+ 13950307561490203211, 2109530041534182225, 16966608361725322273, 17050013487213560977, 6582524050666171954, 8348662591649610062, 18257068567217206885, 12084557407109908164,
+ 18134052713971906645, 13522834551049471624, 11796035797088472898, 4408384297249203940, 13184035659663315829, 10318594606381671372, 43784547091343099, 5864759723778872652,
+ 273751009786971880, 17433500308695913052, 14095169299451888252, 9384697581863180516, 3670310635026975567, 7512955195757960238, 1061441390592395566, 8687976820410979119,
+ 5847434943730572861, 6139588162124989313, 16640425042872222986, 2868461478718351559, 4582082172412426839, 15999623457772192080, 3911979819012561187, 2157661418664981538,
+ 1679437503202036379, 3357369754420384657, 14227038694942012893, 10849243658636382261, 14698996273665673323, 15473144682787301630, 12886363027445513063, 4218522843946321369,
+ 4305571301049736163, 7569433164869749160, 9286896219015408706, 3061813620853218759, 13836461093317197944, 12893856993043908140, 17368798334120212403, 3507360881370918016,
+ 17887381848254057882, 5080557806347717197, 10977337278063191668, 13725230338861893934, 12618602415751234016, 7913767807930932665, 7119689779124353718, 14253582440914432409,
+ 16180079775228731323, 7952167928403988663, 17529082815577219791, 14568667835887326401, 80331144542697157, 16190739578357987369, 8688615216852085496, 16668129019590270615,
+ 7430615013050690552, 16490141830322172211, 9793941851666815428, 12678643730925993658, 248986629347794843, 14250412400606998841, 7671639316274744213, 198890803142183275,
+ 14099347485199617889, 10490379547566026579, 2979540189595374423, 1947868558918413664, 14807112327541983728, 2275659819146309375, 14464189275380358394, 11448352451702208893,
+ 6508543885912264241, 11768683250765503158, 10952591955476802447, 13816638967602836229, 15422608700909569544, 2270147616630940233, 11179267547770249728, 10448962907285827304,
+ 16734703714440778657, 5221611440144584595, 14517195248502569256, 17256223341936104100, 13260411672715241049, 15406026631864099983, 13737383554048659263, 15455560302156741675,
+ 1751310744149771156, 13066379976997527874, 3353407191478085130, 1488844206746128853, 5630044290904406795, 14205180196532348798, 10021323252656730909, 4177092513333669169,
+ 16120743770319899284, 12307096057555293266, 15459231763664176764, 889710173957193674, 4419259815443372538, 17256024223837463177, 6105822529166486288, 14459703836399905847,
+ 13918155529832261002, 12953608416498491778, 4285102780598248527, 15853935066983430239, 2610351403194505517, 14905969693015035277, 12043976761492019990, 3883507340167876094,
+ },
+ {
+ 10994470105201067642, 16033352651464686189, 15774427704233149170, 4596106024708523955, 8854068685612048031, 13529126595187822090, 11509928293490788309, 11620850709434334690,
+ 2068337952936280662, 6856444059219131741, 17213926210819196459, 513413453513211652, 5520465604253824995, 12970886588014924605, 12296275668437451137, 16808287251356954962,
+ 3413637388791433603, 16937929882486384885, 11215805046681294327, 11559398419268924015, 11697258172757450737, 6137795229387768085, 3190984692021653574, 15347236891154706739,
+ 17955247196574331290, 285928599571507431, 11902987704297070981, 2841167725097799100, 9281167410796848363, 16444340476026142274, 3423202423051178709, 9101021305726472246,
+ 7129792976024548817, 11116052691850916690, 13765136177550464799, 15815419857669199536, 3490265904262366633, 3075105190264147815, 13090337405102529529, 3772589223509323910,
+ 1865569627339845981, 14405510401062639678, 16548248891153314376, 18134729633170718083, 7342289204282809135, 13678804825671892797, 11309594694670555920, 6482941613725556691,
+ 15861081392207208208, 8373665453512079014, 1489270058265748447, 3594001972161344915, 1115942979837741628, 11080512787291988167, 8129036993725083225, 3358115086982889477,
+ 8869273893561412239, 10349812081309865197, 8913824872487209090, 3148808685596180274, 530908416884501807, 10284103744075805979, 13606889938905754722, 2418062398586758364,
+ 12413212040240282991, 7478703750955930147, 1623142090654002287, 3800351594986125089, 483782039790230646, 7823394007049468275, 7643246613581822171, 15001087565682987468,
+ 4027740097060141621, 11299467443664320327, 11151439614540606708, 866223440636939151, 3440384894029167420, 3873155505711856537, 5954029035616033594, 15381861683078360474,
+ 536906646315848696, 4762701494256722953, 3244970900590282998, 13809219136128336072, 6259009038800226590, 15448401588227595780, 5228638796256712654, 52817644221507266,
+ 2298603653884322501, 601221199968240092, 1285112712738261659, 13882709760355308533, 13563254433762438170, 13447718334658435053, 13493687958886238103, 11032778298151679283,
+ 17130548285854289846, 13389656739579236890, 11825243553071488462, 3295650559395515332, 5681593832990094932, 1986112566735995333, 6900923912677810734, 12885634718618182763,
+ 5991348986251844787, 10652959958005297329, 4727138097613835700, 11665606181484659859, 17818486388489237050, 17381457056943650121, 11483713831771420728, 14346336385265122549,
+ 6806904066777263417, 13464056321884738018, 6982639430167685380, 7669355554060154277, 5859444273628884163, 13073052226541367148, 9082949662405155834, 12390918296713433257,
+ 8626760324117733843, 16403951745962159364, 5837394731212322914, 6847167719031297053, 2888142337636247281, 3409818743436360750, 1677113113408664401, 1222157560539217476,
+ 2099068961815044779, 12562181036171628906, 16101890713848274666, 6017764396716978009, 13064414845499489050, 8419967306079989759, 18212218073753486044, 1008986649080828268,
+ 14883940931343463293, 12319376897638542821, 12367185247284838739, 5861252096374912155, 10018798299303262817, 6422772579819453330, 2070123940915480886, 12141302052520620302,
+ 7347211048650261526, 4324486111460763452, 1238262417801832007, 7620393213816116173, 13629115925647807355, 17975185632155863015, 6751253893269214002, 3530207280414557051,
+ 2294668553308381923, 17165287249067859297, 9897724011823072967, 17409851874231284314, 13888575318617508639, 12092766090379658603, 8412268183255186961, 8046572777745318483,
+ 4493498909328627892, 14652253245762410953, 5345262207345951514, 3970988687199074488, 2208768899312608620, 1510839623981662369, 642580537527653605, 14940785106713466054,
+ 12480576687390709541, 11249304546887636239, 6075047530003410694, 6121669232248820705, 10309256543424460776, 101796817907499847, 2110451750984093184, 10778335615512351243,
+ 2885395260584376454, 612553588199732392, 17978615338185945515, 10414937982597482385, 6078565000469652304, 11381998957121196965, 1536418226678270090, 15114446343888525079,
+ 13178439025428734594, 16388038968589028071, 16165750006742628176, 17903639397633243161, 9560319088110737392, 7888974492854359310, 16116323630343158496, 10177829649622711030,
+ 12866004572891901059, 10190228418101288877, 4717913135317392922, 13460871068228811018, 4756401774767581561, 7711771325649205249, 13695132008344052711, 13361851345229950778,
+ 938191787235564236, 14328703178349042628, 10217257468952800498, 10415924210674864240, 7171739159943819214, 6502538915112206091, 3300616451202954689, 9576887970605726107,
+ 17259872866446195810, 3694244894150718459, 3214322135693408389, 15571503654130046642, 2332159362399220208, 14087696453363417437, 14318693886833071484, 6131092646618026480,
+ 2378363646789710539, 494954399989334038, 6915496914984330558, 757197055392642378, 9097171378505823650, 12104288338329241488, 1905673690814481722, 13011078677095066032,
+ 8123217305309052526, 2527408826476194625, 10072980201182378784, 15489255866670747761, 12978131276673936002, 8312787524887306087, 13561211528259491689, 6311660759462081662,
+ 16247831307849916845, 8654394682758927217, 1180998443932568506, 3847327552938166714, 17658215545137701161, 11975413369861829206, 10679985684889599173, 14928266362876119222,
+ 7605162785889302718, 5718899840342120733, 6057249397801266401, 9486440247354542717, 10618370743126190918, 16153494357430969850, 16785070420314699787, 11277000340020413902,
+ 12712274459771886808, 15748114764509451764, 1994630377849062253, 564390855806287868, 135493276425567450, 4305883520257541088, 4842008929542073044, 7035106970720249617,
+ },
+ {
+ 5757832545805155636, 17609080533053427627, 13855662030602089892, 9622498683452213297, 12281586442064581946, 610529441714948581, 14740357477019436626, 16983190111725558863,
+ 8470173855499872166, 15915210069852758660, 12368087246231545517, 3867174158581293223, 9082358201271846042, 16663463402757176964, 13740908859955866819, 7748969412274962298,
+ 13375424826931892789, 8508645806391261552, 18359272328758476903, 16433208398005282080, 13071423254987294580, 7999383023651164492, 14514701556577587750, 4848295889377673720,
+ 1020519568493754982, 10467781020270669620, 17607771250627508499, 6620300538436640218, 3858527175523168333, 13798129597334232823, 12495322651937391008, 3926713306087152023,
+ 5116038770086590715, 9943789276328255827, 13715439407309914230, 4887040860130243503, 18288477612078228131, 7640050573721350378, 14537362862524718973, 1658575506924200900,
+ 18400486168653423966, 18026904540891591040, 2037921821724043966, 18010956304511658647, 3751340308708032971, 16275459031633957091, 17177866831427164840, 1635431811339422016,
+ 3897330996014812355, 14347736534650088376, 16862404119462964612, 12216005411754788835, 11561909516001453735, 6261639657930189859, 9101978764314050064, 12280333151331067982,
+ 910276569977357717, 2872810615152156462, 1942106118995935911, 8734004121981822024, 13862118436432879698, 1912639548162720505, 13060137938875924732, 10642216911059510120,
+ 5132232031613377471, 3053931945527415993, 2695169719602504430, 11855046366025891084, 16462491222215366712, 16664416596202206835, 3077124688577987843, 7109495326459366311,
+ 3858762384693274897, 7520194928987242278, 4496744297661574884, 16159343214247089321, 6441167128044578145, 3604951495694157848, 3072075325567814428, 1964798869371174506,
+ 13689578474000942304, 8998724267038772595, 3350174507425430811, 15015413657749135511, 4848907595494161497, 7935644131326131098, 385849363817229511, 10087353746314516691,
+ 6067717601858189468, 10438404859927821308, 9991403870358837471, 17728358586134467771, 13476949968946830198, 7311235997042381570, 4429835371823736972, 17463216455983091974,
+ 3446799583712077997, 6763499387263152328, 9231082163128267527, 7733182914440272884, 17225092062157610409, 8163842076250628108, 14230171836718858439, 8195740716303697476,
+ 57759226194841354, 14962480748390190557, 2047254208877381876, 2830450574091058648, 14905486147404440754, 15792397930637469157, 16383409053580142957, 8686506861607761226,
+ 11120178572139812895, 11534611317577597022, 10437856602660752754, 13925966850017814278, 16776862023867799947, 17362554889424346260, 3408825299914455645, 2606150045870067742,
+ 1119490156854665483, 16772836556735128526, 8607715508817325509, 9931017263385440911, 8799481055779587022, 11740562709785257943, 1956849872282864970, 595288629437737005,
+ 2281206023740505682, 9926719762453358166, 4061056930877891291, 15576657447058286418, 2614863633636977404, 2206334532799385250, 3474210873232711865, 8477734925846980028,
+ 3340305349427486363, 5188523629566097463, 6160790887356413747, 9863988255192204305, 14066826366866912046, 11932779567129411871, 12863116515340321220, 15222969978942513045,
+ 12758642615291604404, 3209096496454847183, 11182564090025111575, 1931382908005123647, 15151007707308352245, 5954868822601995640, 15742433078711209582, 3347972315985628205,
+ 16490337555999239547, 8990885971520264447, 10360258506616249745, 6630482203535351500, 16999397485942558889, 6788531264412870316, 7229988042452941955, 9856917486277085502,
+ 10734804718395003162, 14385908274213668346, 6022876926209575383, 371182857232839872, 7412466195447558077, 7961511694540082231, 8030758761844457080, 8046696009323575767,
+ 6399666206029163609, 17663811097734928631, 11470778017516778751, 17702646621253134508, 14876192013866833082, 12935280981291312476, 1467399042175927183, 8848908916563618579,
+ 14359906986235888744, 4587839368531771442, 10497105860390751463, 14244616431904836676, 14906421794748569105, 7955760173743833219, 17152550002849021490, 138081467951201370,
+ 2349932829525749283, 14749355610935637731, 14751838563606808702, 18058355029195133983, 8461199524627795693, 4219910612621001507, 12776964042319070094, 15905069451612157329,
+ 1355498131832429384, 15975585519420207244, 15757549940022310945, 11357394570768845273, 8197697208897202922, 1901621009956576265, 4290101364365692172, 11947148614241269523,
+ 5840762872471677309, 7514977007446174348, 1527060545570586419, 16609099121469101859, 9320342325870097548, 13965331598495025936, 9683433385601741715, 8845290689278472178,
+ 14119395813630029665, 8634138112335652518, 4658233985292819043, 9622846971166516113, 996353694479629398, 7404001202059682426, 18145845493517798921, 6449664057746980028,
+ 16521775738375440178, 14142357232938693849, 10990147202113110230, 9959066954415780230, 13034621383041722634, 5025797339352139456, 10322111864643366459, 2614961497496501105,
+ 12557910439954509999, 12230084394391362713, 8427020226737465235, 12518021396913122088, 7134271418642259845, 3905501389650646356, 15707807001177039585, 2909191175572717037,
+ 769225015796913609, 8753059944613840368, 1611869096690868003, 11449828005116201197, 11517191665323762104, 2697717476879599241, 11188965056705006770, 6249359019890699508,
+ 15594613737645580981, 3037156291483237329, 5604071908229825169, 1604475019228431078, 7766918222574600204, 9339455406279970318, 16266564745812517195, 4799424886612563329,
+ 11601570184515356415, 11247678357869162156, 8415095559389686315, 9625816053586466047, 11807876143163335372, 15690209134241397733, 8684637224468015605, 15974968144193406020,
+ },
+ {
+ 11275984449259340674, 14922638629881602958, 17186426309348732477, 2094500549052461127, 1245672419927595636, 7529921141299797945, 3181089597449057707, 7749529014968942143,
+ 12842581296013847382, 7845624487558348664, 8747439372999364825, 849305823897878133, 2217292264321907974, 8173534404179332054, 3588643190593022889, 12047811416592660080,
+ 6959527445237219799, 17170778245769606301, 807762173031474321, 9151445233487017286, 15103411489939446333, 7976669669591851659, 14824323986579638641, 1535817133122800221,
+ 2365455792963674834, 2249234228455309648, 5237136544555745182, 8814246419234451463, 13017813702607013347, 2241193992640629733, 17982226236989862510, 9800528153239873634,
+ 14106286229675768856, 13210380707737857536, 8049798856134302097, 9734560649539497957, 1623205786926824588, 14287958304696241706, 1800854015662052826, 1212925137572581937,
+ 3026947385483276729, 13169758424754896088, 6834174387382969128, 7206544572805916922, 6934064062491731757, 10620406409375153308, 5194048681860016706, 1469688754160716493,
+ 12993558037376222988, 4669474265806332952, 4832324179281001408, 4413085485243863075, 5405837906169635761, 10091776239975644239, 5290719555562061144, 4213669820858776415,
+ 17271890190678399664, 284117971978206043, 14999643475704678808, 6264699496666119083, 8859934981351804429, 8300880062060532031, 3667839168820458685, 17133003164910522197,
+ 2997449698566957466, 7884833253198684528, 12498695470674374226, 3115564300965505022, 13376506794876055601, 17895331353362057251, 16990581103090492269, 4674862843410794096,
+ 5006430925084708381, 16053115139414023422, 7049813739763202517, 6676629839394309180, 2410666182773729148, 8945309185393628712, 17674821778351929167, 2683163960153153804,
+ 14505459891908974624, 1849503921080779239, 13990490450160111440, 5181610498614712941, 8653117877167770474, 8814499350344722617, 5343755460620184433, 243401361005198353,
+ 17791633957402883820, 5938849497629314459, 8356801465373786047, 3163889445599730677, 9723709910898285412, 2841967670768995554, 10896757438566636700, 7434996874241771203,
+ 7372440365321345784, 2862880403174214663, 8420116322934238724, 2818552325655576635, 111342786192022359, 74705672003306783, 6591511741514218962, 13124391319573921957,
+ 9931074113040228488, 15362193958291437280, 6613002366394890474, 5258448208225327677, 5126245224005997831, 8755434042915330962, 4405830479557957055, 13343591337768921462,
+ 9242810916685205316, 2838356272544913798, 10099476622515682546, 15551100165305852662, 737838832976059770, 11188026425234857295, 10166119013055947704, 8628233674684143419,
+ 14086272070125896854, 12009239107306499026, 6592816798466831203, 11371087141421707743, 16277593907837607793, 16449564916599232060, 17471042072158213549, 11953382410698485455,
+ 823047236880870350, 7830286704456953572, 6464058012664717582, 15521380082330968365, 10182202815901986450, 18143527955004661429, 3207725716803325445, 3193887560605396493,
+ 15569872930692175382, 5346306686087630019, 6632365629111827330, 15865883909120927399, 7200040798106687333, 5723169387900917854, 15027071603483375844, 11018037359110138454,
+ 741009114382240162, 13879961246490269823, 4790141947869766711, 4479737908710468090, 4979904969974036721, 634413885118778288, 101727670723732801, 3160663692354720401,
+ 14613304240735929191, 7658770973133328064, 3028731168325633068, 1486737152001467114, 16715835021406106944, 15214200823642287507, 18441898204110447697, 5669963154424393607,
+ 4313074171267006471, 16217697149498819074, 2027993870023154350, 18018610229517524263, 117205923522590215, 1784380715025232199, 307956084870608716, 4140912860495129528,
+ 3938313265660748839, 13018885466029447230, 12461354215832959401, 14495299413345669177, 16234805331742830462, 1578184208291171741, 17168468892394781152, 12575003872292801090,
+ 10424668870965277693, 8952352824982702795, 18017430252932614600, 14101949308713937413, 16342352613992153502, 6862842966642668923, 7238655696295295374, 11216037980129705353,
+ 8381651800836461905, 275661552864412688, 5548629401959193001, 7174991431295718071, 7441251258233899268, 14052832476485122412, 301250917807487282, 17797202428295526090,
+ 5173263645506172639, 7621852612520351243, 4670271322283433581, 8663786927624336026, 14433502423455668413, 6926265712671351075, 9776903322968672826, 542207238644018218,
+ 9588056942118198784, 14024040464098859998, 17712446698584877455, 17945455593410056285, 18104078100936060946, 9565731894604896153, 13213845893767817606, 16043575675737450101,
+ 3436179376183792489, 8124476612946977721, 2169106928994773725, 8589833084708985039, 15137137534993478994, 12789920909097626916, 6767184505234754407, 8369097443031908183,
+ 16448089571044203401, 1396460397790470601, 9796571033221459545, 8162511506983092708, 13893406727199803081, 16188892369453405032, 16023105002157964343, 7061289566170423383,
+ 10448117565485426152, 8145533940921335838, 1579413779055052019, 14761309857236492983, 9815666466018583851, 15131106965446764630, 16986514923298225288, 3751380281663155649,
+ 2224143503200162255, 9410177163066991107, 7157489123570995878, 13420515130397093319, 2675717750075873421, 6702938999067232257, 394050047437199204, 11922372120263518221,
+ 9930837889018815244, 11397955329795684905, 6931891256005909739, 516538601500853702, 923444815286512108, 16665363217941733257, 1377231042965759902, 8439923996651310778,
+ 8743817608749172620, 7804846024944300667, 507107926259802795, 5258061329638283805, 15071070675288472598, 1848743106176521296, 16295570828486620412, 7409462376590083349,
+ },
+ {
+ 10223936123327786454, 2321887803300626936, 13607747528117965743, 10456421630388400123, 3266519591187534953, 1053691207967834494, 7151443045753788549, 18124688636664828399,
+ 6946488664257064464, 9775328903143754150, 58700362449753229, 7901817271072273406, 5074738678159003249, 18397239731906005785, 15911102965988125888, 9277654580204145348,
+ 1874700306906525099, 7337351570334852732, 7954195906114335343, 17230226427261850070, 5181329498860909261, 107167242193237170, 4364856176182710332, 14611171841443342914,
+ 10990327814895720476, 16588412771027628390, 2850177278998422188, 11569813740328484871, 3922148781422941186, 3581659216490446242, 3646872304052351376, 10887870617200051198,
+ 8175121141368087217, 15079393830876652065, 16766987879267543828, 10582796406546815217, 16832256009371376870, 10042650470778064892, 17867163453935683780, 7123418345572641116,
+ 1782651072016504775, 11033882508408412333, 11105300284232190625, 3276372405089292929, 6399472468041061588, 15319690780388315076, 4928530863063375966, 11596222198340277418,
+ 5452424185619067445, 12050433097586021281, 15018766900844679217, 273234254926088220, 15426383635547379531, 5213762666043737626, 2829185842069118470, 3230799283770935127,
+ 280030342772428106, 17354882606189694746, 17455445295460080284, 18076709382926018941, 9895982528034548913, 13681172423928175330, 1656433131309057012, 11935757350437721114,
+ 14437472586336929440, 17387041914242263390, 14861577074379746687, 5146556788377906297, 13036814755814090195, 15108836859245573802, 1667563994642521213, 14722845469958372258,
+ 7781061995240980266, 2066078418484154391, 4089376589847114892, 2434843609506038666, 18376509832460312721, 7187709121160627756, 2455436656049651823, 14419116837518875372,
+ 1270185079976403265, 9214436628411717184, 15450450827683432913, 2880014804806096082, 15448799931519826757, 10080240282127928735, 10673974088219619287, 12998944469117087518,
+ 18028691745165748687, 8931519595829275195, 18379893031942930865, 12120980098075498008, 9596371470855895261, 4133427878071274570, 13159312399982591699, 1639865074052258019,
+ 1661696187746428348, 2656198437014978712, 13769477291975851694, 12512848714492869444, 5980926616932344693, 15821983893144953005, 5816015630031200526, 15887565799959254415,
+ 16463929919291729278, 14920112063826664828, 10056471508435050704, 10696267144984026763, 12049530292963090991, 11926086636123189182, 9464890378472966538, 9719194510330239601,
+ 18110249193861022779, 5612784685592189906, 11360454032996393150, 6795401682138043705, 1961398978248902107, 8999847457729547905, 8941741619641401180, 6086938661427918241,
+ 10153847769616409178, 16591913677701651064, 11369151065084080884, 9493662847124813178, 14299126668179023903, 2451525064998641745, 16055696673728703463, 7600736037142850105,
+ 3331094657189179723, 8579232659787725167, 11007434086335446933, 7488529659250917009, 17217317841029490639, 7202184784979805220, 10955913831459448069, 9000508720500651545,
+ 12966352809478736412, 9643078159289928329, 1577471789984999485, 3065864601417367655, 13683627106599504109, 12782093364215596525, 12883456287502344466, 384144415368165580,
+ 2010495664660209855, 15520744321638216967, 5385447087428594566, 12897496516555487055, 7343279016502952747, 11323820480524965410, 3414484402519450292, 2497721392671865580,
+ 16065061175628739123, 13981641072236579791, 6942197664821630380, 1667302033233547783, 11254994815452647404, 5265777633437443003, 1242523090981771320, 11333562630675377559,
+ 13967113064180443992, 10679621597408504672, 16223869620849920309, 8813286726524283182, 2000697484240530777, 5417120625530117302, 12109041278382745291, 13257693171778388584,
+ 11737174095432286870, 13459661128718998448, 4655386724404378836, 3119616752887818891, 13324285182058113375, 1078137740985006431, 1610820303976884986, 10433300853468291587,
+ 13552049979590080822, 4379357932546858489, 7594644709024650108, 7268811721525297059, 6728671136192921869, 8183021491392757516, 6569378188404015319, 6481294988812800823,
+ 6071501855022377789, 3805172128285889052, 6802689634429908933, 11713310861536728752, 18291420299754708339, 7582277238191809023, 6183429490206797252, 10452668103640934631,
+ 16083971675285095468, 2965035896159234764, 9379493912871845724, 13109394112987303523, 231416620515766991, 630445729708227114, 10015412267172773636, 18104521419340860951,
+ 13169770634232644390, 12462964563528956096, 2689434882719314440, 5936289129999384022, 17773436345927091892, 2631892834965929605, 8167719122803587721, 2458223071457452931,
+ 10744932251818515465, 13367480890778627284, 15909410072235544918, 9252559547875570290, 16355895225285344183, 5115003811789024567, 7369371640924666845, 4586532908333332092,
+ 15999516281080615520, 10972478396135936147, 9002100275598932738, 9043399249857967875, 4975188939801806064, 14240375126510021504, 4497218436093285729, 899858725260616391,
+ 4788026362409273347, 8940774760122210910, 66616358927988247, 13054723206705508289, 6256911511259454580, 589353691478007084, 17831732841616807303, 10847300659030294141,
+ 3016120727980669767, 340560476433708073, 10516500208515875331, 14933811541787518485, 9941997565508314935, 9752941942651093949, 1928238223054143159, 16147782066840428568,
+ 8953947187833770878, 6627103431982157573, 1279114735725952920, 5378128106441279737, 15447712974588571571, 10552843735673449739, 14956126644414569830, 1994513099978508850,
+ 3339871688614476269, 2937156932445032796, 2071344655111136329, 509603454456841298, 10929109734329673813, 4531629375826317764, 1802431569220697099, 7971315018970882891,
+ },
+ {
+ 4307546666223047416, 13914498777427309061, 6812156124789414732, 1680174913762530491, 6756325174405757110, 2535144759174364990, 11263393915003278429, 14767595249676286403,
+ 6981289090623415465, 8930069945493884157, 8223216504978132408, 796477795300774057, 3120155459209743472, 14825603392456550470, 3711879425019804830, 11041054473536223927,
+ 14135544350792474553, 6171419888118563156, 12495035851387242130, 2843310747449412555, 5668766087526340552, 10117858698838396246, 5181027414039980750, 13247490124270426,
+ 10847249691051757241, 13332329214721571156, 12457628412284474075, 17044657510482764070, 5218221397478351943, 6922630493693541156, 6207927102810766143, 5644846665334649748,
+ 3169200533952511826, 13123046990387029463, 10303462306703730135, 3315008874027436174, 12427067008037332041, 13190195608614554888, 11930408619792340463, 12881268563913740396,
+ 6700154625941523096, 796110091507466427, 5388163944686450548, 3113104467109529928, 12930558649635882086, 7536049463980825715, 11851825635055645132, 2654888532632283214,
+ 17530163750522835336, 9722612350417778827, 13244522480616169491, 13740964907388846002, 793235549667461791, 1998252773625399730, 9697911491069876538, 3365596945186794622,
+ 6506371904134856034, 9541373683805144018, 3149037850125132851, 7111810601994573897, 16732445693192247884, 4994069681898274622, 7223719243730104348, 5649174084086150712,
+ 4640203560336739679, 9900565149463137003, 1093715346388527458, 17169722430093991108, 14911871124664548213, 8091096017016862097, 2911569128494944136, 10192934055727569503,
+ 13251895658744991883, 18083873142202267221, 8241707078409323349, 5240943892375002838, 7329419846032881810, 17784005510850539551, 14034072243705748841, 10459194866755836024,
+ 16396304393792022741, 510358226971662693, 2002127643935690257, 14681425695950046262, 481902649451190280, 1717807040845239061, 3358903321435525144, 12361252150277163759,
+ 16562962997045986629, 1977585352264948582, 15588655103333244279, 253344970359156714, 12939363813291366775, 12548224526376199974, 14450582788883748364, 5869541533939881460,
+ 15346378007864735078, 9904337403373774864, 17933855382993937953, 9391248304932723816, 2250334275383247202, 5978804056136420670, 1528238852592364099, 6491566547950141733,
+ 8439860020828526478, 8674532462170555517, 13889716766953024111, 1166696901054897271, 3599113965286446905, 3276801695553601157, 6631284861504552600, 757022358659145685,
+ 1117291222568805017, 7148845117809308441, 4044258438241699262, 16593433123496068718, 13992202310165134218, 2998204381658279116, 11137382784909083266, 11972581616612094469,
+ 13970586842647892965, 3680317572634289692, 3416494470839852757, 8341497967750607131, 5522860734580749818, 6847292760525096386, 9317394708872196559, 8522252330730808934,
+ 7951036477225560362, 3718789111033607646, 11667291286820266731, 6927579620295878614, 6796746225018555970, 2070061401518338268, 2452626412419851405, 6130352643925327785,
+ 128317967134069640, 18241952544586374251, 10689311587315723515, 16537482248073764435, 15914436258832414511, 15518344078557945167, 15883120902045970449, 16251109112716994957,
+ 11957637677355945602, 8965882791601648704, 9390723380463051523, 15025038333096274711, 14983844148312808973, 14488574793452236487, 15795526425513195718, 11651978879566144360,
+ 6838341670149165895, 12326189699069923388, 9258908232339348843, 17297038041466207989, 5242182985453021593, 9648294165629935142, 9715128265517308395, 2662662536493743894,
+ 11289660454922592938, 470300170724369955, 4571871646010841181, 8724229358245178735, 10173959190953118810, 3611286966550223594, 10810806064515103403, 13571898391208639897,
+ 6782514579206442308, 893150666351200058, 2099535232722538223, 300696716419619813, 4778737709473194628, 16786610674618434170, 1751624209281870237, 1677055321500839173,
+ 8950755642341912951, 1974922263791844623, 5333377426024439800, 13909803085973342843, 7639604505882407937, 16195074617613125552, 8954461446680048430, 6823359994465290754,
+ 6790869670806738567, 4499989093713728199, 17496748050437205626, 7194829324969293638, 18432532588985091708, 12041300075343806904, 7788543067151959941, 13703334460383638224,
+ 15088053026772564466, 18172984562199594382, 3119317310593433742, 13835498717175822656, 10015747176747842576, 11008571041665092033, 9377877781626879674, 10465443099481589185,
+ 8933481722931715231, 10811948483433850165, 16314247813274714890, 503147444786527544, 2533443680646505069, 7617720894972668760, 12477410920355308950, 422423516920457876,
+ 1501843276089632258, 18180777013307233967, 2535426997299341256, 5318958493284100856, 16758265544916918856, 8118598628492446123, 2511013049335776232, 11181447982017705826,
+ 5996381448168274884, 8108757135223610234, 8067369725584584297, 7184925262427750366, 8199341143188441284, 13031376443169557016, 16036606349050125404, 11958720833222295864,
+ 15336831835795822898, 6806921200736832713, 5959923980569572002, 16835674225684329982, 9985579070716031367, 6517360043741716503, 16609518082836868591, 15878184747124924573,
+ 4786224758360153905, 10283234398763800487, 13695837902615591472, 7260571034197533442, 9225062953237806971, 5969706575516748564, 6537573986766117532, 15754484267739133126,
+ 5004791824134844360, 4971286291135250424, 1811371968353107289, 15772458387007519716, 17381493577383518445, 14326903440006940586, 14237922492359728234, 6234116464684325431,
+ 9293668142017263584, 17134656749295164870, 5879480323258506465, 4788695233340705697, 17809277710857366810, 15157733645094688870, 14143869691462709080, 3162286338158090229,
+ },
+ {
+ 4471613312912718060, 13601785177495103328, 8083849806888905297, 9053545412554844461, 1876605639999952155, 6124537911475118673, 16877891074567085688, 16541720065079132344,
+ 18138305348849897995, 11498878557125400853, 14125346336359365201, 11991638807788804369, 13935355764550912568, 16209147067155729660, 11086190285777019292, 16727288863943596410,
+ 8873152377611229668, 6260821826956744083, 11489383533808918262, 6585112195310961861, 7365821041919648819, 1936165695028211587, 16164747634991415711, 17957320776652490392,
+ 13065030057172289846, 7278765836407486850, 383827150208081081, 7832055893191054601, 5249323286889945617, 1631661912887095912, 4431088469837785451, 2866263172993336891,
+ 6140213328062333551, 16704025329777067037, 5839832043209342792, 9196354634862869885, 2735762290983671190, 5323501742548818139, 3199127466610501482, 6719610946615763192,
+ 4032564124345275293, 240208626666701335, 10264845378648897042, 15608289126091352415, 17866864241918154829, 6207477318890336372, 491708931325462670, 7002400618323099260,
+ 11507747783030541234, 13683043390668171754, 9924966309328785763, 5549516466627385909, 3724001483041449858, 2408778220985427590, 10510707601023430105, 12862520912519186496,
+ 5012372868660694231, 12033652755433827667, 5960732801180177919, 6089487525155575261, 18400192045204062277, 12754175136438960528, 6170965736016609163, 10141374482047067946,
+ 11781193922347227447, 16937876011211881637, 2982972111293427964, 1728244726890919596, 3327997232304084768, 4754833121145407334, 5871027825507908591, 15772493911709624273,
+ 5862944033912881884, 11132730978666088350, 12167180155548291888, 16449838286695349787, 14399676610730016345, 17071083123672650337, 13727728787831175379, 7507765123422748291,
+ 4883057314704644529, 6606742732149784343, 16070555141087563722, 8408294891669305261, 4298316003054547788, 1573237855902568238, 5357176885445018289, 5855732582172939202,
+ 11721126538980357625, 7216004194033429913, 12765889884935948315, 6003399149613653734, 4571046290325663920, 15023470461830485075, 11463513436288928362, 16135217522406374542,
+ 14300855104129153291, 11491527690078786162, 5674183453812271017, 5044682546652747711, 9153978680942338831, 400454762074409409, 12351470849648616331, 10559229320903248832,
+ 11839166519633311143, 16489041153719856002, 4820713204385149226, 13768001962143023630, 13107713406478328396, 15471245828032402460, 6733652296473831544, 7665657127090105428,
+ 6114372925011600760, 13061069979571997963, 12156991017966837826, 12403009591138383583, 5761958709898299258, 1907771304051897263, 3007439379929412593, 18410854883912163962,
+ 14408045416865494124, 2442199381796557002, 10475326303775862953, 13637445125954159957, 3778313603630758744, 9247460369874272772, 17572237597883145087, 11338350096360776602,
+ 16486104352829691584, 11856560607954368882, 7574789609701925021, 12240083183242127118, 9872957337376856275, 11508929285199799229, 12084460294902152451, 5537978390111837838,
+ 17987471707896681874, 13038244305326284081, 6130570860032709721, 13697749322780057427, 15250226813583295859, 11214437899623632196, 11227424166765094355, 16073863048822462466,
+ 12399110995748365941, 8259021120978314016, 13249466772279340115, 11201772702582865107, 6680428295197296613, 14825065014187212998, 11638499193934854444, 13944483483619391536,
+ 12792956896156056673, 17521293339475028823, 5738513258925006248, 7070317930523546296, 11175557000762820214, 15356683721815441509, 3038847573301969409, 3712495274059632476,
+ 9703339902904468931, 6903532388259237787, 6598718994389707957, 3524767540333384210, 17230409287786477986, 398508689507908728, 12956323124513250470, 3050411724956364887,
+ 9936181795041159997, 16502578966693139346, 12331385558134766329, 16368402633574222539, 17549179704234998175, 16732353056786161257, 7018642409136068141, 13907639522324102590,
+ 3202436334590923936, 10521323531132496064, 17087322194145514933, 2557934785422493682, 1054672647095378624, 13197226692868722325, 1089249862965154915, 6479925079582112772,
+ 3880905381896902231, 16702686708922296685, 9815623322445801304, 3275100382267252573, 6882533095211022198, 11000715667767797665, 17615808190378608947, 7418883369171949489,
+ 15109802007060416273, 6172716471862133951, 10675428021956965140, 5719399515177959211, 13414568332401128475, 15437234656470291881, 15174397947571718085, 15093134682110367825,
+ 12767354263003560893, 9423701226679222965, 15980976879563466685, 16370172902785037004, 8843543841582241065, 16817827501960591898, 1524531339915785994, 11624580276885598157,
+ 17147068462804570503, 413038056092618151, 5178874804057490245, 6267545065511948977, 17573054445714024932, 7120476833739422223, 5707115123830922617, 10971108972695387841,
+ 12546478152711337402, 3213958910433616233, 1066524299473173792, 6276669942370566610, 14816818963020520773, 7372806616674468946, 12220278123497641030, 4953625488417522128,
+ 9443607421568920607, 2583030461169574647, 9900905053299561307, 15892418329702922907, 1184158607153919924, 17842123060489837980, 2681791639846598155, 14394984710894548047,
+ 17363678829105605076, 3907856148144141388, 7535915338133892356, 4354053943074081189, 9644954727645503986, 6816908226661921388, 15867806610099035654, 13874346146744695721,
+ 2654416582410062512, 9232324386948815478, 13864516581612906345, 17132638924861000235, 16875312639179645137, 11536057026563830119, 1090137303802556590, 15667188983774010898,
+ 4336697789732072031, 16054848662858822197, 17934449813415035557, 14518380622432694666, 14100582434152190048, 6262866632984449325, 18069160441815400774, 9798342103009260692,
+ },
+ {
+ 687104890149081183, 1938816743979859301, 11342071981175913904, 4270322127259097184, 4964776270155503160, 17849333582861534767, 15814479527332819692, 8083943565773123408,
+ 3986890035154628803, 15316660330065019741, 11560940574529263780, 12576851520091035878, 10883944418224886092, 9908329441525486205, 11238684990962507809, 12900626758810151662,
+ 4077800412543228484, 7173833470188867493, 4604439809988262876, 8090706047559948234, 6117166546062912279, 15748537214854486946, 16719594636199001564, 563850925691991999,
+ 7109145612337154341, 10502845970727626855, 6527587047878193251, 7049690136888681718, 16750703006415156202, 2559311261501721445, 1222802194566919230, 18019462938525560766,
+ 322982839910423931, 2590270731903609206, 14012397059804449007, 4254569016772908483, 8508420263334539908, 9175848619353159735, 9777676085150569067, 51565612432741329,
+ 10233009747926770582, 4858065366607044068, 17837971428933010861, 10016544790726166742, 15260359301638740999, 5101999662344938158, 8809855615759524306, 15369468763003019252,
+ 4549989329927399868, 4866130696079291940, 2784467031729859937, 12617898982021887868, 2721301974598150044, 1014232218549205942, 5446756626431953863, 7315134179045561903,
+ 2592184418896595488, 1723751460114310463, 11198512454461769811, 6203707848404803677, 2601731234271436477, 18186700975610533226, 4757569424831899615, 13359873383138312209,
+ 7339520839328992093, 3349084091338116256, 16532710170417471614, 4425958052714174548, 16833748535746803845, 12665172019028452441, 11274971974705134171, 6504206221872290467,
+ 17921256304121537300, 16573867650874642543, 18439217559900658467, 2917797231317530012, 2415475159165597656, 2027542924210178369, 13542535558170959524, 9428931592662127870,
+ 17821420552135083528, 332847653840014838, 11832506312426064436, 8314663852639826188, 5744960486691350299, 8069724388577179929, 7949740770152910898, 7431049632429535950,
+ 13904640252203718708, 10747313134117021792, 14667070697514384141, 2600977347774743546, 15498986940124021079, 14740645313274577011, 17403811028932224808, 8060102076475954685,
+ 3517334016249952489, 10826317205305313746, 9664766330892822745, 4145357045596983962, 123566849370572650, 6452080295201963409, 15558941970553263818, 1191650177320171029,
+ 13303794914626036653, 12135860919529132656, 3620217653879289461, 5443663950311568399, 9020744500355505374, 11953955628401776228, 2101814526784564259, 18218598416910848525,
+ 306945432061418036, 3150552227215797902, 12928374347433953916, 13299101042862573370, 13702883318367407795, 4879985767563062391, 17591048490944890593, 5088664549487213106,
+ 12990649636107730851, 13002224935892458030, 4380799638612122791, 7935443787987894924, 10438069978940053126, 302419699431148366, 551508078777729872, 366155636877501719,
+ 1657628676376133571, 11398060401420699070, 15376723048512638551, 9875623386041366116, 9978954704196888739, 13768323245406799962, 3568095027938899541, 11120740210192996096,
+ 15561317952331530901, 10479759083279078649, 16536004935523752634, 2928518661531554130, 736056663469575928, 6131440330717529370, 11275149343392851570, 9601030074480228347,
+ 2397968519718544666, 7793046606567840651, 8971792946996316712, 16581860514259510747, 9067804351225143661, 12322425857061932421, 10680196669251962457, 7226521395530060461,
+ 14734735764746786753, 1378251478826083755, 13509550209468602124, 14458158239096821346, 5681313138148713219, 9054039627425609375, 4235536482830620712, 1502753755874778370,
+ 10053374816829223121, 5956370573575798562, 6842782625116195714, 6646143553705917790, 10487617951080656190, 3930886288654953669, 13141858447548326172, 6966039733711006320,
+ 3565931266726110263, 10142260659118036168, 18315406451571961983, 16857333990691080595, 6889519058362658033, 11812045051361479600, 12503117299646153207, 7223531498406875484,
+ 15243128568488310595, 13798032762330794140, 4456440673105474920, 17896260893739107970, 5519537045179649847, 11908888770632658821, 5444504459295913107, 5588728973482930155,
+ 8914021876030553161, 9071747156187788379, 14974736924255494885, 17070198435868341999, 3643991391224035758, 260637732328275649, 17609644917816078225, 2302996326941944665,
+ 1293835161492069734, 5795029109259991590, 6978986253519330084, 10360755458408067486, 9467742127861782141, 9197981630801498774, 6297600026339958279, 1375635441408481019,
+ 16875086338187280639, 18188722331528885957, 2047282716697647836, 17808297209051779725, 4756860180475663362, 16103863616340614471, 4416311024167936572, 6340715229216403079,
+ 14261984008515151673, 8778722476581699445, 1624620854396291363, 8490568310899028272, 6147263592092458803, 3717414274397202281, 3449154725620836174, 13526832985450537957,
+ 5888744936142942064, 17277140802638358763, 10290028650289820825, 1940657436863195045, 10796401239257150661, 7788083050548605852, 13474393003015663489, 8961936186303685357,
+ 81364364291228199, 11636155122637141142, 927710893705810893, 1403208391464987879, 4332234662194589819, 7913319387647351094, 2589089673930123808, 5387282875974124558,
+ 10611669207938469024, 4491327257859270050, 14129391883837344111, 13924810848449766461, 8792187899775727862, 15048142623570073200, 3933861866144269656, 1162055633060412126,
+ 6883724061714130987, 10416279294253729103, 12247033224345499651, 10291073256885105234, 4480789666471481188, 6022169887991948086, 3778647272347441051, 9527380466585130391,
+ 16407225233842256953, 13423683834633513951, 17454464203550700633, 6033758555290610449, 5063673994339442907, 12250392784929419939, 5954485814444410119, 10099095370725442279,
+ },
+ {
+ 7669790431656261769, 2776517632462593286, 11694025667665633506, 9533794089008895277, 2895611631120558023, 11551410655448788956, 10026541900772270925, 6243136875017000843,
+ 9434484992529186384, 16435748047660609525, 16190660694150989986, 7083965852434071085, 4216306437741910756, 2698742177539497614, 5199793642504009017, 17298424769819019160,
+ 4041169394784902550, 5699517344009382913, 5306272267462320559, 15846674482556330025, 2606351264228884283, 4162585980422107281, 3715151019132039005, 6607223447043258693,
+ 8168579295855409503, 16727569921530031841, 6182114460261837773, 8940603165471931374, 6081572078077526926, 5890840443923124563, 11215305828294759727, 2875117534361804712,
+ 9045974983664041829, 905036705033699463, 6962033946652121779, 3027264198782618401, 13786415307358010100, 3643342525745067473, 13641958783381681886, 15675065537779359584,
+ 7507377696839752642, 1061259379811757443, 10276590160392917813, 6889137095037822679, 16373913505782725550, 12287733095803524526, 5695917172259210496, 6360958736925918808,
+ 7459854586357006968, 702429387211615855, 8231296036461604410, 628323703857882004, 1059802628602873385, 12517208225099517507, 2368172548334856593, 4792344750094708709,
+ 18352334786472886037, 7096021570596476676, 17045407505128867173, 2467670847537319400, 2225663888244226889, 1876713214939672742, 5329943993157142620, 12168650975912182188,
+ 6639850268372381409, 2284514769224558945, 15390110317831975716, 13933785559694229008, 12787641603764341997, 793886210532292741, 3222136169196008839, 11104892506868626444,
+ 12660547967989039787, 2109392063597767300, 9889743271997754037, 11803327596624324036, 6940409327849314286, 1466399127363116843, 2572333022681966275, 4216097356402537802,
+ 16858757460581672909, 5838407119753001493, 4453405435911954669, 2828665451507432751, 13657966632733241760, 6875986784381874300, 2390233934255482553, 17386653779125555159,
+ 2976756344404126744, 17032556609402836559, 16348907464011574182, 2196781021202618892, 8270822867494340805, 4738372358350764889, 6088256422707334932, 17121369334507507505,
+ 2081729301541563384, 5577186154173396778, 1865152567701500436, 1422284589642218069, 2489023909725140903, 276494395149046869, 17420927169263487236, 8292343688801608074,
+ 7819174654675104600, 12778055482430336726, 14615848543490171611, 17498415175263742825, 4785899184222234034, 5227136636239697699, 1570704808469050246, 2858953380860123257,
+ 4323577007857413306, 10524228743339400397, 5418808897687027557, 5939367271366264075, 3569359126353670574, 12961495213964770607, 8906990808988099905, 261084295374207271,
+ 13937553904380032591, 9165760362893872288, 14159606902675650669, 7794101517494576908, 11994596892880464164, 16211278212275417034, 1568324133241165712, 6579463633356173526,
+ 3784436423124943604, 14504130598322564217, 13343318845201913758, 2617267381187565844, 10063945174501910869, 8584806992850354016, 17494557973113338328, 16987450461117846543,
+ 3191556545349441897, 4780077336463115599, 102121356289129288, 18353108394754479957, 10099934050411322327, 10323433488737585897, 1480832277301232858, 8304664509429164326,
+ 16494820005379860880, 1719281141269927630, 2983529566257876158, 15928721259545722550, 3714203000118655889, 15922105900709017310, 17662263539397115758, 1577461381995722723,
+ 3426958921435818822, 395886748655323570, 280632942609884625, 4361473570148050146, 6600885194277340683, 16668323952296831943, 10057749804882861513, 5701592287343868424,
+ 6951585494335471689, 15680855848517030170, 9776921814538436791, 17640230976984644473, 5777838393869379647, 2176689364164694052, 10884627395386092313, 17656310368333301757,
+ 215205608065278414, 15361057989137229644, 6756215047166430628, 12215084297877052825, 7185815532109454500, 3394755837885079236, 1778980761557523977, 1861969072415300655,
+ 1535235777601157298, 8875662482214652105, 15404846185148709157, 6538418983879634448, 8341745508628160868, 7582603669233291629, 13003716464169800826, 13309194956036155778,
+ 4791947847339351337, 15204996809755844932, 11672964318683763318, 13067259871945801803, 2139419248113049376, 18195694858089271989, 7208559250123396675, 4671149208567820584,
+ 7276586425415793787, 14549539461274532692, 7381249250982432115, 3215735370879771405, 9197833835152018601, 17236122023193655371, 5029275100388702506, 10829687080866991082,
+ 15533362570521057331, 6942829144635570952, 9572990445468151820, 6294042535380467396, 9052929167426522790, 18293430694900525497, 9076155092724374912, 6410242671697688127,
+ 16930844972313552415, 9009509357611549679, 10722442939705348974, 4686888495512108749, 6326319203058274828, 2590592779822711917, 12876493112068819735, 12372064322375408146,
+ 11938822647496400962, 15333527667419199710, 2385985954862343680, 14776415112244140169, 12817753724474105460, 5129368568396757194, 607898027127847045, 3917240453172744497,
+ 47211231428738650, 9333275559955619158, 7534434473427098199, 8557232404317963052, 11597011861056629671, 15926835677212702441, 17943276685960195536, 2513989430679125485,
+ 2903417389682946131, 15905578940823611094, 4910425097773753648, 10817415382143821768, 16356392378927186116, 1174713154793914656, 9176413825070952738, 17779105524945038711,
+ 18302140065591767046, 6334261306636966429, 4203057092695998640, 2740754645452686340, 10020911575632378009, 5906713384548665207, 13902791548718280892, 2505591245136304093,
+ 369660229032730089, 11851915833529805698, 13485269529443969890, 1270120511649175022, 9128783725626301238, 6173048611666804214, 14151404531169195672, 18241008099921643437,
+ },
+ {
+ 16226789363155700046, 13734705589524056856, 9629995513492843231, 2636144377026081617, 1233405076007260313, 2119672919365858964, 4932766292290315579, 4185117177904214366,
+ 16996170392430604056, 2476294162951949891, 6677439492116109133, 1649550562276235959, 7307278264575968221, 17128676264244010843, 9768062299436691373, 3825325732056225433,
+ 5861627228365499618, 5011684805137210454, 1579621475012951778, 11535371650014354624, 12847902922833318241, 4997014846790385136, 9875988184618650628, 13094082946822736823,
+ 17224650011872599657, 11430188086182976152, 12892063618843720029, 5912993124689510433, 7069617917494051681, 1298012103142698333, 7943359430865057535, 9329776843721719539,
+ 9091412025380293893, 8622355764644651307, 8718353412518918238, 12338548766610785709, 5241721208413391572, 6315749100915178625, 17919741546105181107, 13677847475856526539,
+ 17388254638056933809, 5422523708003180288, 16851423629233109043, 5504298365310420423, 15145549220326540137, 5184279399795985462, 2201159255780420815, 12506121988996036804,
+ 4460258780042860751, 1816027820333214126, 7778555198679811350, 8155451943744426012, 18392325810806023014, 3955295139981696248, 17141295608995204052, 14206938573418983337,
+ 9464085603669036454, 7848014338005901108, 2368914226155404065, 11578416111848517850, 6323647866244499941, 14399589996703947996, 2824522520842155768, 391310842153371124,
+ 17489550078249576530, 13418700716683405568, 8265132837103985034, 8386855948619700037, 4642739669076635265, 1941217860224507563, 7402267177254210690, 9863923110146355168,
+ 6176679707488254741, 4307819088229734202, 7082712522654871200, 14603822721598353371, 4115674343542702663, 4132348830250348245, 5992042367418590636, 15937819366199058986,
+ 11008669567537833607, 14841150752801539889, 5253470025479549456, 5665837575328478629, 4601810548582757669, 6173003636576141170, 6899193615399367692, 968231402917393055,
+ 9948650801233133770, 5836115460383443197, 11404170696974449207, 4791020823967734476, 423024247809958592, 11251668387880260801, 5562546584223492723, 578572156618131794,
+ 13456676488167240303, 17121749146829956377, 17370555398500311209, 5041791432274350681, 4009412063225388868, 2407629981571159579, 13052555280728877038, 11483117063845312302,
+ 13421089948800544371, 5001604265596626568, 15312985179876036482, 16032805766802337150, 12508684642650694186, 10422897636519090240, 3970071218688010847, 17419166239676042564,
+ 400453640980208069, 15220637399286523210, 17321057148174216619, 14860227640908306879, 8047994708253288483, 10174040071518845057, 3280567772472213687, 5651529697807929627,
+ 13988854693674964723, 10817154032069031941, 3171651998645961547, 5518712225783687705, 14706905091132091970, 13960758460934966876, 12141035142079693145, 3149939820435101269,
+ 17047631647660916802, 11327141967045825261, 6773994317182166280, 5064726840218426586, 11921340002806613391, 9067034595359929709, 16499225867422892436, 9333282251383805811,
+ 16851791401266483516, 3629537449474963381, 9616439067014312078, 17691280261538133313, 13915291913519800282, 4229243823856128517, 5998682815524220834, 18293451622818510596,
+ 13740317056016164038, 15410121987858832401, 12080671504470100936, 11635316325777860129, 2584464163154495637, 9849046399519426256, 7991103215010751716, 2109987257001640948,
+ 2505301881938098126, 9962323478923652140, 9312199523950444342, 12957822502673689705, 8060810607849163143, 737408553843844193, 8143568733429881786, 18003168270780769476,
+ 18361695650419575705, 15964191128341330249, 14834821085064012697, 12512940671502931238, 11592194993753547188, 14358438239394059598, 2876909840522954216, 13154864917311376528,
+ 1919485747906370662, 5250174270363591951, 6270889482486194520, 726576673961648732, 5067680336545953003, 3152041783219682891, 15674051618916413369, 12438966377512424334,
+ 7755405178378312585, 2001220826274540277, 13276328134279740680, 5464764824709243977, 5374818714652367118, 7988129049563289676, 13996688354532099099, 14091540329663446637,
+ 4460927228453411137, 16058422703785787527, 8016770980039986075, 11450941100964860520, 2742597710491507274, 16940752978671801273, 5897441526901650840, 5926672707007682446,
+ 11513132526446047688, 2371334524276946445, 2461552185903242316, 2195417192784351355, 8821293936856062699, 17392142102281287994, 4066797133115509650, 10704373174879556404,
+ 15778536817306082589, 5755579035239870289, 2105368957367433938, 16043807797277162048, 14990960263520315004, 15618512283659189876, 1020703273079622517, 9735594589053383212,
+ 11529096471626785524, 13217557548494128192, 6973227637542883184, 17901194406865837244, 2414269359614891938, 9165268075797348071, 1013041849562373861, 17525420894350269712,
+ 5772778048593061508, 15424756817708323928, 11842041259277183807, 17669952685051160523, 5405007811515938363, 4415223809135251038, 2223192286456742546, 15742105795795272774,
+ 2844957165101842208, 5188198001787563362, 17071130520794983821, 6691797472842348899, 4294126956896299534, 5788827614077663481, 16267876196920424765, 10795461545625002360,
+ 4405006158507706423, 8343317561528812223, 7851955346465616798, 14568481075913348758, 4391527082463073614, 13997921112025001396, 15259963045788570485, 6787776208020191083,
+ 4002627348223514978, 16231148554725644698, 9578235186329928114, 14741034641204805809, 14651073646349829984, 1797163177099497214, 982043220105120238, 9027434646723092211,
+ 11809342655313500273, 16822704101813284237, 13097372075606737923, 5463281428453250615, 11967866991673525562, 6828790575157689223, 12568831962097805646, 8385980093725157349,
+ },
+ {
+ 9246769514475702626, 5788284997389507365, 15857897362850327403, 13864676801095094218, 12881526722328523267, 2620841421323221200, 12599981775316397124, 12820989846844560040,
+ 13066648645100247757, 16727492115601921665, 15749471761440498106, 937594351871983779, 8381111811209734349, 10884357918725639691, 781591570669492079, 12778743765298428266,
+ 1797915488494232282, 17274756142259274463, 14207994319179101777, 9405903752538915237, 9001743317941152797, 15048752608061590843, 4925745663463425863, 17143694017177138485,
+ 11613437975225156304, 8619196433402562266, 11907033287998837424, 5056904365610561965, 7637552956459333558, 2827449950719061527, 9998507085256853501, 9238562885525900325,
+ 13246436769495027717, 5445015753017977819, 16266739009580878036, 15566606095998238977, 12196078605380432771, 4854324313875295137, 10974170498288217052, 6425550765546527417,
+ 14574824159194628650, 16992289415800830701, 3499062703303067574, 11220872042664151226, 18135860701984580530, 13630928571014923388, 2913126838766947469, 15869180955632349704,
+ 10621010745328025245, 14637275395423748577, 12928463178963493980, 3270672471462681194, 11765440832075775157, 8138439106739837848, 10004644076263261924, 12582897670868871780,
+ 9605391611675301152, 14014632424042523224, 4638465078692570733, 3013469722048440965, 10972600060210514686, 4497572559400894366, 2652629676209366276, 15827846806499715082,
+ 216774559418075105, 7056732230404238374, 4342481467357759950, 16143194646968227790, 12304857685680249595, 5021239809847286634, 745754913624924064, 1603290801266214539,
+ 2784418922013631805, 3554650219010546629, 8896448905401216908, 13445015286698819482, 6508982623352460996, 3322529327934132311, 18417788670080975365, 17554108945303789353,
+ 17620122226715347133, 9738429733440215024, 15577771434516492888, 12422193389576942718, 18383756008252443605, 12736926759685644351, 16981429110294392086, 11999528928951986433,
+ 14489177617081844909, 15995916840320959685, 6361381313838994395, 11146176143708648759, 17190512001934479207, 8406914945663974955, 18073221191428103088, 14075266763636071788,
+ 11493260321718935244, 7134447477334726345, 10489281872281557152, 6145540581503915475, 4002857892747271740, 5167943945955725680, 2892864850826359384, 16887114279977647596,
+ 16493058314197913501, 14642843949567816202, 8421201635021034279, 3645913604138483317, 6127272877310948153, 3660286308390076870, 14343325047340052700, 5355450922054278073,
+ 8426383571761073988, 17627146217941553397, 7396223609641674418, 2402241526082789613, 4067059813672963823, 10306840429023537942, 18342506396531908477, 7249869764848707701,
+ 17055119974261943875, 6266720893662254540, 13102139953381148874, 5249314164321788611, 7157525732140787612, 15381277459195914226, 17265025258266710353, 17573500432599548120,
+ 11703325491493519624, 6756922948215553472, 10849241105791329449, 4442124988071574400, 13193138084274485850, 11408181130174894537, 16602978650637017500, 4282478513876287613,
+ 15849352004835856215, 7022332658299393045, 2753527422472509447, 2508636546397160815, 18156018186852519542, 13445036260276495365, 6279372959444826452, 4487789154840943220,
+ 6912674041294744283, 18177825269025993506, 2129493844567951509, 6833453501368177831, 1878514166200306112, 12873954541639117785, 7984716702294892278, 1590286870058148425,
+ 13310953099825480439, 942949035351116482, 7073926138905956784, 7700580694019269362, 3135382872993338421, 14860196563555722941, 10662854588851556233, 5736680857908686906,
+ 3285730769783013255, 5648284582450022694, 16712547815126338303, 16508335566005209527, 12272617736405465053, 6690178916455692547, 3481505327914964440, 9148348679521508925,
+ 5445751308732564169, 15671537507319596886, 17721221564106627243, 379590599139121791, 11923849517192812255, 11794879354344258105, 4802202540817083733, 9502952511912834734,
+ 10387170863530151921, 5325952605083195756, 9255511257002170523, 413412942592261158, 2693041233704004652, 14135620291598275670, 11595337134421837671, 1965085859594934829,
+ 10567751951694288416, 10466024270444644770, 17138727632883910786, 12652567654757797118, 13864111438265559205, 2552332838857882141, 3739570066357761968, 1439381288436968019,
+ 14926469836092482784, 1974273958550553776, 10770915481273902280, 4033929759715842024, 15461883773070197251, 5631323369668498153, 13319268013678629907, 16483196656661792008,
+ 5825598074470439590, 15514383036808278765, 836600073173284669, 17006029483641514808, 2253041172677940136, 10109282146738215012, 2050330973627959134, 13783991480192211446,
+ 7361422649689322835, 16012782899552425604, 10324124765039044621, 12319908523309013390, 17065808121376215422, 14844145564675640794, 2754581529304803758, 1149534055082731343,
+ 5371192675994106473, 15892724499541682278, 3862864290653799361, 15759456296270265029, 12018492462347307225, 11503979417352537931, 1894695486127847219, 15184833349947874986,
+ 14711143160318482298, 15515618319260581947, 10015362293284733112, 1169298538214120845, 14481541537099551545, 10304547737174682912, 9351439436434829996, 15807708290724844107,
+ 16946643895530263911, 6110142153011856496, 6857516759047242219, 2026267735072107620, 10338469980804248611, 8224240981205716101, 12739986350836865860, 6828408496869642385,
+ 18199296108774357398, 10154672256889745380, 1190269705881674139, 2317654455984851716, 6948704988682993246, 7864433505284017226, 12371653445644439372, 16432795124971446509,
+ 940679263188661008, 14653357165718140350, 15439920797347843673, 9415587255510557494, 18115037054677722678, 8165849408798239027, 17369387099667572407, 17619066457126678815,
+ },
+ {
+ 11047138821675365077, 8918121441180891972, 26692501222219426, 15248413052700323978, 7305399157133896461, 7065644238942265367, 3949166834387057064, 14178248206708015607,
+ 69860179712677448, 15464996655458230480, 15095351218136595554, 3792171085643513009, 3187131848664503636, 12713332789590787064, 6278731049699775392, 1597379696359884724,
+ 3952205381894147962, 11371446109407143937, 7231851263356642380, 11612512368538303138, 5227214975837085115, 10737565048439707166, 6333629542995218640, 12386725921557807638,
+ 3631886121917081375, 14286411187847401959, 1546298555066841633, 4790996540037176549, 9541492152219486842, 12304868498960755609, 11426775785781090329, 10793898029125361155,
+ 16970032026045908366, 4739834790205788500, 11604777595337607879, 7222013010088766249, 5472916641327283117, 4172262758310864960, 5724466588558015820, 1116300519629843659,
+ 15795014995613238211, 14290583239597353888, 2490895887909073495, 11130992987297321872, 17554999787604367088, 3420129662095546057, 11472786435923282727, 16869713476687852227,
+ 15035061229930585935, 453017670704522761, 13269498238801130975, 9381945743239523058, 2560842189862328725, 6461599464735341542, 12067824007347016987, 254830492191366350,
+ 10932352970046201499, 5875690226460998551, 3699681560657289718, 8665743594825198441, 10232089728487302472, 15664572298733091969, 7267222702489958195, 10552093353886118696,
+ 15492342672675114391, 415437554733257587, 12890781990283079094, 13204935186176658494, 16394061615709463286, 5115734945762570776, 10421418042968619739, 5523853293632283162,
+ 11813086056645720947, 8830149648607040713, 12579056778157665738, 8379445845509747387, 11968812374998926933, 7579079245215672446, 18392522237039663466, 14209436550919930092,
+ 10567400467353853420, 12039111362360014956, 9752698954159464580, 974455144078370581, 16749750902684349652, 7431880327875076520, 16572811640260132367, 197121614392940749,
+ 4008236965647744142, 8547116629188387980, 13874587567755370880, 12696453513706541959, 10752386832421030566, 365775083075323708, 14295272684753058141, 11024897725724291722,
+ 9389823976782902582, 12987864137510618346, 14351996708263571593, 9397276818616280108, 8943695845699072910, 16761881794918812428, 14395546039428720745, 9608138420173912436,
+ 1876375509629583057, 3290820252719995988, 488255726920613097, 12524692306565281959, 1588481057230489293, 2737168321842385117, 3787136521155591587, 2297952115270241626,
+ 14002797112021507101, 15941913959379016886, 13373428680779564378, 905956693320605119, 14911807685762669378, 13046351903511292109, 5010776430112230246, 13569296078340893444,
+ 2399232949134869057, 16408469118941953277, 17229778225562184411, 15990256768949242309, 10694906341686584836, 2816242351659628719, 10794163145486835660, 15601904974333104002,
+ 11025280186920123511, 2638544612389113264, 14138782464677956897, 8182901095979048555, 11304014986311338849, 5418181013691186545, 4702887342992202530, 10901371551431733592,
+ 7470028071605003558, 15552389775050890646, 8285175978626575302, 11755261053723122966, 10951094485216236078, 5507384267235142469, 13239757172982680465, 16153920989213321821,
+ 18058993449364903238, 13898361855323379776, 4830208064835445821, 2108287244297002782, 403367259355957679, 5208097191763320114, 5105753392567780820, 6874403921011777407,
+ 1158521281204350113, 6232098057848694897, 12517518152351190341, 17852831094628421272, 11653774695260269881, 4420518556213490728, 13835371272800239099, 12119809446886544909,
+ 13202632776652821767, 12444449950521044637, 11128589812117041029, 13640351754206364654, 12210309609173025143, 2998033188714623978, 2790558847086549679, 1412882974937756344,
+ 8872627396983903111, 13010748252899511409, 9503554222158928531, 17266043773425260257, 14299273669130129568, 8206060595465248374, 2391269797717981426, 11490348680137544215,
+ 5625434013536513462, 11006046905736534335, 12974260386406529669, 9092861657954444193, 9064784322822166467, 388172762639483312, 7975459140038679785, 16519143691087724914,
+ 5143546864201185527, 6400878864024957342, 15513803558106064076, 6712861984922181478, 15623960351786428150, 3094277713920675576, 6184647748693682810, 7857605928718754519,
+ 17306606255667524897, 5112741148301782470, 12807238528895999857, 15137864759480529337, 14604908573844046691, 17945466774813936069, 8840495514948749443, 11197321446299638637,
+ 17220811598578674405, 4438480777123093236, 3563461541688367236, 3111403934731489046, 16319666149508644048, 11775149819643332554, 6467838040542164752, 1567553021477840718,
+ 5384669083154912160, 17619132083131362037, 8938889855576647487, 9107335704092137106, 10077424566800668390, 15550874273916351174, 15171215175982142697, 15277375103802490853,
+ 2219152199405421709, 5367881810186787185, 9606063680551349934, 18233690822105957780, 9941441403610160291, 14996958158691873775, 4936282940888570414, 16608409936766385729,
+ 3344228690528365773, 14741103791830625697, 12105806333035047780, 4814603778329751458, 15692542571489717296, 4074467482166609769, 11583878004635312071, 15057041878895842153,
+ 12046059187840675577, 1106267261822046486, 4800396230944794513, 6983782857932290361, 12630891077786998995, 18331269049237387531, 8843236455524152484, 8233277245902259517,
+ 4072376012468301087, 15956856707868364136, 6651399164294705117, 9169559249495366525, 8774931051408681195, 7328702764302633441, 3821510551720221090, 13559321821383641691,
+ 13270099407878039596, 11569032926557804346, 7759575992876193458, 8239088820951102451, 9633607727685959064, 9775909232917811341, 12297921731498614906, 14756100221590046549,
+ },
+ {
+ 5228416958727036186, 12534300056259911378, 6859045937063682340, 16561718753727911412, 9427589074776024847, 4167904055656501509, 10156691045253563236, 17557096561606923049,
+ 12847261474293104380, 15935635664155479706, 11956861064550631146, 11743590506948225647, 15973092866215748716, 8269726904881958353, 15639962392523441528, 15171417818360069012,
+ 2605212343977737441, 18393471024186189813, 2302707671753008158, 8606549841034095192, 3842822953634987820, 3094721493917442423, 6408313759447502937, 13486364200254727287,
+ 2191808101092881088, 128992526124656216, 738676021426139131, 10157323147642681558, 11221959943853120586, 18255489816550713347, 10885231659068427649, 12104397395119665023,
+ 7707807226411417919, 16609863548699265350, 17639371636697428128, 8755472387723764172, 164779854477783265, 9714199241756765614, 3491355372893948450, 17683742455036967163,
+ 13595758632462338296, 14515163666150177917, 6720823780841221770, 15071435664343609336, 9016075014492765983, 16881277609836316886, 6969993988115170067, 15419704786434737070,
+ 14933348768149415725, 8499210770493553168, 6778840616148281272, 13282837866452012488, 12007326272174861053, 11172739862019218274, 15202495365302649711, 8797477675597086120,
+ 17862558746132168231, 4941846130090682869, 17131557654945008226, 5312800819142473968, 5818269467205924209, 13458582047859447022, 2683428091382447153, 12956887954464730664,
+ 11820752998821943867, 5623379642132478491, 11666807493120740820, 7241997274572162616, 17165010508995490690, 1769225877906480182, 3814296306467163522, 1913823003062817434,
+ 11936813336110488997, 3878433838529606580, 6540053284493149566, 10610279324743393563, 14079852809920102066, 9176732841330794388, 14287311909822853963, 7146204303626670196,
+ 1222343790928490335, 2199405396044383670, 14080919887213592148, 7341303626667347624, 11784881532696657518, 17307742911086150556, 6036132721599132043, 12167106497065306941,
+ 13817073203406999359, 7220729284169210756, 14908407498603601482, 13536224989620701632, 1615171540711527931, 2063856048260214664, 10622581435417474559, 16378505765730768032,
+ 6855676470217754770, 9517149712286624325, 11080380031068971680, 667425509348698033, 6136243307299269825, 5326577850303193160, 16120190278345757447, 1982981965726975383,
+ 15399454106099176868, 3988744407816581672, 3596277710300384050, 15129113714633923498, 1554582462382333698, 3164553872715651710, 12729140363982748426, 16366728035709811784,
+ 11647936211409428873, 14704462653811533865, 10005129575282387925, 1526194796943187368, 10906326807488904308, 18256878690674435807, 13093574545395154003, 12352810226542367406,
+ 15290817014272444879, 8012864091692295774, 7591940515496590516, 9299619125326026848, 7297256232167521068, 17861204372399797627, 3100022958796565106, 15313770879200204613,
+ 10190931844127971311, 4449277895844944513, 18060903840299422620, 5786504870079014862, 13877878835839147302, 17536010013969819055, 12266576574650233794, 3344384108564085445,
+ 10406231172694378191, 12785551100938284935, 12210880209800471137, 16615829503150405150, 11718424737227538631, 2245674303359461903, 9283401225508960391, 7699506693893764319,
+ 2278745583218618906, 10638058115573097116, 5685140621618740987, 7758115175926730915, 9043786871908073988, 11442680114439652875, 15805407285446479015, 17615595308217319903,
+ 17127838629068819444, 14692086977823146619, 2491742894643214379, 3238012827598604955, 4066194902402629617, 3613111175737094005, 3296022969960608152, 4860674720288543209,
+ 3753308721541654311, 13160824061458819791, 11298768140351857216, 14042645015841972579, 10518697187157549363, 9641851844202952198, 9254393371864075771, 11429987449073904258,
+ 2566815986599406395, 7824829323439900657, 14465315986665451243, 2957504741164666523, 2280672126115151165, 14999624386740200468, 6090063205272921919, 3478652904863279960,
+ 18131467168461028363, 16471039226156513736, 14133684792391617163, 7137732710914086892, 17299189471163637576, 17270681183882183871, 9046519354210103502, 16091721986926209161,
+ 6572875037492082093, 18318192911446580882, 18107990043035854610, 13573958681720538720, 2821064027974708708, 13000417359062035623, 18286430707379660800, 13070676610179458046,
+ 8521670721910745896, 7440450230382417926, 13794050882540606099, 3657777690979179248, 15466400576048586018, 7802926536345945651, 11932698778931413693, 4291944108972289013,
+ 16729016738422946915, 4082443597391481873, 2238453914102498318, 1806710580451019071, 18305396377745213463, 2262209146853870557, 17240603302299977012, 1540474707615687112,
+ 16013022710115947086, 8748688582020024424, 10921431579160628425, 2228053349060750948, 7024441100951863599, 15816203310189470753, 9078141084316010447, 9841344410499582933,
+ 13377311905703150010, 6352753896568161822, 11086576920416079356, 7629848369702522073, 5472036379742160857, 6535781794177956975, 7671272114437903042, 5056286897725243410,
+ 14579350500102276494, 5882738271691348624, 7188200631561491668, 3605286077046581049, 8957701412704914688, 10347270272487533128, 5995825272041784807, 18321189862831103872,
+ 2376178340116743056, 2263593380938083722, 1990228394694589285, 13477447633165073333, 9023956677421314778, 7820910817739139038, 5288765314200405868, 13070699273589738968,
+ 6765952613794178887, 347733275422001747, 7640026333627494061, 12925707101320796635, 5670472934143686904, 15041817307271371058, 18176041895529109656, 6838891343875575060,
+ 9902363248877274636, 15776195244792726061, 1935342462634574355, 16572507349421626602, 7503944294807242065, 7454248748618678609, 2837392985175901475, 16494567631149775512,
+ },
+ {
+ 12299242278498309265, 117303082144788574, 18382936515565701803, 7941141688858846015, 14923823358299367958, 15681089039073030752, 16878721085421212785, 3933697483394042011,
+ 2878536922988449879, 8819365314325664735, 2696760638950720974, 11357179421800829605, 14730809809256560583, 16277258726425730599, 13189689275745970592, 13769784951430309444,
+ 10292669945637125918, 13595563061674492799, 18301330068714272653, 16981073598172019270, 4022713583718555211, 847970754090163894, 12867993617169467387, 10021141621864633865,
+ 15254713655124736464, 6283719686609422435, 5417864180357938798, 14010638809605797246, 10873690600705871523, 15014239467130590294, 15765460219545280655, 6048061577845125750,
+ 13465802402879986040, 12777495579796377113, 11251142258511736751, 8312965145580158560, 223338793373356823, 813747086573652373, 534790745256052224, 3658554932030960904,
+ 223170064814687698, 12938716002978051117, 3848816111228150666, 17463649319617382466, 12886402189488467296, 9605942357133455486, 12272689261734151433, 9916808458308982839,
+ 2879267873712146919, 1770061136847768757, 3497337219301780129, 17065005588401306579, 7675751531915094916, 4337540836976665798, 12505868586426061855, 16929961945120117126,
+ 12012729503226546642, 4811951115754240681, 2652621264600062520, 1108958391967017223, 2508980100759035797, 9023233544151778447, 11183557587641545146, 10726727280840759519,
+ 7189877122763725514, 14656082750550993666, 13508306914749420120, 4564443784575285318, 15450566991657647015, 14019683331552104148, 7751003035652012225, 960642801291306739,
+ 4621870789074053275, 2699296606556701282, 17510056683561737074, 14554735156054608866, 9556940770704438829, 10801168482998872433, 2280142648958056556, 4982572404720159279,
+ 14621729774738200671, 16850208728798178903, 2650832773786164628, 8325244333065591089, 17692120343646193143, 2242845836784335580, 9386711085286369304, 517979059098779699,
+ 396296174223428093, 16763788748988621834, 10606803701784640909, 2399987183687936561, 3641562114746393614, 8536687357022564742, 14532545286961725758, 14658331097531946901,
+ 13237433907493396869, 4990918623816750268, 17673032790739307275, 15907935024302716062, 8613470231259845273, 9346326052910147032, 17747177708802561938, 4490130127498754807,
+ 13867405496809412712, 7002613759387491643, 13033935552806967378, 67437088702121131, 1528641041740753990, 7001198041125832592, 6102137699349551632, 11527483925092226802,
+ 5535652777947676324, 1216916140919282417, 343617165825197489, 10806931059627863289, 3212291315152120591, 3351900116486367965, 8595828725191802110, 15621077401736893558,
+ 2386026063595321071, 9144724461940169335, 13121187420621518554, 17351696676224980491, 143495028693925520, 4590642171152119027, 8655406817079059692, 10005507077955932112,
+ 6982427644490593208, 5211219672384700779, 14348546115682624722, 5876199850746516331, 18396840586595761484, 1465795856184232635, 9374516147960853174, 11803025633575966436,
+ 16374709377982692229, 2545295070586009694, 17366393861359867323, 18067336234259281315, 9181658491221440805, 16188192004088261717, 13669686513572099673, 2044092029751770484,
+ 7308293641399229883, 9813539775451467496, 4837144575100457648, 14073521030008335794, 12371724228966966456, 9185074727660313894, 7387427551441329556, 1866580457929037678,
+ 2047964090310465728, 14133690277069748515, 8287749071957055844, 16821270580974431218, 10324684883587585565, 9652498187344172497, 2461802306013063247, 15048850312040680643,
+ 9659170831875656345, 4251587871822599596, 12391279033133774782, 7346556824831912968, 2133114137534468833, 8694105814921565301, 1000087733076615381, 3478818915231998954,
+ 15832320183998834886, 898251865651859288, 16874504161383651033, 18425166917936355349, 13387271833996394553, 808054572728974308, 2281828182196149100, 14620221246129351423,
+ 2755242181069093228, 16155597320469161129, 17375128328249992589, 13376690667170223367, 12825454052191060221, 13625129606409283552, 5627148592782580041, 3092789333954796605,
+ 105705035058376953, 7541598459498134635, 6627685529861836787, 5724525771392188178, 13865897118300842617, 627652561624109853, 167084371112939132, 14192831153344015422,
+ 15618434900514973465, 14267977447705810206, 8823759714826430108, 7918740515193061263, 1664424041430494212, 9076733744947861202, 12010737794701759719, 13920688631446302868,
+ 14823160144680924028, 15690572793903331709, 1101762466949115887, 19462496059060771, 6099422315003219574, 4300077459158116870, 1282001407902323746, 5792772971400374848,
+ 11488321122368726901, 14632273370344181937, 12984850848826263218, 1857014998357880446, 13897135914532207935, 15925630222053677390, 11992050787349110965, 10072349923584966035,
+ 12837696619944805389, 13470316523008526719, 3074966674353053986, 2630814767089382743, 13353767941686870870, 11405513520090968568, 3071969598389884693, 8212421686942839224,
+ 7866179127856055582, 11874624506147226333, 5793384221044709375, 4411967221310302622, 3004913667339358722, 12230878672003882045, 7395980261120250376, 5188846147165419817,
+ 16160221386626609076, 2876994290210606255, 6065036384672004173, 14696333348949573335, 2667009584785381424, 12100192661844794919, 12401945669155059163, 12847772853650923808,
+ 2286437117905535902, 3130250324287833414, 3662253455746154641, 13229041365675585947, 3663813013822489770, 3093300797589723687, 11507645566517710232, 3715061633580408995,
+ 3464453985305889088, 7486900500389940957, 8944303108517222303, 18319933556403836918, 6782476025105836770, 12822840946923377510, 2799592944286097404, 8737598030273298051,
+ },
+ {
+ 2754563581284692865, 11257360722362829387, 8254261291934606879, 10023569920325096229, 7980898855627864603, 17162063449612285703, 9275759455936104363, 15936804738099638644,
+ 303387668469489035, 7978391052264888020, 15413722057350324494, 508608987485166058, 5585753287556892522, 1344134516582665443, 15144279405777509214, 17014353449841567149,
+ 12518577022215294774, 11347384788122950558, 17122718824958020364, 5431993665805279177, 1445831254446028321, 9845654210244803480, 18315778529692612284, 930563922336183289,
+ 4894710987714215219, 15704993603878996107, 1606204036324478223, 14308635149530198932, 5026736228773269251, 2911689442372084137, 9004077539360196849, 7770049130277452000,
+ 2745120518194234905, 17255944807561408883, 7371907591838942770, 11781525288383871227, 7814952754785494862, 15022715213812536212, 4112388658963418656, 13703771908397991335,
+ 17455440140812341569, 16738670164475421762, 13259904130186215994, 2168106064304872507, 1969289843772256992, 9025317999701057831, 5835661391798891929, 3826587698101558069,
+ 9149648374608464235, 7248346993716635643, 17283919168525322365, 10107681064815728795, 12176813938029507719, 15110337574289033068, 2436453685316043712, 5876967059744625564,
+ 6197919059881189313, 3300993078638782623, 9357667752372201437, 17688129755135009447, 5477090746558113696, 12602024521188880300, 15889961935507293355, 7135039746373941272,
+ 12561063426893244305, 3079971284510744316, 15695857190375815873, 16730673956207425338, 12065477821845465116, 13846158368121919228, 2126156187526559500, 3005167441915916768,
+ 3858987859895327040, 17043903959888117395, 5861237635520080595, 3292646198413575902, 14286644557048422360, 14346409530388980974, 12583555046601155161, 4665981927428063991,
+ 7657686120974201842, 2388299767028319466, 17934808017791217639, 16708640707026372313, 14122341266134976486, 9547124998857374491, 11194069918436025923, 14657538980930727877,
+ 18115789712912297420, 6543740714573413570, 1955886376702349613, 14736853369719086334, 3914330395250768396, 7607848156418885173, 18220633528980056514, 11930952850158858930,
+ 17010681753474701341, 6961088634819162945, 18395881317121515295, 11611411242007267179, 7996717433149311639, 2687736005475404867, 15340214362731923149, 7116441414289074727,
+ 14302198697220662403, 11915041511943922518, 12363437164440173650, 4047410026894059083, 11439743031696133447, 8776097395071907030, 6851061783357383945, 16484733576581941012,
+ 2861660066704264713, 16453202148771053225, 10039035813504063627, 6275446616881871868, 7150609984920482782, 11100941755544354363, 6219760433202580646, 2875377482232912143,
+ 16477698187919702699, 2757160833848759781, 5976267476122243443, 7375951277779829055, 6525111513935493528, 9969722922172879681, 12582700753056539952, 3861958159147836631,
+ 4207324524057007052, 13478421263392765917, 13318275642734768006, 8504333045658265535, 3960720256367501697, 1601661680373853468, 7704205542213854353, 1552780884856863914,
+ 11788483659211580527, 8739274750400900816, 17562450707643849072, 2113004873311762141, 8542099133996522813, 5273956141012313478, 15661081976505113338, 2290009483289921559,
+ 7730337285529332076, 910421998453976119, 1242962796308775978, 15931313183507922513, 296811920445331257, 5821874870466240960, 3843405649348506130, 9007082448812181532,
+ 7200777231000974420, 15816416455380831370, 12932038056359512011, 16422801186386616731, 489979647738684819, 8269568123908387407, 11617965045895762979, 10069799398667455631,
+ 8245819835927284113, 5237630684564909794, 16572245571184457585, 13396211619915630497, 9177789877937729597, 7598297211302983411, 3133511073496333246, 16553020037702090576,
+ 13441991323065749602, 15441570013111011986, 291172418860690999, 765089869758770890, 15943751395897588683, 1887511823199710563, 15452188663399676440, 15028027691170546376,
+ 11009923302693800320, 5465227554242327455, 6511134004216481185, 2187999546010819375, 8387421071023131689, 174295266381833460, 6010935117399493668, 3340537243300033200,
+ 270646782953286316, 6635640130829923909, 12717598479778596267, 3198506535831522029, 2605714695474737827, 14548588937126505914, 10448279824323025574, 16008560590494788290,
+ 11460241048816176102, 6557818897531989327, 6948658872009630691, 12506263743501199667, 1813031030961999507, 18320107449706503757, 16783076453582799039, 10113937529079967843,
+ 3096618551325580575, 16427990761832670936, 11792441105446619028, 682188988823121368, 15606025696371121549, 15395362449248397023, 10158149307437310549, 4442375503593769727,
+ 5486499432056098253, 3811951715772590831, 3856863699598204250, 559724736042732093, 13251752681229681959, 3173330346541355882, 3597530263899943500, 11567248716558420280,
+ 12822024566847931229, 14487146762942136806, 1361688279016339671, 2776885758026394514, 13616222293131162973, 4629388276054227930, 8304132204812769256, 15188275451670510949,
+ 3676452641243730693, 10839008248003066479, 7676518604603740317, 18384275325050378660, 7827069556104560937, 14335741841451672783, 1145975213382205474, 13691620069378415495,
+ 2513451633312793643, 17331457978938147928, 11588204658848186728, 8824497684232939293, 5956154520546906549, 14434958079241686709, 11798870987920360447, 5875117843056165993,
+ 11313908033360767461, 14678575871510948417, 8479250768537158849, 13854183783158898926, 9413525381201650053, 10077438283140087199, 11413783222317729281, 4418417663988371635,
+ 6001837395299394988, 7545846298204709215, 6662102781970969966, 12696054550672669874, 7436995610194789060, 9042605089262227561, 11621508125069628605, 7289479057360149973,
+ },
+ {
+ 15555649500689005370, 7857014165815014600, 4664069234996337119, 8058230521837307501, 18090421626009183450, 9278179697462444898, 17138879353097908770, 18285223269807746171,
+ 12741225973381753432, 5461579923265799106, 4348015554411389960, 14970536173310927872, 11568371199139699619, 11748662751675667230, 15495664823061891700, 670495577617908409,
+ 9865772269601493999, 7325690007062740244, 18437964667185475354, 9546360111100619473, 6266116854923135821, 295905071295541234, 16037468634731642095, 16602609756229843150,
+ 13266892287530449189, 2232961926857263527, 5934654989155432442, 5415975493246016158, 17117784386882463158, 3875398271691758986, 15755743581177209843, 11380746535012415659,
+ 2047267291755864302, 5637045342080349843, 15779859211873634959, 2415175423642772217, 8664113127182901077, 3032959071705433205, 4250761979529656864, 8729259992491925346,
+ 16559648310139278090, 4796731467175572814, 9267555165184254828, 1400425639375650333, 5199035239208036114, 7994153995816640929, 15677746297407711038, 16338895562326287438,
+ 1740700879164022194, 8442598604870387302, 14969965175291161450, 3086786406508018865, 12097766404705693713, 3199218739761578935, 1744290654371906763, 3672816528005113908,
+ 10162459499000477259, 7534168794800651527, 6320985751131807192, 9263840233901040421, 12178625283764017250, 13143797113231152006, 13089053551699608219, 13480464010111258056,
+ 6572275813002629593, 15328678880838895837, 4281489161088939969, 17996597009627631572, 10405019085582914256, 4773602940383417103, 1455403777441353089, 14766164051850429812,
+ 3886207141264898075, 4215113531127552656, 14283840761174092490, 11161507022796293343, 11235510295695006534, 2260082644337974884, 3781681413610409735, 5577127823856153222,
+ 15609337727165147542, 5725010295752332028, 10091444858515990726, 16162855978360002823, 2478705273652882251, 11394669968372227148, 512641267828429208, 581410050368207889,
+ 11426681347395118622, 17135867298562042628, 10870734720453621112, 13803364322975322217, 9740527283062276630, 13765238480893165263, 4792349632705804116, 18359029140065847048,
+ 15215482935039834459, 3553551735757600990, 2471323651039083728, 11355669733892999598, 15830640053604899661, 1625382463038435804, 11969247519895019474, 14372668755409420943,
+ 11113773299077372683, 16618589175113863833, 12580653821785550146, 8264195478884998326, 13569468260761711850, 16539804400793525042, 14464540796871846551, 7710175783216723569,
+ 17565888320840294282, 12018783137972195797, 12159604578082686310, 3210316551614996638, 7700243787046971168, 8307491254792612954, 18022719331150542511, 15890872243361196004,
+ 1420251412241033168, 6449255753238336368, 7468624722117319576, 5716072259994197219, 8165015263320602150, 4757529151848960259, 2040364517493350115, 11069642333484542749,
+ 6861520668253090597, 14998859189233167760, 4851545815068976365, 594940747993801912, 8473464707442482665, 2316305389548210767, 3995416902578245152, 15770229481630179441,
+ 7316766449463697051, 1410976281384361962, 2878678490331118371, 17638505404208445694, 1130528985192221086, 7544558337870473835, 12071909234624857373, 436426448500115731,
+ 17436263074416557505, 4339627812592344328, 1076653379358004720, 4996950998108333398, 8528954985898069924, 8259369611572362613, 7869239207308554342, 3592003501289265534,
+ 9434687670958330812, 4808705744496679962, 13910334628239183613, 2628594509406050235, 10055911465166862298, 8069256921143792190, 11886968715847498191, 6351594160398684406,
+ 9865961381837093259, 9590584894285774485, 10674927452501376239, 13861388033657872936, 808108663597281834, 16581187679405825998, 490312642991375167, 11352102610468872412,
+ 12888468148530619065, 542846015600866106, 2086677842713697080, 2758534484095407635, 15767123419143795297, 7077908316515201321, 10297306681753168866, 4641656730233236179,
+ 104445550853777927, 3829969899624802332, 4713997522991885393, 677661257782477991, 12383280183673921457, 4373178143111769979, 6375181113677777933, 18335077047046332699,
+ 5472832021993175857, 6120966193470238758, 13156950075464061259, 14833372202848630444, 5760885723102571027, 18022572190906170343, 9870918995920897221, 9546055667466056471,
+ 9516299398994303577, 11139435630263588724, 7364473923787049608, 15378275072059759415, 7378721410182635028, 2461490895435109307, 2579531356642691202, 11061693189962996686,
+ 17380308085259417126, 12584808641030185141, 15485460243194852460, 16172215807782473427, 690761937231979275, 17570908286361380250, 3347059970688467328, 10404397759606846636,
+ 1302709369035452282, 7149637102192386111, 11659118804203241786, 13909758997057192683, 4361167997834561257, 2300569874961985919, 1609719848856066643, 9866395233709297503,
+ 17124591880405051324, 3379747334483646957, 2084957978131092947, 2413497199486596627, 2867893699156486423, 1357836849049583382, 13607307985344221035, 18039194570940763745,
+ 831259058391834318, 12782538711785697535, 12640197944141330321, 11004378265133697847, 13622812183191119169, 5878590957416415562, 4810392157669422629, 11128662835376944411,
+ 2248775486830407244, 1550340755758127506, 8399436463450785695, 11063895076440303538, 2168511054719571959, 12238498894267955838, 10824266376477587807, 1073243669335517712,
+ 13568560319245254019, 17615770913976878569, 13214159705850440913, 7542574047209989510, 7392331519546111377, 2685908847786414772, 14835825677444128105, 11347648758435372538,
+ 13230815692170010235, 10662664037814374845, 10120384214921531334, 8892863196093018212, 4443096401408136417, 1896901619181196514, 739574867683694091, 11233952771600927581,
+ },
+ {
+ 10708602749843635238, 8046488591192123621, 11391158257016334553, 2512908912806916201, 14760224624269838038, 2631910198880826809, 2387247811669872033, 2048193899742148806,
+ 17392983665205025720, 12603331233691760840, 3752711326309791976, 8281544857330094164, 17004651346561270106, 10399322540256775923, 9721299450284863176, 3725182263994944884,
+ 13643198694788276032, 8127425924819487897, 14165945065582850538, 16520977003164405917, 1941948502806705647, 12390949208067741787, 17112117376594486408, 10232906951259490042,
+ 74190110112564319, 11227229657066745164, 10151080599595091944, 16591051376074591375, 8256332495682655293, 7127985739945537626, 5708687346183367016, 18310921046455023281,
+ 10040565835468209502, 3986586816478505371, 7957179672896506016, 13347112914739722590, 14099734293320932556, 13921950344400895005, 17478414833301838037, 961445463330402797,
+ 10553899634420153414, 4215979268474142438, 3643081140297634139, 11620906704125380460, 17733628678553828301, 137473878710304143, 6211837577294956591, 13926110616399428016,
+ 15203257341351417031, 11597829209697937782, 17317571979811895272, 15929491640570317745, 5731227916276913121, 17093153916149088230, 3908757069480556294, 18211818355208735794,
+ 6887629766078662088, 15330755394432598442, 14144608177273202986, 13258654996547018856, 8975611270007504091, 16869834856279102361, 12707970875852047823, 8520055680055874514,
+ 1643407997152867899, 5400650551136488814, 14663391437159245064, 4743209938058826866, 1728998201888511561, 11238331920657195520, 11617984741897247805, 13858029141809651878,
+ 14003691155137683825, 14641672309228817657, 4031783205073356111, 7414359968614421153, 8166814052414482491, 5209260157266028169, 17555696996149558694, 9780166780476833956,
+ 16029688335186691281, 4244772808651443261, 6450459413414527821, 695875191412722741, 11104054226249884864, 146461617619843327, 17480214580411209436, 11285418463967817315,
+ 4922928863857449168, 7226901725606965824, 7568276305032368752, 6253457805758596797, 14345040877576203078, 1718759302498746946, 3544444746772280646, 17880302727399449566,
+ 10693864548794383214, 736148233485985101, 5241869061152863453, 7166954323782542739, 16861482585847848251, 11483636939866570861, 5950529069765100144, 700887039653157350,
+ 3906969739692830848, 5546431121705298614, 10491799900914853406, 5871175286230239238, 2851574662174517162, 5511397532379144624, 17409985391224790540, 319012646822749823,
+ 15269778527761027251, 16471737837294177924, 8830398856035267114, 8206807757132984992, 1421214226977370752, 14233751335943482739, 9668238787333455452, 8700964319357541954,
+ 17561670470565298976, 9524122142014309347, 12619871254022879995, 4098610542031538614, 8338691110372464881, 4378598564044777665, 16435680634249352743, 12067828180476756532,
+ 17102475410743425567, 16244272017711449252, 17598420420289880464, 17858587557620312875, 7275673248698358659, 13952444068457447282, 14004356855576602555, 15776401868125266614,
+ 9848210305813205280, 13777844361911806961, 874096498600831552, 6286216140019341540, 1229781121487503670, 1221366673976886811, 4534525466071627057, 12966848050780717673,
+ 14890015174575641658, 7082696407720868596, 14546372439063914434, 18306301153433516531, 14360952638570231274, 5387283652784509262, 7246876761413034655, 4917620254108676720,
+ 16572428185201836512, 15535161956998457706, 3118508753345802652, 1036719175817464914, 9929541174185476740, 17658764424885337186, 3038747611544544975, 9299826573437432057,
+ 4068144934809412921, 10422353259330469599, 10820596685747357573, 18174251490011716747, 14838047097904304200, 6494279871501263445, 10908514990777375627, 2395220126873880030,
+ 8963350163641627638, 7331858013030906816, 15047142507680204097, 6113080618647404016, 7520779807516814683, 10255234845683676901, 12010823325949652354, 4005195924244758895,
+ 14455661618497189876, 980858168361168621, 5057114595071838088, 9447128440518577562, 17115933446433165191, 2063106073840638313, 5944852338857186064, 14601740796836275216,
+ 12750698494453238590, 822220438401342753, 12735466470750534773, 15168985403101776640, 17837672477921333079, 5413174218886573483, 8340507718774735027, 3796235681866322764,
+ 3366536195244539993, 13901131415620358879, 9146606194544531067, 7568835872371146677, 7101767951356426039, 1958938171972212156, 3871412404872561111, 6437054924013233533,
+ 12849284342518738377, 6061751293342567452, 15168528907847693260, 2071752334274992986, 338461453751559028, 17270401493472349305, 14152214298057413781, 14074152302209367061,
+ 3814267663972523036, 16047424572869508620, 328411057776532579, 2321721854548154476, 14950895796162035951, 2381867458906113095, 3121132178592915484, 11019696935452271225,
+ 17929201703034060485, 15478512769360046037, 11652099424221007544, 8436785170867890275, 16340404489137765730, 13705192502407972039, 9464924753010269658, 17372624212933856135,
+ 11237648987477338397, 850109691127167972, 6566350160877169296, 8028073340740368222, 4445151747345891893, 7753144690511205475, 7551118086710031441, 16785474804106433438,
+ 3919851124685189663, 16028223238454025712, 9008783533564802058, 15647028843636411071, 2029953422298604024, 361326896057539567, 7653333452646403127, 9211397293968688243,
+ 1817762546790624760, 15025271080704836746, 5761603182050280401, 10355268146728370750, 12900583599325924868, 1488036927944469046, 3405173852255315327, 5600132277974719103,
+ 12574920924242276218, 18146097977073890534, 18281543194137545725, 4432773597060264867, 797962164984225968, 13432184750641235201, 12394976215803938078, 3054137600383896411,
+ },
+ {
+ 18270562314314829224, 17574550722906577917, 15522987576586641271, 4200899284963891561, 15719788437321868967, 18390235318178988753, 17371818886753265385, 4583380351670429228,
+ 10102001197913431240, 6211688443233768420, 1986665099237206297, 9248907879269017005, 3081890249882755206, 7828527646248386813, 12676034092230326487, 14655968027461080131,
+ 11464797300361906821, 1444449385286554508, 12433005157477847389, 964895564896662942, 8457290958728547067, 7044433897330872257, 12292673928275053405, 13679355097865192207,
+ 5841260956815214452, 9461234662498869948, 4761949640006529525, 18229823346336752572, 16055957459773168413, 4272222475092786627, 18249888117586812770, 4531982814021269346,
+ 6528332072572425694, 15678657170585498065, 14462305190193144746, 10159286562616704237, 8919172138248087686, 13247073064443362933, 3056992278576665488, 8216417459286092432,
+ 15095644792163717355, 9931529712396800935, 8608932369590222253, 17019087735655287783, 17120516184457264144, 16412592485673181957, 11221555966650765276, 6815162077372858604,
+ 2239262553722461896, 16518231021003046485, 12875479040073845800, 5243141733462514317, 12216971859495819530, 8917663622006068692, 2606734295032613674, 11237688605489138628,
+ 7270238601276019194, 7318279377964882079, 5226788506940171017, 16838449902078450273, 3375184937745100689, 16320655850583231520, 2173229791272659780, 9816381971766204380,
+ 11061652864259684005, 3625039563689831268, 4435107405432574197, 17570841169180914890, 13981724030179216670, 4992487505540472327, 12893808204753555435, 12899171569920731024,
+ 12002788024483301555, 629439982201171214, 13749214763651595011, 7164215996082938159, 63895081825959000, 7006379700570403491, 11929054672290963822, 14290981753366049931,
+ 13554823979828564217, 15627748053816803157, 12924111102804720301, 3165135223777020148, 16345805559363705538, 17656153856991335888, 5211305829410935273, 17846236012427517420,
+ 17113448198982970082, 8879656883528014268, 15215762680430094206, 15232212751792759071, 13699137317730183483, 11815206710130297569, 12298409691055054593, 2192455093917691024,
+ 6973243752017383665, 15334919534387330350, 12203459507012560294, 1880762698376948842, 13008085926447766632, 11695961991328184563, 1311309249708428072, 11981691262998313807,
+ 9209757463013140118, 6782250604095077195, 7896297860376631775, 11500117438976638113, 9099678680126826621, 11908588510950302487, 16863479726536938776, 6368086884375751995,
+ 6220904427249143299, 4501361418756965744, 13907877383736656540, 4499586229279778873, 7233050316216252450, 2775027076708404645, 5669561106187108564, 13951461080947060780,
+ 9742819322069712752, 8779224519997661749, 2591205872359340025, 9144644967919881415, 10495832103710459914, 4557264342687723360, 935865282128150338, 7120324256648044882,
+ 10780007491891304021, 14382731247791652187, 3296810302419712004, 12567831906993651128, 13933396378746555124, 9754754796778422405, 2905077741505886450, 1522919492210831578,
+ 7282056954663740402, 4915867300034445627, 234865561845141598, 8874426225092097899, 196269165389812019, 8951904020835594063, 6908903359164319029, 14062039535595890356,
+ 17922518107542393156, 17388335427727081004, 9077910363057082196, 15157438705071651749, 5615196861086779767, 17404239261681991687, 9772161835068093639, 8380484042808490346,
+ 4683169301229531061, 14940390399739549639, 13128742567354398401, 15222231511210090535, 7352694898928671332, 1221555526903686446, 8697118143614336514, 14515400523185205597,
+ 1119145169329993209, 3453487026721044641, 13723232966162256192, 15282444658208236515, 17806849942057939434, 158876409507122147, 11954364486113732203, 17566268785944006600,
+ 1443798830844295330, 5744920153630226551, 15739845606128975055, 10458513157123726949, 14711103578835123416, 13886988522344422595, 4454455988362290346, 3507977514728491242,
+ 15328194945269186082, 16398971046683442289, 16293780909421539295, 14488637138080135219, 1108438550767062231, 16291467684905586857, 11891067917592351245, 10459212739896579624,
+ 10239864229747754958, 12477284975467600510, 14054830413782031834, 17752968481181892755, 16082713391521411762, 17191329716867535304, 11245514405315337909, 15159733745232554536,
+ 4102022129275956496, 9361580273813141387, 13824110615577377618, 17115977884930866990, 15058705994968389537, 8039281695081359531, 7179945727911625126, 17628825480910826356,
+ 2477336341361304271, 18030759410783456479, 1164757311756325043, 14195508307731677040, 9794350654156795137, 1135232100348022155, 16693007636113380448, 17770308235375345497,
+ 7064407865619227176, 13284912648471743270, 9099541805618279168, 7383718461788433067, 3311987567268438113, 10943963018398042859, 8038765968977300792, 11039205462473399718,
+ 15444441705629456398, 12030687282161152512, 16500102437404896225, 11467410246278043769, 14031300093087427598, 14458597378298191386, 9434445586505481169, 18221119130471329822,
+ 2269899977501467759, 9674818903084386596, 11781166634280447465, 11752600152694102168, 14011747753002945247, 1084585411243442928, 15614695372212325691, 17196679090968022414,
+ 5062812993852092949, 13193601915773858975, 12910718608861476282, 447390754333146274, 18393090736120037653, 13231946117349235609, 13365030203093499221, 9724404925244523782,
+ 18064151108813428175, 3928735035534454387, 186686833744587171, 18406101378699384605, 16477405641027895528, 697258480673566897, 8958117283476321666, 6970167016227017231,
+ 12694666558858009121, 10992313774737315187, 6046508760845509477, 16818819584649823054, 10464090160691032219, 1221502600750419478, 8658907655588546849, 16110628890984509877,
+ },
+ {
+ 6266830413981331818, 10890859480409122410, 738708598357264321, 9241173142142102299, 9152169484987657321, 12122430118903337055, 14270062735458703743, 13877635605034675649,
+ 4572419849863661964, 16330741525817240677, 5456771007607263420, 8798510164381810403, 16547183593064549865, 11355135595370359779, 17581910791451710925, 11684604415749005488,
+ 2766130068233451740, 15909689412657372214, 3272828023364980896, 12907534005596438003, 7065629942275705493, 10609982651084321649, 17330343421617897811, 1746246157222856918,
+ 13168953859078137190, 9072500190247791267, 11090199374278127099, 3503847851309060744, 14395788825673846582, 15073993563711333791, 12879874391706003251, 15417028700542288171,
+ 2686132933630431595, 334448852204016400, 5636563341880055087, 3897420469445246068, 10569177829735084588, 8244586887721364305, 17973493209847502315, 18083956436923498083,
+ 15514509130323617046, 9833490355170899102, 12028021566105512521, 6349458533674010018, 13470024118569549588, 7867188201997717819, 6110792121234766402, 9917858515766574695,
+ 8273062939911446888, 1936638151089648185, 44602505720759914, 7839132833110815763, 13045756168264521992, 5375972549477973022, 16964151563535745964, 2871770426902555432,
+ 14164643693563803212, 6386904235704679249, 931435629437735510, 4496152555914735639, 820099532545743339, 13419988718062417698, 8948965620484833159, 7238410427920554862,
+ 4387941928212462673, 8023190031094282215, 12322462122895470016, 17311901330042910195, 16445894183838823370, 690424095855074674, 12820742226551488738, 227935274674223825,
+ 17437729542205914525, 8439533239207225162, 2862942422640746698, 7340866642549630610, 6770451500989398335, 17262212260581413381, 2156505157624055602, 12195119268846383898,
+ 13685370053507732180, 8304030489296890211, 7042719123662732989, 12115154796394040286, 10496877583013829980, 8909625177349683785, 5268451666475070682, 8910488385056259560,
+ 8874918984373509240, 6652428549855789281, 14866714872885601270, 12093494664302554643, 10149999253136441007, 1611873554177023520, 10271627298853910516, 15171882630355491729,
+ 17085353800722834618, 3271271583189511718, 9338466116389304614, 4685545953421459012, 5059702655119414343, 201476627889215522, 6388612869673101372, 8357217932305360896,
+ 1836621694151238424, 11438765101326043062, 5537182303007083642, 2657011195641217678, 15370176245454161308, 1478361893117141052, 8729368992402422055, 4807782542738120272,
+ 5084555197036037383, 500829604818691390, 8128181767026348964, 17895639808733462562, 4138748420878083709, 267892430674902987, 8954358455330696651, 1946224964877667668,
+ 9440495138081785154, 17328162383695942099, 1870751290505230781, 17987066187778049275, 9952223038481207705, 10126409487298126876, 1181369784146909444, 8158422380540037915,
+ 13005094963168549814, 7254364757817833162, 3708127625424981603, 13214729399033107803, 14306349749849146051, 6983270854272831328, 14340019638575491596, 704963281855585401,
+ 14561080905697443279, 7640393429462623622, 15685656608213407004, 6317405842945074950, 4586197269496115618, 4352991463721324184, 18421865732314202847, 13679296167045126011,
+ 5024751682397066348, 16464187416447039066, 17561916915150311869, 1553872229528720506, 3971081625989469183, 16363381852589102144, 270446294712200212, 4668332646727035585,
+ 4623922898967168175, 14927682190770218553, 18082407698612059245, 8156136453512346914, 17256761355350416250, 16185397881787027880, 15277309552707416345, 5991448876439153037,
+ 1334994484802978518, 6876293985013000163, 4439518198423905531, 18287119869714109760, 10942138263490204506, 13981840082343935999, 186527448590996253, 11759869552919496944,
+ 12652601656062843187, 3947181446273381189, 6235196886760030384, 10531746495897900463, 13249616872107073217, 7937663398525899211, 8701780810784627798, 5901496026068341468,
+ 1059285256835989485, 18284969105568513488, 8222225088165109003, 1178299600673068913, 16068875143685216551, 17849596766342630525, 1648308197824141718, 6344465813776674352,
+ 13850247359965870279, 17391106451201459411, 9882112511974412840, 9027485913083134379, 2654737707750698953, 2829872619210764000, 10765067664204983675, 6008367494734883394,
+ 3596754664049660092, 4049565277418079196, 14294713401434678004, 682517403208603499, 13234861305501540306, 2878761138137340816, 7563571058666806838, 10752984583868770507,
+ 16399575624290910947, 4698193514990054887, 16222170007882213172, 6300376482027477465, 11587700570984519002, 4408509191665524069, 8909599698892352923, 15110169407475815879,
+ 13814725956725513903, 11675366986709837701, 8008867936756292473, 9627238521208818281, 500674066787171963, 7307892989752288627, 16701476208533897826, 13745712031279374379,
+ 14988991093658383268, 6586109441484875615, 1456715231981080716, 10587865765103526958, 7439504550868615754, 430398141621608230, 17710495017725824018, 1267747579289674850,
+ 7939033276966801601, 2952159422351169438, 10439776924885737877, 13630489377585028212, 1779349587540144503, 17209739673050012100, 4174716919431642272, 4986592555205663040,
+ 7110204405213656469, 5225260473696365908, 13544564684169074596, 10807318789744351361, 16301252682064971136, 5046748520130689465, 7049174921499294642, 433785658536711265,
+ 14622798677938552282, 10254953149954221954, 2609148400899865119, 8197897294400569748, 11784700166516996756, 18107483989994737564, 5332550435198791193, 13389061302706010047,
+ 10892870009313407934, 5709686117526627488, 16343828906887149047, 16063790174205876315, 9076070192754075075, 4639156357669822592, 4904064385440242264, 5942264027390072320,
+ },
+ {
+ 17271984572727555145, 2097715321690717305, 5328035183708920909, 13206781766168781008, 5915693606819932446, 3870849056297265622, 18070792155150730518, 7786839743829304936,
+ 7939886940063870939, 3597929036186322512, 12732772396498766041, 13300425267730247515, 2345112231224079293, 12327614629296314761, 17106102937162407683, 1702694317559034053,
+ 8821632341545057929, 16708916351043551467, 9171560460858577216, 5711973041149612575, 15808302665077457285, 8412173943543358851, 12602476370021215716, 4778674887660148537,
+ 2707032230021934480, 14943945588296447659, 13333962501998857701, 10603744732015256400, 3386903941135274495, 8507451908469049716, 9231176235773710950, 995464562185179329,
+ 1251633503842168929, 8646405730060859011, 7525758827556699239, 12781503299321323139, 3762059070509735358, 12904588764985510882, 518456268741331092, 2752400776174997580,
+ 2965193395248314453, 17005383961540689211, 2510904953800463288, 14582093408745785683, 14930278391247460535, 15319775861672772086, 7915822626743762260, 10481055458185028303,
+ 750419144665626202, 4009275022270511551, 4810013488865977907, 13090697535304919393, 17408171061427940566, 13469810562978287715, 3952372983235223818, 12596642250700034956,
+ 1497127512412966214, 11292269970549257426, 13920714718237672232, 1363993524078475237, 3687544717271789391, 14097532154250414203, 8367320351336033564, 11089672460564007700,
+ 3927746316404254574, 9973240942982210953, 14122301504960485144, 13249357693517290445, 4259646940372475995, 13156585130751616020, 424701295260668232, 13006066175253059189,
+ 10308406270137410351, 18444509448482991837, 12509509048086126555, 8343613342319774737, 7902938427788958534, 9924925722974851885, 3762983399809982874, 3397026083592781307,
+ 2116063253535766674, 12792173758770945620, 11357854367169339661, 4502375174987631263, 11374536277081101446, 718846097450449491, 6555869089578359747, 8571702699293548804,
+ 16993953276024768829, 12025590727803527383, 2672535181364381995, 2183111709986676538, 14822521615475063339, 10618745739454592027, 9957515072444869664, 4797521918890127374,
+ 4582720743315312164, 18280419827142252016, 13380629421749133990, 16786942173448683921, 5929601960379909803, 6715621236552343797, 11192523517069751956, 228125323996215945,
+ 11861176681909235996, 830114321507110720, 13132205092670772295, 15318287322892364044, 9027539547229346224, 17629383881225964174, 17167841317952050345, 2169650970520181403,
+ 11366374573920831312, 81590945660392134, 3339836125719372590, 6176339191258224503, 13498384602634172923, 17380518646558253069, 6505222573402454298, 3404196177841425416,
+ 18097777493026370528, 398882723487597643, 10462164796529820357, 7704526739683689547, 12438059168025171393, 8163001730991802064, 3319372757738711275, 11518015261707325261,
+ 11235814273447729629, 17827792960634604769, 10951458440780869321, 13957342734404907932, 7344502969085321880, 15052563297577817908, 9276707332138855229, 16077312904856138176,
+ 10945312442787809330, 8353703025227023259, 2523779015225839136, 5997809042199398051, 5595238423081509301, 1901941038793828090, 983256298544496123, 929860142706986099,
+ 1859865867925261002, 7171610585558961405, 1708769301690637951, 1669524125723022855, 1126574755172968332, 2284167219201068318, 14925928881489847288, 4349400324295146347,
+ 7809927122217534998, 6003738786706135597, 16541272992243715653, 11352602417510156921, 6070703545889058045, 3742828100079104877, 9416263310824977145, 1406892456840131842,
+ 15907861713154660116, 1789008941779432664, 1086909323248385272, 5337878342158043460, 16405416607888113885, 8262005873603178297, 12775015438669465500, 18394022127507526579,
+ 9832954710628449545, 14612341389215489179, 8860090710725054989, 501948203387806263, 9969784653624607958, 8524029089859194791, 18028069685553315869, 7863046312587606086,
+ 940114737935470576, 15661699274328042324, 2496303902262203344, 17900779334646110602, 10928989106385540880, 11160326060698033088, 6591628982832414200, 12860480197090465356,
+ 4005088727670892586, 10215872293486093012, 7634075494800881158, 7707144163086802537, 7085702388211248163, 12168208652772836726, 16347414703478252736, 9386623607672009940,
+ 10720163442173876242, 185901255673735804, 189524655270248212, 17937598483325200462, 9672767027914841820, 10911520298114954717, 17238190140596785520, 12302850034563751398,
+ 1819872496749425461, 5856173707582412777, 8519855762092470428, 16315722646681962500, 11554620938156340375, 11822224388003836169, 8221825485736638318, 8413608007099273503,
+ 16265754830488408124, 13716061973016396515, 6050948709307262401, 7654402024031530787, 149712175032162058, 8319305312043679388, 6115793437696694706, 9669523344165801010,
+ 16530167766100505402, 14672843572926881623, 10752276706209340624, 6888427606311779724, 9060900993058482991, 10439396339124215720, 17640982730425880863, 12916411341252677116,
+ 13182242421100666863, 5267519287044706312, 1780278156705475292, 12839545421803851420, 9724873776343820617, 11742602593528879231, 7598735248774560112, 4512374343657017031,
+ 15080601286098047668, 6570749562833033851, 17494147260115946401, 4878559727303047850, 2259644365338981526, 4232993783633913284, 3035566741498855042, 4306414296080197928,
+ 3878740698199360708, 1631626027682263021, 10479957065093850273, 1678395573475248815, 9964111965340163993, 8992208313996775841, 8147773940555378986, 6205594793300838098,
+ 11776245660806714247, 18129100488268011560, 15389929343761297429, 14544292652959164154, 18063319363114104433, 13433144948667100183, 462314076175867487, 1862686058702915115,
+ },
+ {
+ 17596113920958619489, 11920824479658301378, 3786905428099710821, 11112764289322906111, 8817260450068629272, 6518847791514184438, 14593687325057339783, 18018581321332807475,
+ 2056254186648603335, 12143016575009965669, 3290271983033386616, 4338404082685850854, 12660444972475266123, 6345409453044955404, 13278869140778657354, 10063005167581762786,
+ 5303950317664091150, 17409642188139937758, 12135894348302287058, 16444741243419918751, 13935885817311324323, 8496908197501732703, 12163317723288843510, 16891353436915314685,
+ 4168420842704770545, 338440394515115377, 2102734009665481092, 9772725684473416465, 1247616337419130000, 4222072446972129729, 4836898447189504007, 9154346000276664999,
+ 14649401370738987318, 10044081678892811795, 1306517740393103656, 16586067283450489622, 8211832777731897494, 12849393859171286949, 17327526552807783018, 14171704619529498238,
+ 12849053390245557890, 12693731077061619694, 5891211584023409476, 1008960642219209210, 15593393872187214387, 7381248773473326001, 16559656086795428559, 2106025108820858934,
+ 4347667984894353308, 2129883711661222690, 1076488804494872856, 15059968377755299195, 12380937310378496242, 16425480883270944108, 4769192332535557551, 10388305045430499744,
+ 8553482596123775980, 12627020622203381515, 10774788686070344669, 6271789114731522097, 6270586844349642345, 5584690155569770537, 9478526526319537174, 3405597782572827563,
+ 10255408065071335198, 2384996481315588922, 10679447117297819075, 3734157461055440416, 1635042286365643780, 11321809677772938996, 2373960964139986040, 4869129159740273704,
+ 16681657814739017038, 2045405432090866328, 9473434018957974472, 5549808619751464269, 3650191693776946828, 15805634549736708741, 16764187297356548667, 6849534466635874363,
+ 15295973549776084894, 8150873746601283651, 8017271980921908229, 4899654767305073834, 15257501950484641691, 17365331272978440066, 14582514748324426172, 14727798284858071666,
+ 3180828552573045472, 17698570278585440462, 8986722854533057232, 4518665117523514004, 1718699978312382231, 16973074530347269964, 5684175730451960274, 17228649521631238473,
+ 2897180423141326703, 13691273076814343144, 2967066815351306232, 1032540647518716635, 16365716239989474776, 9503666134295232767, 4599017511227366879, 14475021161770955579,
+ 4512626226592149712, 13575110216387069176, 13488662643497927183, 9673147324484250604, 143936220816719815, 2307325398241477161, 468315224038772027, 17743582630124600591,
+ 1476494936296433813, 3408612621327299295, 11632756669813257564, 14296719897220703576, 2241234207239321749, 12237974399755684219, 16253635131573079566, 11892871179872526079,
+ 8492025373682814159, 9341562763134230960, 10104358220305514878, 17545404790574939459, 15737064134098795026, 3880597485411313924, 388663918823974597, 15032915458906101183,
+ 965170717686924337, 17890763357610871180, 17588999527212997589, 13975763809893342952, 5109669798963414493, 7891916061794044612, 4789971769354067287, 13049661741280223048,
+ 13047125712251887103, 6463859133854403980, 12163714608926575519, 1018022589444930561, 1736262463418943433, 15665908585957261569, 8814123147308650486, 4798595000205497714,
+ 7506024921976923590, 5624217973379693717, 1727881279508834884, 6843292805721229768, 14721392495779901462, 15739256886195814859, 4955780690636679890, 7941019297099640360,
+ 1679075860853192334, 11512607379850785303, 8606478479964129549, 11355506676698290554, 8638706560448426692, 11314948425220125007, 192055228531777598, 1407009579456262795,
+ 13286048494307428802, 9003344338654830342, 14153483128501604993, 18443414499226619410, 1307798988667786279, 4422280407538765508, 1295189585361403916, 13026087054487841568,
+ 2784400402546744378, 15890015749743051151, 2721137876244951114, 5096506081391833837, 7797931926335359630, 11689452583898581132, 14104784079353186010, 3188967683935096888,
+ 11330231400343362000, 12540174755074499088, 10360616188128925049, 3458938550662172118, 9983547038346979353, 14986318734459319551, 6345635939240236679, 6320686366534231970,
+ 14746231842102190633, 3622710091833101572, 12626649920759262265, 7005023704205322451, 8825692535315999015, 4515463777365688013, 9604411949030284505, 440641657645532650,
+ 15551627195111259367, 17849357920181562395, 12870839822102241674, 13056924324197787476, 15109676201265834798, 12974964680406025093, 12841612415554457877, 2986633967268013902,
+ 16172780290314506283, 6024364507532794579, 11981052290745212925, 14235100278052078327, 12912505872017936022, 4228821405313065812, 2097279962856941481, 5542690235350104224,
+ 6815123236701278368, 6160442936745831892, 8260371745376005388, 715614132817606668, 7741929556783214361, 6266561134278568728, 13996537792746468394, 13116379830815883328,
+ 2212637711439965528, 14217650183061928416, 540869510933311267, 6195450915592159668, 14904187357299823445, 5215400274033728644, 6907285779833076435, 15804222685871187391,
+ 11124404141552478131, 15318338152189072517, 4067893425297764249, 1697503410612817642, 219476584157335935, 13106826093753036522, 13049745208482785760, 11832231199490391669,
+ 10493328169052594634, 10627081887813813659, 11388663495964140754, 1823051796271862577, 12476400584366298768, 13709148086980681224, 4023418280354022922, 7805776130903851852,
+ 11989040498741265687, 10985482710426001838, 598034896692782488, 12230542700168560619, 1496676485546951232, 6833695919100964828, 527254545105454187, 6276811762216173921,
+ 9452334891928331309, 15207477416486363718, 8081596400773698461, 7528187414539292243, 4773782664413845594, 9435822414884353799, 2722451711480791323, 13642248100035410269,
+ },
+ {
+ 16570512961992288509, 15784840936011612960, 17797638261217375128, 10589124475671334830, 16848440972121709899, 697679021479737566, 1456738239410270672, 7214966028857473701,
+ 11434157439953722433, 9090860151661100670, 8407869262026581159, 17882547383849407986, 5277257988019769029, 11463853580825342183, 13607489835500075467, 16878800997498950751,
+ 16059297404709367406, 8426877510924536482, 15269952608796785555, 6256768900860981984, 13142655793714702073, 14566340391957173918, 17147065283704191109, 879106195264399,
+ 1169887957113173798, 4936710663791789838, 8029349457377894665, 3317691225106441967, 3622769674498678594, 1954144360900150482, 1432045977341997899, 934737636789214593,
+ 3519438486034584001, 16935008858078961013, 11197550931955434437, 14100007087636813411, 11096181469792598416, 1355325916242161182, 3003143504356161310, 5855778258533244174,
+ 3469569649879539425, 2734409579230557820, 11243101718460963222, 14449395552625174865, 2832531264658408884, 12736053595733579053, 8514265014627300817, 4646556172848350801,
+ 1035318057221356651, 16671259128991069733, 933190532640544326, 10073090919720429370, 16298191828003546946, 89465711115503243, 1726024123088964037, 6293688775358665990,
+ 9646181593082190523, 16068017411165381498, 13319927590045936565, 6113055520205251774, 5576373032696512038, 13880344501714804196, 12022206929479211261, 7387979357081975315,
+ 14353322331225872624, 14137906560696321406, 9597841809127146330, 14479164716385450316, 18390885446667453601, 13789601367024187697, 10599569472420906075, 2183893278236533867,
+ 17984045804937170919, 3008713646989855019, 11485907798025297413, 14380740919052976941, 3447658178143557676, 9095585806057103587, 698302327819557146, 7922185712178340950,
+ 12592522203651044465, 1491459000495558958, 16988036704095888812, 4620491143241098446, 3378886739158384486, 4601357523445825298, 7994566595487207192, 4507705687201434902,
+ 13553890993911526641, 1237937561304731611, 11053651340854998814, 8737874920379363783, 15414508226001886918, 9311523317130687807, 12080742822716708234, 4715194040021784977,
+ 11413808093931937231, 8917864246709968658, 13554307490245863526, 2827250138133094370, 7637229863299622584, 6947884496333162143, 4815638417787805223, 10291803792760846879,
+ 12634048591085853546, 14321728045924893987, 12328747445286182403, 436433860116637979, 5910966881246245784, 11478152401127002379, 15956760875311888867, 9580341487614973173,
+ 5405380974793160831, 17240616607222875763, 18407061562244315632, 1777973921117407170, 6264824459040652928, 18267228171835175724, 1278124974392461846, 16099414790431742315,
+ 17182132836049601530, 1486820395924546410, 14089047182364760490, 5375431386740917568, 2352352269256972682, 6220398075470064021, 11444733678216077372, 8173002234516562536,
+ 2862732059898640615, 16485439906762588046, 8431038226899400253, 8931540904642954995, 792388953489761723, 9296018986570869365, 406447163304028453, 13270392174836298982,
+ 7772216409762031099, 334915652033124323, 9705063888720244418, 9829080877134570370, 13800256788887079067, 6121678265481939341, 11747955988727867922, 15561904257519022219,
+ 11919247213871390231, 15058232650937578711, 16509435050946309716, 4585646383560577121, 7109079914182671169, 11070600764446172401, 13212464399509938674, 18057628120939602025,
+ 7896339057146013415, 17613745684396593735, 4238258305077796259, 15438639564771394288, 1482402451684594916, 3104492350488980260, 5195487229736262641, 1361150797580203482,
+ 16473144777827648385, 1226441665397311636, 11587851779002173636, 5723433682141598491, 3818559725806959172, 9177247615754603193, 9531025756553277601, 1716800965379459594,
+ 10276432666086031385, 14627452075834666747, 18000674623336885291, 15328386422432799237, 18139556051577750412, 2213209867616444127, 9597214736611625280, 2458176131336755868,
+ 17117806110792423322, 15887422186644000204, 18208397717053970445, 7361598408210891485, 13818554723430207838, 12816010025806087198, 17883064936092688211, 1716893191215908273,
+ 15949855327936545010, 15892613064802301412, 7776232754624386979, 2193423430757249310, 10751855334513902678, 13772090376350944100, 5644047706587449960, 370992037484721524,
+ 10797607675141630710, 16625594903133516890, 4419277245854893028, 2258389314023537326, 16758443963306613299, 15289730903996513589, 17124263661185245991, 15862017582286958991,
+ 12272869926038995590, 4569089761627301260, 6495211060884501138, 11124576322292919277, 18314798224925096163, 13232331582712038213, 4402693571738173484, 9384301049965683996,
+ 9611026793910805538, 11152002731283498747, 12528844591327054806, 1485961507414531975, 12820893906402770618, 16248407567081091370, 15794078730898691457, 18231302945982292403,
+ 16329953561108738383, 14553061366216182492, 5112198484879130320, 1893048255589705004, 11469678860036344015, 8992526488267920066, 18208245559304821518, 8980963391041920447,
+ 17694621707784163170, 16317870945177246022, 9173265111786762052, 761657177655159269, 4876597178189658673, 8185879737091523833, 7520213092991049248, 6827963030155074898,
+ 4104149897263226969, 1008830393844825523, 1692548451180187046, 10984924503299463085, 3526468436911084942, 18012043951628458646, 15181223987778535698, 18016160796635284569,
+ 1524865955503524054, 7479709667052337482, 1064851658558537015, 14404181438374010311, 192685278892740648, 38546070651855709, 17006752915941498851, 7120034574635007914,
+ 5783711159088718530, 3503029393593618754, 10736192136662142256, 15443859608128978668, 271805305647187927, 8244170003029416464, 17017071503038388446, 2536084345379259313,
+ },
+ {
+ 11535283160083833975, 15102917952496589829, 12909095696891909745, 4611119212602854744, 4195756524232932570, 3754603965149529000, 11539276962358607816, 4401424391553230481,
+ 6842993463058627120, 2708745114617181146, 3832382691810282630, 10333834458057320812, 15028197197004077388, 7681293343255119354, 16227537691626587485, 6703363869832495607,
+ 17567108285065737529, 8693245930330183070, 4025672038042952121, 3491067119247522844, 2100799837993193702, 9579542950705774054, 5091525285265581997, 1806757015709820088,
+ 7757354840118131559, 16227348978414023262, 5800861674805516343, 16486235803248700961, 15646700163023677309, 504258765210816549, 13724147899924699697, 13588899538317277096,
+ 15895596198730294923, 12513494396374459899, 13552575153453342654, 11312160635199213507, 12446618129881485796, 11286186116805515445, 1584191291800429567, 10188741008566636830,
+ 10157626021970320469, 6580688164585715207, 17158416694471177787, 2873484006255074946, 14852216457444863041, 7463041347566136496, 2190030775722958628, 1116223035803602728,
+ 8598875756176012541, 2146658618778432566, 17575736667490551813, 3602654209723135985, 7223347129734689099, 7256921024940026570, 12249118963297692963, 18168532365040819099,
+ 7585174044375463988, 2491923406404093282, 9355733262044036112, 11484079611349577325, 13145030009254943291, 1234932884591406048, 479845670619308618, 87735010308690865,
+ 17307641427543377277, 5862898330718319545, 485176909118483750, 9845223919324484076, 4293549583740204024, 12765124043303931186, 17870894781132657382, 6508172378961793849,
+ 14234156373312856400, 16210939788814051708, 6478880037923038392, 10435512103812909561, 18142844321802359753, 3243996803126554757, 395603619159523019, 10213589535656704607,
+ 13256075137060741204, 991285459654295161, 7857174835799942102, 16201105040554625727, 18398612085349994216, 4512860776717176840, 3521452275695011919, 10622137733045360311,
+ 14993897771652736376, 3320388672992783302, 18421556740970114213, 7438967127794745135, 15048397725789819354, 12721899159423777001, 1135125102123667189, 6637485660805148630,
+ 205606553319362431, 15484041062449893984, 12896720585684759529, 16675624014916513672, 7579112430564108390, 4602460356900251082, 2677787525524280838, 16723483619407414479,
+ 17981939084373319995, 3989926231875116241, 15342509934701840045, 12999252495311233519, 15389919024942778009, 6136335679373143315, 4117381415379544275, 13342256463740646775,
+ 7622769999043907199, 15227992053145045634, 2719412067263856950, 17811161335980566241, 15036003689029442301, 4919601685231030712, 6756893600905532338, 18029567067112075531,
+ 15804957839814547154, 7924193945337173678, 1080401129055171214, 10816444561878528576, 13171610473229361771, 8099842733764406841, 9284741070210451667, 9245195215372025289,
+ 7352706423771400643, 11698139248518535945, 12836997796963939813, 5747451345763534394, 7572033904536738295, 17752330422795919668, 16404760947165813434, 13477251724246705199,
+ 13700661979310397040, 11172816633936827025, 7087232997755437759, 601317374152115944, 16028751590837491967, 2749566595186093629, 4445248645671399873, 14714182958477634997,
+ 9630153311383545953, 6436885924177487891, 2150319761793765423, 3799049242167098053, 13902640603080497957, 13121685068446422901, 10099985117244683905, 10692630753099650856,
+ 15680946025356791729, 10935239212262708062, 15255596602338430716, 15502989798538458360, 906862139952472300, 16128362872146694687, 9038121313842477036, 18304772087255114926,
+ 16961332967315316395, 2872796507989583018, 18145413156795763341, 6377517922859730669, 3543621377523624979, 10701607653242580646, 17747200374013876449, 15987594633077120628,
+ 3487728992359467218, 11271201736509774853, 5968332690756574092, 3519182420015237550, 17144214368301394415, 1805926423542721773, 13696269518209159134, 942126126861202483,
+ 16211326119697754410, 9043490763746629445, 13119640043033532883, 9720862297941999125, 12662669665140843415, 2188499520755532048, 2671631563874698813, 621164898587943254,
+ 4804461736506692241, 10698651491947798952, 3524135800884470917, 11451653751986650957, 10424912129082359813, 6912080226482239953, 10357808719970986526, 11757540241185558439,
+ 1841157911016607198, 17522398974331009019, 17392843956059453556, 10510758656389762948, 7666344868273530034, 10739409534839380580, 18010518965344703872, 14478457810678500914,
+ 1617286442811984718, 4008909641875477702, 9328654790542709745, 12015676532404599710, 1763241944305573436, 9348325841675336084, 4072126498552363080, 17245688939115723821,
+ 17277555838655940279, 16604852268187595492, 11674181070138727191, 9847423407697994070, 13562477602392929772, 8181792789545038731, 6999326623175714057, 1227784795779857129,
+ 14923095901189736698, 90760593075377861, 11732533481504731510, 6000651333412910877, 7847865090059184838, 16859539404919901595, 2709105957383645337, 10263820965340474390,
+ 3161036642361826348, 5744405044938832483, 8939741099437920817, 9766097724147073614, 12117283480449144394, 14632688836103128438, 9491651214417380913, 13676064073123352224,
+ 4673738700738845848, 2758750364605631228, 16596306154038523740, 4124917668203310922, 2170509910405778957, 4178077301639210933, 5109055439238993109, 9627760120514410208,
+ 7259236533647228872, 9608619771663043870, 4148523959415435806, 8831501534166306825, 7919036824652396994, 13625969543100731176, 2546426055029800579, 14852443398434563210,
+ 7185947197544988797, 13306809398790059447, 11440959253278285287, 15084384140482244570, 5059824327869316332, 7585354587200615362, 6801884469610571467, 6427162399794357267,
+ },
+ {
+ 1453348772281274490, 13028088570940491471, 16100623112750251840, 5210244400259879874, 9443916155735416174, 16869520919318450506, 7479400208826706574, 14374970913352602458,
+ 10149597276931117465, 5778043705364022356, 13888443605241209218, 6889225091714429810, 16209363638493043975, 8516350006446457646, 9214830076046105138, 8894115046323798175,
+ 13026208197163596446, 1935567165776196957, 17296090833026336866, 8461284827078097590, 14545382879887213077, 3302103559307506085, 16026840048628844803, 4599071908983273604,
+ 16375505785562641226, 4903074653735993511, 10355787845763031761, 3188849033041803587, 3145147452182864049, 13878747873108378466, 2798650520315922121, 2325962694441538199,
+ 3587824003578414733, 14942115314012282784, 5312737955169054323, 12478174640832111702, 7866537714619856949, 1076672758151595875, 12163513815501875048, 8343003998956997868,
+ 1392855790923554429, 928317148609633070, 18064469091536763974, 13446092366984636072, 1975222540660444795, 15443703204026028192, 4016450768356916736, 16656367700821047808,
+ 11505659892372808762, 10322665279031813234, 16948868760451861492, 13706671779256832515, 1968671033447691229, 2446599110590827903, 13721045634744350023, 11053432744256776013,
+ 11530583608023224967, 15979155166527001287, 592931868138800645, 5361781029549479856, 11246418752830406096, 11882251984248471436, 12026389461944629423, 12289077922798223820,
+ 7685868633977346753, 12528120620943035447, 18394735788313062922, 8523101916703102094, 14261596878655792929, 6364753027040158382, 6433482955677830548, 11538026548937002170,
+ 16221339092185577802, 2285289075757313223, 1151823739515800834, 1010537560193450047, 10651328720714490261, 426599792903611671, 7181136719074122463, 15350479907287263858,
+ 7559201271967630136, 3561344874280275357, 16379496429080959327, 9662226613694502966, 10995850494788192620, 14875349277152338391, 730989400822814422, 14352408679782573731,
+ 1328339467113622233, 13374824154992864223, 12057132015324007824, 8507647484414695512, 9286016370798146986, 9083798709648036612, 4312959727577073260, 3732271624824420608,
+ 13007262390446754395, 5631627784562092966, 2644488352265945851, 15340614364643014664, 6692152810984634488, 3903120388658969320, 2513009101887784188, 11999681442010831364,
+ 8301343575202991577, 7108394080005777057, 3477086484634015274, 8772644795325835021, 14747670046873500269, 11068536909552811695, 12349879357573378133, 8898863927320049679,
+ 5231654281125841638, 5181061730027800171, 10657176101406080919, 2548241936122768256, 17140253194139059526, 17961047717804458883, 12407260567549658707, 1464243167234637194,
+ 14129872794455315922, 16354810591314135702, 3177417010197743958, 16494351130927698692, 16557765056328447818, 3139394196704738651, 16891813998271303650, 7374470997276173905,
+ 18244279644122204437, 10097660526910786400, 9270181538098732492, 362243376823712218, 12369863141977485111, 2550957813014523686, 7768963959853738595, 470330801228165919,
+ 14632133560934390222, 13844843545192224059, 12232948338847787315, 18223343336729121487, 5219716370019749202, 2729037467284418866, 4546118556621109595, 12191675540034649671,
+ 15868991745937371466, 12686820201922367283, 14799186526268126508, 1241626948050017911, 7080072468753824165, 10793537086099229143, 7855065677501047369, 9747396305344084667,
+ 1547814412223404746, 14194709348405912167, 15771028929623393801, 10945048172868684389, 11234483878611528405, 9542605377580701681, 4555391547268120796, 14916872217451960022,
+ 398873176015355134, 17609838001051405497, 3752252377312468061, 10441993652683449973, 5218696730429184844, 6514361533063264268, 16024112124035725701, 13453525984055386124,
+ 3452753233449684767, 8937677805622461008, 15346651759545553711, 17055791435062851045, 7025089132368408239, 7345356702153522862, 17760529714295220737, 10772604293313163303,
+ 9940223451501501487, 13671810088603813912, 1432075826781639138, 8145669182511487125, 852664959675237493, 11640273224943853609, 16220508798123490173, 12787926729706993802,
+ 15490131882364039637, 5140304620645573547, 9403655757498826823, 9824850954989176893, 6366139388903157264, 1981838834216345294, 14659870286877979556, 12091578124492612524,
+ 9672198938588933438, 1848525214318852627, 6500329435429402369, 8547555101599185897, 4282139019453982058, 13391782461875853462, 956960429753408730, 1865393350316759140,
+ 9155847368519787501, 17397005067762861574, 9943535970799289809, 16365250691105460494, 13595148040475375224, 8300668237654861875, 7149654787413440141, 9226755501324961219,
+ 9359301870380673800, 9344500253098672841, 282669288080771734, 1996492472861301505, 4777949892950526502, 1374151996531969836, 1529729047243522545, 3371309923165192567,
+ 6832171775190531128, 7903951882621813138, 13717573220828409267, 15400575122727850054, 6101307565293576434, 17226772767105983314, 3485283905347036760, 18439321272895583847,
+ 5095761409418784463, 15484673138064200869, 7743962634971666154, 9119597914470879266, 17978461537972396088, 11490423388734028778, 6537551181013090408, 11706370534703673937,
+ 6635008658379780815, 11428304571924124070, 392068559161657302, 16314310970878215293, 13769492864572584674, 16880907685897496637, 2877983831559338706, 10391660973242662710,
+ 16861266984653213659, 1968749328591656132, 13301499995824894911, 14523175695360471336, 15130688872832023531, 1794208537894703529, 10525607764904270704, 14288604199111912452,
+ 6289130721434350290, 16129337673129720846, 6648351866142453445, 7705983837166554199, 8120136556582544814, 7924417299222764443, 7821995671300500892, 17697662726179084739,
+ },
+ {
+ 14086745261650819748, 17953107869776717888, 1777725846494438428, 2765314406831814212, 2603997194371852991, 2223517025105045134, 15579901335531749634, 6571733615332171148,
+ 5478465698516556930, 287443454357750768, 12201204663663129285, 782849770204658255, 11270045035009204251, 18152683972363144337, 10463300037926642739, 14363641589598863851,
+ 3790592075869665284, 5120320091554329756, 15361581282394028709, 14360464660911164915, 15249656958991931940, 2563751872827049233, 13745792006936646733, 14572943674933168173,
+ 3167679014977537749, 3417445004776702290, 1709246158507866266, 5861367897285174360, 8437429877561342262, 17231607446243478748, 16047596399306766948, 17656438585829685897,
+ 12977336347094692192, 13723871955002954132, 17399919575273038847, 16163973860244779675, 14997921584659740746, 6543247361992425067, 4675926398794487109, 17580512382853194054,
+ 10778828023580771765, 9812611440536367405, 14168149065325522570, 6692229110872358750, 11010962724400581057, 13055203605065073902, 11396463140731730116, 16969789651736772232,
+ 3285258013801327520, 14114268241282882888, 5367399533198476395, 17277410244918727028, 2751895479568588421, 751309667192066167, 13038472116718581917, 16013586354334596135,
+ 12364168614988940500, 13194367252463423118, 12223406606527741280, 5014139294736037086, 11244699482831816093, 5336498572491451990, 14268404582568856142, 14502318820969599316,
+ 13295673091232554814, 313981772853781347, 11829785736449464030, 13341198421333360997, 16787450350663111566, 6525407229623764159, 2012189087039636323, 2110887590404698758,
+ 16984895766311549717, 1057465499230843916, 13565444352859773343, 1019268943988786852, 2190009140530404855, 10045598732295352970, 4789803496780829836, 10700158388954091321,
+ 14840255112988420500, 10066628133368048408, 6527354835873855384, 16106607076894419125, 3989024604987083046, 10713316697020150694, 16811389107269552264, 11294596059797019619,
+ 622403023838932676, 11423409007234191765, 17086830255702454786, 1213655124481948199, 3976593910085142945, 6315875935390432543, 17947502153968859628, 18031540937011531543,
+ 342675887578148289, 18366454547938469828, 3853530442390999217, 7499455395579634652, 768597405171336502, 1636271570683567151, 16786205337983572309, 13056241332863199715,
+ 15991153618414855143, 803325462494472241, 5709347424437559249, 10138502962472290727, 4675124205115228106, 9599016746550345197, 2769443894695609085, 12311020551315051087,
+ 11470964304318784059, 17313372553735202469, 10686997186364536871, 9228453594798060121, 57933324335302024, 8931389936300346353, 17823596478938529898, 1126996688113949835,
+ 10457697409306668434, 7830477435299250296, 7772379952622394912, 6109610138112185532, 4382803614711332911, 286231563340803504, 14408860285425706036, 9445290615796219321,
+ 17536503470234411585, 10755841521760916925, 15905055604395954066, 10842889504280336728, 10780689817551616331, 12490302644719656914, 14399349211478895553, 6458867531104408547,
+ 18263696007238300109, 14344529985676987842, 12351733186419515951, 14648750128188787544, 3817061805140294292, 13460439592176294955, 2651602395003150964, 17613970291099197676,
+ 1107162420032075035, 14270134754120385897, 4090584027509443635, 16066949131129832709, 10961641187652236678, 5480069976143996625, 8549185711954755645, 3125346629140152790,
+ 18337941236561689695, 2899707239319273137, 10519944305927777790, 6913088845356962577, 13796383125935684169, 2756403850320695444, 10018647980137529204, 15185795467001072323,
+ 678732289042224133, 4682655677430115166, 4806112367076139288, 7101473987708012826, 9907594813802528406, 5572757906434657546, 17819688309930892783, 8734990255058630746,
+ 4831187103439262344, 8323117365987697459, 10690753367486223100, 8744079000061869982, 12737295198832254982, 8667406752954112119, 7942221205780634447, 12612313636795726189,
+ 16544756691464009983, 3413196415113664237, 18442955254149959801, 10193439941306925215, 15178607176290875326, 10962146035584651400, 11757614026658646304, 16241528460556234026,
+ 9593194029396792148, 6376573619820601945, 14694667798662125868, 473733746941891591, 662607401339043361, 12105576520816361855, 16629740468317868384, 12416206001206121129,
+ 6655696780366945398, 12313066500177107654, 15308290012322636383, 2159880960357061128, 12374858353753840133, 11331956329091160239, 11794840094276473821, 14170553661155653327,
+ 3336445037682266403, 3746885541049520962, 12688476435907418892, 7399089272376853858, 4625608774712059624, 1535040330210000507, 6878676093546636363, 14533846142831376645,
+ 6250279356724130049, 3901672557903829499, 4884397030724371499, 17385695093365573379, 1240675257703364805, 9703149291204612490, 10295741452056015824, 7249917314998921510,
+ 3336800790505577062, 319907795530586958, 15852045830964927276, 5538471989976998401, 16558766045222360422, 3135574045680866961, 7590873965560972285, 17191236425969884848,
+ 542161999237385999, 11197034618748267807, 9595673564043235773, 16351524685431643264, 17127456926935032059, 15448416181819692483, 2035065584045934249, 10976830957530152324,
+ 3230012351101746458, 1662948417350421518, 16062645290454019168, 6021730506322388306, 9903225699788133023, 8549278993611431648, 16267339446315716903, 9759051179942016620,
+ 14281696762614162345, 14970002486407845832, 14940104843292093805, 7021313216295048550, 18131586729996841512, 13957742754718859034, 14322894803959804982, 12861731196696133597,
+ 8838210194055479741, 16190279688839291456, 16520121358582757556, 16768959728701970634, 17279896910204983538, 3582816187015369256, 372137484481226748, 12972230207718993241,
+ },
+ {
+ 11480031272463364121, 2525856087688686375, 5875255653057452580, 5606696614467440464, 12094715534218447962, 6173479337351577758, 18244015404482482803, 13199349349511499695,
+ 6097225965006328457, 2630336000831717722, 7049638698786526134, 4203465038263485233, 4677974565964393904, 16044036678123636261, 7266049959100300634, 11339674070148504929,
+ 1979992348876614621, 7494083465935166973, 16035723662722831744, 6820849908885211777, 16740735239901023053, 2017530162004533409, 3087262288875984227, 9072904925376793991,
+ 1830819081327272165, 11951292080860646547, 18002500803418629938, 13317986695217403840, 5005574200470766330, 10138179785738652504, 1610518863105278224, 13309443265578956954,
+ 8383241685301818332, 6629521234420338478, 8997316634951392497, 9976633096395028594, 5171570735720385772, 8129153749198218776, 10596302915145665067, 3726383961188926740,
+ 14494271933981109919, 9056473564311980628, 10557807473422306929, 1549747977888697589, 4534184223615087698, 12977118933167042558, 11930457930026683843, 8346041661496792064,
+ 7756665146227128459, 15140824521273386708, 693427108108819556, 14819189462601222526, 4507393341357475468, 8710020646873218244, 4535796033535410044, 11884504632761707946,
+ 14192194428653714479, 11578618567931657764, 6032668189933768881, 15981535562386193924, 8925792218631314923, 9408714146004183681, 8859017894477140398, 1675606340537058506,
+ 13447287279335601133, 7792625320709870803, 5347956615552953057, 335662974452886013, 892825083599137885, 1017154417323307505, 11923254597349925339, 13818364932306372202,
+ 15945578153358602744, 6304522416189552358, 2545503746231186920, 14563857419242252397, 9711931929315762229, 7949505077082864287, 8780199792032437102, 10109661610848827442,
+ 1628958128919487289, 9818701341087940523, 16480240737841185501, 12633108361335093100, 7982147509828265471, 17959437456356197928, 14370416793872407673, 3577532534573396628,
+ 3265119697568866078, 13753322853248061974, 15435163376825672274, 13646180516491714938, 15620545622955090250, 18241486585498394565, 459566729126467878, 9728217493753855944,
+ 3187267754295885454, 7928049460349917700, 737484733736740002, 16459695106656291329, 13000947522838718027, 11933371771659873912, 4720706464557996274, 7997020276574823784,
+ 12655797542121172150, 16574741514838026101, 13532097913031715162, 13578832971779600350, 18171552525916232035, 1516234640889581592, 13625693062693291889, 3869929504544890755,
+ 5134589228918179333, 5186584327613952853, 4114726195152833642, 578924708883784650, 9128473117392713984, 17390547319520768186, 15077700332150016764, 8707446714183162237,
+ 9883406233903270985, 17184231210468300584, 5498903678702785900, 6903833261382863389, 11489138095102981636, 9021901931786177471, 3227433089132150317, 16639455550797627042,
+ 2873811051246262545, 12759149028054319339, 17920917684786397179, 2576455203420292139, 3181713670456310447, 1109589489775958300, 8920119044838278090, 16056330702164227666,
+ 14277712977137303378, 3896132440952049994, 13252040000848248790, 7844779060499908154, 8651912177362107995, 5527615699397299219, 17688189207278018869, 14560189235973121390,
+ 10861951673021959044, 16820848382034483641, 6761027571816469843, 3555385088072119757, 9363319138244920099, 14172742116400795253, 9535965929584057049, 15628517769546199992,
+ 839045372102318861, 2032615141683118500, 1921816835840256924, 504869150737570005, 15932184001422473505, 7849475855313305697, 1189994394722618598, 9874798350548229684,
+ 11973862230325381876, 7670835358390713178, 2113918814334481709, 2289514490388170400, 4570586695077036349, 9449360682811675838, 16506880875578470116, 13723444740920888736,
+ 14210428465641691143, 15330786835024186142, 14173742373807858721, 15041216914246450688, 6027140311333696225, 17929849776014561356, 3955935517651718925, 12080548003455651805,
+ 1974142217308765297, 2347591555456293560, 9029256828390326937, 15561672734848339972, 4719329765359230610, 1924866103348851009, 4950258359754995181, 3245952633715281425,
+ 4636201671610663345, 16662311204732135584, 12398897461787695675, 1355753199357412657, 14695655628527768564, 1692838776852698754, 7346906782826540912, 14869028424564984700,
+ 7242751356102425321, 7770223857340282376, 14976092066160539655, 389604327668352376, 3472336298605227213, 11456496863560248128, 14597769228654187922, 17838672631901123288,
+ 10997764239928568735, 16648935573721932916, 13735964592293716412, 1364388817619955667, 2511134998006929958, 811167458534120344, 9371523327214092569, 9359553283016173792,
+ 15311627918891973770, 17170652974122329165, 12620036374776436604, 17439165699070911984, 18124482726761033947, 18390083820175794165, 5492125340791896125, 16506113603670143446,
+ 14240080119980315297, 4751983902385717684, 7941412103662252178, 4796803816695225123, 9327986654018000413, 15111345676387005726, 3582558555283734415, 11913156735178831102,
+ 2000999613038609412, 1070613390669459171, 595743672188217500, 9169968884291187666, 17960105939867090032, 13927107353795575459, 17872593051171658925, 11087013461789649235,
+ 2280207936513065312, 11704149285124951522, 2138093032627822381, 6816805886026674440, 12141709925796155041, 12164596600353477920, 5711177504477877148, 17211856235678509153,
+ 6061025098309340877, 4060747063380827182, 15186975689243681841, 18380882388949786706, 927262408801716090, 7447789770541113239, 14074172896745727729, 15372475239996145400,
+ 12915868875134296756, 3071143582305760275, 10848717856747030010, 10973183887638314967, 5983269487036236823, 9885184706933333413, 14509493669845681951, 9669608065801379977,
+ },
+ {
+ 1190044221726873109, 732025666411072563, 7781096808432277694, 13079879637655900365, 9048680577986693793, 12528346194104542124, 10575369576322629835, 14068913038053465964,
+ 3297775431071502652, 7947266993949467566, 6161673381820779563, 10604626734847205996, 17669018066064502925, 2386071939136502354, 1076023928806956187, 16075459682742916440,
+ 12387028018538599950, 5791681577936683396, 7536922194386052138, 15377770247350254336, 6564316170002440575, 10629371267149545956, 16125760269901494382, 16051398044897962024,
+ 6237354803374899565, 1782400080026955610, 16115027077529639342, 17907259814836219524, 13179234383592426160, 7302978467158243198, 13607720993463749225, 6693531817169120736,
+ 9659467198848065929, 13765104107473534739, 820057324758632016, 16604163594878267405, 15321524555426147294, 14524167419333362957, 157291234996957911, 3689488130722730671,
+ 13896531645245568605, 13598412639648980920, 3875086557856143664, 9242105512956135595, 17410265178284161091, 14904754836127450463, 2354327747158525902, 3858493849578874654,
+ 17288438225198818690, 15275190140700748774, 14532856272428522409, 2192294551011758591, 4275966798014984695, 13252812474545742114, 8604705074440591847, 504054699541924162,
+ 3623027910997185067, 15957466839356832933, 3504133700071704955, 4135728439139397021, 1160600455691338871, 4431928072859292239, 1542956305318065546, 14109181184369694855,
+ 9135041378474229537, 15626265487594878380, 2645257479784082878, 8105679948445763432, 8579715781576202730, 13437359533149316300, 4344754720520585005, 13326372852934103673,
+ 11341622086474305983, 17104958945811183278, 8154944722722765678, 6696975609844549775, 10895966360082878017, 2277686037575837054, 12236842881540344526, 2086470279064067791,
+ 2942668817124399651, 11439025398519255779, 17477264434671471242, 9606507094132585461, 1771475584689465869, 10791261379080046759, 12901998265727968789, 16423426725595035468,
+ 555419805011031621, 13715683285513290290, 15526654591995532751, 7023761423093873354, 15093366433178982304, 8625749648874372840, 6412297471587129206, 12161114204724955762,
+ 9362623422820895267, 6491356159483508885, 8543224082493101588, 17290929683278258587, 17348088672546490894, 3323350644823701753, 398525379151271104, 17794417664139368677,
+ 5880914635028486784, 2898124605786107494, 7953506026198179840, 2189477357380899574, 11207122442953694293, 9288539232660208671, 13527697625010979453, 448462453367651162,
+ 11262205719433170585, 7539606872920196479, 492385789142086954, 16036594120332851267, 8084547797138041354, 10063336189771527714, 2484395715469739911, 4879298610504146231,
+ 194633990068625151, 1028128699653523895, 7436102537486268753, 1855520454959348468, 5098690958966904466, 3342676718901330604, 6942212660412942406, 16510188838209674083,
+ 8847926418142642416, 13342814936333556870, 7082971165114489981, 9307541560233171550, 3013250253632606412, 289291445038948658, 2146586262066552853, 17331519863134074479,
+ 13747350589043267381, 6342477793304440894, 10087817302474912962, 18076409030930122816, 240893790090129194, 16364025156922886538, 13518167316464590136, 8034023137744309866,
+ 14013962864860194917, 5900268096329749317, 578665371390927342, 1407477069341362927, 15142181569643383319, 5217173950329651840, 7999234477517614232, 14722759409199120103,
+ 857809728583972424, 12785911478120621636, 11351522624307635957, 6288237507210579634, 15030282678317951238, 5521516375915920991, 1243458350248005764, 3567973108073272461,
+ 2834660673354035229, 10728451420162645120, 13396052377701115268, 5730788601228223475, 1616452754395208882, 18236419625072278846, 1179103743668306981, 9179882267624091153,
+ 14087439523312437665, 7814060325531094731, 16777382811045908592, 16916500419494378884, 16312259370378381991, 15848836870251345077, 16352246673954213424, 5228629139249919486,
+ 16889782864065172884, 4486233301301317202, 12424854171669138635, 12490430652425390015, 17335301435940798099, 9685877269515541824, 8728054940414220990, 12781228698496150916,
+ 15949574575791239459, 10394047428873141717, 15915386168603191086, 11400139144838475744, 17198578050070482368, 10505386377385262682, 11269920026804647377, 6796664536515856824,
+ 3285292208372623455, 3132524240856145976, 9712372580979649457, 10523781046601368064, 4797168047433466290, 11748687702182632788, 4177442610271848350, 8549047039673741703,
+ 6897143822689085584, 12727238507037725105, 12692443686524496083, 1187606781546544862, 11016254758711515442, 15622643445771261724, 16669571628491989170, 7099030384210775042,
+ 3541342827520921014, 12596176563952906070, 5209816272233406534, 17158219989390045881, 9096207539532607151, 12291463442559161708, 5188945441827912424, 12721126884401346746,
+ 8137171114112655022, 6980210601071561830, 10156197453074241467, 4481350962546197264, 1902625885079226055, 18310954340184922140, 10624840172920793227, 121304340390125189,
+ 5261036238619130144, 1418912875050883205, 4741122621962383610, 17995804738511375239, 15493968366750472633, 8417505178158780382, 14685111207483504618, 9729483826733360559,
+ 13434828863770210135, 2940287993177646896, 18241509923298296610, 7613631401252601295, 16815829390381900683, 8566309323681786413, 11094402941539790077, 8442864581771174862,
+ 17650059733379916650, 4260524072883785227, 8351659371377110706, 710338300032970720, 5512358883882849029, 15533759462039713914, 14352344066268876816, 16708588250954382269,
+ 7963995066277321795, 14986230345162551366, 10616366460602472778, 16186943329215681149, 9522855644164647723, 9858336437877021178, 16344942402271072884, 18027353494090590022,
+ },
+ {
+ 13175608910844763921, 11168387468381338510, 5555873864224342205, 17780559602571640345, 1582669533588472814, 8145133762789622706, 1625372887980692455, 4576809104068554285,
+ 2900978533555060931, 1749946807863203576, 11705734511931072459, 4446980691693419424, 14437524160594907241, 12795912566640000773, 10400036254697939154, 8356113088668925361,
+ 8900726473285671992, 890209742292473781, 16059468388719373167, 16577940938542233221, 17381766864962227733, 231915274132518769, 16477817024012878700, 15646021286310268545,
+ 17511864284462479051, 4391315454785363822, 16633389246499252566, 4290325975494843430, 15912670649662225191, 14172733821603328367, 12709802627494541709, 9648238681838878635,
+ 4196883387800999444, 11261069840079702254, 7135968036494594517, 14537849011631244070, 1930221875891546479, 3029378442249928630, 6219774718328347890, 5922417028077454069,
+ 12470404961188516783, 10420863874252801995, 1369334550763670666, 4781925915058005676, 13086538845357988031, 11031029468188842766, 15252022305121561855, 10497770692868889659,
+ 8808726441417268278, 8201526072266158452, 1136378216494101481, 15014143365043073153, 9752793532008788265, 18423606089906807282, 12708377698545978413, 13697405528655748691,
+ 12840624483048662115, 13614226123606379619, 15087571995173849344, 1395488309690794621, 7173958427217959878, 1198561064176820332, 16821886260165681626, 10019479169661217935,
+ 8089335244564816024, 4652396396372834438, 8473523796721514770, 4493469844513366216, 5448577418901909194, 13487502520341193870, 17978888920023381084, 13125222810420128122,
+ 11369140159803973135, 8391820975236191093, 4865145203995472561, 14899728882581231165, 11169032718524058736, 14532440521453562763, 7464998467530340358, 14981246381757614771,
+ 1242992129726403300, 2014928170416925311, 6341058783517690890, 11690291498253019630, 15951586247597589423, 14033993236702971559, 16963725291567059464, 15075124616754319226,
+ 9901328250515532535, 12796305315632203942, 10819069447660403847, 12542238242508661240, 17077777737462854238, 4364294351375777648, 8335244153165894685, 8863541604979427311,
+ 12648774127830238318, 5576880401343711606, 11786867566889606858, 7421038047404952574, 1037620897405863409, 16680872357246325645, 10443290127564886223, 9718182896013830581,
+ 15199007294286606824, 10225517367964809030, 12827778649823141649, 16046039393887815140, 284741135347255597, 4015036609703147909, 3902771329647468314, 5128707935422024078,
+ 16953148044626345363, 5101833106541998042, 7655361325113312133, 15948906743291877728, 11619404274756349415, 7521254459857915559, 14978040810149334918, 18392994517091959591,
+ 11029520456752683158, 2297291237551781454, 7934683676555855479, 1563954733146546128, 2252805128528536255, 10523375695822939112, 4598652721256818062, 17493379105069761435,
+ 1717382065475436420, 12444276194160330384, 3605273785657402256, 17503422605669121015, 12066457002913396306, 18109827202111882684, 11555238022219952400, 1993655497207863497,
+ 14852223665677610379, 4743851404709775958, 8525514757892437524, 10402777181498591725, 17096034999928466604, 4289543339828736175, 4306145672261807290, 15366124999227707930,
+ 9159996652502914864, 10301349423202296392, 13530090903323000632, 4082840779249202050, 8824726775176314063, 16033767772359951785, 9181204573557660412, 7582747879080399183,
+ 2747551692916263132, 3151123035150683281, 13938314333309683791, 11970864255483579560, 7894236725861110998, 10515613623232567716, 10445557830757066839, 11155097727742050955,
+ 511773909731247800, 2463012923954369656, 15859141716591041471, 4940781664379556792, 12435624343534748284, 10278084527750129328, 13178135781125832492, 5930328346313533490,
+ 9122100956633732655, 4007876149211873275, 8339438668849688737, 7791075492023424820, 1771434463088769532, 17441254147665904787, 9470298561063283873, 16552350513326886351,
+ 2842235074681099572, 6045955093828210410, 12534027406207583719, 9023829328537353058, 1661393615224515538, 3265308219651631507, 14825441620546235307, 6190486519065164472,
+ 9569762631377921701, 10341967309273270300, 4697570564829842013, 16684762125903667303, 5281929071085172097, 13027961546412671127, 18398764602992176314, 3872796039504642974,
+ 3625046733403850478, 18072436321985678348, 12926724831909767455, 17260889391923475120, 2205546675974800411, 4823605748727161314, 11756378320036258430, 12725625915588142865,
+ 3620769638269480452, 531680461898170, 10213227252513668871, 15446816035938502695, 15189868868814686765, 8689079054027125562, 10767300823767661010, 12185259819280811201,
+ 2954211398425891878, 887045055614004603, 17024961552904496644, 11183183814128062689, 2275500228939836642, 8454621353771976628, 13706050871317440521, 13152782683288296426,
+ 5035609627965208105, 6338722048255995169, 11594969687968475335, 5639340633771742838, 6928934109570365590, 11589609913085167707, 18353534295019770247, 6879412911232288750,
+ 13223374340976842384, 5803903047180226505, 10382392856292138950, 6807712794865416975, 7273350714698980002, 16071615116869102753, 11130104866208195846, 17099222713467268806,
+ 4288395624114587260, 10186593737807119648, 5970871638851383523, 17274628153010076591, 11083355019391773968, 15571651381840416650, 7269503864920933911, 34314525388851272,
+ 13518907013113787435, 13799188114963266222, 10949327997049617616, 12851874981220611149, 14245323937486085594, 18324803746062213911, 13224741762315864830, 15910543010675195930,
+ 10042008593756684008, 6713471518385846030, 10961845838926538977, 855334359173719954, 7598642325962297361, 18006511265377032901, 6844179774752820363, 248341098560552116,
+ },
+ {
+ 9259451175701532574, 15597985540941612547, 4541553034987877619, 8927802479718285609, 14672230714928802576, 14284617950605306700, 13339970812390782046, 787078889494395850,
+ 18165439414433063919, 14693282888419586094, 14992616289789993230, 13232208150801553912, 16850347485914337693, 223364064845089224, 7302899015436300563, 4391216311540396088,
+ 12558388403188680537, 10687978569513354810, 9373570856564649772, 4079327722512691465, 5372437374337150740, 3297421622932790851, 9326853593823854687, 8305371884938201251,
+ 13121472331750731303, 323745485980480220, 15110308702528119977, 4696302365805118372, 9859094500834571602, 5315656270700787939, 4187063204400651801, 1695695129564818327,
+ 12817586844019849804, 15425488216915741929, 15293562913126043054, 11770898970765978555, 6806103820345670787, 17789094149138630459, 3807781571713999192, 5823254011289457198,
+ 10112524230916015604, 13571143908656270599, 6031089010487808103, 5774558171390644848, 1808959243244841573, 14071312530426692155, 9241399242704553402, 9836150050655369231,
+ 14083007972403535491, 16009534514129505275, 12481249112231139039, 10309390659262581143, 5782517315325366794, 16717384553294759530, 4578392002424884134, 239005024949492645,
+ 1836857790164349346, 11263074778654917115, 4786985732596838094, 10525445084936097674, 1484357665966453236, 3110871407036679558, 1668700683093601768, 2736269131339849752,
+ 9249362594190920246, 8088172462140746331, 4080107866285682736, 16205599538438551353, 18312943145606312277, 9100632964181080418, 3969991115337154345, 5340098959750368315,
+ 12122448407048035645, 2891250339380982359, 12794212052932523400, 7166035112141500392, 15777441312724841162, 4552528682229856831, 2210391301762911476, 16410254680957321386,
+ 6910044784455706429, 11951245774670922855, 207795885948948810, 3884359690300962405, 4578772752480772344, 2948797025787714138, 16774444535658202808, 1388183483009000463,
+ 16561979129019905420, 1881862411760441386, 15441950514711322010, 12933020223458438856, 5570341451312029630, 18172638959728259059, 9074261777549091198, 6171200053222754816,
+ 10126435102176037237, 1545112930055760251, 11563139991870561593, 10766754172364938834, 5116781688621052838, 14471715119500781375, 4724882615229687192, 10031687030722046325,
+ 15016829467434780812, 6777508689306592732, 3811682306063049290, 11475361222871876387, 6080948654298953274, 11612742274762615033, 5268311600240325047, 9147351973710519861,
+ 12130203770930472759, 3880998309361490881, 8807462332494735511, 9291388074337055133, 18165312304248982432, 12733144434841535979, 18090195697688236271, 5825342884125782172,
+ 13034385644450400616, 16498936039876211302, 13019014556329727588, 17911110193068084415, 2034190024798401134, 1526884555531436223, 8108814142164064795, 4075908755148182733,
+ 6404489603451030675, 16601749455344583377, 12907740667428956072, 8446383351289364752, 698035388979141580, 3593914367189873571, 6744392605914028800, 18168078953147292456,
+ 14484703498492722699, 6030798911934842161, 15931002029256202288, 13491601992324491240, 8585568434702372483, 9113555433818303018, 6375455514164501736, 1476639189214882947,
+ 5209683814158663472, 16895526973128357178, 10457488910391635027, 17530003042232440701, 4030542539553991812, 9445656287872984963, 4212489101708176016, 837886727556721620,
+ 2983790309644332360, 11276772732994425347, 1310621139116895737, 3131558545024164284, 3529698629777893989, 13271773334458143370, 8171825923423406479, 305718858162497191,
+ 12347543988562572522, 4315642616923374215, 17586477762860427847, 1037334149869395772, 18311071546005132724, 1489505092175764848, 8926409949339059078, 10260307609945070716,
+ 1253128470325829265, 917180526219408600, 10471425074479693158, 12658678264459355332, 8502334428811982364, 3789675096428271759, 4531393894096863308, 12176495757901795513,
+ 15081766794595471636, 3944913291385975654, 14507832069904091484, 3624606168811123851, 11476968507677098110, 1056328247986567681, 15246608002200052960, 6566558379177875405,
+ 652315099041528631, 4287214618428297946, 11730560225433863449, 7935915607867430686, 15979159466809939008, 9209839549459543141, 9501045239358734673, 15490773728258884277,
+ 14281001444503113096, 9253803478538227873, 3842669056581745692, 18399990656305827164, 11841387504710944221, 16052361836589007383, 831115622826254867, 8219898649601530197,
+ 2339128600785774471, 1426373723429876004, 3299581455481810683, 16966544600853047157, 8419320331267538738, 8376963013864643226, 1996128805959565963, 2376766970819731241,
+ 10963611841695254911, 2914831079289063424, 15884256460390936367, 8724520571578171796, 329402220949313078, 8905943304651682982, 1280899166177109409, 6980213851983339340,
+ 5524321102331478758, 2149164875175284006, 10897639484514200325, 14605181792035620644, 316829907946955098, 13776780818927038502, 18167912441745301047, 16845612593885106452,
+ 15817940025107036888, 14405005201957074368, 11647077937228007305, 13795438792777593442, 9813339145840642083, 13196196205982926907, 5981408724994946624, 12303812818084690125,
+ 11821409230485627325, 1934768678832528678, 15798397330147151183, 2089418311766868031, 16206123311846842647, 10276976147555604745, 14858750786699873336, 2500482310681473276,
+ 9502861285700748804, 12163827978530279544, 9383068907817479934, 5717811606328725851, 12416198015020836733, 15689586164720677894, 14337921697724349521, 9392403465642971279,
+ 6696074876763293236, 10302972695440286968, 16647054763285662245, 7841147641486000427, 10612636854719975349, 3592516733641215266, 14172459823963607855, 1290213042793222575,
+ },
+}
diff --git a/src/crypto/elliptic/p256_asm_table_test.go b/src/crypto/elliptic/p256_asm_table_test.go
new file mode 100644
index 0000000..6b64f26
--- /dev/null
+++ b/src/crypto/elliptic/p256_asm_table_test.go
@@ -0,0 +1,59 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64
+// +build amd64 arm64
+
+package elliptic
+
+import (
+ "reflect"
+ "testing"
+)
+
+func TestP256PrecomputedTable(t *testing.T) {
+
+ basePoint := []uint64{
+ 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
+ 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
+ 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
+ }
+ t1 := make([]uint64, 12)
+ t2 := make([]uint64, 12)
+ copy(t2, basePoint)
+
+ zInv := make([]uint64, 4)
+ zInvSq := make([]uint64, 4)
+ for j := 0; j < 32; j++ {
+ copy(t1, t2)
+ for i := 0; i < 43; i++ {
+ // The window size is 6 so we need to double 6 times.
+ if i != 0 {
+ for k := 0; k < 6; k++ {
+ p256PointDoubleAsm(t1, t1)
+ }
+ }
+ // Convert the point to affine form. (Its values are
+ // still in Montgomery form however.)
+ p256Inverse(zInv, t1[8:12])
+ p256Sqr(zInvSq, zInv, 1)
+ p256Mul(zInv, zInv, zInvSq)
+
+ p256Mul(t1[:4], t1[:4], zInvSq)
+ p256Mul(t1[4:8], t1[4:8], zInv)
+
+ copy(t1[8:12], basePoint[8:12])
+
+ if got, want := p256Precomputed[i][j*8:(j*8)+8], t1[:8]; !reflect.DeepEqual(got, want) {
+ t.Fatalf("Unexpected table entry at [%d][%d:%d]: got %v, want %v", i, j*8, (j*8)+8, got, want)
+ }
+ }
+ if j == 0 {
+ p256PointDoubleAsm(t2, basePoint)
+ } else {
+ p256PointAddAsm(t2, t2, basePoint)
+ }
+ }
+
+}
diff --git a/src/crypto/elliptic/p256_generic.go b/src/crypto/elliptic/p256_generic.go
new file mode 100644
index 0000000..25762a8
--- /dev/null
+++ b/src/crypto/elliptic/p256_generic.go
@@ -0,0 +1,15 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 && !s390x && !arm64 && !ppc64le
+// +build !amd64,!s390x,!arm64,!ppc64le
+
+package elliptic
+
+var p256 p256Curve
+
+func initP256Arch() {
+ // Use pure Go implementation.
+ p256 = p256Curve{p256Params}
+}
diff --git a/src/crypto/elliptic/p256_ppc64le.go b/src/crypto/elliptic/p256_ppc64le.go
new file mode 100644
index 0000000..40d9ed9
--- /dev/null
+++ b/src/crypto/elliptic/p256_ppc64le.go
@@ -0,0 +1,522 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64le
+// +build ppc64le
+
+package elliptic
+
+import (
+ "crypto/subtle"
+ "encoding/binary"
+ "math/big"
+)
+
+// This was ported from the s390x implementation for ppc64le.
+// Some hints are included here for changes that should be
+// in the big endian ppc64 implementation, however more
+// investigation and testing is needed for the ppc64 big
+// endian version to work.
+type p256CurveFast struct {
+ *CurveParams
+}
+
+type p256Point struct {
+ x [32]byte
+ y [32]byte
+ z [32]byte
+}
+
+var (
+ p256 Curve
+ p256PreFast *[37][64]p256Point
+)
+
+func initP256Arch() {
+ p256 = p256CurveFast{p256Params}
+ initTable()
+ return
+}
+
+func (curve p256CurveFast) Params() *CurveParams {
+ return curve.CurveParams
+}
+
+// Functions implemented in p256_asm_ppc64le.s
+// Montgomery multiplication modulo P256
+//
+//go:noescape
+func p256MulAsm(res, in1, in2 []byte)
+
+// Montgomery square modulo P256
+//
+func p256Sqr(res, in []byte) {
+ p256MulAsm(res, in, in)
+}
+
+// Montgomery multiplication by 1
+//
+//go:noescape
+func p256FromMont(res, in []byte)
+
+// iff cond == 1 val <- -val
+//
+//go:noescape
+func p256NegCond(val *p256Point, cond int)
+
+// if cond == 0 res <- b; else res <- a
+//
+//go:noescape
+func p256MovCond(res, a, b *p256Point, cond int)
+
+// Constant time table access
+//
+//go:noescape
+func p256Select(point *p256Point, table []p256Point, idx int)
+
+//
+//go:noescape
+func p256SelectBase(point *p256Point, table []p256Point, idx int)
+
+// Point add with P2 being affine point
+// If sign == 1 -> P2 = -P2
+// If sel == 0 -> P3 = P1
+// if zero == 0 -> P3 = P2
+//
+//go:noescape
+func p256PointAddAffineAsm(res, in1, in2 *p256Point, sign, sel, zero int)
+
+// Point add
+//
+//go:noescape
+func p256PointAddAsm(res, in1, in2 *p256Point) int
+
+//
+//go:noescape
+func p256PointDoubleAsm(res, in *p256Point)
+
+// The result should be a slice in LE order, but the slice
+// from big.Bytes is in BE order.
+// TODO: For big endian implementation, do not reverse bytes.
+//
+func fromBig(big *big.Int) []byte {
+ // This could be done a lot more efficiently...
+ res := big.Bytes()
+ t := make([]byte, 32)
+ if len(res) < 32 {
+ copy(t[32-len(res):], res)
+ } else if len(res) == 32 {
+ copy(t, res)
+ } else {
+ copy(t, res[len(res)-32:])
+ }
+ p256ReverseBytes(t, t)
+ return t
+}
+
+// p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
+// is equal or greater than the order of the group, it's reduced modulo that order.
+func p256GetMultiplier(in []byte) []byte {
+ n := new(big.Int).SetBytes(in)
+
+ if n.Cmp(p256Params.N) >= 0 {
+ n.Mod(n, p256Params.N)
+ }
+ return fromBig(n)
+}
+
+// p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
+// underlying field of the curve. (See initP256 for the value.) Thus rr here is
+// R×R mod p. See comment in Inverse about how this is used.
+// TODO: For big endian implementation, the bytes in these slices should be in reverse order,
+// as found in the s390x implementation.
+var rr = []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00}
+
+// (This is one, in the Montgomery domain.)
+var one = []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}
+
+func maybeReduceModP(in *big.Int) *big.Int {
+ if in.Cmp(p256Params.P) < 0 {
+ return in
+ }
+ return new(big.Int).Mod(in, p256Params.P)
+}
+
+// p256ReverseBytes copies the first 32 bytes from in to res in reverse order.
+func p256ReverseBytes(res, in []byte) {
+ // remove bounds check
+ in = in[:32]
+ res = res[:32]
+
+ // Load in reverse order
+ a := binary.BigEndian.Uint64(in[0:])
+ b := binary.BigEndian.Uint64(in[8:])
+ c := binary.BigEndian.Uint64(in[16:])
+ d := binary.BigEndian.Uint64(in[24:])
+
+ // Store in normal order
+ binary.LittleEndian.PutUint64(res[0:], d)
+ binary.LittleEndian.PutUint64(res[8:], c)
+ binary.LittleEndian.PutUint64(res[16:], b)
+ binary.LittleEndian.PutUint64(res[24:], a)
+}
+
+func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
+ var r1, r2 p256Point
+
+ scalarReduced := p256GetMultiplier(baseScalar)
+ r1IsInfinity := scalarIsZero(scalarReduced)
+ r1.p256BaseMult(scalarReduced)
+
+ copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
+ copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
+ copy(r2.z[:], one)
+ p256MulAsm(r2.x[:], r2.x[:], rr[:])
+ p256MulAsm(r2.y[:], r2.y[:], rr[:])
+
+ scalarReduced = p256GetMultiplier(scalar)
+ r2IsInfinity := scalarIsZero(scalarReduced)
+ r2.p256ScalarMult(scalarReduced)
+
+ var sum, double p256Point
+ pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
+ p256PointDoubleAsm(&double, &r1)
+ p256MovCond(&sum, &double, &sum, pointsEqual)
+ p256MovCond(&sum, &r1, &sum, r2IsInfinity)
+ p256MovCond(&sum, &r2, &sum, r1IsInfinity)
+ return sum.p256PointToAffine()
+}
+
+func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
+ var r p256Point
+ reducedScalar := p256GetMultiplier(scalar)
+ r.p256BaseMult(reducedScalar)
+ return r.p256PointToAffine()
+}
+
+func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
+ scalarReduced := p256GetMultiplier(scalar)
+ var r p256Point
+ copy(r.x[:], fromBig(maybeReduceModP(bigX)))
+ copy(r.y[:], fromBig(maybeReduceModP(bigY)))
+ copy(r.z[:], one)
+ p256MulAsm(r.x[:], r.x[:], rr[:])
+ p256MulAsm(r.y[:], r.y[:], rr[:])
+ r.p256ScalarMult(scalarReduced)
+ return r.p256PointToAffine()
+}
+
+func scalarIsZero(scalar []byte) int {
+ // If any byte is not zero, return 0.
+ // Check for -0.... since that appears to compare to 0.
+ b := byte(0)
+ for _, s := range scalar {
+ b |= s
+ }
+ return subtle.ConstantTimeByteEq(b, 0)
+}
+
+func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
+ zInv := make([]byte, 32)
+ zInvSq := make([]byte, 32)
+
+ p256Inverse(zInv, p.z[:])
+ p256Sqr(zInvSq, zInv)
+ p256MulAsm(zInv, zInv, zInvSq)
+
+ p256MulAsm(zInvSq, p.x[:], zInvSq)
+ p256MulAsm(zInv, p.y[:], zInv)
+
+ p256FromMont(zInvSq, zInvSq)
+ p256FromMont(zInv, zInv)
+
+ // SetBytes expects a slice in big endian order,
+ // since ppc64le is little endian, reverse the bytes.
+ // TODO: For big endian, bytes don't need to be reversed.
+ p256ReverseBytes(zInvSq, zInvSq)
+ p256ReverseBytes(zInv, zInv)
+ rx := new(big.Int).SetBytes(zInvSq)
+ ry := new(big.Int).SetBytes(zInv)
+ return rx, ry
+}
+
+// p256Inverse sets out to in^-1 mod p.
+func p256Inverse(out, in []byte) {
+ var stack [6 * 32]byte
+ p2 := stack[32*0 : 32*0+32]
+ p4 := stack[32*1 : 32*1+32]
+ p8 := stack[32*2 : 32*2+32]
+ p16 := stack[32*3 : 32*3+32]
+ p32 := stack[32*4 : 32*4+32]
+
+ p256Sqr(out, in)
+ p256MulAsm(p2, out, in) // 3*p
+
+ p256Sqr(out, p2)
+ p256Sqr(out, out)
+ p256MulAsm(p4, out, p2) // f*p
+
+ p256Sqr(out, p4)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(p8, out, p4) // ff*p
+
+ p256Sqr(out, p8)
+
+ for i := 0; i < 7; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(p16, out, p8) // ffff*p
+
+ p256Sqr(out, p16)
+ for i := 0; i < 15; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(p32, out, p16) // ffffffff*p
+
+ p256Sqr(out, p32)
+
+ for i := 0; i < 31; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, in)
+
+ for i := 0; i < 32*4; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p32)
+
+ for i := 0; i < 32; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p32)
+
+ for i := 0; i < 16; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p16)
+
+ for i := 0; i < 8; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p8)
+
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(out, out, p4)
+
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(out, out, p2)
+
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(out, out, in)
+}
+
+func boothW5(in uint) (int, int) {
+ var s uint = ^((in >> 5) - 1)
+ var d uint = (1 << 6) - in - 1
+ d = (d & s) | (in & (^s))
+ d = (d >> 1) + (d & 1)
+ return int(d), int(s & 1)
+}
+
+func boothW6(in uint) (int, int) {
+ var s uint = ^((in >> 6) - 1)
+ var d uint = (1 << 7) - in - 1
+ d = (d & s) | (in & (^s))
+ d = (d >> 1) + (d & 1)
+ return int(d), int(s & 1)
+}
+
+func boothW7(in uint) (int, int) {
+ var s uint = ^((in >> 7) - 1)
+ var d uint = (1 << 8) - in - 1
+ d = (d & s) | (in & (^s))
+ d = (d >> 1) + (d & 1)
+ return int(d), int(s & 1)
+}
+
+func initTable() {
+
+ p256PreFast = new([37][64]p256Point)
+
+ // TODO: For big endian, these slices should be in reverse byte order,
+ // as found in the s390x implementation.
+ basePoint := p256Point{
+ x: [32]byte{0x3c, 0x14, 0xa9, 0x18, 0xd4, 0x30, 0xe7, 0x79, 0x01, 0xb6, 0xed, 0x5f, 0xfc, 0x95, 0xba, 0x75,
+ 0x10, 0x25, 0x62, 0x77, 0x2b, 0x73, 0xfb, 0x79, 0xc6, 0x55, 0x37, 0xa5, 0x76, 0x5f, 0x90, 0x18}, //(p256.x*2^256)%p
+ y: [32]byte{0x0a, 0x56, 0x95, 0xce, 0x57, 0x53, 0xf2, 0xdd, 0x5c, 0xe4, 0x19, 0xba, 0xe4, 0xb8, 0x4a, 0x8b,
+ 0x25, 0xf3, 0x21, 0xdd, 0x88, 0x86, 0xe8, 0xd2, 0x85, 0x5d, 0x88, 0x25, 0x18, 0xff, 0x71, 0x85}, //(p256.y*2^256)%p
+ z: [32]byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, //(p256.z*2^256)%p
+
+ }
+
+ t1 := new(p256Point)
+ t2 := new(p256Point)
+ *t2 = basePoint
+
+ zInv := make([]byte, 32)
+ zInvSq := make([]byte, 32)
+ for j := 0; j < 64; j++ {
+ *t1 = *t2
+ for i := 0; i < 37; i++ {
+ // The window size is 7 so we need to double 7 times.
+ if i != 0 {
+ for k := 0; k < 7; k++ {
+ p256PointDoubleAsm(t1, t1)
+ }
+ }
+ // Convert the point to affine form. (Its values are
+ // still in Montgomery form however.)
+ p256Inverse(zInv, t1.z[:])
+ p256Sqr(zInvSq, zInv)
+ p256MulAsm(zInv, zInv, zInvSq)
+
+ p256MulAsm(t1.x[:], t1.x[:], zInvSq)
+ p256MulAsm(t1.y[:], t1.y[:], zInv)
+
+ copy(t1.z[:], basePoint.z[:])
+ // Update the table entry
+ copy(p256PreFast[i][j].x[:], t1.x[:])
+ copy(p256PreFast[i][j].y[:], t1.y[:])
+ }
+ if j == 0 {
+ p256PointDoubleAsm(t2, &basePoint)
+ } else {
+ p256PointAddAsm(t2, t2, &basePoint)
+ }
+ }
+}
+
+func (p *p256Point) p256BaseMult(scalar []byte) {
+ // TODO: For big endian, the index should be 31 not 0.
+ wvalue := (uint(scalar[0]) << 1) & 0xff
+ sel, sign := boothW7(uint(wvalue))
+ p256SelectBase(p, p256PreFast[0][:], sel)
+ p256NegCond(p, sign)
+
+ copy(p.z[:], one[:])
+ var t0 p256Point
+
+ copy(t0.z[:], one[:])
+
+ index := uint(6)
+ zero := sel
+ for i := 1; i < 37; i++ {
+ // TODO: For big endian, use the same index values as found
+ // in the s390x implementation.
+ if index < 247 {
+ wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0xff
+ } else {
+ wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0xff
+ }
+ index += 7
+ sel, sign = boothW7(uint(wvalue))
+ p256SelectBase(&t0, p256PreFast[i][:], sel)
+ p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
+ zero |= sel
+ }
+}
+
+func (p *p256Point) p256ScalarMult(scalar []byte) {
+ // precomp is a table of precomputed points that stores powers of p
+ // from p^1 to p^16.
+ var precomp [16]p256Point
+ var t0, t1, t2, t3 p256Point
+
+ *&precomp[0] = *p
+ p256PointDoubleAsm(&t0, p)
+ p256PointDoubleAsm(&t1, &t0)
+ p256PointDoubleAsm(&t2, &t1)
+ p256PointDoubleAsm(&t3, &t2)
+ *&precomp[1] = t0
+ *&precomp[3] = t1
+ *&precomp[7] = t2
+ *&precomp[15] = t3
+
+ p256PointAddAsm(&t0, &t0, p)
+ p256PointAddAsm(&t1, &t1, p)
+ p256PointAddAsm(&t2, &t2, p)
+
+ *&precomp[2] = t0
+ *&precomp[4] = t1
+ *&precomp[8] = t2
+
+ p256PointDoubleAsm(&t0, &t0)
+ p256PointDoubleAsm(&t1, &t1)
+ *&precomp[5] = t0
+ *&precomp[9] = t1
+
+ p256PointAddAsm(&t2, &t0, p)
+ p256PointAddAsm(&t1, &t1, p)
+ *&precomp[6] = t2
+ *&precomp[10] = t1
+
+ p256PointDoubleAsm(&t0, &t0)
+ p256PointDoubleAsm(&t2, &t2)
+ *&precomp[11] = t0
+ *&precomp[13] = t2
+
+ p256PointAddAsm(&t0, &t0, p)
+ p256PointAddAsm(&t2, &t2, p)
+ *&precomp[12] = t0
+ *&precomp[14] = t2
+
+ // Start scanning the window from top bit
+ index := uint(254)
+ var sel, sign int
+
+ // TODO: For big endian, use index found in s390x implementation.
+ wvalue := (uint(scalar[index/8]) >> (index % 8)) & 0x3f
+ sel, _ = boothW5(uint(wvalue))
+ p256Select(p, precomp[:], sel)
+ zero := sel
+
+ for index > 4 {
+ index -= 5
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+
+ // TODO: For big endian, use index values as found in s390x implementation.
+ if index < 247 {
+ wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0x3f
+ } else {
+ wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0x3f
+ }
+
+ sel, sign = boothW5(uint(wvalue))
+
+ p256Select(&t0, precomp[:], sel)
+ p256NegCond(&t0, sign)
+ p256PointAddAsm(&t1, p, &t0)
+ p256MovCond(&t1, &t1, p, sel)
+ p256MovCond(p, &t1, &t0, zero)
+ zero |= sel
+ }
+
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+
+ // TODO: Use index for big endian as found in s390x implementation.
+ wvalue = (uint(scalar[0]) << 1) & 0x3f
+ sel, sign = boothW5(uint(wvalue))
+
+ p256Select(&t0, precomp[:], sel)
+ p256NegCond(&t0, sign)
+ p256PointAddAsm(&t1, p, &t0)
+ p256MovCond(&t1, &t1, p, sel)
+ p256MovCond(p, &t1, &t0, zero)
+}
diff --git a/src/crypto/elliptic/p256_s390x.go b/src/crypto/elliptic/p256_s390x.go
new file mode 100644
index 0000000..91e613b
--- /dev/null
+++ b/src/crypto/elliptic/p256_s390x.go
@@ -0,0 +1,577 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build s390x
+// +build s390x
+
+package elliptic
+
+import (
+ "crypto/subtle"
+ "internal/cpu"
+ "math/big"
+ "unsafe"
+)
+
+const (
+ offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
+ offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE)
+)
+
+type p256CurveFast struct {
+ *CurveParams
+}
+
+type p256Point struct {
+ x [32]byte
+ y [32]byte
+ z [32]byte
+}
+
+var (
+ p256 Curve
+ p256PreFast *[37][64]p256Point
+)
+
+//go:noescape
+func p256MulInternalTrampolineSetup()
+
+//go:noescape
+func p256SqrInternalTrampolineSetup()
+
+//go:noescape
+func p256MulInternalVX()
+
+//go:noescape
+func p256MulInternalVMSL()
+
+//go:noescape
+func p256SqrInternalVX()
+
+//go:noescape
+func p256SqrInternalVMSL()
+
+func initP256Arch() {
+ if cpu.S390X.HasVX {
+ p256 = p256CurveFast{p256Params}
+ initTable()
+ return
+ }
+
+ // No vector support, use pure Go implementation.
+ p256 = p256Curve{p256Params}
+ return
+}
+
+func (curve p256CurveFast) Params() *CurveParams {
+ return curve.CurveParams
+}
+
+// Functions implemented in p256_asm_s390x.s
+// Montgomery multiplication modulo P256
+//
+//go:noescape
+func p256SqrAsm(res, in1 []byte)
+
+//go:noescape
+func p256MulAsm(res, in1, in2 []byte)
+
+// Montgomery square modulo P256
+func p256Sqr(res, in []byte) {
+ p256SqrAsm(res, in)
+}
+
+// Montgomery multiplication by 1
+//
+//go:noescape
+func p256FromMont(res, in []byte)
+
+// iff cond == 1 val <- -val
+//
+//go:noescape
+func p256NegCond(val *p256Point, cond int)
+
+// if cond == 0 res <- b; else res <- a
+//
+//go:noescape
+func p256MovCond(res, a, b *p256Point, cond int)
+
+// Constant time table access
+//
+//go:noescape
+func p256Select(point *p256Point, table []p256Point, idx int)
+
+//go:noescape
+func p256SelectBase(point *p256Point, table []p256Point, idx int)
+
+// Montgomery multiplication modulo Ord(G)
+//
+//go:noescape
+func p256OrdMul(res, in1, in2 []byte)
+
+// Montgomery square modulo Ord(G), repeated n times
+func p256OrdSqr(res, in []byte, n int) {
+ copy(res, in)
+ for i := 0; i < n; i += 1 {
+ p256OrdMul(res, res, res)
+ }
+}
+
+// Point add with P2 being affine point
+// If sign == 1 -> P2 = -P2
+// If sel == 0 -> P3 = P1
+// if zero == 0 -> P3 = P2
+//
+//go:noescape
+func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
+
+// Point add
+//
+//go:noescape
+func p256PointAddAsm(P3, P1, P2 *p256Point) int
+
+//go:noescape
+func p256PointDoubleAsm(P3, P1 *p256Point)
+
+func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
+ if k.Cmp(p256Params.N) >= 0 {
+ // This should never happen.
+ reducedK := new(big.Int).Mod(k, p256Params.N)
+ k = reducedK
+ }
+
+ // table will store precomputed powers of x. The 32 bytes at index
+ // i store x^(i+1).
+ var table [15][32]byte
+
+ x := fromBig(k)
+ // This code operates in the Montgomery domain where R = 2^256 mod n
+ // and n is the order of the scalar field. (See initP256 for the
+ // value.) Elements in the Montgomery domain take the form a×R and
+ // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
+ // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
+ // i.e. converts x into the Montgomery domain. Stored in BigEndian form
+ RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
+ 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
+
+ p256OrdMul(table[0][:], x, RR)
+
+ // Prepare the table, no need in constant time access, because the
+ // power is not a secret. (Entry 0 is never used.)
+ for i := 2; i < 16; i += 2 {
+ p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
+ p256OrdMul(table[i][:], table[i-1][:], table[0][:])
+ }
+
+ copy(x, table[14][:]) // f
+
+ p256OrdSqr(x[0:32], x[0:32], 4)
+ p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
+ t := make([]byte, 32)
+ copy(t, x)
+
+ p256OrdSqr(x, x, 8)
+ p256OrdMul(x, x, t) // ffff
+ copy(t, x)
+
+ p256OrdSqr(x, x, 16)
+ p256OrdMul(x, x, t) // ffffffff
+ copy(t, x)
+
+ p256OrdSqr(x, x, 64) // ffffffff0000000000000000
+ p256OrdMul(x, x, t) // ffffffff00000000ffffffff
+ p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
+ p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff
+
+ // Remaining 32 windows
+ expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
+ 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
+ for i := 0; i < 32; i++ {
+ p256OrdSqr(x, x, 4)
+ p256OrdMul(x, x, table[expLo[i]-1][:])
+ }
+
+ // Multiplying by one in the Montgomery domain converts a Montgomery
+ // value out of the domain.
+ one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
+ p256OrdMul(x, x, one)
+
+ return new(big.Int).SetBytes(x)
+}
+
+// fromBig converts a *big.Int into a format used by this code.
+func fromBig(big *big.Int) []byte {
+ // This could be done a lot more efficiently...
+ res := big.Bytes()
+ if 32 == len(res) {
+ return res
+ }
+ t := make([]byte, 32)
+ offset := 32 - len(res)
+ for i := len(res) - 1; i >= 0; i-- {
+ t[i+offset] = res[i]
+ }
+ return t
+}
+
+// p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
+// is equal or greater than the order of the group, it's reduced modulo that order.
+func p256GetMultiplier(in []byte) []byte {
+ n := new(big.Int).SetBytes(in)
+
+ if n.Cmp(p256Params.N) >= 0 {
+ n.Mod(n, p256Params.N)
+ }
+ return fromBig(n)
+}
+
+// p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
+// underlying field of the curve. (See initP256 for the value.) Thus rr here is
+// R×R mod p. See comment in Inverse about how this is used.
+var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
+ 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
+
+// (This is one, in the Montgomery domain.)
+var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
+
+func maybeReduceModP(in *big.Int) *big.Int {
+ if in.Cmp(p256Params.P) < 0 {
+ return in
+ }
+ return new(big.Int).Mod(in, p256Params.P)
+}
+
+func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
+ var r1, r2 p256Point
+ scalarReduced := p256GetMultiplier(baseScalar)
+ r1IsInfinity := scalarIsZero(scalarReduced)
+ r1.p256BaseMult(scalarReduced)
+
+ copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
+ copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
+ copy(r2.z[:], one)
+ p256MulAsm(r2.x[:], r2.x[:], rr[:])
+ p256MulAsm(r2.y[:], r2.y[:], rr[:])
+
+ scalarReduced = p256GetMultiplier(scalar)
+ r2IsInfinity := scalarIsZero(scalarReduced)
+ r2.p256ScalarMult(p256GetMultiplier(scalar))
+
+ var sum, double p256Point
+ pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
+ p256PointDoubleAsm(&double, &r1)
+ p256MovCond(&sum, &double, &sum, pointsEqual)
+ p256MovCond(&sum, &r1, &sum, r2IsInfinity)
+ p256MovCond(&sum, &r2, &sum, r1IsInfinity)
+ return sum.p256PointToAffine()
+}
+
+func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
+ var r p256Point
+ r.p256BaseMult(p256GetMultiplier(scalar))
+ return r.p256PointToAffine()
+}
+
+func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
+ var r p256Point
+ copy(r.x[:], fromBig(maybeReduceModP(bigX)))
+ copy(r.y[:], fromBig(maybeReduceModP(bigY)))
+ copy(r.z[:], one)
+ p256MulAsm(r.x[:], r.x[:], rr[:])
+ p256MulAsm(r.y[:], r.y[:], rr[:])
+ r.p256ScalarMult(p256GetMultiplier(scalar))
+ return r.p256PointToAffine()
+}
+
+// scalarIsZero returns 1 if scalar represents the zero value, and zero
+// otherwise.
+func scalarIsZero(scalar []byte) int {
+ b := byte(0)
+ for _, s := range scalar {
+ b |= s
+ }
+ return subtle.ConstantTimeByteEq(b, 0)
+}
+
+func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
+ zInv := make([]byte, 32)
+ zInvSq := make([]byte, 32)
+
+ p256Inverse(zInv, p.z[:])
+ p256Sqr(zInvSq, zInv)
+ p256MulAsm(zInv, zInv, zInvSq)
+
+ p256MulAsm(zInvSq, p.x[:], zInvSq)
+ p256MulAsm(zInv, p.y[:], zInv)
+
+ p256FromMont(zInvSq, zInvSq)
+ p256FromMont(zInv, zInv)
+
+ return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
+}
+
+// p256Inverse sets out to in^-1 mod p.
+func p256Inverse(out, in []byte) {
+ var stack [6 * 32]byte
+ p2 := stack[32*0 : 32*0+32]
+ p4 := stack[32*1 : 32*1+32]
+ p8 := stack[32*2 : 32*2+32]
+ p16 := stack[32*3 : 32*3+32]
+ p32 := stack[32*4 : 32*4+32]
+
+ p256Sqr(out, in)
+ p256MulAsm(p2, out, in) // 3*p
+
+ p256Sqr(out, p2)
+ p256Sqr(out, out)
+ p256MulAsm(p4, out, p2) // f*p
+
+ p256Sqr(out, p4)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(p8, out, p4) // ff*p
+
+ p256Sqr(out, p8)
+
+ for i := 0; i < 7; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(p16, out, p8) // ffff*p
+
+ p256Sqr(out, p16)
+ for i := 0; i < 15; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(p32, out, p16) // ffffffff*p
+
+ p256Sqr(out, p32)
+
+ for i := 0; i < 31; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, in)
+
+ for i := 0; i < 32*4; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p32)
+
+ for i := 0; i < 32; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p32)
+
+ for i := 0; i < 16; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p16)
+
+ for i := 0; i < 8; i++ {
+ p256Sqr(out, out)
+ }
+ p256MulAsm(out, out, p8)
+
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(out, out, p4)
+
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(out, out, p2)
+
+ p256Sqr(out, out)
+ p256Sqr(out, out)
+ p256MulAsm(out, out, in)
+}
+
+func boothW5(in uint) (int, int) {
+ var s uint = ^((in >> 5) - 1)
+ var d uint = (1 << 6) - in - 1
+ d = (d & s) | (in & (^s))
+ d = (d >> 1) + (d & 1)
+ return int(d), int(s & 1)
+}
+
+func boothW7(in uint) (int, int) {
+ var s uint = ^((in >> 7) - 1)
+ var d uint = (1 << 8) - in - 1
+ d = (d & s) | (in & (^s))
+ d = (d >> 1) + (d & 1)
+ return int(d), int(s & 1)
+}
+
+func initTable() {
+ p256PreFast = new([37][64]p256Point) //z coordinate not used
+ basePoint := p256Point{
+ x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
+ 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
+ y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
+ 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
+ z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
+ }
+
+ t1 := new(p256Point)
+ t2 := new(p256Point)
+ *t2 = basePoint
+
+ zInv := make([]byte, 32)
+ zInvSq := make([]byte, 32)
+ for j := 0; j < 64; j++ {
+ *t1 = *t2
+ for i := 0; i < 37; i++ {
+ // The window size is 7 so we need to double 7 times.
+ if i != 0 {
+ for k := 0; k < 7; k++ {
+ p256PointDoubleAsm(t1, t1)
+ }
+ }
+ // Convert the point to affine form. (Its values are
+ // still in Montgomery form however.)
+ p256Inverse(zInv, t1.z[:])
+ p256Sqr(zInvSq, zInv)
+ p256MulAsm(zInv, zInv, zInvSq)
+
+ p256MulAsm(t1.x[:], t1.x[:], zInvSq)
+ p256MulAsm(t1.y[:], t1.y[:], zInv)
+
+ copy(t1.z[:], basePoint.z[:])
+ // Update the table entry
+ copy(p256PreFast[i][j].x[:], t1.x[:])
+ copy(p256PreFast[i][j].y[:], t1.y[:])
+ }
+ if j == 0 {
+ p256PointDoubleAsm(t2, &basePoint)
+ } else {
+ p256PointAddAsm(t2, t2, &basePoint)
+ }
+ }
+}
+
+func (p *p256Point) p256BaseMult(scalar []byte) {
+ wvalue := (uint(scalar[31]) << 1) & 0xff
+ sel, sign := boothW7(uint(wvalue))
+ p256SelectBase(p, p256PreFast[0][:], sel)
+ p256NegCond(p, sign)
+
+ copy(p.z[:], one[:])
+ var t0 p256Point
+
+ copy(t0.z[:], one[:])
+
+ index := uint(6)
+ zero := sel
+
+ for i := 1; i < 37; i++ {
+ if index < 247 {
+ wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
+ } else {
+ wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
+ }
+ index += 7
+ sel, sign = boothW7(uint(wvalue))
+ p256SelectBase(&t0, p256PreFast[i][:], sel)
+ p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
+ zero |= sel
+ }
+}
+
+func (p *p256Point) p256ScalarMult(scalar []byte) {
+ // precomp is a table of precomputed points that stores powers of p
+ // from p^1 to p^16.
+ var precomp [16]p256Point
+ var t0, t1, t2, t3 p256Point
+
+ // Prepare the table
+ *&precomp[0] = *p
+
+ p256PointDoubleAsm(&t0, p)
+ p256PointDoubleAsm(&t1, &t0)
+ p256PointDoubleAsm(&t2, &t1)
+ p256PointDoubleAsm(&t3, &t2)
+ *&precomp[1] = t0 // 2
+ *&precomp[3] = t1 // 4
+ *&precomp[7] = t2 // 8
+ *&precomp[15] = t3 // 16
+
+ p256PointAddAsm(&t0, &t0, p)
+ p256PointAddAsm(&t1, &t1, p)
+ p256PointAddAsm(&t2, &t2, p)
+ *&precomp[2] = t0 // 3
+ *&precomp[4] = t1 // 5
+ *&precomp[8] = t2 // 9
+
+ p256PointDoubleAsm(&t0, &t0)
+ p256PointDoubleAsm(&t1, &t1)
+ *&precomp[5] = t0 // 6
+ *&precomp[9] = t1 // 10
+
+ p256PointAddAsm(&t2, &t0, p)
+ p256PointAddAsm(&t1, &t1, p)
+ *&precomp[6] = t2 // 7
+ *&precomp[10] = t1 // 11
+
+ p256PointDoubleAsm(&t0, &t0)
+ p256PointDoubleAsm(&t2, &t2)
+ *&precomp[11] = t0 // 12
+ *&precomp[13] = t2 // 14
+
+ p256PointAddAsm(&t0, &t0, p)
+ p256PointAddAsm(&t2, &t2, p)
+ *&precomp[12] = t0 // 13
+ *&precomp[14] = t2 // 15
+
+ // Start scanning the window from top bit
+ index := uint(254)
+ var sel, sign int
+
+ wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
+ sel, _ = boothW5(uint(wvalue))
+ p256Select(p, precomp[:], sel)
+ zero := sel
+
+ for index > 4 {
+ index -= 5
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+
+ if index < 247 {
+ wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
+ } else {
+ wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
+ }
+
+ sel, sign = boothW5(uint(wvalue))
+
+ p256Select(&t0, precomp[:], sel)
+ p256NegCond(&t0, sign)
+ p256PointAddAsm(&t1, p, &t0)
+ p256MovCond(&t1, &t1, p, sel)
+ p256MovCond(p, &t1, &t0, zero)
+ zero |= sel
+ }
+
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+ p256PointDoubleAsm(p, p)
+
+ wvalue = (uint(scalar[31]) << 1) & 0x3f
+ sel, sign = boothW5(uint(wvalue))
+
+ p256Select(&t0, precomp[:], sel)
+ p256NegCond(&t0, sign)
+ p256PointAddAsm(&t1, p, &t0)
+ p256MovCond(&t1, &t1, p, sel)
+ p256MovCond(p, &t1, &t0, zero)
+}
diff --git a/src/crypto/elliptic/p256_test.go b/src/crypto/elliptic/p256_test.go
new file mode 100644
index 0000000..694186d
--- /dev/null
+++ b/src/crypto/elliptic/p256_test.go
@@ -0,0 +1,169 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package elliptic
+
+import (
+ "math/big"
+ "testing"
+)
+
+type scalarMultTest struct {
+ k string
+ xIn, yIn string
+ xOut, yOut string
+}
+
+var p256MultTests = []scalarMultTest{
+ {
+ "2a265f8bcbdcaf94d58519141e578124cb40d64a501fba9c11847b28965bc737",
+ "023819813ac969847059028ea88a1f30dfbcde03fc791d3a252c6b41211882ea",
+ "f93e4ae433cc12cf2a43fc0ef26400c0e125508224cdb649380f25479148a4ad",
+ "4d4de80f1534850d261075997e3049321a0864082d24a917863366c0724f5ae3",
+ "a22d2b7f7818a3563e0f7a76c9bf0921ac55e06e2e4d11795b233824b1db8cc0",
+ },
+ {
+ "313f72ff9fe811bf573176231b286a3bdb6f1b14e05c40146590727a71c3bccd",
+ "cc11887b2d66cbae8f4d306627192522932146b42f01d3c6f92bd5c8ba739b06",
+ "a2f08a029cd06b46183085bae9248b0ed15b70280c7ef13a457f5af382426031",
+ "831c3f6b5f762d2f461901577af41354ac5f228c2591f84f8a6e51e2e3f17991",
+ "93f90934cd0ef2c698cc471c60a93524e87ab31ca2412252337f364513e43684",
+ },
+}
+
+func TestP256BaseMult(t *testing.T) {
+ p256 := P256()
+ p256Generic := p256.Params()
+
+ scalars := make([]*big.Int, 0, len(p224BaseMultTests)+1)
+ for _, e := range p224BaseMultTests {
+ k, _ := new(big.Int).SetString(e.k, 10)
+ scalars = append(scalars, k)
+ }
+ k := new(big.Int).SetInt64(1)
+ k.Lsh(k, 500)
+ scalars = append(scalars, k)
+
+ for i, k := range scalars {
+ x, y := p256.ScalarBaseMult(k.Bytes())
+ x2, y2 := p256Generic.ScalarBaseMult(k.Bytes())
+ if x.Cmp(x2) != 0 || y.Cmp(y2) != 0 {
+ t.Errorf("#%d: got (%x, %x), want (%x, %x)", i, x, y, x2, y2)
+ }
+
+ if testing.Short() && i > 5 {
+ break
+ }
+ }
+}
+
+func TestP256Mult(t *testing.T) {
+ p256 := P256()
+ p256Generic := p256.Params()
+
+ for i, e := range p224BaseMultTests {
+ x, _ := new(big.Int).SetString(e.x, 16)
+ y, _ := new(big.Int).SetString(e.y, 16)
+ k, _ := new(big.Int).SetString(e.k, 10)
+
+ xx, yy := p256.ScalarMult(x, y, k.Bytes())
+ xx2, yy2 := p256Generic.ScalarMult(x, y, k.Bytes())
+ if xx.Cmp(xx2) != 0 || yy.Cmp(yy2) != 0 {
+ t.Errorf("#%d: got (%x, %x), want (%x, %x)", i, xx, yy, xx2, yy2)
+ }
+ if testing.Short() && i > 5 {
+ break
+ }
+ }
+
+ for i, e := range p256MultTests {
+ x, _ := new(big.Int).SetString(e.xIn, 16)
+ y, _ := new(big.Int).SetString(e.yIn, 16)
+ k, _ := new(big.Int).SetString(e.k, 16)
+ expectedX, _ := new(big.Int).SetString(e.xOut, 16)
+ expectedY, _ := new(big.Int).SetString(e.yOut, 16)
+
+ xx, yy := p256.ScalarMult(x, y, k.Bytes())
+ if xx.Cmp(expectedX) != 0 || yy.Cmp(expectedY) != 0 {
+ t.Errorf("#%d: got (%x, %x), want (%x, %x)", i, xx, yy, expectedX, expectedY)
+ }
+ }
+}
+
+type synthCombinedMult struct {
+ Curve
+}
+
+func (s synthCombinedMult) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
+ x1, y1 := s.ScalarBaseMult(baseScalar)
+ x2, y2 := s.ScalarMult(bigX, bigY, scalar)
+ return s.Add(x1, y1, x2, y2)
+}
+
+func TestP256CombinedMult(t *testing.T) {
+ type combinedMult interface {
+ Curve
+ CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int)
+ }
+
+ p256, ok := P256().(combinedMult)
+ if !ok {
+ p256 = &synthCombinedMult{P256()}
+ }
+
+ gx := p256.Params().Gx
+ gy := p256.Params().Gy
+
+ zero := make([]byte, 32)
+ one := make([]byte, 32)
+ one[31] = 1
+ two := make([]byte, 32)
+ two[31] = 2
+
+ // 0×G + 0×G = ∞
+ x, y := p256.CombinedMult(gx, gy, zero, zero)
+ if x.Sign() != 0 || y.Sign() != 0 {
+ t.Errorf("0×G + 0×G = (%d, %d), should be ∞", x, y)
+ }
+
+ // 1×G + 0×G = G
+ x, y = p256.CombinedMult(gx, gy, one, zero)
+ if x.Cmp(gx) != 0 || y.Cmp(gy) != 0 {
+ t.Errorf("1×G + 0×G = (%d, %d), should be (%d, %d)", x, y, gx, gy)
+ }
+
+ // 0×G + 1×G = G
+ x, y = p256.CombinedMult(gx, gy, zero, one)
+ if x.Cmp(gx) != 0 || y.Cmp(gy) != 0 {
+ t.Errorf("0×G + 1×G = (%d, %d), should be (%d, %d)", x, y, gx, gy)
+ }
+
+ // 1×G + 1×G = 2×G
+ x, y = p256.CombinedMult(gx, gy, one, one)
+ ggx, ggy := p256.ScalarBaseMult(two)
+ if x.Cmp(ggx) != 0 || y.Cmp(ggy) != 0 {
+ t.Errorf("1×G + 1×G = (%d, %d), should be (%d, %d)", x, y, ggx, ggy)
+ }
+
+ minusOne := new(big.Int).Sub(p256.Params().N, big.NewInt(1))
+ // 1×G + (-1)×G = ∞
+ x, y = p256.CombinedMult(gx, gy, one, minusOne.Bytes())
+ if x.Sign() != 0 || y.Sign() != 0 {
+ t.Errorf("1×G + (-1)×G = (%d, %d), should be ∞", x, y)
+ }
+}
+
+func TestIssue52075(t *testing.T) {
+ Gx, Gy := P256().Params().Gx, P256().Params().Gy
+ scalar := make([]byte, 33)
+ scalar[32] = 1
+ x, y := P256().ScalarBaseMult(scalar)
+ if x.Cmp(Gx) != 0 || y.Cmp(Gy) != 0 {
+ t.Errorf("unexpected output (%v,%v)", x, y)
+ }
+ x, y = P256().ScalarMult(Gx, Gy, scalar)
+ if x.Cmp(Gx) != 0 || y.Cmp(Gy) != 0 {
+ t.Errorf("unexpected output (%v,%v)", x, y)
+ }
+}
diff --git a/src/crypto/elliptic/p521.go b/src/crypto/elliptic/p521.go
new file mode 100644
index 0000000..587991e
--- /dev/null
+++ b/src/crypto/elliptic/p521.go
@@ -0,0 +1,264 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package elliptic
+
+import (
+ "crypto/elliptic/internal/fiat"
+ "math/big"
+)
+
+type p521Curve struct {
+ *CurveParams
+}
+
+var p521 p521Curve
+var p521Params *CurveParams
+
+func initP521() {
+ // See FIPS 186-3, section D.2.5
+ p521.CurveParams = &CurveParams{Name: "P-521"}
+ p521.P, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", 10)
+ p521.N, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397655394245057746333217197532963996371363321113864768612440380340372808892707005449", 10)
+ p521.B, _ = new(big.Int).SetString("051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", 16)
+ p521.Gx, _ = new(big.Int).SetString("c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", 16)
+ p521.Gy, _ = new(big.Int).SetString("11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", 16)
+ p521.BitSize = 521
+}
+
+func (curve p521Curve) Params() *CurveParams {
+ return curve.CurveParams
+}
+
+func (curve p521Curve) IsOnCurve(x, y *big.Int) bool {
+ if x.Sign() < 0 || x.Cmp(curve.P) >= 0 ||
+ y.Sign() < 0 || y.Cmp(curve.P) >= 0 {
+ return false
+ }
+
+ x1 := bigIntToFiatP521(x)
+ y1 := bigIntToFiatP521(y)
+ b := bigIntToFiatP521(curve.B) // TODO: precompute this value.
+
+ // x³ - 3x + b.
+ x3 := new(fiat.P521Element).Square(x1)
+ x3.Mul(x3, x1)
+
+ threeX := new(fiat.P521Element).Add(x1, x1)
+ threeX.Add(threeX, x1)
+
+ x3.Sub(x3, threeX)
+ x3.Add(x3, b)
+
+ // y² = x³ - 3x + b
+ y2 := new(fiat.P521Element).Square(y1)
+
+ return x3.Equal(y2) == 1
+}
+
+type p521Point struct {
+ x, y, z *fiat.P521Element
+}
+
+func fiatP521ToBigInt(x *fiat.P521Element) *big.Int {
+ xBytes := x.Bytes()
+ for i := range xBytes[:len(xBytes)/2] {
+ xBytes[i], xBytes[len(xBytes)-i-1] = xBytes[len(xBytes)-i-1], xBytes[i]
+ }
+ return new(big.Int).SetBytes(xBytes)
+}
+
+// affineFromJacobian brings a point in Jacobian coordinates back to affine
+// coordinates, with (0, 0) representing infinity by convention. It also goes
+// back to big.Int values to match the exposed API.
+func (curve p521Curve) affineFromJacobian(p *p521Point) (x, y *big.Int) {
+ if p.z.IsZero() == 1 {
+ return new(big.Int), new(big.Int)
+ }
+
+ zinv := new(fiat.P521Element).Invert(p.z)
+ zinvsq := new(fiat.P521Element).Mul(zinv, zinv)
+
+ xx := new(fiat.P521Element).Mul(p.x, zinvsq)
+ zinvsq.Mul(zinvsq, zinv)
+ yy := new(fiat.P521Element).Mul(p.y, zinvsq)
+
+ return fiatP521ToBigInt(xx), fiatP521ToBigInt(yy)
+}
+
+func bigIntToFiatP521(x *big.Int) *fiat.P521Element {
+ xBytes := new(big.Int).Mod(x, p521.P).FillBytes(make([]byte, 66))
+ for i := range xBytes[:len(xBytes)/2] {
+ xBytes[i], xBytes[len(xBytes)-i-1] = xBytes[len(xBytes)-i-1], xBytes[i]
+ }
+ x1, err := new(fiat.P521Element).SetBytes(xBytes)
+ if err != nil {
+ // The input is reduced modulo P and encoded in a fixed size bytes
+ // slice, this should be impossible.
+ panic("internal error: bigIntToFiatP521")
+ }
+ return x1
+}
+
+// jacobianFromAffine converts (x, y) affine coordinates into (x, y, z) Jacobian
+// coordinates. It also converts from big.Int to fiat, which is necessarily a
+// messy and variable-time operation, which we can't avoid due to the exposed API.
+func (curve p521Curve) jacobianFromAffine(x, y *big.Int) *p521Point {
+ // (0, 0) is by convention the point at infinity, which can't be represented
+ // in affine coordinates, but is (0, 0, 0) in Jacobian.
+ if x.Sign() == 0 && y.Sign() == 0 {
+ return &p521Point{
+ x: new(fiat.P521Element),
+ y: new(fiat.P521Element),
+ z: new(fiat.P521Element),
+ }
+ }
+ return &p521Point{
+ x: bigIntToFiatP521(x),
+ y: bigIntToFiatP521(y),
+ z: new(fiat.P521Element).One(),
+ }
+}
+
+func (curve p521Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
+ p1 := curve.jacobianFromAffine(x1, y1)
+ p2 := curve.jacobianFromAffine(x2, y2)
+ return curve.affineFromJacobian(p1.addJacobian(p1, p2))
+}
+
+// addJacobian sets q = p1 + p2, and returns q. The points may overlap.
+func (q *p521Point) addJacobian(p1, p2 *p521Point) *p521Point {
+ // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+ z1IsZero := p1.z.IsZero()
+ z2IsZero := p2.z.IsZero()
+
+ z1z1 := new(fiat.P521Element).Square(p1.z)
+ z2z2 := new(fiat.P521Element).Square(p2.z)
+
+ u1 := new(fiat.P521Element).Mul(p1.x, z2z2)
+ u2 := new(fiat.P521Element).Mul(p2.x, z1z1)
+ h := new(fiat.P521Element).Sub(u2, u1)
+ xEqual := h.IsZero() == 1
+ i := new(fiat.P521Element).Add(h, h)
+ i.Square(i)
+ j := new(fiat.P521Element).Mul(h, i)
+
+ s1 := new(fiat.P521Element).Mul(p1.y, p2.z)
+ s1.Mul(s1, z2z2)
+ s2 := new(fiat.P521Element).Mul(p2.y, p1.z)
+ s2.Mul(s2, z1z1)
+ r := new(fiat.P521Element).Sub(s2, s1)
+ yEqual := r.IsZero() == 1
+ if xEqual && yEqual && z1IsZero == 0 && z2IsZero == 0 {
+ return q.doubleJacobian(p1)
+ }
+ r.Add(r, r)
+ v := new(fiat.P521Element).Mul(u1, i)
+
+ x := new(fiat.P521Element).Set(r)
+ x.Square(x)
+ x.Sub(x, j)
+ x.Sub(x, v)
+ x.Sub(x, v)
+
+ y := new(fiat.P521Element).Set(r)
+ v.Sub(v, x)
+ y.Mul(y, v)
+ s1.Mul(s1, j)
+ s1.Add(s1, s1)
+ y.Sub(y, s1)
+
+ z := new(fiat.P521Element).Add(p1.z, p2.z)
+ z.Square(z)
+ z.Sub(z, z1z1)
+ z.Sub(z, z2z2)
+ z.Mul(z, h)
+
+ x.Select(p2.x, x, z1IsZero)
+ x.Select(p1.x, x, z2IsZero)
+ y.Select(p2.y, y, z1IsZero)
+ y.Select(p1.y, y, z2IsZero)
+ z.Select(p2.z, z, z1IsZero)
+ z.Select(p1.z, z, z2IsZero)
+
+ q.x.Set(x)
+ q.y.Set(y)
+ q.z.Set(z)
+ return q
+}
+
+func (curve p521Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
+ p := curve.jacobianFromAffine(x1, y1)
+ return curve.affineFromJacobian(p.doubleJacobian(p))
+}
+
+// doubleJacobian sets q = p + p, and returns q. The points may overlap.
+func (q *p521Point) doubleJacobian(p *p521Point) *p521Point {
+ // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
+ delta := new(fiat.P521Element).Square(p.z)
+ gamma := new(fiat.P521Element).Square(p.y)
+ alpha := new(fiat.P521Element).Sub(p.x, delta)
+ alpha2 := new(fiat.P521Element).Add(p.x, delta)
+ alpha.Mul(alpha, alpha2)
+ alpha2.Set(alpha)
+ alpha.Add(alpha, alpha)
+ alpha.Add(alpha, alpha2)
+
+ beta := alpha2.Mul(p.x, gamma)
+
+ q.x.Square(alpha)
+ beta8 := new(fiat.P521Element).Add(beta, beta)
+ beta8.Add(beta8, beta8)
+ beta8.Add(beta8, beta8)
+ q.x.Sub(q.x, beta8)
+
+ q.z.Add(p.y, p.z)
+ q.z.Square(q.z)
+ q.z.Sub(q.z, gamma)
+ q.z.Sub(q.z, delta)
+
+ beta.Add(beta, beta)
+ beta.Add(beta, beta)
+ beta.Sub(beta, q.x)
+ q.y.Mul(alpha, beta)
+
+ gamma.Square(gamma)
+ gamma.Add(gamma, gamma)
+ gamma.Add(gamma, gamma)
+ gamma.Add(gamma, gamma)
+
+ q.y.Sub(q.y, gamma)
+
+ return q
+}
+
+func (curve p521Curve) ScalarMult(Bx, By *big.Int, scalar []byte) (*big.Int, *big.Int) {
+ B := curve.jacobianFromAffine(Bx, By)
+ p, t := &p521Point{
+ x: new(fiat.P521Element),
+ y: new(fiat.P521Element),
+ z: new(fiat.P521Element),
+ }, &p521Point{
+ x: new(fiat.P521Element),
+ y: new(fiat.P521Element),
+ z: new(fiat.P521Element),
+ }
+
+ for _, byte := range scalar {
+ for bitNum := 0; bitNum < 8; bitNum++ {
+ p.doubleJacobian(p)
+ bit := (byte >> (7 - bitNum)) & 1
+ t.addJacobian(p, B)
+ p.x.Select(t.x, p.x, int(bit))
+ p.y.Select(t.y, p.y, int(bit))
+ p.z.Select(t.z, p.z, int(bit))
+ }
+ }
+
+ return curve.affineFromJacobian(p)
+}
+
+func (curve p521Curve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
+ return curve.ScalarMult(curve.Gx, curve.Gy, k)
+}