diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:15:26 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:15:26 +0000 |
commit | 82539ad8d59729fb45b0bb0edda8f2bddb719eb1 (patch) | |
tree | 58f0b58e6f44f0e04d4a6373132cf426fa835fa7 /src/crypto/elliptic | |
parent | Initial commit. (diff) | |
download | golang-1.17-upstream.tar.xz golang-1.17-upstream.zip |
Adding upstream version 1.17.13.upstream/1.17.13upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/crypto/elliptic')
23 files changed, 18324 insertions, 0 deletions
diff --git a/src/crypto/elliptic/elliptic.go b/src/crypto/elliptic/elliptic.go new file mode 100644 index 0000000..b84339e --- /dev/null +++ b/src/crypto/elliptic/elliptic.go @@ -0,0 +1,491 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package elliptic implements several standard elliptic curves over prime +// fields. +package elliptic + +// This package operates, internally, on Jacobian coordinates. For a given +// (x, y) position on the curve, the Jacobian coordinates are (x1, y1, z1) +// where x = x1/z1² and y = y1/z1³. The greatest speedups come when the whole +// calculation can be performed within the transform (as in ScalarMult and +// ScalarBaseMult). But even for Add and Double, it's faster to apply and +// reverse the transform than to operate in affine coordinates. + +import ( + "io" + "math/big" + "sync" +) + +// A Curve represents a short-form Weierstrass curve with a=-3. +// +// Note that the point at infinity (0, 0) is not considered on the curve, and +// although it can be returned by Add, Double, ScalarMult, or ScalarBaseMult, it +// can't be marshaled or unmarshaled, and IsOnCurve will return false for it. +type Curve interface { + // Params returns the parameters for the curve. + Params() *CurveParams + // IsOnCurve reports whether the given (x,y) lies on the curve. + IsOnCurve(x, y *big.Int) bool + // Add returns the sum of (x1,y1) and (x2,y2) + Add(x1, y1, x2, y2 *big.Int) (x, y *big.Int) + // Double returns 2*(x,y) + Double(x1, y1 *big.Int) (x, y *big.Int) + // ScalarMult returns k*(Bx,By) where k is a number in big-endian form. + ScalarMult(x1, y1 *big.Int, k []byte) (x, y *big.Int) + // ScalarBaseMult returns k*G, where G is the base point of the group + // and k is an integer in big-endian form. + ScalarBaseMult(k []byte) (x, y *big.Int) +} + +func matchesSpecificCurve(params *CurveParams, available ...Curve) (Curve, bool) { + for _, c := range available { + if params == c.Params() { + return c, true + } + } + return nil, false +} + +// CurveParams contains the parameters of an elliptic curve and also provides +// a generic, non-constant time implementation of Curve. +type CurveParams struct { + P *big.Int // the order of the underlying field + N *big.Int // the order of the base point + B *big.Int // the constant of the curve equation + Gx, Gy *big.Int // (x,y) of the base point + BitSize int // the size of the underlying field + Name string // the canonical name of the curve +} + +func (curve *CurveParams) Params() *CurveParams { + return curve +} + +// polynomial returns x³ - 3x + b. +func (curve *CurveParams) polynomial(x *big.Int) *big.Int { + x3 := new(big.Int).Mul(x, x) + x3.Mul(x3, x) + + threeX := new(big.Int).Lsh(x, 1) + threeX.Add(threeX, x) + + x3.Sub(x3, threeX) + x3.Add(x3, curve.B) + x3.Mod(x3, curve.P) + + return x3 +} + +func (curve *CurveParams) IsOnCurve(x, y *big.Int) bool { + // If there is a dedicated constant-time implementation for this curve operation, + // use that instead of the generic one. + if specific, ok := matchesSpecificCurve(curve, p224, p521); ok { + return specific.IsOnCurve(x, y) + } + + if x.Sign() < 0 || x.Cmp(curve.P) >= 0 || + y.Sign() < 0 || y.Cmp(curve.P) >= 0 { + return false + } + + // y² = x³ - 3x + b + y2 := new(big.Int).Mul(y, y) + y2.Mod(y2, curve.P) + + return curve.polynomial(x).Cmp(y2) == 0 +} + +// zForAffine returns a Jacobian Z value for the affine point (x, y). If x and +// y are zero, it assumes that they represent the point at infinity because (0, +// 0) is not on the any of the curves handled here. +func zForAffine(x, y *big.Int) *big.Int { + z := new(big.Int) + if x.Sign() != 0 || y.Sign() != 0 { + z.SetInt64(1) + } + return z +} + +// affineFromJacobian reverses the Jacobian transform. See the comment at the +// top of the file. If the point is ∞ it returns 0, 0. +func (curve *CurveParams) affineFromJacobian(x, y, z *big.Int) (xOut, yOut *big.Int) { + if z.Sign() == 0 { + return new(big.Int), new(big.Int) + } + + zinv := new(big.Int).ModInverse(z, curve.P) + zinvsq := new(big.Int).Mul(zinv, zinv) + + xOut = new(big.Int).Mul(x, zinvsq) + xOut.Mod(xOut, curve.P) + zinvsq.Mul(zinvsq, zinv) + yOut = new(big.Int).Mul(y, zinvsq) + yOut.Mod(yOut, curve.P) + return +} + +func (curve *CurveParams) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) { + // If there is a dedicated constant-time implementation for this curve operation, + // use that instead of the generic one. + if specific, ok := matchesSpecificCurve(curve, p224, p521); ok { + return specific.Add(x1, y1, x2, y2) + } + + z1 := zForAffine(x1, y1) + z2 := zForAffine(x2, y2) + return curve.affineFromJacobian(curve.addJacobian(x1, y1, z1, x2, y2, z2)) +} + +// addJacobian takes two points in Jacobian coordinates, (x1, y1, z1) and +// (x2, y2, z2) and returns their sum, also in Jacobian form. +func (curve *CurveParams) addJacobian(x1, y1, z1, x2, y2, z2 *big.Int) (*big.Int, *big.Int, *big.Int) { + // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl + x3, y3, z3 := new(big.Int), new(big.Int), new(big.Int) + if z1.Sign() == 0 { + x3.Set(x2) + y3.Set(y2) + z3.Set(z2) + return x3, y3, z3 + } + if z2.Sign() == 0 { + x3.Set(x1) + y3.Set(y1) + z3.Set(z1) + return x3, y3, z3 + } + + z1z1 := new(big.Int).Mul(z1, z1) + z1z1.Mod(z1z1, curve.P) + z2z2 := new(big.Int).Mul(z2, z2) + z2z2.Mod(z2z2, curve.P) + + u1 := new(big.Int).Mul(x1, z2z2) + u1.Mod(u1, curve.P) + u2 := new(big.Int).Mul(x2, z1z1) + u2.Mod(u2, curve.P) + h := new(big.Int).Sub(u2, u1) + xEqual := h.Sign() == 0 + if h.Sign() == -1 { + h.Add(h, curve.P) + } + i := new(big.Int).Lsh(h, 1) + i.Mul(i, i) + j := new(big.Int).Mul(h, i) + + s1 := new(big.Int).Mul(y1, z2) + s1.Mul(s1, z2z2) + s1.Mod(s1, curve.P) + s2 := new(big.Int).Mul(y2, z1) + s2.Mul(s2, z1z1) + s2.Mod(s2, curve.P) + r := new(big.Int).Sub(s2, s1) + if r.Sign() == -1 { + r.Add(r, curve.P) + } + yEqual := r.Sign() == 0 + if xEqual && yEqual { + return curve.doubleJacobian(x1, y1, z1) + } + r.Lsh(r, 1) + v := new(big.Int).Mul(u1, i) + + x3.Set(r) + x3.Mul(x3, x3) + x3.Sub(x3, j) + x3.Sub(x3, v) + x3.Sub(x3, v) + x3.Mod(x3, curve.P) + + y3.Set(r) + v.Sub(v, x3) + y3.Mul(y3, v) + s1.Mul(s1, j) + s1.Lsh(s1, 1) + y3.Sub(y3, s1) + y3.Mod(y3, curve.P) + + z3.Add(z1, z2) + z3.Mul(z3, z3) + z3.Sub(z3, z1z1) + z3.Sub(z3, z2z2) + z3.Mul(z3, h) + z3.Mod(z3, curve.P) + + return x3, y3, z3 +} + +func (curve *CurveParams) Double(x1, y1 *big.Int) (*big.Int, *big.Int) { + // If there is a dedicated constant-time implementation for this curve operation, + // use that instead of the generic one. + if specific, ok := matchesSpecificCurve(curve, p224, p521); ok { + return specific.Double(x1, y1) + } + + z1 := zForAffine(x1, y1) + return curve.affineFromJacobian(curve.doubleJacobian(x1, y1, z1)) +} + +// doubleJacobian takes a point in Jacobian coordinates, (x, y, z), and +// returns its double, also in Jacobian form. +func (curve *CurveParams) doubleJacobian(x, y, z *big.Int) (*big.Int, *big.Int, *big.Int) { + // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b + delta := new(big.Int).Mul(z, z) + delta.Mod(delta, curve.P) + gamma := new(big.Int).Mul(y, y) + gamma.Mod(gamma, curve.P) + alpha := new(big.Int).Sub(x, delta) + if alpha.Sign() == -1 { + alpha.Add(alpha, curve.P) + } + alpha2 := new(big.Int).Add(x, delta) + alpha.Mul(alpha, alpha2) + alpha2.Set(alpha) + alpha.Lsh(alpha, 1) + alpha.Add(alpha, alpha2) + + beta := alpha2.Mul(x, gamma) + + x3 := new(big.Int).Mul(alpha, alpha) + beta8 := new(big.Int).Lsh(beta, 3) + beta8.Mod(beta8, curve.P) + x3.Sub(x3, beta8) + if x3.Sign() == -1 { + x3.Add(x3, curve.P) + } + x3.Mod(x3, curve.P) + + z3 := new(big.Int).Add(y, z) + z3.Mul(z3, z3) + z3.Sub(z3, gamma) + if z3.Sign() == -1 { + z3.Add(z3, curve.P) + } + z3.Sub(z3, delta) + if z3.Sign() == -1 { + z3.Add(z3, curve.P) + } + z3.Mod(z3, curve.P) + + beta.Lsh(beta, 2) + beta.Sub(beta, x3) + if beta.Sign() == -1 { + beta.Add(beta, curve.P) + } + y3 := alpha.Mul(alpha, beta) + + gamma.Mul(gamma, gamma) + gamma.Lsh(gamma, 3) + gamma.Mod(gamma, curve.P) + + y3.Sub(y3, gamma) + if y3.Sign() == -1 { + y3.Add(y3, curve.P) + } + y3.Mod(y3, curve.P) + + return x3, y3, z3 +} + +func (curve *CurveParams) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.Int) { + // If there is a dedicated constant-time implementation for this curve operation, + // use that instead of the generic one. + if specific, ok := matchesSpecificCurve(curve, p224, p256, p521); ok { + return specific.ScalarMult(Bx, By, k) + } + + Bz := new(big.Int).SetInt64(1) + x, y, z := new(big.Int), new(big.Int), new(big.Int) + + for _, byte := range k { + for bitNum := 0; bitNum < 8; bitNum++ { + x, y, z = curve.doubleJacobian(x, y, z) + if byte&0x80 == 0x80 { + x, y, z = curve.addJacobian(Bx, By, Bz, x, y, z) + } + byte <<= 1 + } + } + + return curve.affineFromJacobian(x, y, z) +} + +func (curve *CurveParams) ScalarBaseMult(k []byte) (*big.Int, *big.Int) { + // If there is a dedicated constant-time implementation for this curve operation, + // use that instead of the generic one. + if specific, ok := matchesSpecificCurve(curve, p224, p256, p521); ok { + return specific.ScalarBaseMult(k) + } + + return curve.ScalarMult(curve.Gx, curve.Gy, k) +} + +var mask = []byte{0xff, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f} + +// GenerateKey returns a public/private key pair. The private key is +// generated using the given reader, which must return random data. +func GenerateKey(curve Curve, rand io.Reader) (priv []byte, x, y *big.Int, err error) { + N := curve.Params().N + bitSize := N.BitLen() + byteLen := (bitSize + 7) / 8 + priv = make([]byte, byteLen) + + for x == nil { + _, err = io.ReadFull(rand, priv) + if err != nil { + return + } + // We have to mask off any excess bits in the case that the size of the + // underlying field is not a whole number of bytes. + priv[0] &= mask[bitSize%8] + // This is because, in tests, rand will return all zeros and we don't + // want to get the point at infinity and loop forever. + priv[1] ^= 0x42 + + // If the scalar is out of range, sample another random number. + if new(big.Int).SetBytes(priv).Cmp(N) >= 0 { + continue + } + + x, y = curve.ScalarBaseMult(priv) + } + return +} + +// Marshal converts a point on the curve into the uncompressed form specified in +// section 4.3.6 of ANSI X9.62. +func Marshal(curve Curve, x, y *big.Int) []byte { + byteLen := (curve.Params().BitSize + 7) / 8 + + ret := make([]byte, 1+2*byteLen) + ret[0] = 4 // uncompressed point + + x.FillBytes(ret[1 : 1+byteLen]) + y.FillBytes(ret[1+byteLen : 1+2*byteLen]) + + return ret +} + +// MarshalCompressed converts a point on the curve into the compressed form +// specified in section 4.3.6 of ANSI X9.62. +func MarshalCompressed(curve Curve, x, y *big.Int) []byte { + byteLen := (curve.Params().BitSize + 7) / 8 + compressed := make([]byte, 1+byteLen) + compressed[0] = byte(y.Bit(0)) | 2 + x.FillBytes(compressed[1:]) + return compressed +} + +// Unmarshal converts a point, serialized by Marshal, into an x, y pair. +// It is an error if the point is not in uncompressed form or is not on the curve. +// On error, x = nil. +func Unmarshal(curve Curve, data []byte) (x, y *big.Int) { + byteLen := (curve.Params().BitSize + 7) / 8 + if len(data) != 1+2*byteLen { + return nil, nil + } + if data[0] != 4 { // uncompressed form + return nil, nil + } + p := curve.Params().P + x = new(big.Int).SetBytes(data[1 : 1+byteLen]) + y = new(big.Int).SetBytes(data[1+byteLen:]) + if x.Cmp(p) >= 0 || y.Cmp(p) >= 0 { + return nil, nil + } + if !curve.IsOnCurve(x, y) { + return nil, nil + } + return +} + +// UnmarshalCompressed converts a point, serialized by MarshalCompressed, into an x, y pair. +// It is an error if the point is not in compressed form or is not on the curve. +// On error, x = nil. +func UnmarshalCompressed(curve Curve, data []byte) (x, y *big.Int) { + byteLen := (curve.Params().BitSize + 7) / 8 + if len(data) != 1+byteLen { + return nil, nil + } + if data[0] != 2 && data[0] != 3 { // compressed form + return nil, nil + } + p := curve.Params().P + x = new(big.Int).SetBytes(data[1:]) + if x.Cmp(p) >= 0 { + return nil, nil + } + // y² = x³ - 3x + b + y = curve.Params().polynomial(x) + y = y.ModSqrt(y, p) + if y == nil { + return nil, nil + } + if byte(y.Bit(0)) != data[0]&1 { + y.Neg(y).Mod(y, p) + } + if !curve.IsOnCurve(x, y) { + return nil, nil + } + return +} + +var initonce sync.Once +var p384 *CurveParams + +func initAll() { + initP224() + initP256() + initP384() + initP521() +} + +func initP384() { + // See FIPS 186-3, section D.2.4 + p384 = &CurveParams{Name: "P-384"} + p384.P, _ = new(big.Int).SetString("39402006196394479212279040100143613805079739270465446667948293404245721771496870329047266088258938001861606973112319", 10) + p384.N, _ = new(big.Int).SetString("39402006196394479212279040100143613805079739270465446667946905279627659399113263569398956308152294913554433653942643", 10) + p384.B, _ = new(big.Int).SetString("b3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef", 16) + p384.Gx, _ = new(big.Int).SetString("aa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7", 16) + p384.Gy, _ = new(big.Int).SetString("3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f", 16) + p384.BitSize = 384 +} + +// P256 returns a Curve which implements NIST P-256 (FIPS 186-3, section D.2.3), +// also known as secp256r1 or prime256v1. The CurveParams.Name of this Curve is +// "P-256". +// +// Multiple invocations of this function will return the same value, so it can +// be used for equality checks and switch statements. +// +// ScalarMult and ScalarBaseMult are implemented using constant-time algorithms. +func P256() Curve { + initonce.Do(initAll) + return p256 +} + +// P384 returns a Curve which implements NIST P-384 (FIPS 186-3, section D.2.4), +// also known as secp384r1. The CurveParams.Name of this Curve is "P-384". +// +// Multiple invocations of this function will return the same value, so it can +// be used for equality checks and switch statements. +// +// The cryptographic operations do not use constant-time algorithms. +func P384() Curve { + initonce.Do(initAll) + return p384 +} + +// P521 returns a Curve which implements NIST P-521 (FIPS 186-3, section D.2.5), +// also known as secp521r1. The CurveParams.Name of this Curve is "P-521". +// +// Multiple invocations of this function will return the same value, so it can +// be used for equality checks and switch statements. +// +// The cryptographic operations are implemented using constant-time algorithms. +func P521() Curve { + initonce.Do(initAll) + return p521 +} diff --git a/src/crypto/elliptic/elliptic_test.go b/src/crypto/elliptic/elliptic_test.go new file mode 100644 index 0000000..3fe53c5 --- /dev/null +++ b/src/crypto/elliptic/elliptic_test.go @@ -0,0 +1,331 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package elliptic + +import ( + "bytes" + "crypto/rand" + "encoding/hex" + "math/big" + "testing" +) + +// genericParamsForCurve returns the dereferenced CurveParams for +// the specified curve. This is used to avoid the logic for +// upgrading a curve to it's specific implementation, forcing +// usage of the generic implementation. This is only relevant +// for the P224, P256, and P521 curves. +func genericParamsForCurve(c Curve) *CurveParams { + d := *(c.Params()) + return &d +} + +func testAllCurves(t *testing.T, f func(*testing.T, Curve)) { + tests := []struct { + name string + curve Curve + }{ + {"P256", P256()}, + {"P256/Params", genericParamsForCurve(P256())}, + {"P224", P224()}, + {"P224/Params", genericParamsForCurve(P224())}, + {"P384", P384()}, + {"P384/Params", genericParamsForCurve(P384())}, + {"P521", P521()}, + {"P521/Params", genericParamsForCurve(P521())}, + } + if testing.Short() { + tests = tests[:1] + } + for _, test := range tests { + curve := test.curve + t.Run(test.name, func(t *testing.T) { + t.Parallel() + f(t, curve) + }) + } +} + +func TestOnCurve(t *testing.T) { + testAllCurves(t, func(t *testing.T, curve Curve) { + if !curve.IsOnCurve(curve.Params().Gx, curve.Params().Gy) { + t.Error("basepoint is not on the curve") + } + }) +} + +func TestOffCurve(t *testing.T) { + testAllCurves(t, func(t *testing.T, curve Curve) { + x, y := new(big.Int).SetInt64(1), new(big.Int).SetInt64(1) + if curve.IsOnCurve(x, y) { + t.Errorf("point off curve is claimed to be on the curve") + } + b := Marshal(curve, x, y) + x1, y1 := Unmarshal(curve, b) + if x1 != nil || y1 != nil { + t.Errorf("unmarshaling a point not on the curve succeeded") + } + }) +} + +func TestInfinity(t *testing.T) { + testAllCurves(t, testInfinity) +} + +func testInfinity(t *testing.T, curve Curve) { + _, x, y, _ := GenerateKey(curve, rand.Reader) + x, y = curve.ScalarMult(x, y, curve.Params().N.Bytes()) + if x.Sign() != 0 || y.Sign() != 0 { + t.Errorf("x^q != ∞") + } + + x, y = curve.ScalarBaseMult([]byte{0}) + if x.Sign() != 0 || y.Sign() != 0 { + t.Errorf("b^0 != ∞") + x.SetInt64(0) + y.SetInt64(0) + } + + x2, y2 := curve.Double(x, y) + if x2.Sign() != 0 || y2.Sign() != 0 { + t.Errorf("2∞ != ∞") + } + + baseX := curve.Params().Gx + baseY := curve.Params().Gy + + x3, y3 := curve.Add(baseX, baseY, x, y) + if x3.Cmp(baseX) != 0 || y3.Cmp(baseY) != 0 { + t.Errorf("x+∞ != x") + } + + x4, y4 := curve.Add(x, y, baseX, baseY) + if x4.Cmp(baseX) != 0 || y4.Cmp(baseY) != 0 { + t.Errorf("∞+x != x") + } + + if curve.IsOnCurve(x, y) { + t.Errorf("IsOnCurve(∞) == true") + } +} + +func TestMarshal(t *testing.T) { + testAllCurves(t, func(t *testing.T, curve Curve) { + _, x, y, err := GenerateKey(curve, rand.Reader) + if err != nil { + t.Fatal(err) + } + serialized := Marshal(curve, x, y) + xx, yy := Unmarshal(curve, serialized) + if xx == nil { + t.Fatal("failed to unmarshal") + } + if xx.Cmp(x) != 0 || yy.Cmp(y) != 0 { + t.Fatal("unmarshal returned different values") + } + }) +} + +func TestUnmarshalToLargeCoordinates(t *testing.T) { + // See https://golang.org/issues/20482. + testAllCurves(t, testUnmarshalToLargeCoordinates) +} + +func testUnmarshalToLargeCoordinates(t *testing.T, curve Curve) { + p := curve.Params().P + byteLen := (p.BitLen() + 7) / 8 + + // Set x to be greater than curve's parameter P – specifically, to P+5. + // Set y to mod_sqrt(x^3 - 3x + B)) so that (x mod P = 5 , y) is on the + // curve. + x := new(big.Int).Add(p, big.NewInt(5)) + y := curve.Params().polynomial(x) + y.ModSqrt(y, p) + + invalid := make([]byte, byteLen*2+1) + invalid[0] = 4 // uncompressed encoding + x.FillBytes(invalid[1 : 1+byteLen]) + y.FillBytes(invalid[1+byteLen:]) + + if X, Y := Unmarshal(curve, invalid); X != nil || Y != nil { + t.Errorf("Unmarshal accepts invalid X coordinate") + } + + if curve == p256 { + // This is a point on the curve with a small y value, small enough that + // we can add p and still be within 32 bytes. + x, _ = new(big.Int).SetString("31931927535157963707678568152204072984517581467226068221761862915403492091210", 10) + y, _ = new(big.Int).SetString("5208467867388784005506817585327037698770365050895731383201516607147", 10) + y.Add(y, p) + + if p.Cmp(y) > 0 || y.BitLen() != 256 { + t.Fatal("y not within expected range") + } + + // marshal + x.FillBytes(invalid[1 : 1+byteLen]) + y.FillBytes(invalid[1+byteLen:]) + + if X, Y := Unmarshal(curve, invalid); X != nil || Y != nil { + t.Errorf("Unmarshal accepts invalid Y coordinate") + } + } +} + +// TestInvalidCoordinates tests big.Int values that are not valid field elements +// (negative or bigger than P). They are expected to return false from +// IsOnCurve, all other behavior is undefined. +func TestInvalidCoordinates(t *testing.T) { + testAllCurves(t, testInvalidCoordinates) +} + +func testInvalidCoordinates(t *testing.T, curve Curve) { + checkIsOnCurveFalse := func(name string, x, y *big.Int) { + if curve.IsOnCurve(x, y) { + t.Errorf("IsOnCurve(%s) unexpectedly returned true", name) + } + } + + p := curve.Params().P + _, x, y, _ := GenerateKey(curve, rand.Reader) + xx, yy := new(big.Int), new(big.Int) + + // Check if the sign is getting dropped. + xx.Neg(x) + checkIsOnCurveFalse("-x, y", xx, y) + yy.Neg(y) + checkIsOnCurveFalse("x, -y", x, yy) + + // Check if negative values are reduced modulo P. + xx.Sub(x, p) + checkIsOnCurveFalse("x-P, y", xx, y) + yy.Sub(y, p) + checkIsOnCurveFalse("x, y-P", x, yy) + + // Check if positive values are reduced modulo P. + xx.Add(x, p) + checkIsOnCurveFalse("x+P, y", xx, y) + yy.Add(y, p) + checkIsOnCurveFalse("x, y+P", x, yy) + + // Check if the overflow is dropped. + xx.Add(x, new(big.Int).Lsh(big.NewInt(1), 535)) + checkIsOnCurveFalse("x+2⁵³⁵, y", xx, y) + yy.Add(y, new(big.Int).Lsh(big.NewInt(1), 535)) + checkIsOnCurveFalse("x, y+2⁵³⁵", x, yy) + + // Check if P is treated like zero (if possible). + // y^2 = x^3 - 3x + B + // y = mod_sqrt(x^3 - 3x + B) + // y = mod_sqrt(B) if x = 0 + // If there is no modsqrt, there is no point with x = 0, can't test x = P. + if yy := new(big.Int).ModSqrt(curve.Params().B, p); yy != nil { + if !curve.IsOnCurve(big.NewInt(0), yy) { + t.Fatal("(0, mod_sqrt(B)) is not on the curve?") + } + checkIsOnCurveFalse("P, y", p, yy) + } +} + +func TestMarshalCompressed(t *testing.T) { + t.Run("P-256/03", func(t *testing.T) { + data, _ := hex.DecodeString("031e3987d9f9ea9d7dd7155a56a86b2009e1e0ab332f962d10d8beb6406ab1ad79") + x, _ := new(big.Int).SetString("13671033352574878777044637384712060483119675368076128232297328793087057702265", 10) + y, _ := new(big.Int).SetString("66200849279091436748794323380043701364391950689352563629885086590854940586447", 10) + testMarshalCompressed(t, P256(), x, y, data) + }) + t.Run("P-256/02", func(t *testing.T) { + data, _ := hex.DecodeString("021e3987d9f9ea9d7dd7155a56a86b2009e1e0ab332f962d10d8beb6406ab1ad79") + x, _ := new(big.Int).SetString("13671033352574878777044637384712060483119675368076128232297328793087057702265", 10) + y, _ := new(big.Int).SetString("49591239931264812013903123569363872165694192725937750565648544718012157267504", 10) + testMarshalCompressed(t, P256(), x, y, data) + }) + + t.Run("Invalid", func(t *testing.T) { + data, _ := hex.DecodeString("02fd4bf61763b46581fd9174d623516cf3c81edd40e29ffa2777fb6cb0ae3ce535") + X, Y := UnmarshalCompressed(P256(), data) + if X != nil || Y != nil { + t.Error("expected an error for invalid encoding") + } + }) + + if testing.Short() { + t.Skip("skipping other curves on short test") + } + + testAllCurves(t, func(t *testing.T, curve Curve) { + _, x, y, err := GenerateKey(curve, rand.Reader) + if err != nil { + t.Fatal(err) + } + testMarshalCompressed(t, curve, x, y, nil) + }) + +} + +func testMarshalCompressed(t *testing.T, curve Curve, x, y *big.Int, want []byte) { + if !curve.IsOnCurve(x, y) { + t.Fatal("invalid test point") + } + got := MarshalCompressed(curve, x, y) + if want != nil && !bytes.Equal(got, want) { + t.Errorf("got unexpected MarshalCompressed result: got %x, want %x", got, want) + } + + X, Y := UnmarshalCompressed(curve, got) + if X == nil || Y == nil { + t.Fatalf("UnmarshalCompressed failed unexpectedly") + } + + if !curve.IsOnCurve(X, Y) { + t.Error("UnmarshalCompressed returned a point not on the curve") + } + if X.Cmp(x) != 0 || Y.Cmp(y) != 0 { + t.Errorf("point did not round-trip correctly: got (%v, %v), want (%v, %v)", X, Y, x, y) + } +} + +func benchmarkAllCurves(t *testing.B, f func(*testing.B, Curve)) { + tests := []struct { + name string + curve Curve + }{ + {"P256", P256()}, + {"P224", P224()}, + {"P384", P384()}, + {"P521", P521()}, + } + for _, test := range tests { + curve := test.curve + t.Run(test.name, func(t *testing.B) { + f(t, curve) + }) + } +} + +func BenchmarkScalarBaseMult(b *testing.B) { + benchmarkAllCurves(b, func(b *testing.B, curve Curve) { + priv, _, _, _ := GenerateKey(curve, rand.Reader) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + x, _ := curve.ScalarBaseMult(priv) + // Prevent the compiler from optimizing out the operation. + priv[0] ^= byte(x.Bits()[0]) + } + }) +} + +func BenchmarkScalarMult(b *testing.B) { + benchmarkAllCurves(b, func(b *testing.B, curve Curve) { + _, x, y, _ := GenerateKey(curve, rand.Reader) + priv, _, _, _ := GenerateKey(curve, rand.Reader) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + x, y = curve.ScalarMult(x, y, priv) + } + }) +} diff --git a/src/crypto/elliptic/fuzz_test.go b/src/crypto/elliptic/fuzz_test.go new file mode 100644 index 0000000..8ff3bf3 --- /dev/null +++ b/src/crypto/elliptic/fuzz_test.go @@ -0,0 +1,54 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 || arm64 || ppc64le +// +build amd64 arm64 ppc64le + +package elliptic + +import ( + "crypto/rand" + "testing" + "time" +) + +func TestFuzz(t *testing.T) { + p256 := P256() + p256Generic := p256.Params() + + var scalar1 [32]byte + var scalar2 [32]byte + var timeout *time.Timer + + if testing.Short() { + timeout = time.NewTimer(10 * time.Millisecond) + } else { + timeout = time.NewTimer(2 * time.Second) + } + + for { + select { + case <-timeout.C: + return + default: + } + + rand.Read(scalar1[:]) + rand.Read(scalar2[:]) + + x, y := p256.ScalarBaseMult(scalar1[:]) + x2, y2 := p256Generic.ScalarBaseMult(scalar1[:]) + + xx, yy := p256.ScalarMult(x, y, scalar2[:]) + xx2, yy2 := p256Generic.ScalarMult(x2, y2, scalar2[:]) + + if x.Cmp(x2) != 0 || y.Cmp(y2) != 0 { + t.Fatalf("ScalarBaseMult does not match reference result with scalar: %x, please report this error to security@golang.org", scalar1) + } + + if xx.Cmp(xx2) != 0 || yy.Cmp(yy2) != 0 { + t.Fatalf("ScalarMult does not match reference result with scalars: %x and %x, please report this error to security@golang.org", scalar1, scalar2) + } + } +} diff --git a/src/crypto/elliptic/internal/fiat/Dockerfile b/src/crypto/elliptic/internal/fiat/Dockerfile new file mode 100644 index 0000000..7b5ece0 --- /dev/null +++ b/src/crypto/elliptic/internal/fiat/Dockerfile @@ -0,0 +1,12 @@ +# Copyright 2021 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +FROM coqorg/coq:8.13.2 + +RUN git clone https://github.com/mit-plv/fiat-crypto +RUN cd fiat-crypto && git checkout c076f3550bea2bb7f4cb5766a32594b9e67694f2 +RUN cd fiat-crypto && git submodule update --init --recursive +RUN cd fiat-crypto && eval $(opam env) && make -j4 standalone-ocaml SKIP_BEDROCK2=1 + +ENTRYPOINT ["fiat-crypto/src/ExtractionOCaml/unsaturated_solinas"] diff --git a/src/crypto/elliptic/internal/fiat/README b/src/crypto/elliptic/internal/fiat/README new file mode 100644 index 0000000..171f57a --- /dev/null +++ b/src/crypto/elliptic/internal/fiat/README @@ -0,0 +1,39 @@ +The code in this package was autogenerated by the fiat-crypto project +at commit c076f3550 from a formally verified model. + + docker build -t fiat-crypto:c076f3550 . + docker run fiat-crypto:c076f3550 --lang Go --no-wide-int --cmovznz-by-mul \ + --internal-static --public-function-case camelCase --public-type-case camelCase \ + --private-function-case camelCase --private-type-case camelCase \ + --no-prefix-fiat --package-name fiat --doc-text-before-function-name '' \ + --doc-prepend-header 'Code generated by Fiat Cryptography. DO NOT EDIT.' \ + --doc-newline-before-package-declaration p521 64 9 '2^521 - 1' \ + carry_mul carry_square carry add sub to_bytes from_bytes selectznz \ + > p521_fiat64.go + +It comes under the following license. + + Copyright (c) 2015-2020 The fiat-crypto Authors. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design, + Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The authors are listed at + + https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS diff --git a/src/crypto/elliptic/internal/fiat/p521.go b/src/crypto/elliptic/internal/fiat/p521.go new file mode 100644 index 0000000..dc67732 --- /dev/null +++ b/src/crypto/elliptic/internal/fiat/p521.go @@ -0,0 +1,197 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fiat implements prime order fields using formally verified algorithms +// from the Fiat Cryptography project. +package fiat + +import ( + "crypto/subtle" + "errors" +) + +// P521Element is an integer modulo 2^521 - 1. +// +// The zero value is a valid zero element. +type P521Element struct { + // This element has the following bounds, which are tighter than + // the output bounds of some operations. Those operations must be + // followed by a carry. + // + // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], + // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], + // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000] + x [9]uint64 +} + +// One sets e = 1, and returns e. +func (e *P521Element) One() *P521Element { + *e = P521Element{} + e.x[0] = 1 + return e +} + +// Equal returns 1 if e == t, and zero otherwise. +func (e *P521Element) Equal(t *P521Element) int { + eBytes := e.Bytes() + tBytes := t.Bytes() + return subtle.ConstantTimeCompare(eBytes, tBytes) +} + +var p521ZeroEncoding = new(P521Element).Bytes() + +// IsZero returns 1 if e == 0, and zero otherwise. +func (e *P521Element) IsZero() int { + eBytes := e.Bytes() + return subtle.ConstantTimeCompare(eBytes, p521ZeroEncoding) +} + +// Set sets e = t, and returns e. +func (e *P521Element) Set(t *P521Element) *P521Element { + e.x = t.x + return e +} + +// Bytes returns the 66-byte little-endian encoding of e. +func (e *P521Element) Bytes() []byte { + // This function must be inlined to move the allocation to the parent and + // save it from escaping to the heap. + var out [66]byte + p521ToBytes(&out, &e.x) + return out[:] +} + +// SetBytes sets e = v, where v is a little-endian 66-byte encoding, and returns +// e. If v is not 66 bytes or it encodes a value higher than 2^521 - 1, SetBytes +// returns nil and an error, and e is unchanged. +func (e *P521Element) SetBytes(v []byte) (*P521Element, error) { + if len(v) != 66 || v[65] > 1 { + return nil, errors.New("invalid P-521 field encoding") + } + var in [66]byte + copy(in[:], v) + p521FromBytes(&e.x, &in) + return e, nil +} + +// Add sets e = t1 + t2, and returns e. +func (e *P521Element) Add(t1, t2 *P521Element) *P521Element { + p521Add(&e.x, &t1.x, &t2.x) + p521Carry(&e.x, &e.x) + return e +} + +// Sub sets e = t1 - t2, and returns e. +func (e *P521Element) Sub(t1, t2 *P521Element) *P521Element { + p521Sub(&e.x, &t1.x, &t2.x) + p521Carry(&e.x, &e.x) + return e +} + +// Mul sets e = t1 * t2, and returns e. +func (e *P521Element) Mul(t1, t2 *P521Element) *P521Element { + p521CarryMul(&e.x, &t1.x, &t2.x) + return e +} + +// Square sets e = t * t, and returns e. +func (e *P521Element) Square(t *P521Element) *P521Element { + p521CarrySquare(&e.x, &t.x) + return e +} + +// Select sets e to a if cond == 1, and to b if cond == 0. +func (v *P521Element) Select(a, b *P521Element, cond int) *P521Element { + p521Selectznz(&v.x, p521Uint1(cond), &b.x, &a.x) + return v +} + +// Invert sets e = 1/t, and returns e. +// +// If t == 0, Invert returns e = 0. +func (e *P521Element) Invert(t *P521Element) *P521Element { + // Inversion is implemented as exponentiation with exponent p − 2. + // The sequence of multiplications and squarings was generated with + // github.com/mmcloughlin/addchain v0.2.0. + + var t1, t2 = new(P521Element), new(P521Element) + + // _10 = 2 * 1 + t1.Square(t) + + // _11 = 1 + _10 + t1.Mul(t, t1) + + // _1100 = _11 << 2 + t2.Square(t1) + t2.Square(t2) + + // _1111 = _11 + _1100 + t1.Mul(t1, t2) + + // _11110000 = _1111 << 4 + t2.Square(t1) + for i := 0; i < 3; i++ { + t2.Square(t2) + } + + // _11111111 = _1111 + _11110000 + t1.Mul(t1, t2) + + // x16 = _11111111<<8 + _11111111 + t2.Square(t1) + for i := 0; i < 7; i++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + + // x32 = x16<<16 + x16 + t2.Square(t1) + for i := 0; i < 15; i++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + + // x64 = x32<<32 + x32 + t2.Square(t1) + for i := 0; i < 31; i++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + + // x65 = 2*x64 + 1 + t2.Square(t1) + t2.Mul(t2, t) + + // x129 = x65<<64 + x64 + for i := 0; i < 64; i++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + + // x130 = 2*x129 + 1 + t2.Square(t1) + t2.Mul(t2, t) + + // x259 = x130<<129 + x129 + for i := 0; i < 129; i++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + + // x260 = 2*x259 + 1 + t2.Square(t1) + t2.Mul(t2, t) + + // x519 = x260<<259 + x259 + for i := 0; i < 259; i++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + + // return x519<<2 + 1 + t1.Square(t1) + t1.Square(t1) + return e.Mul(t1, t) +} diff --git a/src/crypto/elliptic/internal/fiat/p521_fiat64.go b/src/crypto/elliptic/internal/fiat/p521_fiat64.go new file mode 100644 index 0000000..f86283b --- /dev/null +++ b/src/crypto/elliptic/internal/fiat/p521_fiat64.go @@ -0,0 +1,1856 @@ +// Code generated by Fiat Cryptography. DO NOT EDIT. +// +// Autogenerated: 'fiat-crypto/src/ExtractionOCaml/unsaturated_solinas' --lang Go --no-wide-int --cmovznz-by-mul --internal-static --public-function-case camelCase --public-type-case camelCase --private-function-case camelCase --private-type-case camelCase --no-prefix-fiat --package-name fiat --doc-text-before-function-name '' --doc-prepend-header 'Code generated by Fiat Cryptography. DO NOT EDIT.' --doc-newline-before-package-declaration p521 64 9 '2^521 - 1' carry_mul carry_square carry add sub to_bytes from_bytes selectznz +// +// curve description: p521 +// +// machine_wordsize = 64 (from "64") +// +// requested operations: carry_mul, carry_square, carry, add, sub, to_bytes, from_bytes, selectznz +// +// n = 9 (from "9") +// +// s-c = 2^521 - [(1, 1)] (from "2^521 - 1") +// +// tight_bounds_multiplier = 1 (from "") +// +// +// +// Computed values: +// +// carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1] +// +// eval z = z[0] + (z[1] << 58) + (z[2] << 116) + (z[3] << 174) + (z[4] << 232) + (z[5] << 0x122) + (z[6] << 0x15c) + (z[7] << 0x196) + (z[8] << 0x1d0) +// +// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) +// +// balance = [0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x3fffffffffffffe] + +package fiat + +import "math/bits" + +type p521Uint1 uint8 +type p521Int1 int8 + +// p521AddcarryxU64 is a thin wrapper around bits.Add64 that uses p521Uint1 rather than uint64 +func p521AddcarryxU64(x uint64, y uint64, carry p521Uint1) (uint64, p521Uint1) { + sum, carryOut := bits.Add64(x, y, uint64(carry)) + return sum, p521Uint1(carryOut) +} + +// p521SubborrowxU64 is a thin wrapper around bits.Sub64 that uses p521Uint1 rather than uint64 +func p521SubborrowxU64(x uint64, y uint64, carry p521Uint1) (uint64, p521Uint1) { + sum, carryOut := bits.Sub64(x, y, uint64(carry)) + return sum, p521Uint1(carryOut) +} + +// p521AddcarryxU58 is an addition with carry. +// +// Postconditions: +// out1 = (arg1 + arg2 + arg3) mod 2^58 +// out2 = ⌊(arg1 + arg2 + arg3) / 2^58⌋ +// +// Input Bounds: +// arg1: [0x0 ~> 0x1] +// arg2: [0x0 ~> 0x3ffffffffffffff] +// arg3: [0x0 ~> 0x3ffffffffffffff] +// Output Bounds: +// out1: [0x0 ~> 0x3ffffffffffffff] +// out2: [0x0 ~> 0x1] +func p521AddcarryxU58(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) { + x1 := ((uint64(arg1) + arg2) + arg3) + x2 := (x1 & 0x3ffffffffffffff) + x3 := p521Uint1((x1 >> 58)) + *out1 = x2 + *out2 = x3 +} + +// p521SubborrowxU58 is a subtraction with borrow. +// +// Postconditions: +// out1 = (-arg1 + arg2 + -arg3) mod 2^58 +// out2 = -⌊(-arg1 + arg2 + -arg3) / 2^58⌋ +// +// Input Bounds: +// arg1: [0x0 ~> 0x1] +// arg2: [0x0 ~> 0x3ffffffffffffff] +// arg3: [0x0 ~> 0x3ffffffffffffff] +// Output Bounds: +// out1: [0x0 ~> 0x3ffffffffffffff] +// out2: [0x0 ~> 0x1] +func p521SubborrowxU58(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) { + x1 := ((int64(arg2) - int64(arg1)) - int64(arg3)) + x2 := p521Int1((x1 >> 58)) + x3 := (uint64(x1) & 0x3ffffffffffffff) + *out1 = x3 + *out2 = (0x0 - p521Uint1(x2)) +} + +// p521AddcarryxU57 is an addition with carry. +// +// Postconditions: +// out1 = (arg1 + arg2 + arg3) mod 2^57 +// out2 = ⌊(arg1 + arg2 + arg3) / 2^57⌋ +// +// Input Bounds: +// arg1: [0x0 ~> 0x1] +// arg2: [0x0 ~> 0x1ffffffffffffff] +// arg3: [0x0 ~> 0x1ffffffffffffff] +// Output Bounds: +// out1: [0x0 ~> 0x1ffffffffffffff] +// out2: [0x0 ~> 0x1] +func p521AddcarryxU57(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) { + x1 := ((uint64(arg1) + arg2) + arg3) + x2 := (x1 & 0x1ffffffffffffff) + x3 := p521Uint1((x1 >> 57)) + *out1 = x2 + *out2 = x3 +} + +// p521SubborrowxU57 is a subtraction with borrow. +// +// Postconditions: +// out1 = (-arg1 + arg2 + -arg3) mod 2^57 +// out2 = -⌊(-arg1 + arg2 + -arg3) / 2^57⌋ +// +// Input Bounds: +// arg1: [0x0 ~> 0x1] +// arg2: [0x0 ~> 0x1ffffffffffffff] +// arg3: [0x0 ~> 0x1ffffffffffffff] +// Output Bounds: +// out1: [0x0 ~> 0x1ffffffffffffff] +// out2: [0x0 ~> 0x1] +func p521SubborrowxU57(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) { + x1 := ((int64(arg2) - int64(arg1)) - int64(arg3)) + x2 := p521Int1((x1 >> 57)) + x3 := (uint64(x1) & 0x1ffffffffffffff) + *out1 = x3 + *out2 = (0x0 - p521Uint1(x2)) +} + +// p521CmovznzU64 is a single-word conditional move. +// +// Postconditions: +// out1 = (if arg1 = 0 then arg2 else arg3) +// +// Input Bounds: +// arg1: [0x0 ~> 0x1] +// arg2: [0x0 ~> 0xffffffffffffffff] +// arg3: [0x0 ~> 0xffffffffffffffff] +// Output Bounds: +// out1: [0x0 ~> 0xffffffffffffffff] +func p521CmovznzU64(out1 *uint64, arg1 p521Uint1, arg2 uint64, arg3 uint64) { + x1 := (uint64(arg1) * 0xffffffffffffffff) + x2 := ((x1 & arg3) | ((^x1) & arg2)) + *out1 = x2 +} + +// p521CarryMul multiplies two field elements and reduces the result. +// +// Postconditions: +// eval out1 mod m = (eval arg1 * eval arg2) mod m +// +// Input Bounds: +// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] +// arg2: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] +// Output Bounds: +// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +func p521CarryMul(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) { + var x1 uint64 + var x2 uint64 + x2, x1 = bits.Mul64(arg1[8], (arg2[8] * 0x2)) + var x3 uint64 + var x4 uint64 + x4, x3 = bits.Mul64(arg1[8], (arg2[7] * 0x2)) + var x5 uint64 + var x6 uint64 + x6, x5 = bits.Mul64(arg1[8], (arg2[6] * 0x2)) + var x7 uint64 + var x8 uint64 + x8, x7 = bits.Mul64(arg1[8], (arg2[5] * 0x2)) + var x9 uint64 + var x10 uint64 + x10, x9 = bits.Mul64(arg1[8], (arg2[4] * 0x2)) + var x11 uint64 + var x12 uint64 + x12, x11 = bits.Mul64(arg1[8], (arg2[3] * 0x2)) + var x13 uint64 + var x14 uint64 + x14, x13 = bits.Mul64(arg1[8], (arg2[2] * 0x2)) + var x15 uint64 + var x16 uint64 + x16, x15 = bits.Mul64(arg1[8], (arg2[1] * 0x2)) + var x17 uint64 + var x18 uint64 + x18, x17 = bits.Mul64(arg1[7], (arg2[8] * 0x2)) + var x19 uint64 + var x20 uint64 + x20, x19 = bits.Mul64(arg1[7], (arg2[7] * 0x2)) + var x21 uint64 + var x22 uint64 + x22, x21 = bits.Mul64(arg1[7], (arg2[6] * 0x2)) + var x23 uint64 + var x24 uint64 + x24, x23 = bits.Mul64(arg1[7], (arg2[5] * 0x2)) + var x25 uint64 + var x26 uint64 + x26, x25 = bits.Mul64(arg1[7], (arg2[4] * 0x2)) + var x27 uint64 + var x28 uint64 + x28, x27 = bits.Mul64(arg1[7], (arg2[3] * 0x2)) + var x29 uint64 + var x30 uint64 + x30, x29 = bits.Mul64(arg1[7], (arg2[2] * 0x2)) + var x31 uint64 + var x32 uint64 + x32, x31 = bits.Mul64(arg1[6], (arg2[8] * 0x2)) + var x33 uint64 + var x34 uint64 + x34, x33 = bits.Mul64(arg1[6], (arg2[7] * 0x2)) + var x35 uint64 + var x36 uint64 + x36, x35 = bits.Mul64(arg1[6], (arg2[6] * 0x2)) + var x37 uint64 + var x38 uint64 + x38, x37 = bits.Mul64(arg1[6], (arg2[5] * 0x2)) + var x39 uint64 + var x40 uint64 + x40, x39 = bits.Mul64(arg1[6], (arg2[4] * 0x2)) + var x41 uint64 + var x42 uint64 + x42, x41 = bits.Mul64(arg1[6], (arg2[3] * 0x2)) + var x43 uint64 + var x44 uint64 + x44, x43 = bits.Mul64(arg1[5], (arg2[8] * 0x2)) + var x45 uint64 + var x46 uint64 + x46, x45 = bits.Mul64(arg1[5], (arg2[7] * 0x2)) + var x47 uint64 + var x48 uint64 + x48, x47 = bits.Mul64(arg1[5], (arg2[6] * 0x2)) + var x49 uint64 + var x50 uint64 + x50, x49 = bits.Mul64(arg1[5], (arg2[5] * 0x2)) + var x51 uint64 + var x52 uint64 + x52, x51 = bits.Mul64(arg1[5], (arg2[4] * 0x2)) + var x53 uint64 + var x54 uint64 + x54, x53 = bits.Mul64(arg1[4], (arg2[8] * 0x2)) + var x55 uint64 + var x56 uint64 + x56, x55 = bits.Mul64(arg1[4], (arg2[7] * 0x2)) + var x57 uint64 + var x58 uint64 + x58, x57 = bits.Mul64(arg1[4], (arg2[6] * 0x2)) + var x59 uint64 + var x60 uint64 + x60, x59 = bits.Mul64(arg1[4], (arg2[5] * 0x2)) + var x61 uint64 + var x62 uint64 + x62, x61 = bits.Mul64(arg1[3], (arg2[8] * 0x2)) + var x63 uint64 + var x64 uint64 + x64, x63 = bits.Mul64(arg1[3], (arg2[7] * 0x2)) + var x65 uint64 + var x66 uint64 + x66, x65 = bits.Mul64(arg1[3], (arg2[6] * 0x2)) + var x67 uint64 + var x68 uint64 + x68, x67 = bits.Mul64(arg1[2], (arg2[8] * 0x2)) + var x69 uint64 + var x70 uint64 + x70, x69 = bits.Mul64(arg1[2], (arg2[7] * 0x2)) + var x71 uint64 + var x72 uint64 + x72, x71 = bits.Mul64(arg1[1], (arg2[8] * 0x2)) + var x73 uint64 + var x74 uint64 + x74, x73 = bits.Mul64(arg1[8], arg2[0]) + var x75 uint64 + var x76 uint64 + x76, x75 = bits.Mul64(arg1[7], arg2[1]) + var x77 uint64 + var x78 uint64 + x78, x77 = bits.Mul64(arg1[7], arg2[0]) + var x79 uint64 + var x80 uint64 + x80, x79 = bits.Mul64(arg1[6], arg2[2]) + var x81 uint64 + var x82 uint64 + x82, x81 = bits.Mul64(arg1[6], arg2[1]) + var x83 uint64 + var x84 uint64 + x84, x83 = bits.Mul64(arg1[6], arg2[0]) + var x85 uint64 + var x86 uint64 + x86, x85 = bits.Mul64(arg1[5], arg2[3]) + var x87 uint64 + var x88 uint64 + x88, x87 = bits.Mul64(arg1[5], arg2[2]) + var x89 uint64 + var x90 uint64 + x90, x89 = bits.Mul64(arg1[5], arg2[1]) + var x91 uint64 + var x92 uint64 + x92, x91 = bits.Mul64(arg1[5], arg2[0]) + var x93 uint64 + var x94 uint64 + x94, x93 = bits.Mul64(arg1[4], arg2[4]) + var x95 uint64 + var x96 uint64 + x96, x95 = bits.Mul64(arg1[4], arg2[3]) + var x97 uint64 + var x98 uint64 + x98, x97 = bits.Mul64(arg1[4], arg2[2]) + var x99 uint64 + var x100 uint64 + x100, x99 = bits.Mul64(arg1[4], arg2[1]) + var x101 uint64 + var x102 uint64 + x102, x101 = bits.Mul64(arg1[4], arg2[0]) + var x103 uint64 + var x104 uint64 + x104, x103 = bits.Mul64(arg1[3], arg2[5]) + var x105 uint64 + var x106 uint64 + x106, x105 = bits.Mul64(arg1[3], arg2[4]) + var x107 uint64 + var x108 uint64 + x108, x107 = bits.Mul64(arg1[3], arg2[3]) + var x109 uint64 + var x110 uint64 + x110, x109 = bits.Mul64(arg1[3], arg2[2]) + var x111 uint64 + var x112 uint64 + x112, x111 = bits.Mul64(arg1[3], arg2[1]) + var x113 uint64 + var x114 uint64 + x114, x113 = bits.Mul64(arg1[3], arg2[0]) + var x115 uint64 + var x116 uint64 + x116, x115 = bits.Mul64(arg1[2], arg2[6]) + var x117 uint64 + var x118 uint64 + x118, x117 = bits.Mul64(arg1[2], arg2[5]) + var x119 uint64 + var x120 uint64 + x120, x119 = bits.Mul64(arg1[2], arg2[4]) + var x121 uint64 + var x122 uint64 + x122, x121 = bits.Mul64(arg1[2], arg2[3]) + var x123 uint64 + var x124 uint64 + x124, x123 = bits.Mul64(arg1[2], arg2[2]) + var x125 uint64 + var x126 uint64 + x126, x125 = bits.Mul64(arg1[2], arg2[1]) + var x127 uint64 + var x128 uint64 + x128, x127 = bits.Mul64(arg1[2], arg2[0]) + var x129 uint64 + var x130 uint64 + x130, x129 = bits.Mul64(arg1[1], arg2[7]) + var x131 uint64 + var x132 uint64 + x132, x131 = bits.Mul64(arg1[1], arg2[6]) + var x133 uint64 + var x134 uint64 + x134, x133 = bits.Mul64(arg1[1], arg2[5]) + var x135 uint64 + var x136 uint64 + x136, x135 = bits.Mul64(arg1[1], arg2[4]) + var x137 uint64 + var x138 uint64 + x138, x137 = bits.Mul64(arg1[1], arg2[3]) + var x139 uint64 + var x140 uint64 + x140, x139 = bits.Mul64(arg1[1], arg2[2]) + var x141 uint64 + var x142 uint64 + x142, x141 = bits.Mul64(arg1[1], arg2[1]) + var x143 uint64 + var x144 uint64 + x144, x143 = bits.Mul64(arg1[1], arg2[0]) + var x145 uint64 + var x146 uint64 + x146, x145 = bits.Mul64(arg1[0], arg2[8]) + var x147 uint64 + var x148 uint64 + x148, x147 = bits.Mul64(arg1[0], arg2[7]) + var x149 uint64 + var x150 uint64 + x150, x149 = bits.Mul64(arg1[0], arg2[6]) + var x151 uint64 + var x152 uint64 + x152, x151 = bits.Mul64(arg1[0], arg2[5]) + var x153 uint64 + var x154 uint64 + x154, x153 = bits.Mul64(arg1[0], arg2[4]) + var x155 uint64 + var x156 uint64 + x156, x155 = bits.Mul64(arg1[0], arg2[3]) + var x157 uint64 + var x158 uint64 + x158, x157 = bits.Mul64(arg1[0], arg2[2]) + var x159 uint64 + var x160 uint64 + x160, x159 = bits.Mul64(arg1[0], arg2[1]) + var x161 uint64 + var x162 uint64 + x162, x161 = bits.Mul64(arg1[0], arg2[0]) + var x163 uint64 + var x164 p521Uint1 + x163, x164 = p521AddcarryxU64(x29, x15, 0x0) + var x165 uint64 + x165, _ = p521AddcarryxU64(x30, x16, x164) + var x167 uint64 + var x168 p521Uint1 + x167, x168 = p521AddcarryxU64(x41, x163, 0x0) + var x169 uint64 + x169, _ = p521AddcarryxU64(x42, x165, x168) + var x171 uint64 + var x172 p521Uint1 + x171, x172 = p521AddcarryxU64(x51, x167, 0x0) + var x173 uint64 + x173, _ = p521AddcarryxU64(x52, x169, x172) + var x175 uint64 + var x176 p521Uint1 + x175, x176 = p521AddcarryxU64(x59, x171, 0x0) + var x177 uint64 + x177, _ = p521AddcarryxU64(x60, x173, x176) + var x179 uint64 + var x180 p521Uint1 + x179, x180 = p521AddcarryxU64(x65, x175, 0x0) + var x181 uint64 + x181, _ = p521AddcarryxU64(x66, x177, x180) + var x183 uint64 + var x184 p521Uint1 + x183, x184 = p521AddcarryxU64(x69, x179, 0x0) + var x185 uint64 + x185, _ = p521AddcarryxU64(x70, x181, x184) + var x187 uint64 + var x188 p521Uint1 + x187, x188 = p521AddcarryxU64(x71, x183, 0x0) + var x189 uint64 + x189, _ = p521AddcarryxU64(x72, x185, x188) + var x191 uint64 + var x192 p521Uint1 + x191, x192 = p521AddcarryxU64(x161, x187, 0x0) + var x193 uint64 + x193, _ = p521AddcarryxU64(x162, x189, x192) + x195 := ((x191 >> 58) | ((x193 << 6) & 0xffffffffffffffff)) + x196 := (x193 >> 58) + x197 := (x191 & 0x3ffffffffffffff) + var x198 uint64 + var x199 p521Uint1 + x198, x199 = p521AddcarryxU64(x75, x73, 0x0) + var x200 uint64 + x200, _ = p521AddcarryxU64(x76, x74, x199) + var x202 uint64 + var x203 p521Uint1 + x202, x203 = p521AddcarryxU64(x79, x198, 0x0) + var x204 uint64 + x204, _ = p521AddcarryxU64(x80, x200, x203) + var x206 uint64 + var x207 p521Uint1 + x206, x207 = p521AddcarryxU64(x85, x202, 0x0) + var x208 uint64 + x208, _ = p521AddcarryxU64(x86, x204, x207) + var x210 uint64 + var x211 p521Uint1 + x210, x211 = p521AddcarryxU64(x93, x206, 0x0) + var x212 uint64 + x212, _ = p521AddcarryxU64(x94, x208, x211) + var x214 uint64 + var x215 p521Uint1 + x214, x215 = p521AddcarryxU64(x103, x210, 0x0) + var x216 uint64 + x216, _ = p521AddcarryxU64(x104, x212, x215) + var x218 uint64 + var x219 p521Uint1 + x218, x219 = p521AddcarryxU64(x115, x214, 0x0) + var x220 uint64 + x220, _ = p521AddcarryxU64(x116, x216, x219) + var x222 uint64 + var x223 p521Uint1 + x222, x223 = p521AddcarryxU64(x129, x218, 0x0) + var x224 uint64 + x224, _ = p521AddcarryxU64(x130, x220, x223) + var x226 uint64 + var x227 p521Uint1 + x226, x227 = p521AddcarryxU64(x145, x222, 0x0) + var x228 uint64 + x228, _ = p521AddcarryxU64(x146, x224, x227) + var x230 uint64 + var x231 p521Uint1 + x230, x231 = p521AddcarryxU64(x77, x1, 0x0) + var x232 uint64 + x232, _ = p521AddcarryxU64(x78, x2, x231) + var x234 uint64 + var x235 p521Uint1 + x234, x235 = p521AddcarryxU64(x81, x230, 0x0) + var x236 uint64 + x236, _ = p521AddcarryxU64(x82, x232, x235) + var x238 uint64 + var x239 p521Uint1 + x238, x239 = p521AddcarryxU64(x87, x234, 0x0) + var x240 uint64 + x240, _ = p521AddcarryxU64(x88, x236, x239) + var x242 uint64 + var x243 p521Uint1 + x242, x243 = p521AddcarryxU64(x95, x238, 0x0) + var x244 uint64 + x244, _ = p521AddcarryxU64(x96, x240, x243) + var x246 uint64 + var x247 p521Uint1 + x246, x247 = p521AddcarryxU64(x105, x242, 0x0) + var x248 uint64 + x248, _ = p521AddcarryxU64(x106, x244, x247) + var x250 uint64 + var x251 p521Uint1 + x250, x251 = p521AddcarryxU64(x117, x246, 0x0) + var x252 uint64 + x252, _ = p521AddcarryxU64(x118, x248, x251) + var x254 uint64 + var x255 p521Uint1 + x254, x255 = p521AddcarryxU64(x131, x250, 0x0) + var x256 uint64 + x256, _ = p521AddcarryxU64(x132, x252, x255) + var x258 uint64 + var x259 p521Uint1 + x258, x259 = p521AddcarryxU64(x147, x254, 0x0) + var x260 uint64 + x260, _ = p521AddcarryxU64(x148, x256, x259) + var x262 uint64 + var x263 p521Uint1 + x262, x263 = p521AddcarryxU64(x17, x3, 0x0) + var x264 uint64 + x264, _ = p521AddcarryxU64(x18, x4, x263) + var x266 uint64 + var x267 p521Uint1 + x266, x267 = p521AddcarryxU64(x83, x262, 0x0) + var x268 uint64 + x268, _ = p521AddcarryxU64(x84, x264, x267) + var x270 uint64 + var x271 p521Uint1 + x270, x271 = p521AddcarryxU64(x89, x266, 0x0) + var x272 uint64 + x272, _ = p521AddcarryxU64(x90, x268, x271) + var x274 uint64 + var x275 p521Uint1 + x274, x275 = p521AddcarryxU64(x97, x270, 0x0) + var x276 uint64 + x276, _ = p521AddcarryxU64(x98, x272, x275) + var x278 uint64 + var x279 p521Uint1 + x278, x279 = p521AddcarryxU64(x107, x274, 0x0) + var x280 uint64 + x280, _ = p521AddcarryxU64(x108, x276, x279) + var x282 uint64 + var x283 p521Uint1 + x282, x283 = p521AddcarryxU64(x119, x278, 0x0) + var x284 uint64 + x284, _ = p521AddcarryxU64(x120, x280, x283) + var x286 uint64 + var x287 p521Uint1 + x286, x287 = p521AddcarryxU64(x133, x282, 0x0) + var x288 uint64 + x288, _ = p521AddcarryxU64(x134, x284, x287) + var x290 uint64 + var x291 p521Uint1 + x290, x291 = p521AddcarryxU64(x149, x286, 0x0) + var x292 uint64 + x292, _ = p521AddcarryxU64(x150, x288, x291) + var x294 uint64 + var x295 p521Uint1 + x294, x295 = p521AddcarryxU64(x19, x5, 0x0) + var x296 uint64 + x296, _ = p521AddcarryxU64(x20, x6, x295) + var x298 uint64 + var x299 p521Uint1 + x298, x299 = p521AddcarryxU64(x31, x294, 0x0) + var x300 uint64 + x300, _ = p521AddcarryxU64(x32, x296, x299) + var x302 uint64 + var x303 p521Uint1 + x302, x303 = p521AddcarryxU64(x91, x298, 0x0) + var x304 uint64 + x304, _ = p521AddcarryxU64(x92, x300, x303) + var x306 uint64 + var x307 p521Uint1 + x306, x307 = p521AddcarryxU64(x99, x302, 0x0) + var x308 uint64 + x308, _ = p521AddcarryxU64(x100, x304, x307) + var x310 uint64 + var x311 p521Uint1 + x310, x311 = p521AddcarryxU64(x109, x306, 0x0) + var x312 uint64 + x312, _ = p521AddcarryxU64(x110, x308, x311) + var x314 uint64 + var x315 p521Uint1 + x314, x315 = p521AddcarryxU64(x121, x310, 0x0) + var x316 uint64 + x316, _ = p521AddcarryxU64(x122, x312, x315) + var x318 uint64 + var x319 p521Uint1 + x318, x319 = p521AddcarryxU64(x135, x314, 0x0) + var x320 uint64 + x320, _ = p521AddcarryxU64(x136, x316, x319) + var x322 uint64 + var x323 p521Uint1 + x322, x323 = p521AddcarryxU64(x151, x318, 0x0) + var x324 uint64 + x324, _ = p521AddcarryxU64(x152, x320, x323) + var x326 uint64 + var x327 p521Uint1 + x326, x327 = p521AddcarryxU64(x21, x7, 0x0) + var x328 uint64 + x328, _ = p521AddcarryxU64(x22, x8, x327) + var x330 uint64 + var x331 p521Uint1 + x330, x331 = p521AddcarryxU64(x33, x326, 0x0) + var x332 uint64 + x332, _ = p521AddcarryxU64(x34, x328, x331) + var x334 uint64 + var x335 p521Uint1 + x334, x335 = p521AddcarryxU64(x43, x330, 0x0) + var x336 uint64 + x336, _ = p521AddcarryxU64(x44, x332, x335) + var x338 uint64 + var x339 p521Uint1 + x338, x339 = p521AddcarryxU64(x101, x334, 0x0) + var x340 uint64 + x340, _ = p521AddcarryxU64(x102, x336, x339) + var x342 uint64 + var x343 p521Uint1 + x342, x343 = p521AddcarryxU64(x111, x338, 0x0) + var x344 uint64 + x344, _ = p521AddcarryxU64(x112, x340, x343) + var x346 uint64 + var x347 p521Uint1 + x346, x347 = p521AddcarryxU64(x123, x342, 0x0) + var x348 uint64 + x348, _ = p521AddcarryxU64(x124, x344, x347) + var x350 uint64 + var x351 p521Uint1 + x350, x351 = p521AddcarryxU64(x137, x346, 0x0) + var x352 uint64 + x352, _ = p521AddcarryxU64(x138, x348, x351) + var x354 uint64 + var x355 p521Uint1 + x354, x355 = p521AddcarryxU64(x153, x350, 0x0) + var x356 uint64 + x356, _ = p521AddcarryxU64(x154, x352, x355) + var x358 uint64 + var x359 p521Uint1 + x358, x359 = p521AddcarryxU64(x23, x9, 0x0) + var x360 uint64 + x360, _ = p521AddcarryxU64(x24, x10, x359) + var x362 uint64 + var x363 p521Uint1 + x362, x363 = p521AddcarryxU64(x35, x358, 0x0) + var x364 uint64 + x364, _ = p521AddcarryxU64(x36, x360, x363) + var x366 uint64 + var x367 p521Uint1 + x366, x367 = p521AddcarryxU64(x45, x362, 0x0) + var x368 uint64 + x368, _ = p521AddcarryxU64(x46, x364, x367) + var x370 uint64 + var x371 p521Uint1 + x370, x371 = p521AddcarryxU64(x53, x366, 0x0) + var x372 uint64 + x372, _ = p521AddcarryxU64(x54, x368, x371) + var x374 uint64 + var x375 p521Uint1 + x374, x375 = p521AddcarryxU64(x113, x370, 0x0) + var x376 uint64 + x376, _ = p521AddcarryxU64(x114, x372, x375) + var x378 uint64 + var x379 p521Uint1 + x378, x379 = p521AddcarryxU64(x125, x374, 0x0) + var x380 uint64 + x380, _ = p521AddcarryxU64(x126, x376, x379) + var x382 uint64 + var x383 p521Uint1 + x382, x383 = p521AddcarryxU64(x139, x378, 0x0) + var x384 uint64 + x384, _ = p521AddcarryxU64(x140, x380, x383) + var x386 uint64 + var x387 p521Uint1 + x386, x387 = p521AddcarryxU64(x155, x382, 0x0) + var x388 uint64 + x388, _ = p521AddcarryxU64(x156, x384, x387) + var x390 uint64 + var x391 p521Uint1 + x390, x391 = p521AddcarryxU64(x25, x11, 0x0) + var x392 uint64 + x392, _ = p521AddcarryxU64(x26, x12, x391) + var x394 uint64 + var x395 p521Uint1 + x394, x395 = p521AddcarryxU64(x37, x390, 0x0) + var x396 uint64 + x396, _ = p521AddcarryxU64(x38, x392, x395) + var x398 uint64 + var x399 p521Uint1 + x398, x399 = p521AddcarryxU64(x47, x394, 0x0) + var x400 uint64 + x400, _ = p521AddcarryxU64(x48, x396, x399) + var x402 uint64 + var x403 p521Uint1 + x402, x403 = p521AddcarryxU64(x55, x398, 0x0) + var x404 uint64 + x404, _ = p521AddcarryxU64(x56, x400, x403) + var x406 uint64 + var x407 p521Uint1 + x406, x407 = p521AddcarryxU64(x61, x402, 0x0) + var x408 uint64 + x408, _ = p521AddcarryxU64(x62, x404, x407) + var x410 uint64 + var x411 p521Uint1 + x410, x411 = p521AddcarryxU64(x127, x406, 0x0) + var x412 uint64 + x412, _ = p521AddcarryxU64(x128, x408, x411) + var x414 uint64 + var x415 p521Uint1 + x414, x415 = p521AddcarryxU64(x141, x410, 0x0) + var x416 uint64 + x416, _ = p521AddcarryxU64(x142, x412, x415) + var x418 uint64 + var x419 p521Uint1 + x418, x419 = p521AddcarryxU64(x157, x414, 0x0) + var x420 uint64 + x420, _ = p521AddcarryxU64(x158, x416, x419) + var x422 uint64 + var x423 p521Uint1 + x422, x423 = p521AddcarryxU64(x27, x13, 0x0) + var x424 uint64 + x424, _ = p521AddcarryxU64(x28, x14, x423) + var x426 uint64 + var x427 p521Uint1 + x426, x427 = p521AddcarryxU64(x39, x422, 0x0) + var x428 uint64 + x428, _ = p521AddcarryxU64(x40, x424, x427) + var x430 uint64 + var x431 p521Uint1 + x430, x431 = p521AddcarryxU64(x49, x426, 0x0) + var x432 uint64 + x432, _ = p521AddcarryxU64(x50, x428, x431) + var x434 uint64 + var x435 p521Uint1 + x434, x435 = p521AddcarryxU64(x57, x430, 0x0) + var x436 uint64 + x436, _ = p521AddcarryxU64(x58, x432, x435) + var x438 uint64 + var x439 p521Uint1 + x438, x439 = p521AddcarryxU64(x63, x434, 0x0) + var x440 uint64 + x440, _ = p521AddcarryxU64(x64, x436, x439) + var x442 uint64 + var x443 p521Uint1 + x442, x443 = p521AddcarryxU64(x67, x438, 0x0) + var x444 uint64 + x444, _ = p521AddcarryxU64(x68, x440, x443) + var x446 uint64 + var x447 p521Uint1 + x446, x447 = p521AddcarryxU64(x143, x442, 0x0) + var x448 uint64 + x448, _ = p521AddcarryxU64(x144, x444, x447) + var x450 uint64 + var x451 p521Uint1 + x450, x451 = p521AddcarryxU64(x159, x446, 0x0) + var x452 uint64 + x452, _ = p521AddcarryxU64(x160, x448, x451) + var x454 uint64 + var x455 p521Uint1 + x454, x455 = p521AddcarryxU64(x195, x450, 0x0) + var x456 uint64 + x456, _ = p521AddcarryxU64(x196, x452, x455) + x458 := ((x454 >> 58) | ((x456 << 6) & 0xffffffffffffffff)) + x459 := (x456 >> 58) + x460 := (x454 & 0x3ffffffffffffff) + var x461 uint64 + var x462 p521Uint1 + x461, x462 = p521AddcarryxU64(x458, x418, 0x0) + var x463 uint64 + x463, _ = p521AddcarryxU64(x459, x420, x462) + x465 := ((x461 >> 58) | ((x463 << 6) & 0xffffffffffffffff)) + x466 := (x463 >> 58) + x467 := (x461 & 0x3ffffffffffffff) + var x468 uint64 + var x469 p521Uint1 + x468, x469 = p521AddcarryxU64(x465, x386, 0x0) + var x470 uint64 + x470, _ = p521AddcarryxU64(x466, x388, x469) + x472 := ((x468 >> 58) | ((x470 << 6) & 0xffffffffffffffff)) + x473 := (x470 >> 58) + x474 := (x468 & 0x3ffffffffffffff) + var x475 uint64 + var x476 p521Uint1 + x475, x476 = p521AddcarryxU64(x472, x354, 0x0) + var x477 uint64 + x477, _ = p521AddcarryxU64(x473, x356, x476) + x479 := ((x475 >> 58) | ((x477 << 6) & 0xffffffffffffffff)) + x480 := (x477 >> 58) + x481 := (x475 & 0x3ffffffffffffff) + var x482 uint64 + var x483 p521Uint1 + x482, x483 = p521AddcarryxU64(x479, x322, 0x0) + var x484 uint64 + x484, _ = p521AddcarryxU64(x480, x324, x483) + x486 := ((x482 >> 58) | ((x484 << 6) & 0xffffffffffffffff)) + x487 := (x484 >> 58) + x488 := (x482 & 0x3ffffffffffffff) + var x489 uint64 + var x490 p521Uint1 + x489, x490 = p521AddcarryxU64(x486, x290, 0x0) + var x491 uint64 + x491, _ = p521AddcarryxU64(x487, x292, x490) + x493 := ((x489 >> 58) | ((x491 << 6) & 0xffffffffffffffff)) + x494 := (x491 >> 58) + x495 := (x489 & 0x3ffffffffffffff) + var x496 uint64 + var x497 p521Uint1 + x496, x497 = p521AddcarryxU64(x493, x258, 0x0) + var x498 uint64 + x498, _ = p521AddcarryxU64(x494, x260, x497) + x500 := ((x496 >> 58) | ((x498 << 6) & 0xffffffffffffffff)) + x501 := (x498 >> 58) + x502 := (x496 & 0x3ffffffffffffff) + var x503 uint64 + var x504 p521Uint1 + x503, x504 = p521AddcarryxU64(x500, x226, 0x0) + var x505 uint64 + x505, _ = p521AddcarryxU64(x501, x228, x504) + x507 := ((x503 >> 57) | ((x505 << 7) & 0xffffffffffffffff)) + x508 := (x505 >> 57) + x509 := (x503 & 0x1ffffffffffffff) + var x510 uint64 + var x511 p521Uint1 + x510, x511 = p521AddcarryxU64(x197, x507, 0x0) + x512 := (uint64(x511) + x508) + x513 := ((x510 >> 58) | ((x512 << 6) & 0xffffffffffffffff)) + x514 := (x510 & 0x3ffffffffffffff) + x515 := (x513 + x460) + x516 := p521Uint1((x515 >> 58)) + x517 := (x515 & 0x3ffffffffffffff) + x518 := (uint64(x516) + x467) + out1[0] = x514 + out1[1] = x517 + out1[2] = x518 + out1[3] = x474 + out1[4] = x481 + out1[5] = x488 + out1[6] = x495 + out1[7] = x502 + out1[8] = x509 +} + +// p521CarrySquare squares a field element and reduces the result. +// +// Postconditions: +// eval out1 mod m = (eval arg1 * eval arg1) mod m +// +// Input Bounds: +// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] +// Output Bounds: +// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +func p521CarrySquare(out1 *[9]uint64, arg1 *[9]uint64) { + x1 := arg1[8] + x2 := (x1 * 0x2) + x3 := (arg1[8] * 0x2) + x4 := arg1[7] + x5 := (x4 * 0x2) + x6 := (arg1[7] * 0x2) + x7 := arg1[6] + x8 := (x7 * 0x2) + x9 := (arg1[6] * 0x2) + x10 := arg1[5] + x11 := (x10 * 0x2) + x12 := (arg1[5] * 0x2) + x13 := (arg1[4] * 0x2) + x14 := (arg1[3] * 0x2) + x15 := (arg1[2] * 0x2) + x16 := (arg1[1] * 0x2) + var x17 uint64 + var x18 uint64 + x18, x17 = bits.Mul64(arg1[8], (x1 * 0x2)) + var x19 uint64 + var x20 uint64 + x20, x19 = bits.Mul64(arg1[7], (x2 * 0x2)) + var x21 uint64 + var x22 uint64 + x22, x21 = bits.Mul64(arg1[7], (x4 * 0x2)) + var x23 uint64 + var x24 uint64 + x24, x23 = bits.Mul64(arg1[6], (x2 * 0x2)) + var x25 uint64 + var x26 uint64 + x26, x25 = bits.Mul64(arg1[6], (x5 * 0x2)) + var x27 uint64 + var x28 uint64 + x28, x27 = bits.Mul64(arg1[6], (x7 * 0x2)) + var x29 uint64 + var x30 uint64 + x30, x29 = bits.Mul64(arg1[5], (x2 * 0x2)) + var x31 uint64 + var x32 uint64 + x32, x31 = bits.Mul64(arg1[5], (x5 * 0x2)) + var x33 uint64 + var x34 uint64 + x34, x33 = bits.Mul64(arg1[5], (x8 * 0x2)) + var x35 uint64 + var x36 uint64 + x36, x35 = bits.Mul64(arg1[5], (x10 * 0x2)) + var x37 uint64 + var x38 uint64 + x38, x37 = bits.Mul64(arg1[4], (x2 * 0x2)) + var x39 uint64 + var x40 uint64 + x40, x39 = bits.Mul64(arg1[4], (x5 * 0x2)) + var x41 uint64 + var x42 uint64 + x42, x41 = bits.Mul64(arg1[4], (x8 * 0x2)) + var x43 uint64 + var x44 uint64 + x44, x43 = bits.Mul64(arg1[4], (x11 * 0x2)) + var x45 uint64 + var x46 uint64 + x46, x45 = bits.Mul64(arg1[4], arg1[4]) + var x47 uint64 + var x48 uint64 + x48, x47 = bits.Mul64(arg1[3], (x2 * 0x2)) + var x49 uint64 + var x50 uint64 + x50, x49 = bits.Mul64(arg1[3], (x5 * 0x2)) + var x51 uint64 + var x52 uint64 + x52, x51 = bits.Mul64(arg1[3], (x8 * 0x2)) + var x53 uint64 + var x54 uint64 + x54, x53 = bits.Mul64(arg1[3], x12) + var x55 uint64 + var x56 uint64 + x56, x55 = bits.Mul64(arg1[3], x13) + var x57 uint64 + var x58 uint64 + x58, x57 = bits.Mul64(arg1[3], arg1[3]) + var x59 uint64 + var x60 uint64 + x60, x59 = bits.Mul64(arg1[2], (x2 * 0x2)) + var x61 uint64 + var x62 uint64 + x62, x61 = bits.Mul64(arg1[2], (x5 * 0x2)) + var x63 uint64 + var x64 uint64 + x64, x63 = bits.Mul64(arg1[2], x9) + var x65 uint64 + var x66 uint64 + x66, x65 = bits.Mul64(arg1[2], x12) + var x67 uint64 + var x68 uint64 + x68, x67 = bits.Mul64(arg1[2], x13) + var x69 uint64 + var x70 uint64 + x70, x69 = bits.Mul64(arg1[2], x14) + var x71 uint64 + var x72 uint64 + x72, x71 = bits.Mul64(arg1[2], arg1[2]) + var x73 uint64 + var x74 uint64 + x74, x73 = bits.Mul64(arg1[1], (x2 * 0x2)) + var x75 uint64 + var x76 uint64 + x76, x75 = bits.Mul64(arg1[1], x6) + var x77 uint64 + var x78 uint64 + x78, x77 = bits.Mul64(arg1[1], x9) + var x79 uint64 + var x80 uint64 + x80, x79 = bits.Mul64(arg1[1], x12) + var x81 uint64 + var x82 uint64 + x82, x81 = bits.Mul64(arg1[1], x13) + var x83 uint64 + var x84 uint64 + x84, x83 = bits.Mul64(arg1[1], x14) + var x85 uint64 + var x86 uint64 + x86, x85 = bits.Mul64(arg1[1], x15) + var x87 uint64 + var x88 uint64 + x88, x87 = bits.Mul64(arg1[1], arg1[1]) + var x89 uint64 + var x90 uint64 + x90, x89 = bits.Mul64(arg1[0], x3) + var x91 uint64 + var x92 uint64 + x92, x91 = bits.Mul64(arg1[0], x6) + var x93 uint64 + var x94 uint64 + x94, x93 = bits.Mul64(arg1[0], x9) + var x95 uint64 + var x96 uint64 + x96, x95 = bits.Mul64(arg1[0], x12) + var x97 uint64 + var x98 uint64 + x98, x97 = bits.Mul64(arg1[0], x13) + var x99 uint64 + var x100 uint64 + x100, x99 = bits.Mul64(arg1[0], x14) + var x101 uint64 + var x102 uint64 + x102, x101 = bits.Mul64(arg1[0], x15) + var x103 uint64 + var x104 uint64 + x104, x103 = bits.Mul64(arg1[0], x16) + var x105 uint64 + var x106 uint64 + x106, x105 = bits.Mul64(arg1[0], arg1[0]) + var x107 uint64 + var x108 p521Uint1 + x107, x108 = p521AddcarryxU64(x51, x43, 0x0) + var x109 uint64 + x109, _ = p521AddcarryxU64(x52, x44, x108) + var x111 uint64 + var x112 p521Uint1 + x111, x112 = p521AddcarryxU64(x61, x107, 0x0) + var x113 uint64 + x113, _ = p521AddcarryxU64(x62, x109, x112) + var x115 uint64 + var x116 p521Uint1 + x115, x116 = p521AddcarryxU64(x73, x111, 0x0) + var x117 uint64 + x117, _ = p521AddcarryxU64(x74, x113, x116) + var x119 uint64 + var x120 p521Uint1 + x119, x120 = p521AddcarryxU64(x105, x115, 0x0) + var x121 uint64 + x121, _ = p521AddcarryxU64(x106, x117, x120) + x123 := ((x119 >> 58) | ((x121 << 6) & 0xffffffffffffffff)) + x124 := (x121 >> 58) + x125 := (x119 & 0x3ffffffffffffff) + var x126 uint64 + var x127 p521Uint1 + x126, x127 = p521AddcarryxU64(x53, x45, 0x0) + var x128 uint64 + x128, _ = p521AddcarryxU64(x54, x46, x127) + var x130 uint64 + var x131 p521Uint1 + x130, x131 = p521AddcarryxU64(x63, x126, 0x0) + var x132 uint64 + x132, _ = p521AddcarryxU64(x64, x128, x131) + var x134 uint64 + var x135 p521Uint1 + x134, x135 = p521AddcarryxU64(x75, x130, 0x0) + var x136 uint64 + x136, _ = p521AddcarryxU64(x76, x132, x135) + var x138 uint64 + var x139 p521Uint1 + x138, x139 = p521AddcarryxU64(x89, x134, 0x0) + var x140 uint64 + x140, _ = p521AddcarryxU64(x90, x136, x139) + var x142 uint64 + var x143 p521Uint1 + x142, x143 = p521AddcarryxU64(x55, x17, 0x0) + var x144 uint64 + x144, _ = p521AddcarryxU64(x56, x18, x143) + var x146 uint64 + var x147 p521Uint1 + x146, x147 = p521AddcarryxU64(x65, x142, 0x0) + var x148 uint64 + x148, _ = p521AddcarryxU64(x66, x144, x147) + var x150 uint64 + var x151 p521Uint1 + x150, x151 = p521AddcarryxU64(x77, x146, 0x0) + var x152 uint64 + x152, _ = p521AddcarryxU64(x78, x148, x151) + var x154 uint64 + var x155 p521Uint1 + x154, x155 = p521AddcarryxU64(x91, x150, 0x0) + var x156 uint64 + x156, _ = p521AddcarryxU64(x92, x152, x155) + var x158 uint64 + var x159 p521Uint1 + x158, x159 = p521AddcarryxU64(x57, x19, 0x0) + var x160 uint64 + x160, _ = p521AddcarryxU64(x58, x20, x159) + var x162 uint64 + var x163 p521Uint1 + x162, x163 = p521AddcarryxU64(x67, x158, 0x0) + var x164 uint64 + x164, _ = p521AddcarryxU64(x68, x160, x163) + var x166 uint64 + var x167 p521Uint1 + x166, x167 = p521AddcarryxU64(x79, x162, 0x0) + var x168 uint64 + x168, _ = p521AddcarryxU64(x80, x164, x167) + var x170 uint64 + var x171 p521Uint1 + x170, x171 = p521AddcarryxU64(x93, x166, 0x0) + var x172 uint64 + x172, _ = p521AddcarryxU64(x94, x168, x171) + var x174 uint64 + var x175 p521Uint1 + x174, x175 = p521AddcarryxU64(x23, x21, 0x0) + var x176 uint64 + x176, _ = p521AddcarryxU64(x24, x22, x175) + var x178 uint64 + var x179 p521Uint1 + x178, x179 = p521AddcarryxU64(x69, x174, 0x0) + var x180 uint64 + x180, _ = p521AddcarryxU64(x70, x176, x179) + var x182 uint64 + var x183 p521Uint1 + x182, x183 = p521AddcarryxU64(x81, x178, 0x0) + var x184 uint64 + x184, _ = p521AddcarryxU64(x82, x180, x183) + var x186 uint64 + var x187 p521Uint1 + x186, x187 = p521AddcarryxU64(x95, x182, 0x0) + var x188 uint64 + x188, _ = p521AddcarryxU64(x96, x184, x187) + var x190 uint64 + var x191 p521Uint1 + x190, x191 = p521AddcarryxU64(x29, x25, 0x0) + var x192 uint64 + x192, _ = p521AddcarryxU64(x30, x26, x191) + var x194 uint64 + var x195 p521Uint1 + x194, x195 = p521AddcarryxU64(x71, x190, 0x0) + var x196 uint64 + x196, _ = p521AddcarryxU64(x72, x192, x195) + var x198 uint64 + var x199 p521Uint1 + x198, x199 = p521AddcarryxU64(x83, x194, 0x0) + var x200 uint64 + x200, _ = p521AddcarryxU64(x84, x196, x199) + var x202 uint64 + var x203 p521Uint1 + x202, x203 = p521AddcarryxU64(x97, x198, 0x0) + var x204 uint64 + x204, _ = p521AddcarryxU64(x98, x200, x203) + var x206 uint64 + var x207 p521Uint1 + x206, x207 = p521AddcarryxU64(x31, x27, 0x0) + var x208 uint64 + x208, _ = p521AddcarryxU64(x32, x28, x207) + var x210 uint64 + var x211 p521Uint1 + x210, x211 = p521AddcarryxU64(x37, x206, 0x0) + var x212 uint64 + x212, _ = p521AddcarryxU64(x38, x208, x211) + var x214 uint64 + var x215 p521Uint1 + x214, x215 = p521AddcarryxU64(x85, x210, 0x0) + var x216 uint64 + x216, _ = p521AddcarryxU64(x86, x212, x215) + var x218 uint64 + var x219 p521Uint1 + x218, x219 = p521AddcarryxU64(x99, x214, 0x0) + var x220 uint64 + x220, _ = p521AddcarryxU64(x100, x216, x219) + var x222 uint64 + var x223 p521Uint1 + x222, x223 = p521AddcarryxU64(x39, x33, 0x0) + var x224 uint64 + x224, _ = p521AddcarryxU64(x40, x34, x223) + var x226 uint64 + var x227 p521Uint1 + x226, x227 = p521AddcarryxU64(x47, x222, 0x0) + var x228 uint64 + x228, _ = p521AddcarryxU64(x48, x224, x227) + var x230 uint64 + var x231 p521Uint1 + x230, x231 = p521AddcarryxU64(x87, x226, 0x0) + var x232 uint64 + x232, _ = p521AddcarryxU64(x88, x228, x231) + var x234 uint64 + var x235 p521Uint1 + x234, x235 = p521AddcarryxU64(x101, x230, 0x0) + var x236 uint64 + x236, _ = p521AddcarryxU64(x102, x232, x235) + var x238 uint64 + var x239 p521Uint1 + x238, x239 = p521AddcarryxU64(x41, x35, 0x0) + var x240 uint64 + x240, _ = p521AddcarryxU64(x42, x36, x239) + var x242 uint64 + var x243 p521Uint1 + x242, x243 = p521AddcarryxU64(x49, x238, 0x0) + var x244 uint64 + x244, _ = p521AddcarryxU64(x50, x240, x243) + var x246 uint64 + var x247 p521Uint1 + x246, x247 = p521AddcarryxU64(x59, x242, 0x0) + var x248 uint64 + x248, _ = p521AddcarryxU64(x60, x244, x247) + var x250 uint64 + var x251 p521Uint1 + x250, x251 = p521AddcarryxU64(x103, x246, 0x0) + var x252 uint64 + x252, _ = p521AddcarryxU64(x104, x248, x251) + var x254 uint64 + var x255 p521Uint1 + x254, x255 = p521AddcarryxU64(x123, x250, 0x0) + var x256 uint64 + x256, _ = p521AddcarryxU64(x124, x252, x255) + x258 := ((x254 >> 58) | ((x256 << 6) & 0xffffffffffffffff)) + x259 := (x256 >> 58) + x260 := (x254 & 0x3ffffffffffffff) + var x261 uint64 + var x262 p521Uint1 + x261, x262 = p521AddcarryxU64(x258, x234, 0x0) + var x263 uint64 + x263, _ = p521AddcarryxU64(x259, x236, x262) + x265 := ((x261 >> 58) | ((x263 << 6) & 0xffffffffffffffff)) + x266 := (x263 >> 58) + x267 := (x261 & 0x3ffffffffffffff) + var x268 uint64 + var x269 p521Uint1 + x268, x269 = p521AddcarryxU64(x265, x218, 0x0) + var x270 uint64 + x270, _ = p521AddcarryxU64(x266, x220, x269) + x272 := ((x268 >> 58) | ((x270 << 6) & 0xffffffffffffffff)) + x273 := (x270 >> 58) + x274 := (x268 & 0x3ffffffffffffff) + var x275 uint64 + var x276 p521Uint1 + x275, x276 = p521AddcarryxU64(x272, x202, 0x0) + var x277 uint64 + x277, _ = p521AddcarryxU64(x273, x204, x276) + x279 := ((x275 >> 58) | ((x277 << 6) & 0xffffffffffffffff)) + x280 := (x277 >> 58) + x281 := (x275 & 0x3ffffffffffffff) + var x282 uint64 + var x283 p521Uint1 + x282, x283 = p521AddcarryxU64(x279, x186, 0x0) + var x284 uint64 + x284, _ = p521AddcarryxU64(x280, x188, x283) + x286 := ((x282 >> 58) | ((x284 << 6) & 0xffffffffffffffff)) + x287 := (x284 >> 58) + x288 := (x282 & 0x3ffffffffffffff) + var x289 uint64 + var x290 p521Uint1 + x289, x290 = p521AddcarryxU64(x286, x170, 0x0) + var x291 uint64 + x291, _ = p521AddcarryxU64(x287, x172, x290) + x293 := ((x289 >> 58) | ((x291 << 6) & 0xffffffffffffffff)) + x294 := (x291 >> 58) + x295 := (x289 & 0x3ffffffffffffff) + var x296 uint64 + var x297 p521Uint1 + x296, x297 = p521AddcarryxU64(x293, x154, 0x0) + var x298 uint64 + x298, _ = p521AddcarryxU64(x294, x156, x297) + x300 := ((x296 >> 58) | ((x298 << 6) & 0xffffffffffffffff)) + x301 := (x298 >> 58) + x302 := (x296 & 0x3ffffffffffffff) + var x303 uint64 + var x304 p521Uint1 + x303, x304 = p521AddcarryxU64(x300, x138, 0x0) + var x305 uint64 + x305, _ = p521AddcarryxU64(x301, x140, x304) + x307 := ((x303 >> 57) | ((x305 << 7) & 0xffffffffffffffff)) + x308 := (x305 >> 57) + x309 := (x303 & 0x1ffffffffffffff) + var x310 uint64 + var x311 p521Uint1 + x310, x311 = p521AddcarryxU64(x125, x307, 0x0) + x312 := (uint64(x311) + x308) + x313 := ((x310 >> 58) | ((x312 << 6) & 0xffffffffffffffff)) + x314 := (x310 & 0x3ffffffffffffff) + x315 := (x313 + x260) + x316 := p521Uint1((x315 >> 58)) + x317 := (x315 & 0x3ffffffffffffff) + x318 := (uint64(x316) + x267) + out1[0] = x314 + out1[1] = x317 + out1[2] = x318 + out1[3] = x274 + out1[4] = x281 + out1[5] = x288 + out1[6] = x295 + out1[7] = x302 + out1[8] = x309 +} + +// p521Carry reduces a field element. +// +// Postconditions: +// eval out1 mod m = eval arg1 mod m +// +// Input Bounds: +// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] +// Output Bounds: +// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +func p521Carry(out1 *[9]uint64, arg1 *[9]uint64) { + x1 := arg1[0] + x2 := ((x1 >> 58) + arg1[1]) + x3 := ((x2 >> 58) + arg1[2]) + x4 := ((x3 >> 58) + arg1[3]) + x5 := ((x4 >> 58) + arg1[4]) + x6 := ((x5 >> 58) + arg1[5]) + x7 := ((x6 >> 58) + arg1[6]) + x8 := ((x7 >> 58) + arg1[7]) + x9 := ((x8 >> 58) + arg1[8]) + x10 := ((x1 & 0x3ffffffffffffff) + (x9 >> 57)) + x11 := (uint64(p521Uint1((x10 >> 58))) + (x2 & 0x3ffffffffffffff)) + x12 := (x10 & 0x3ffffffffffffff) + x13 := (x11 & 0x3ffffffffffffff) + x14 := (uint64(p521Uint1((x11 >> 58))) + (x3 & 0x3ffffffffffffff)) + x15 := (x4 & 0x3ffffffffffffff) + x16 := (x5 & 0x3ffffffffffffff) + x17 := (x6 & 0x3ffffffffffffff) + x18 := (x7 & 0x3ffffffffffffff) + x19 := (x8 & 0x3ffffffffffffff) + x20 := (x9 & 0x1ffffffffffffff) + out1[0] = x12 + out1[1] = x13 + out1[2] = x14 + out1[3] = x15 + out1[4] = x16 + out1[5] = x17 + out1[6] = x18 + out1[7] = x19 + out1[8] = x20 +} + +// p521Add adds two field elements. +// +// Postconditions: +// eval out1 mod m = (eval arg1 + eval arg2) mod m +// +// Input Bounds: +// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +// arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +// Output Bounds: +// out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] +func p521Add(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) { + x1 := (arg1[0] + arg2[0]) + x2 := (arg1[1] + arg2[1]) + x3 := (arg1[2] + arg2[2]) + x4 := (arg1[3] + arg2[3]) + x5 := (arg1[4] + arg2[4]) + x6 := (arg1[5] + arg2[5]) + x7 := (arg1[6] + arg2[6]) + x8 := (arg1[7] + arg2[7]) + x9 := (arg1[8] + arg2[8]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 + out1[8] = x9 +} + +// p521Sub subtracts two field elements. +// +// Postconditions: +// eval out1 mod m = (eval arg1 - eval arg2) mod m +// +// Input Bounds: +// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +// arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +// Output Bounds: +// out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] +func p521Sub(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) { + x1 := ((0x7fffffffffffffe + arg1[0]) - arg2[0]) + x2 := ((0x7fffffffffffffe + arg1[1]) - arg2[1]) + x3 := ((0x7fffffffffffffe + arg1[2]) - arg2[2]) + x4 := ((0x7fffffffffffffe + arg1[3]) - arg2[3]) + x5 := ((0x7fffffffffffffe + arg1[4]) - arg2[4]) + x6 := ((0x7fffffffffffffe + arg1[5]) - arg2[5]) + x7 := ((0x7fffffffffffffe + arg1[6]) - arg2[6]) + x8 := ((0x7fffffffffffffe + arg1[7]) - arg2[7]) + x9 := ((0x3fffffffffffffe + arg1[8]) - arg2[8]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 + out1[8] = x9 +} + +// p521ToBytes serializes a field element to bytes in little-endian order. +// +// Postconditions: +// out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65] +// +// Input Bounds: +// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +// Output Bounds: +// out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] +func p521ToBytes(out1 *[66]uint8, arg1 *[9]uint64) { + var x1 uint64 + var x2 p521Uint1 + p521SubborrowxU58(&x1, &x2, 0x0, arg1[0], 0x3ffffffffffffff) + var x3 uint64 + var x4 p521Uint1 + p521SubborrowxU58(&x3, &x4, x2, arg1[1], 0x3ffffffffffffff) + var x5 uint64 + var x6 p521Uint1 + p521SubborrowxU58(&x5, &x6, x4, arg1[2], 0x3ffffffffffffff) + var x7 uint64 + var x8 p521Uint1 + p521SubborrowxU58(&x7, &x8, x6, arg1[3], 0x3ffffffffffffff) + var x9 uint64 + var x10 p521Uint1 + p521SubborrowxU58(&x9, &x10, x8, arg1[4], 0x3ffffffffffffff) + var x11 uint64 + var x12 p521Uint1 + p521SubborrowxU58(&x11, &x12, x10, arg1[5], 0x3ffffffffffffff) + var x13 uint64 + var x14 p521Uint1 + p521SubborrowxU58(&x13, &x14, x12, arg1[6], 0x3ffffffffffffff) + var x15 uint64 + var x16 p521Uint1 + p521SubborrowxU58(&x15, &x16, x14, arg1[7], 0x3ffffffffffffff) + var x17 uint64 + var x18 p521Uint1 + p521SubborrowxU57(&x17, &x18, x16, arg1[8], 0x1ffffffffffffff) + var x19 uint64 + p521CmovznzU64(&x19, x18, uint64(0x0), 0xffffffffffffffff) + var x20 uint64 + var x21 p521Uint1 + p521AddcarryxU58(&x20, &x21, 0x0, x1, (x19 & 0x3ffffffffffffff)) + var x22 uint64 + var x23 p521Uint1 + p521AddcarryxU58(&x22, &x23, x21, x3, (x19 & 0x3ffffffffffffff)) + var x24 uint64 + var x25 p521Uint1 + p521AddcarryxU58(&x24, &x25, x23, x5, (x19 & 0x3ffffffffffffff)) + var x26 uint64 + var x27 p521Uint1 + p521AddcarryxU58(&x26, &x27, x25, x7, (x19 & 0x3ffffffffffffff)) + var x28 uint64 + var x29 p521Uint1 + p521AddcarryxU58(&x28, &x29, x27, x9, (x19 & 0x3ffffffffffffff)) + var x30 uint64 + var x31 p521Uint1 + p521AddcarryxU58(&x30, &x31, x29, x11, (x19 & 0x3ffffffffffffff)) + var x32 uint64 + var x33 p521Uint1 + p521AddcarryxU58(&x32, &x33, x31, x13, (x19 & 0x3ffffffffffffff)) + var x34 uint64 + var x35 p521Uint1 + p521AddcarryxU58(&x34, &x35, x33, x15, (x19 & 0x3ffffffffffffff)) + var x36 uint64 + var x37 p521Uint1 + p521AddcarryxU57(&x36, &x37, x35, x17, (x19 & 0x1ffffffffffffff)) + x38 := (x34 << 6) + x39 := (x32 << 4) + x40 := (x30 << 2) + x41 := (x26 << 6) + x42 := (x24 << 4) + x43 := (x22 << 2) + x44 := (uint8(x20) & 0xff) + x45 := (x20 >> 8) + x46 := (uint8(x45) & 0xff) + x47 := (x45 >> 8) + x48 := (uint8(x47) & 0xff) + x49 := (x47 >> 8) + x50 := (uint8(x49) & 0xff) + x51 := (x49 >> 8) + x52 := (uint8(x51) & 0xff) + x53 := (x51 >> 8) + x54 := (uint8(x53) & 0xff) + x55 := (x53 >> 8) + x56 := (uint8(x55) & 0xff) + x57 := uint8((x55 >> 8)) + x58 := (x43 + uint64(x57)) + x59 := (uint8(x58) & 0xff) + x60 := (x58 >> 8) + x61 := (uint8(x60) & 0xff) + x62 := (x60 >> 8) + x63 := (uint8(x62) & 0xff) + x64 := (x62 >> 8) + x65 := (uint8(x64) & 0xff) + x66 := (x64 >> 8) + x67 := (uint8(x66) & 0xff) + x68 := (x66 >> 8) + x69 := (uint8(x68) & 0xff) + x70 := (x68 >> 8) + x71 := (uint8(x70) & 0xff) + x72 := uint8((x70 >> 8)) + x73 := (x42 + uint64(x72)) + x74 := (uint8(x73) & 0xff) + x75 := (x73 >> 8) + x76 := (uint8(x75) & 0xff) + x77 := (x75 >> 8) + x78 := (uint8(x77) & 0xff) + x79 := (x77 >> 8) + x80 := (uint8(x79) & 0xff) + x81 := (x79 >> 8) + x82 := (uint8(x81) & 0xff) + x83 := (x81 >> 8) + x84 := (uint8(x83) & 0xff) + x85 := (x83 >> 8) + x86 := (uint8(x85) & 0xff) + x87 := uint8((x85 >> 8)) + x88 := (x41 + uint64(x87)) + x89 := (uint8(x88) & 0xff) + x90 := (x88 >> 8) + x91 := (uint8(x90) & 0xff) + x92 := (x90 >> 8) + x93 := (uint8(x92) & 0xff) + x94 := (x92 >> 8) + x95 := (uint8(x94) & 0xff) + x96 := (x94 >> 8) + x97 := (uint8(x96) & 0xff) + x98 := (x96 >> 8) + x99 := (uint8(x98) & 0xff) + x100 := (x98 >> 8) + x101 := (uint8(x100) & 0xff) + x102 := uint8((x100 >> 8)) + x103 := (uint8(x28) & 0xff) + x104 := (x28 >> 8) + x105 := (uint8(x104) & 0xff) + x106 := (x104 >> 8) + x107 := (uint8(x106) & 0xff) + x108 := (x106 >> 8) + x109 := (uint8(x108) & 0xff) + x110 := (x108 >> 8) + x111 := (uint8(x110) & 0xff) + x112 := (x110 >> 8) + x113 := (uint8(x112) & 0xff) + x114 := (x112 >> 8) + x115 := (uint8(x114) & 0xff) + x116 := uint8((x114 >> 8)) + x117 := (x40 + uint64(x116)) + x118 := (uint8(x117) & 0xff) + x119 := (x117 >> 8) + x120 := (uint8(x119) & 0xff) + x121 := (x119 >> 8) + x122 := (uint8(x121) & 0xff) + x123 := (x121 >> 8) + x124 := (uint8(x123) & 0xff) + x125 := (x123 >> 8) + x126 := (uint8(x125) & 0xff) + x127 := (x125 >> 8) + x128 := (uint8(x127) & 0xff) + x129 := (x127 >> 8) + x130 := (uint8(x129) & 0xff) + x131 := uint8((x129 >> 8)) + x132 := (x39 + uint64(x131)) + x133 := (uint8(x132) & 0xff) + x134 := (x132 >> 8) + x135 := (uint8(x134) & 0xff) + x136 := (x134 >> 8) + x137 := (uint8(x136) & 0xff) + x138 := (x136 >> 8) + x139 := (uint8(x138) & 0xff) + x140 := (x138 >> 8) + x141 := (uint8(x140) & 0xff) + x142 := (x140 >> 8) + x143 := (uint8(x142) & 0xff) + x144 := (x142 >> 8) + x145 := (uint8(x144) & 0xff) + x146 := uint8((x144 >> 8)) + x147 := (x38 + uint64(x146)) + x148 := (uint8(x147) & 0xff) + x149 := (x147 >> 8) + x150 := (uint8(x149) & 0xff) + x151 := (x149 >> 8) + x152 := (uint8(x151) & 0xff) + x153 := (x151 >> 8) + x154 := (uint8(x153) & 0xff) + x155 := (x153 >> 8) + x156 := (uint8(x155) & 0xff) + x157 := (x155 >> 8) + x158 := (uint8(x157) & 0xff) + x159 := (x157 >> 8) + x160 := (uint8(x159) & 0xff) + x161 := uint8((x159 >> 8)) + x162 := (uint8(x36) & 0xff) + x163 := (x36 >> 8) + x164 := (uint8(x163) & 0xff) + x165 := (x163 >> 8) + x166 := (uint8(x165) & 0xff) + x167 := (x165 >> 8) + x168 := (uint8(x167) & 0xff) + x169 := (x167 >> 8) + x170 := (uint8(x169) & 0xff) + x171 := (x169 >> 8) + x172 := (uint8(x171) & 0xff) + x173 := (x171 >> 8) + x174 := (uint8(x173) & 0xff) + x175 := p521Uint1((x173 >> 8)) + out1[0] = x44 + out1[1] = x46 + out1[2] = x48 + out1[3] = x50 + out1[4] = x52 + out1[5] = x54 + out1[6] = x56 + out1[7] = x59 + out1[8] = x61 + out1[9] = x63 + out1[10] = x65 + out1[11] = x67 + out1[12] = x69 + out1[13] = x71 + out1[14] = x74 + out1[15] = x76 + out1[16] = x78 + out1[17] = x80 + out1[18] = x82 + out1[19] = x84 + out1[20] = x86 + out1[21] = x89 + out1[22] = x91 + out1[23] = x93 + out1[24] = x95 + out1[25] = x97 + out1[26] = x99 + out1[27] = x101 + out1[28] = x102 + out1[29] = x103 + out1[30] = x105 + out1[31] = x107 + out1[32] = x109 + out1[33] = x111 + out1[34] = x113 + out1[35] = x115 + out1[36] = x118 + out1[37] = x120 + out1[38] = x122 + out1[39] = x124 + out1[40] = x126 + out1[41] = x128 + out1[42] = x130 + out1[43] = x133 + out1[44] = x135 + out1[45] = x137 + out1[46] = x139 + out1[47] = x141 + out1[48] = x143 + out1[49] = x145 + out1[50] = x148 + out1[51] = x150 + out1[52] = x152 + out1[53] = x154 + out1[54] = x156 + out1[55] = x158 + out1[56] = x160 + out1[57] = x161 + out1[58] = x162 + out1[59] = x164 + out1[60] = x166 + out1[61] = x168 + out1[62] = x170 + out1[63] = x172 + out1[64] = x174 + out1[65] = uint8(x175) +} + +// p521FromBytes deserializes a field element from bytes in little-endian order. +// +// Postconditions: +// eval out1 mod m = bytes_eval arg1 mod m +// +// Input Bounds: +// arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]] +// Output Bounds: +// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] +func p521FromBytes(out1 *[9]uint64, arg1 *[66]uint8) { + x1 := (uint64(p521Uint1(arg1[65])) << 56) + x2 := (uint64(arg1[64]) << 48) + x3 := (uint64(arg1[63]) << 40) + x4 := (uint64(arg1[62]) << 32) + x5 := (uint64(arg1[61]) << 24) + x6 := (uint64(arg1[60]) << 16) + x7 := (uint64(arg1[59]) << 8) + x8 := arg1[58] + x9 := (uint64(arg1[57]) << 50) + x10 := (uint64(arg1[56]) << 42) + x11 := (uint64(arg1[55]) << 34) + x12 := (uint64(arg1[54]) << 26) + x13 := (uint64(arg1[53]) << 18) + x14 := (uint64(arg1[52]) << 10) + x15 := (uint64(arg1[51]) << 2) + x16 := (uint64(arg1[50]) << 52) + x17 := (uint64(arg1[49]) << 44) + x18 := (uint64(arg1[48]) << 36) + x19 := (uint64(arg1[47]) << 28) + x20 := (uint64(arg1[46]) << 20) + x21 := (uint64(arg1[45]) << 12) + x22 := (uint64(arg1[44]) << 4) + x23 := (uint64(arg1[43]) << 54) + x24 := (uint64(arg1[42]) << 46) + x25 := (uint64(arg1[41]) << 38) + x26 := (uint64(arg1[40]) << 30) + x27 := (uint64(arg1[39]) << 22) + x28 := (uint64(arg1[38]) << 14) + x29 := (uint64(arg1[37]) << 6) + x30 := (uint64(arg1[36]) << 56) + x31 := (uint64(arg1[35]) << 48) + x32 := (uint64(arg1[34]) << 40) + x33 := (uint64(arg1[33]) << 32) + x34 := (uint64(arg1[32]) << 24) + x35 := (uint64(arg1[31]) << 16) + x36 := (uint64(arg1[30]) << 8) + x37 := arg1[29] + x38 := (uint64(arg1[28]) << 50) + x39 := (uint64(arg1[27]) << 42) + x40 := (uint64(arg1[26]) << 34) + x41 := (uint64(arg1[25]) << 26) + x42 := (uint64(arg1[24]) << 18) + x43 := (uint64(arg1[23]) << 10) + x44 := (uint64(arg1[22]) << 2) + x45 := (uint64(arg1[21]) << 52) + x46 := (uint64(arg1[20]) << 44) + x47 := (uint64(arg1[19]) << 36) + x48 := (uint64(arg1[18]) << 28) + x49 := (uint64(arg1[17]) << 20) + x50 := (uint64(arg1[16]) << 12) + x51 := (uint64(arg1[15]) << 4) + x52 := (uint64(arg1[14]) << 54) + x53 := (uint64(arg1[13]) << 46) + x54 := (uint64(arg1[12]) << 38) + x55 := (uint64(arg1[11]) << 30) + x56 := (uint64(arg1[10]) << 22) + x57 := (uint64(arg1[9]) << 14) + x58 := (uint64(arg1[8]) << 6) + x59 := (uint64(arg1[7]) << 56) + x60 := (uint64(arg1[6]) << 48) + x61 := (uint64(arg1[5]) << 40) + x62 := (uint64(arg1[4]) << 32) + x63 := (uint64(arg1[3]) << 24) + x64 := (uint64(arg1[2]) << 16) + x65 := (uint64(arg1[1]) << 8) + x66 := arg1[0] + x67 := (x65 + uint64(x66)) + x68 := (x64 + x67) + x69 := (x63 + x68) + x70 := (x62 + x69) + x71 := (x61 + x70) + x72 := (x60 + x71) + x73 := (x59 + x72) + x74 := (x73 & 0x3ffffffffffffff) + x75 := uint8((x73 >> 58)) + x76 := (x58 + uint64(x75)) + x77 := (x57 + x76) + x78 := (x56 + x77) + x79 := (x55 + x78) + x80 := (x54 + x79) + x81 := (x53 + x80) + x82 := (x52 + x81) + x83 := (x82 & 0x3ffffffffffffff) + x84 := uint8((x82 >> 58)) + x85 := (x51 + uint64(x84)) + x86 := (x50 + x85) + x87 := (x49 + x86) + x88 := (x48 + x87) + x89 := (x47 + x88) + x90 := (x46 + x89) + x91 := (x45 + x90) + x92 := (x91 & 0x3ffffffffffffff) + x93 := uint8((x91 >> 58)) + x94 := (x44 + uint64(x93)) + x95 := (x43 + x94) + x96 := (x42 + x95) + x97 := (x41 + x96) + x98 := (x40 + x97) + x99 := (x39 + x98) + x100 := (x38 + x99) + x101 := (x36 + uint64(x37)) + x102 := (x35 + x101) + x103 := (x34 + x102) + x104 := (x33 + x103) + x105 := (x32 + x104) + x106 := (x31 + x105) + x107 := (x30 + x106) + x108 := (x107 & 0x3ffffffffffffff) + x109 := uint8((x107 >> 58)) + x110 := (x29 + uint64(x109)) + x111 := (x28 + x110) + x112 := (x27 + x111) + x113 := (x26 + x112) + x114 := (x25 + x113) + x115 := (x24 + x114) + x116 := (x23 + x115) + x117 := (x116 & 0x3ffffffffffffff) + x118 := uint8((x116 >> 58)) + x119 := (x22 + uint64(x118)) + x120 := (x21 + x119) + x121 := (x20 + x120) + x122 := (x19 + x121) + x123 := (x18 + x122) + x124 := (x17 + x123) + x125 := (x16 + x124) + x126 := (x125 & 0x3ffffffffffffff) + x127 := uint8((x125 >> 58)) + x128 := (x15 + uint64(x127)) + x129 := (x14 + x128) + x130 := (x13 + x129) + x131 := (x12 + x130) + x132 := (x11 + x131) + x133 := (x10 + x132) + x134 := (x9 + x133) + x135 := (x7 + uint64(x8)) + x136 := (x6 + x135) + x137 := (x5 + x136) + x138 := (x4 + x137) + x139 := (x3 + x138) + x140 := (x2 + x139) + x141 := (x1 + x140) + out1[0] = x74 + out1[1] = x83 + out1[2] = x92 + out1[3] = x100 + out1[4] = x108 + out1[5] = x117 + out1[6] = x126 + out1[7] = x134 + out1[8] = x141 +} + +// p521Selectznz is a multi-limb conditional select. +// +// Postconditions: +// eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) +// +// Input Bounds: +// arg1: [0x0 ~> 0x1] +// arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] +// arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] +// Output Bounds: +// out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] +func p521Selectznz(out1 *[9]uint64, arg1 p521Uint1, arg2 *[9]uint64, arg3 *[9]uint64) { + var x1 uint64 + p521CmovznzU64(&x1, arg1, arg2[0], arg3[0]) + var x2 uint64 + p521CmovznzU64(&x2, arg1, arg2[1], arg3[1]) + var x3 uint64 + p521CmovznzU64(&x3, arg1, arg2[2], arg3[2]) + var x4 uint64 + p521CmovznzU64(&x4, arg1, arg2[3], arg3[3]) + var x5 uint64 + p521CmovznzU64(&x5, arg1, arg2[4], arg3[4]) + var x6 uint64 + p521CmovznzU64(&x6, arg1, arg2[5], arg3[5]) + var x7 uint64 + p521CmovznzU64(&x7, arg1, arg2[6], arg3[6]) + var x8 uint64 + p521CmovznzU64(&x8, arg1, arg2[7], arg3[7]) + var x9 uint64 + p521CmovznzU64(&x9, arg1, arg2[8], arg3[8]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 + out1[8] = x9 +} diff --git a/src/crypto/elliptic/internal/fiat/p521_test.go b/src/crypto/elliptic/internal/fiat/p521_test.go new file mode 100644 index 0000000..661bde3 --- /dev/null +++ b/src/crypto/elliptic/internal/fiat/p521_test.go @@ -0,0 +1,37 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fiat_test + +import ( + "crypto/elliptic/internal/fiat" + "crypto/rand" + "testing" +) + +func p521Random(t *testing.T) *fiat.P521Element { + buf := make([]byte, 66) + if _, err := rand.Read(buf); err != nil { + t.Fatal(err) + } + buf[65] &= 1 + e, err := new(fiat.P521Element).SetBytes(buf) + if err != nil { + t.Fatal(err) + } + return e +} + +func TestP521Invert(t *testing.T) { + a := p521Random(t) + inv := new(fiat.P521Element).Invert(a) + one := new(fiat.P521Element).Mul(a, inv) + if new(fiat.P521Element).One().Equal(one) != 1 { + t.Errorf("a * 1/a != 1; got %x for %x", one.Bytes(), a.Bytes()) + } + inv.Invert(new(fiat.P521Element)) + if new(fiat.P521Element).Equal(inv) != 1 { + t.Errorf("1/0 != 0; got %x", inv.Bytes()) + } +} diff --git a/src/crypto/elliptic/p224.go b/src/crypto/elliptic/p224.go new file mode 100644 index 0000000..ff5c834 --- /dev/null +++ b/src/crypto/elliptic/p224.go @@ -0,0 +1,783 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package elliptic + +// This is a constant-time, 32-bit implementation of P224. See FIPS 186-3, +// section D.2.2. +// +// See https://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background. + +import ( + "math/big" +) + +var p224 p224Curve + +type p224Curve struct { + *CurveParams + gx, gy, b p224FieldElement +} + +func initP224() { + // See FIPS 186-3, section D.2.2 + p224.CurveParams = &CurveParams{Name: "P-224"} + p224.P, _ = new(big.Int).SetString("26959946667150639794667015087019630673557916260026308143510066298881", 10) + p224.N, _ = new(big.Int).SetString("26959946667150639794667015087019625940457807714424391721682722368061", 10) + p224.B, _ = new(big.Int).SetString("b4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", 16) + p224.Gx, _ = new(big.Int).SetString("b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", 16) + p224.Gy, _ = new(big.Int).SetString("bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", 16) + p224.BitSize = 224 + + p224FromBig(&p224.gx, p224.Gx) + p224FromBig(&p224.gy, p224.Gy) + p224FromBig(&p224.b, p224.B) +} + +// P224 returns a Curve which implements P-224 (see FIPS 186-3, section D.2.2). +// +// The cryptographic operations are implemented using constant-time algorithms. +func P224() Curve { + initonce.Do(initAll) + return p224 +} + +func (curve p224Curve) Params() *CurveParams { + return curve.CurveParams +} + +func (curve p224Curve) IsOnCurve(bigX, bigY *big.Int) bool { + if bigX.Sign() < 0 || bigX.Cmp(curve.P) >= 0 || + bigY.Sign() < 0 || bigY.Cmp(curve.P) >= 0 { + return false + } + + var x, y p224FieldElement + p224FromBig(&x, bigX) + p224FromBig(&y, bigY) + + // y² = x³ - 3x + b + var tmp p224LargeFieldElement + var x3 p224FieldElement + p224Square(&x3, &x, &tmp) + p224Mul(&x3, &x3, &x, &tmp) + + for i := 0; i < 8; i++ { + x[i] *= 3 + } + p224Sub(&x3, &x3, &x) + p224Reduce(&x3) + p224Add(&x3, &x3, &curve.b) + p224Contract(&x3, &x3) + + p224Square(&y, &y, &tmp) + p224Contract(&y, &y) + + for i := 0; i < 8; i++ { + if y[i] != x3[i] { + return false + } + } + return true +} + +func (p224Curve) Add(bigX1, bigY1, bigX2, bigY2 *big.Int) (x, y *big.Int) { + var x1, y1, z1, x2, y2, z2, x3, y3, z3 p224FieldElement + + p224FromBig(&x1, bigX1) + p224FromBig(&y1, bigY1) + if bigX1.Sign() != 0 || bigY1.Sign() != 0 { + z1[0] = 1 + } + p224FromBig(&x2, bigX2) + p224FromBig(&y2, bigY2) + if bigX2.Sign() != 0 || bigY2.Sign() != 0 { + z2[0] = 1 + } + + p224AddJacobian(&x3, &y3, &z3, &x1, &y1, &z1, &x2, &y2, &z2) + return p224ToAffine(&x3, &y3, &z3) +} + +func (p224Curve) Double(bigX1, bigY1 *big.Int) (x, y *big.Int) { + var x1, y1, z1, x2, y2, z2 p224FieldElement + + p224FromBig(&x1, bigX1) + p224FromBig(&y1, bigY1) + z1[0] = 1 + + p224DoubleJacobian(&x2, &y2, &z2, &x1, &y1, &z1) + return p224ToAffine(&x2, &y2, &z2) +} + +func (p224Curve) ScalarMult(bigX1, bigY1 *big.Int, scalar []byte) (x, y *big.Int) { + var x1, y1, z1, x2, y2, z2 p224FieldElement + + p224FromBig(&x1, bigX1) + p224FromBig(&y1, bigY1) + z1[0] = 1 + + p224ScalarMult(&x2, &y2, &z2, &x1, &y1, &z1, scalar) + return p224ToAffine(&x2, &y2, &z2) +} + +func (curve p224Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { + var z1, x2, y2, z2 p224FieldElement + + z1[0] = 1 + p224ScalarMult(&x2, &y2, &z2, &curve.gx, &curve.gy, &z1, scalar) + return p224ToAffine(&x2, &y2, &z2) +} + +// Field element functions. +// +// The field that we're dealing with is ℤ/pℤ where p = 2**224 - 2**96 + 1. +// +// Field elements are represented by a FieldElement, which is a typedef to an +// array of 8 uint32's. The value of a FieldElement, a, is: +// a[0] + 2**28·a[1] + 2**56·a[1] + ... + 2**196·a[7] +// +// Using 28-bit limbs means that there's only 4 bits of headroom, which is less +// than we would really like. But it has the useful feature that we hit 2**224 +// exactly, making the reflections during a reduce much nicer. +type p224FieldElement [8]uint32 + +// p224P is the order of the field, represented as a p224FieldElement. +var p224P = [8]uint32{1, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff} + +// p224IsZero returns 1 if a == 0 mod p and 0 otherwise. +// +// a[i] < 2**29 +func p224IsZero(a *p224FieldElement) uint32 { + // Since a p224FieldElement contains 224 bits there are two possible + // representations of 0: 0 and p. + var minimal p224FieldElement + p224Contract(&minimal, a) + + var isZero, isP uint32 + for i, v := range minimal { + isZero |= v + isP |= v - p224P[i] + } + + // If either isZero or isP is 0, then we should return 1. + isZero |= isZero >> 16 + isZero |= isZero >> 8 + isZero |= isZero >> 4 + isZero |= isZero >> 2 + isZero |= isZero >> 1 + + isP |= isP >> 16 + isP |= isP >> 8 + isP |= isP >> 4 + isP |= isP >> 2 + isP |= isP >> 1 + + // For isZero and isP, the LSB is 0 iff all the bits are zero. + result := isZero & isP + result = (^result) & 1 + + return result +} + +// p224Add computes *out = a+b +// +// a[i] + b[i] < 2**32 +func p224Add(out, a, b *p224FieldElement) { + for i := 0; i < 8; i++ { + out[i] = a[i] + b[i] + } +} + +const two31p3 = 1<<31 + 1<<3 +const two31m3 = 1<<31 - 1<<3 +const two31m15m3 = 1<<31 - 1<<15 - 1<<3 + +// p224ZeroModP31 is 0 mod p where bit 31 is set in all limbs so that we can +// subtract smaller amounts without underflow. See the section "Subtraction" in +// [1] for reasoning. +var p224ZeroModP31 = []uint32{two31p3, two31m3, two31m3, two31m15m3, two31m3, two31m3, two31m3, two31m3} + +// p224Sub computes *out = a-b +// +// a[i], b[i] < 2**30 +// out[i] < 2**32 +func p224Sub(out, a, b *p224FieldElement) { + for i := 0; i < 8; i++ { + out[i] = a[i] + p224ZeroModP31[i] - b[i] + } +} + +// LargeFieldElement also represents an element of the field. The limbs are +// still spaced 28-bits apart and in little-endian order. So the limbs are at +// 0, 28, 56, ..., 392 bits, each 64-bits wide. +type p224LargeFieldElement [15]uint64 + +const two63p35 = 1<<63 + 1<<35 +const two63m35 = 1<<63 - 1<<35 +const two63m35m19 = 1<<63 - 1<<35 - 1<<19 + +// p224ZeroModP63 is 0 mod p where bit 63 is set in all limbs. See the section +// "Subtraction" in [1] for why. +var p224ZeroModP63 = [8]uint64{two63p35, two63m35, two63m35, two63m35, two63m35m19, two63m35, two63m35, two63m35} + +const bottom12Bits = 0xfff +const bottom28Bits = 0xfffffff + +// p224Mul computes *out = a*b +// +// a[i] < 2**29, b[i] < 2**30 (or vice versa) +// out[i] < 2**29 +func p224Mul(out, a, b *p224FieldElement, tmp *p224LargeFieldElement) { + for i := 0; i < 15; i++ { + tmp[i] = 0 + } + + for i := 0; i < 8; i++ { + for j := 0; j < 8; j++ { + tmp[i+j] += uint64(a[i]) * uint64(b[j]) + } + } + + p224ReduceLarge(out, tmp) +} + +// Square computes *out = a*a +// +// a[i] < 2**29 +// out[i] < 2**29 +func p224Square(out, a *p224FieldElement, tmp *p224LargeFieldElement) { + for i := 0; i < 15; i++ { + tmp[i] = 0 + } + + for i := 0; i < 8; i++ { + for j := 0; j <= i; j++ { + r := uint64(a[i]) * uint64(a[j]) + if i == j { + tmp[i+j] += r + } else { + tmp[i+j] += r << 1 + } + } + } + + p224ReduceLarge(out, tmp) +} + +// ReduceLarge converts a p224LargeFieldElement to a p224FieldElement. +// +// in[i] < 2**62 +func p224ReduceLarge(out *p224FieldElement, in *p224LargeFieldElement) { + for i := 0; i < 8; i++ { + in[i] += p224ZeroModP63[i] + } + + // Eliminate the coefficients at 2**224 and greater. + for i := 14; i >= 8; i-- { + in[i-8] -= in[i] + in[i-5] += (in[i] & 0xffff) << 12 + in[i-4] += in[i] >> 16 + } + in[8] = 0 + // in[0..8] < 2**64 + + // As the values become small enough, we start to store them in |out| + // and use 32-bit operations. + for i := 1; i < 8; i++ { + in[i+1] += in[i] >> 28 + out[i] = uint32(in[i] & bottom28Bits) + } + in[0] -= in[8] + out[3] += uint32(in[8]&0xffff) << 12 + out[4] += uint32(in[8] >> 16) + // in[0] < 2**64 + // out[3] < 2**29 + // out[4] < 2**29 + // out[1,2,5..7] < 2**28 + + out[0] = uint32(in[0] & bottom28Bits) + out[1] += uint32((in[0] >> 28) & bottom28Bits) + out[2] += uint32(in[0] >> 56) + // out[0] < 2**28 + // out[1..4] < 2**29 + // out[5..7] < 2**28 +} + +// Reduce reduces the coefficients of a to smaller bounds. +// +// On entry: a[i] < 2**31 + 2**30 +// On exit: a[i] < 2**29 +func p224Reduce(a *p224FieldElement) { + for i := 0; i < 7; i++ { + a[i+1] += a[i] >> 28 + a[i] &= bottom28Bits + } + top := a[7] >> 28 + a[7] &= bottom28Bits + + // top < 2**4 + mask := top + mask |= mask >> 2 + mask |= mask >> 1 + mask <<= 31 + mask = uint32(int32(mask) >> 31) + // Mask is all ones if top != 0, all zero otherwise + + a[0] -= top + a[3] += top << 12 + + // We may have just made a[0] negative but, if we did, then we must + // have added something to a[3], this it's > 2**12. Therefore we can + // carry down to a[0]. + a[3] -= 1 & mask + a[2] += mask & (1<<28 - 1) + a[1] += mask & (1<<28 - 1) + a[0] += mask & (1 << 28) +} + +// p224Invert calculates *out = in**-1 by computing in**(2**224 - 2**96 - 1), +// i.e. Fermat's little theorem. +func p224Invert(out, in *p224FieldElement) { + var f1, f2, f3, f4 p224FieldElement + var c p224LargeFieldElement + + p224Square(&f1, in, &c) // 2 + p224Mul(&f1, &f1, in, &c) // 2**2 - 1 + p224Square(&f1, &f1, &c) // 2**3 - 2 + p224Mul(&f1, &f1, in, &c) // 2**3 - 1 + p224Square(&f2, &f1, &c) // 2**4 - 2 + p224Square(&f2, &f2, &c) // 2**5 - 4 + p224Square(&f2, &f2, &c) // 2**6 - 8 + p224Mul(&f1, &f1, &f2, &c) // 2**6 - 1 + p224Square(&f2, &f1, &c) // 2**7 - 2 + for i := 0; i < 5; i++ { // 2**12 - 2**6 + p224Square(&f2, &f2, &c) + } + p224Mul(&f2, &f2, &f1, &c) // 2**12 - 1 + p224Square(&f3, &f2, &c) // 2**13 - 2 + for i := 0; i < 11; i++ { // 2**24 - 2**12 + p224Square(&f3, &f3, &c) + } + p224Mul(&f2, &f3, &f2, &c) // 2**24 - 1 + p224Square(&f3, &f2, &c) // 2**25 - 2 + for i := 0; i < 23; i++ { // 2**48 - 2**24 + p224Square(&f3, &f3, &c) + } + p224Mul(&f3, &f3, &f2, &c) // 2**48 - 1 + p224Square(&f4, &f3, &c) // 2**49 - 2 + for i := 0; i < 47; i++ { // 2**96 - 2**48 + p224Square(&f4, &f4, &c) + } + p224Mul(&f3, &f3, &f4, &c) // 2**96 - 1 + p224Square(&f4, &f3, &c) // 2**97 - 2 + for i := 0; i < 23; i++ { // 2**120 - 2**24 + p224Square(&f4, &f4, &c) + } + p224Mul(&f2, &f4, &f2, &c) // 2**120 - 1 + for i := 0; i < 6; i++ { // 2**126 - 2**6 + p224Square(&f2, &f2, &c) + } + p224Mul(&f1, &f1, &f2, &c) // 2**126 - 1 + p224Square(&f1, &f1, &c) // 2**127 - 2 + p224Mul(&f1, &f1, in, &c) // 2**127 - 1 + for i := 0; i < 97; i++ { // 2**224 - 2**97 + p224Square(&f1, &f1, &c) + } + p224Mul(out, &f1, &f3, &c) // 2**224 - 2**96 - 1 +} + +// p224Contract converts a FieldElement to its unique, minimal form. +// +// On entry, in[i] < 2**29 +// On exit, out[i] < 2**28 and out < p +func p224Contract(out, in *p224FieldElement) { + copy(out[:], in[:]) + + // First, carry the bits above 28 to the higher limb. + for i := 0; i < 7; i++ { + out[i+1] += out[i] >> 28 + out[i] &= bottom28Bits + } + top := out[7] >> 28 + out[7] &= bottom28Bits + + // Use the reduction identity to carry the overflow. + // + // a + top * 2²²⁴ = a + top * 2⁹⁶ - top + out[0] -= top + out[3] += top << 12 + + // We may just have made out[0] negative. So we carry down. If we made + // out[0] negative then we know that out[3] is sufficiently positive + // because we just added to it. + for i := 0; i < 3; i++ { + mask := uint32(int32(out[i]) >> 31) + out[i] += (1 << 28) & mask + out[i+1] -= 1 & mask + } + + // We might have pushed out[3] over 2**28 so we perform another, partial, + // carry chain. + for i := 3; i < 7; i++ { + out[i+1] += out[i] >> 28 + out[i] &= bottom28Bits + } + top = out[7] >> 28 + out[7] &= bottom28Bits + + // Eliminate top while maintaining the same value mod p. + out[0] -= top + out[3] += top << 12 + + // There are two cases to consider for out[3]: + // 1) The first time that we eliminated top, we didn't push out[3] over + // 2**28. In this case, the partial carry chain didn't change any values + // and top is now zero. + // 2) We did push out[3] over 2**28 the first time that we eliminated top. + // The first value of top was in [0..2], therefore, after overflowing + // and being reduced by the second carry chain, out[3] <= 2<<12 - 1. + // In both cases, out[3] cannot have overflowed when we eliminated top for + // the second time. + + // Again, we may just have made out[0] negative, so do the same carry down. + // As before, if we made out[0] negative then we know that out[3] is + // sufficiently positive. + for i := 0; i < 3; i++ { + mask := uint32(int32(out[i]) >> 31) + out[i] += (1 << 28) & mask + out[i+1] -= 1 & mask + } + + // Now we see if the value is >= p and, if so, subtract p. + + // First we build a mask from the top four limbs, which must all be + // equal to bottom28Bits if the whole value is >= p. If top4AllOnes + // ends up with any zero bits in the bottom 28 bits, then this wasn't + // true. + top4AllOnes := uint32(0xffffffff) + for i := 4; i < 8; i++ { + top4AllOnes &= out[i] + } + top4AllOnes |= 0xf0000000 + // Now we replicate any zero bits to all the bits in top4AllOnes. + top4AllOnes &= top4AllOnes >> 16 + top4AllOnes &= top4AllOnes >> 8 + top4AllOnes &= top4AllOnes >> 4 + top4AllOnes &= top4AllOnes >> 2 + top4AllOnes &= top4AllOnes >> 1 + top4AllOnes = uint32(int32(top4AllOnes<<31) >> 31) + + // Now we test whether the bottom three limbs are non-zero. + bottom3NonZero := out[0] | out[1] | out[2] + bottom3NonZero |= bottom3NonZero >> 16 + bottom3NonZero |= bottom3NonZero >> 8 + bottom3NonZero |= bottom3NonZero >> 4 + bottom3NonZero |= bottom3NonZero >> 2 + bottom3NonZero |= bottom3NonZero >> 1 + bottom3NonZero = uint32(int32(bottom3NonZero<<31) >> 31) + + // Assuming top4AllOnes != 0, everything depends on the value of out[3]. + // If it's > 0xffff000 then the whole value is > p + // If it's = 0xffff000 and bottom3NonZero != 0, then the whole value is >= p + // If it's < 0xffff000, then the whole value is < p + n := 0xffff000 - out[3] + out3Equal := n + out3Equal |= out3Equal >> 16 + out3Equal |= out3Equal >> 8 + out3Equal |= out3Equal >> 4 + out3Equal |= out3Equal >> 2 + out3Equal |= out3Equal >> 1 + out3Equal = ^uint32(int32(out3Equal<<31) >> 31) + + // If out[3] > 0xffff000 then n's MSB will be one. + out3GT := uint32(int32(n) >> 31) + + mask := top4AllOnes & ((out3Equal & bottom3NonZero) | out3GT) + out[0] -= 1 & mask + out[3] -= 0xffff000 & mask + out[4] -= 0xfffffff & mask + out[5] -= 0xfffffff & mask + out[6] -= 0xfffffff & mask + out[7] -= 0xfffffff & mask + + // Do one final carry down, in case we made out[0] negative. One of + // out[0..3] needs to be positive and able to absorb the -1 or the value + // would have been < p, and the subtraction wouldn't have happened. + for i := 0; i < 3; i++ { + mask := uint32(int32(out[i]) >> 31) + out[i] += (1 << 28) & mask + out[i+1] -= 1 & mask + } +} + +// Group element functions. +// +// These functions deal with group elements. The group is an elliptic curve +// group with a = -3 defined in FIPS 186-3, section D.2.2. + +// p224AddJacobian computes *out = a+b where a != b. +func p224AddJacobian(x3, y3, z3, x1, y1, z1, x2, y2, z2 *p224FieldElement) { + // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-p224Add-2007-bl + var z1z1, z2z2, u1, u2, s1, s2, h, i, j, r, v p224FieldElement + var c p224LargeFieldElement + + z1IsZero := p224IsZero(z1) + z2IsZero := p224IsZero(z2) + + // Z1Z1 = Z1² + p224Square(&z1z1, z1, &c) + // Z2Z2 = Z2² + p224Square(&z2z2, z2, &c) + // U1 = X1*Z2Z2 + p224Mul(&u1, x1, &z2z2, &c) + // U2 = X2*Z1Z1 + p224Mul(&u2, x2, &z1z1, &c) + // S1 = Y1*Z2*Z2Z2 + p224Mul(&s1, z2, &z2z2, &c) + p224Mul(&s1, y1, &s1, &c) + // S2 = Y2*Z1*Z1Z1 + p224Mul(&s2, z1, &z1z1, &c) + p224Mul(&s2, y2, &s2, &c) + // H = U2-U1 + p224Sub(&h, &u2, &u1) + p224Reduce(&h) + xEqual := p224IsZero(&h) + // I = (2*H)² + for j := 0; j < 8; j++ { + i[j] = h[j] << 1 + } + p224Reduce(&i) + p224Square(&i, &i, &c) + // J = H*I + p224Mul(&j, &h, &i, &c) + // r = 2*(S2-S1) + p224Sub(&r, &s2, &s1) + p224Reduce(&r) + yEqual := p224IsZero(&r) + if xEqual == 1 && yEqual == 1 && z1IsZero == 0 && z2IsZero == 0 { + p224DoubleJacobian(x3, y3, z3, x1, y1, z1) + return + } + for i := 0; i < 8; i++ { + r[i] <<= 1 + } + p224Reduce(&r) + // V = U1*I + p224Mul(&v, &u1, &i, &c) + // Z3 = ((Z1+Z2)²-Z1Z1-Z2Z2)*H + p224Add(&z1z1, &z1z1, &z2z2) + p224Add(&z2z2, z1, z2) + p224Reduce(&z2z2) + p224Square(&z2z2, &z2z2, &c) + p224Sub(z3, &z2z2, &z1z1) + p224Reduce(z3) + p224Mul(z3, z3, &h, &c) + // X3 = r²-J-2*V + for i := 0; i < 8; i++ { + z1z1[i] = v[i] << 1 + } + p224Add(&z1z1, &j, &z1z1) + p224Reduce(&z1z1) + p224Square(x3, &r, &c) + p224Sub(x3, x3, &z1z1) + p224Reduce(x3) + // Y3 = r*(V-X3)-2*S1*J + for i := 0; i < 8; i++ { + s1[i] <<= 1 + } + p224Mul(&s1, &s1, &j, &c) + p224Sub(&z1z1, &v, x3) + p224Reduce(&z1z1) + p224Mul(&z1z1, &z1z1, &r, &c) + p224Sub(y3, &z1z1, &s1) + p224Reduce(y3) + + p224CopyConditional(x3, x2, z1IsZero) + p224CopyConditional(x3, x1, z2IsZero) + p224CopyConditional(y3, y2, z1IsZero) + p224CopyConditional(y3, y1, z2IsZero) + p224CopyConditional(z3, z2, z1IsZero) + p224CopyConditional(z3, z1, z2IsZero) +} + +// p224DoubleJacobian computes *out = a+a. +func p224DoubleJacobian(x3, y3, z3, x1, y1, z1 *p224FieldElement) { + var delta, gamma, beta, alpha, t p224FieldElement + var c p224LargeFieldElement + + p224Square(&delta, z1, &c) + p224Square(&gamma, y1, &c) + p224Mul(&beta, x1, &gamma, &c) + + // alpha = 3*(X1-delta)*(X1+delta) + p224Add(&t, x1, &delta) + for i := 0; i < 8; i++ { + t[i] += t[i] << 1 + } + p224Reduce(&t) + p224Sub(&alpha, x1, &delta) + p224Reduce(&alpha) + p224Mul(&alpha, &alpha, &t, &c) + + // Z3 = (Y1+Z1)²-gamma-delta + p224Add(z3, y1, z1) + p224Reduce(z3) + p224Square(z3, z3, &c) + p224Sub(z3, z3, &gamma) + p224Reduce(z3) + p224Sub(z3, z3, &delta) + p224Reduce(z3) + + // X3 = alpha²-8*beta + for i := 0; i < 8; i++ { + delta[i] = beta[i] << 3 + } + p224Reduce(&delta) + p224Square(x3, &alpha, &c) + p224Sub(x3, x3, &delta) + p224Reduce(x3) + + // Y3 = alpha*(4*beta-X3)-8*gamma² + for i := 0; i < 8; i++ { + beta[i] <<= 2 + } + p224Sub(&beta, &beta, x3) + p224Reduce(&beta) + p224Square(&gamma, &gamma, &c) + for i := 0; i < 8; i++ { + gamma[i] <<= 3 + } + p224Reduce(&gamma) + p224Mul(y3, &alpha, &beta, &c) + p224Sub(y3, y3, &gamma) + p224Reduce(y3) +} + +// p224CopyConditional sets *out = *in iff the least-significant-bit of control +// is true, and it runs in constant time. +func p224CopyConditional(out, in *p224FieldElement, control uint32) { + control <<= 31 + control = uint32(int32(control) >> 31) + + for i := 0; i < 8; i++ { + out[i] ^= (out[i] ^ in[i]) & control + } +} + +func p224ScalarMult(outX, outY, outZ, inX, inY, inZ *p224FieldElement, scalar []byte) { + var xx, yy, zz p224FieldElement + for i := 0; i < 8; i++ { + outX[i] = 0 + outY[i] = 0 + outZ[i] = 0 + } + + for _, byte := range scalar { + for bitNum := uint(0); bitNum < 8; bitNum++ { + p224DoubleJacobian(outX, outY, outZ, outX, outY, outZ) + bit := uint32((byte >> (7 - bitNum)) & 1) + p224AddJacobian(&xx, &yy, &zz, inX, inY, inZ, outX, outY, outZ) + p224CopyConditional(outX, &xx, bit) + p224CopyConditional(outY, &yy, bit) + p224CopyConditional(outZ, &zz, bit) + } + } +} + +// p224ToAffine converts from Jacobian to affine form. +func p224ToAffine(x, y, z *p224FieldElement) (*big.Int, *big.Int) { + var zinv, zinvsq, outx, outy p224FieldElement + var tmp p224LargeFieldElement + + if isPointAtInfinity := p224IsZero(z); isPointAtInfinity == 1 { + return new(big.Int), new(big.Int) + } + + p224Invert(&zinv, z) + p224Square(&zinvsq, &zinv, &tmp) + p224Mul(x, x, &zinvsq, &tmp) + p224Mul(&zinvsq, &zinvsq, &zinv, &tmp) + p224Mul(y, y, &zinvsq, &tmp) + + p224Contract(&outx, x) + p224Contract(&outy, y) + return p224ToBig(&outx), p224ToBig(&outy) +} + +// get28BitsFromEnd returns the least-significant 28 bits from buf>>shift, +// where buf is interpreted as a big-endian number. +func get28BitsFromEnd(buf []byte, shift uint) (uint32, []byte) { + var ret uint32 + + for i := uint(0); i < 4; i++ { + var b byte + if l := len(buf); l > 0 { + b = buf[l-1] + // We don't remove the byte if we're about to return and we're not + // reading all of it. + if i != 3 || shift == 4 { + buf = buf[:l-1] + } + } + ret |= uint32(b) << (8 * i) >> shift + } + ret &= bottom28Bits + return ret, buf +} + +// p224FromBig sets *out = *in. +func p224FromBig(out *p224FieldElement, in *big.Int) { + bytes := in.Bytes() + out[0], bytes = get28BitsFromEnd(bytes, 0) + out[1], bytes = get28BitsFromEnd(bytes, 4) + out[2], bytes = get28BitsFromEnd(bytes, 0) + out[3], bytes = get28BitsFromEnd(bytes, 4) + out[4], bytes = get28BitsFromEnd(bytes, 0) + out[5], bytes = get28BitsFromEnd(bytes, 4) + out[6], bytes = get28BitsFromEnd(bytes, 0) + out[7], bytes = get28BitsFromEnd(bytes, 4) +} + +// p224ToBig returns in as a big.Int. +func p224ToBig(in *p224FieldElement) *big.Int { + var buf [28]byte + buf[27] = byte(in[0]) + buf[26] = byte(in[0] >> 8) + buf[25] = byte(in[0] >> 16) + buf[24] = byte(((in[0] >> 24) & 0x0f) | (in[1]<<4)&0xf0) + + buf[23] = byte(in[1] >> 4) + buf[22] = byte(in[1] >> 12) + buf[21] = byte(in[1] >> 20) + + buf[20] = byte(in[2]) + buf[19] = byte(in[2] >> 8) + buf[18] = byte(in[2] >> 16) + buf[17] = byte(((in[2] >> 24) & 0x0f) | (in[3]<<4)&0xf0) + + buf[16] = byte(in[3] >> 4) + buf[15] = byte(in[3] >> 12) + buf[14] = byte(in[3] >> 20) + + buf[13] = byte(in[4]) + buf[12] = byte(in[4] >> 8) + buf[11] = byte(in[4] >> 16) + buf[10] = byte(((in[4] >> 24) & 0x0f) | (in[5]<<4)&0xf0) + + buf[9] = byte(in[5] >> 4) + buf[8] = byte(in[5] >> 12) + buf[7] = byte(in[5] >> 20) + + buf[6] = byte(in[6]) + buf[5] = byte(in[6] >> 8) + buf[4] = byte(in[6] >> 16) + buf[3] = byte(((in[6] >> 24) & 0x0f) | (in[7]<<4)&0xf0) + + buf[2] = byte(in[7] >> 4) + buf[1] = byte(in[7] >> 12) + buf[0] = byte(in[7] >> 20) + + return new(big.Int).SetBytes(buf[:]) +} diff --git a/src/crypto/elliptic/p224_test.go b/src/crypto/elliptic/p224_test.go new file mode 100644 index 0000000..b213b27 --- /dev/null +++ b/src/crypto/elliptic/p224_test.go @@ -0,0 +1,629 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package elliptic + +import ( + "encoding/hex" + "fmt" + "math/big" + "math/bits" + "math/rand" + "reflect" + "testing" + "testing/quick" +) + +var toFromBigTests = []string{ + "0", + "1", + "23", + "b70e0cb46bb4bf7f321390b94a03c1d356c01122343280d6105c1d21", + "706a46d476dcb76798e6046d89474788d164c18032d268fd10704fa6", +} + +func p224AlternativeToBig(in *p224FieldElement) *big.Int { + ret := new(big.Int) + tmp := new(big.Int) + + for i := len(in) - 1; i >= 0; i-- { + ret.Lsh(ret, 28) + tmp.SetInt64(int64(in[i])) + ret.Add(ret, tmp) + } + ret.Mod(ret, P224().Params().P) + return ret +} + +func TestP224ToFromBig(t *testing.T) { + for i, test := range toFromBigTests { + n, _ := new(big.Int).SetString(test, 16) + var x p224FieldElement + p224FromBig(&x, n) + m := p224ToBig(&x) + if n.Cmp(m) != 0 { + t.Errorf("#%d: %x != %x", i, n, m) + } + q := p224AlternativeToBig(&x) + if n.Cmp(q) != 0 { + t.Errorf("#%d: %x != %x (alternative)", i, n, q) + } + } +} + +// quickCheckConfig32 will make each quickcheck test run (32 * -quickchecks) +// times. The default value of -quickchecks is 100. +var quickCheckConfig32 = &quick.Config{MaxCountScale: 32} + +// weirdLimbs can be combined to generate a range of edge-case field elements. +var weirdLimbs = [...]uint32{ + 0, 1, (1 << 29) - 1, + (1 << 12), (1 << 12) - 1, + (1 << 28), (1 << 28) - 1, +} + +func generateLimb(rand *rand.Rand) uint32 { + const bottom29Bits = 0x1fffffff + n := rand.Intn(len(weirdLimbs) + 3) + switch n { + case len(weirdLimbs): + // Random value. + return uint32(rand.Int31n(1 << 29)) + case len(weirdLimbs) + 1: + // Sum of two values. + k := generateLimb(rand) + generateLimb(rand) + return k & bottom29Bits + case len(weirdLimbs) + 2: + // Difference of two values. + k := generateLimb(rand) - generateLimb(rand) + return k & bottom29Bits + default: + return weirdLimbs[n] + } +} + +func (p224FieldElement) Generate(rand *rand.Rand, size int) reflect.Value { + return reflect.ValueOf(p224FieldElement{ + generateLimb(rand), + generateLimb(rand), + generateLimb(rand), + generateLimb(rand), + generateLimb(rand), + generateLimb(rand), + generateLimb(rand), + generateLimb(rand), + }) +} + +func isInBounds(x *p224FieldElement) bool { + return bits.Len32(x[0]) <= 29 && + bits.Len32(x[1]) <= 29 && + bits.Len32(x[2]) <= 29 && + bits.Len32(x[3]) <= 29 && + bits.Len32(x[4]) <= 29 && + bits.Len32(x[5]) <= 29 && + bits.Len32(x[6]) <= 29 && + bits.Len32(x[7]) <= 29 +} + +func TestP224Mul(t *testing.T) { + mulMatchesBigInt := func(a, b, out p224FieldElement) bool { + var tmp p224LargeFieldElement + p224Mul(&out, &a, &b, &tmp) + + exp := new(big.Int).Mul(p224AlternativeToBig(&a), p224AlternativeToBig(&b)) + exp.Mod(exp, P224().Params().P) + got := p224AlternativeToBig(&out) + if exp.Cmp(got) != 0 || !isInBounds(&out) { + t.Logf("a = %x", a) + t.Logf("b = %x", b) + t.Logf("p224Mul(a, b) = %x = %v", out, got) + t.Logf("a * b = %v", exp) + return false + } + + return true + } + + a := p224FieldElement{0xfffffff, 0xfffffff, 0xf00ffff, 0x20f, 0x0, 0x0, 0x0, 0x0} + b := p224FieldElement{1, 0, 0, 0, 0, 0, 0, 0} + if !mulMatchesBigInt(a, b, p224FieldElement{}) { + t.Fail() + } + + if err := quick.Check(mulMatchesBigInt, quickCheckConfig32); err != nil { + t.Error(err) + } +} + +func TestP224Square(t *testing.T) { + squareMatchesBigInt := func(a, out p224FieldElement) bool { + var tmp p224LargeFieldElement + p224Square(&out, &a, &tmp) + + exp := p224AlternativeToBig(&a) + exp.Mul(exp, exp) + exp.Mod(exp, P224().Params().P) + got := p224AlternativeToBig(&out) + if exp.Cmp(got) != 0 || !isInBounds(&out) { + t.Logf("a = %x", a) + t.Logf("p224Square(a, b) = %x = %v", out, got) + t.Logf("a * a = %v", exp) + return false + } + + return true + } + + if err := quick.Check(squareMatchesBigInt, quickCheckConfig32); err != nil { + t.Error(err) + } +} + +func TestP224Add(t *testing.T) { + addMatchesBigInt := func(a, b, out p224FieldElement) bool { + p224Add(&out, &a, &b) + + exp := new(big.Int).Add(p224AlternativeToBig(&a), p224AlternativeToBig(&b)) + exp.Mod(exp, P224().Params().P) + got := p224AlternativeToBig(&out) + if exp.Cmp(got) != 0 { + t.Logf("a = %x", a) + t.Logf("b = %x", b) + t.Logf("p224Add(a, b) = %x = %v", out, got) + t.Logf("a + b = %v", exp) + return false + } + + return true + } + + if err := quick.Check(addMatchesBigInt, quickCheckConfig32); err != nil { + t.Error(err) + } +} + +func TestP224Reduce(t *testing.T) { + reduceMatchesBigInt := func(a p224FieldElement) bool { + out := a + // TODO: generate higher values for functions like p224Reduce that are + // expected to work with higher input bounds. + p224Reduce(&out) + + exp := p224AlternativeToBig(&a) + got := p224AlternativeToBig(&out) + if exp.Cmp(got) != 0 || !isInBounds(&out) { + t.Logf("a = %x = %v", a, exp) + t.Logf("p224Reduce(a) = %x = %v", out, got) + return false + } + + return true + } + + if err := quick.Check(reduceMatchesBigInt, quickCheckConfig32); err != nil { + t.Error(err) + } +} + +func TestP224Contract(t *testing.T) { + contractMatchesBigInt := func(a, out p224FieldElement) bool { + p224Contract(&out, &a) + + exp := p224AlternativeToBig(&a) + got := p224AlternativeToBig(&out) + if exp.Cmp(got) != 0 { + t.Logf("a = %x = %v", a, exp) + t.Logf("p224Contract(a) = %x = %v", out, got) + return false + } + + // Check that out < P. + for i := range p224P { + k := 8 - i - 1 + if out[k] > p224P[k] { + t.Logf("p224Contract(a) = %x", out) + return false + } + if out[k] < p224P[k] { + return true + } + } + t.Logf("p224Contract(a) = %x", out) + return false + } + + if !contractMatchesBigInt(p224P, p224FieldElement{}) { + t.Error("p224Contract(p) is broken") + } + pMinus1 := p224FieldElement{0, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff} + if !contractMatchesBigInt(pMinus1, p224FieldElement{}) { + t.Error("p224Contract(p - 1) is broken") + } + // Check that we can handle input above p, but lowest limb zero. + a := p224FieldElement{0, 1, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff} + if !contractMatchesBigInt(a, p224FieldElement{}) { + t.Error("p224Contract(p + 2²⁸) is broken") + } + // Check that we can handle input above p, but lowest three limbs zero. + b := p224FieldElement{0, 0, 0, 0xffff001, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff} + if !contractMatchesBigInt(b, p224FieldElement{}) { + t.Error("p224Contract(p + 2⁸⁴) is broken") + } + + if err := quick.Check(contractMatchesBigInt, quickCheckConfig32); err != nil { + t.Error(err) + } +} + +func TestP224IsZero(t *testing.T) { + if got := p224IsZero(&p224FieldElement{}); got != 1 { + t.Errorf("p224IsZero(0) = %d, expected 1", got) + } + if got := p224IsZero((*p224FieldElement)(&p224P)); got != 1 { + t.Errorf("p224IsZero(p) = %d, expected 1", got) + } + if got := p224IsZero(&p224FieldElement{1}); got != 0 { + t.Errorf("p224IsZero(1) = %d, expected 0", got) + } + + isZeroMatchesBigInt := func(a p224FieldElement) bool { + isZero := p224IsZero(&a) + + big := p224AlternativeToBig(&a) + if big.Sign() == 0 && isZero != 1 { + return false + } + if big.Sign() != 0 && isZero != 0 { + return false + } + return true + } + + if err := quick.Check(isZeroMatchesBigInt, quickCheckConfig32); err != nil { + t.Error(err) + } +} + +func TestP224Invert(t *testing.T) { + var out p224FieldElement + + p224Invert(&out, &p224FieldElement{}) + if got := p224IsZero(&out); got != 1 { + t.Errorf("p224Invert(0) = %x, expected 0", out) + } + + p224Invert(&out, (*p224FieldElement)(&p224P)) + if got := p224IsZero(&out); got != 1 { + t.Errorf("p224Invert(p) = %x, expected 0", out) + } + + p224Invert(&out, &p224FieldElement{1}) + p224Contract(&out, &out) + if out != (p224FieldElement{1}) { + t.Errorf("p224Invert(1) = %x, expected 1", out) + } + + var tmp p224LargeFieldElement + a := p224FieldElement{1, 2, 3, 4, 5, 6, 7, 8} + p224Invert(&out, &a) + p224Mul(&out, &out, &a, &tmp) + p224Contract(&out, &out) + if out != (p224FieldElement{1}) { + t.Errorf("p224Invert(a) * a = %x, expected 1", out) + } +} + +type baseMultTest struct { + k string + x, y string +} + +var p224BaseMultTests = []baseMultTest{ + { + "1", + "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", + "bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", + }, + { + "2", + "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6", + "1c2b76a7bc25e7702a704fa986892849fca629487acf3709d2e4e8bb", + }, + { + "3", + "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04", + "a3f7f03cadd0be444c0aa56830130ddf77d317344e1af3591981a925", + }, + { + "4", + "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301", + "482580a0ec5bc47e88bc8c378632cd196cb3fa058a7114eb03054c9", + }, + { + "5", + "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa", + "27e8bff1745635ec5ba0c9f1c2ede15414c6507d29ffe37e790a079b", + }, + { + "6", + "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408", + "89faf0ccb750d99b553c574fad7ecfb0438586eb3952af5b4b153c7e", + }, + { + "7", + "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28", + "f3a30085497f2f611ee2517b163ef8c53b715d18bb4e4808d02b963", + }, + { + "8", + "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550", + "46dcd3ea5c43898c5c5fc4fdac7db39c2f02ebee4e3541d1e78047a", + }, + { + "9", + "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d", + "371732e4f41bf4f7883035e6a79fcedc0e196eb07b48171697517463", + }, + { + "10", + "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd", + "39bb30eab337e0a521b6cba1abe4b2b3a3e524c14a3fe3eb116b655f", + }, + { + "11", + "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c", + "20b510004092e96636cfb7e32efded8265c266dfb754fa6d6491a6da", + }, + { + "12", + "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a", + "207dddf0385bfdeab6e9acda8da06b3bbef224a93ab1e9e036109d13", + }, + { + "13", + "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca", + "252819f71c7fb7fbcb159be337d37d3336d7feb963724fdfb0ecb767", + }, + { + "14", + "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa", + "d5814cd724199c4a5b974a43685fbf5b8bac69459c9469bc8f23ccaf", + }, + { + "15", + "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9", + "979a5f4759f80f4fb4ec2e34f5566d595680a11735e7b61046127989", + }, + { + "16", + "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d", + "3399d464345906b11b00e363ef429221f2ec720d2f665d7dead5b482", + }, + { + "17", + "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc", + "ff149efa6606a6bd20ef7d1b06bd92f6904639dce5174db6cc554a26", + }, + { + "18", + "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc", + "ea98d60e5ffc9b8fcf999fab1df7e7ef7084f20ddb61bb045a6ce002", + }, + { + "19", + "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c", + "dcf1f6c3db09c70acc25391d492fe25b4a180babd6cea356c04719cd", + }, + { + "20", + "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455", + "d5d7110274cba7cdee90e1a8b0d394c376a5573db6be0bf2747f530", + }, + { + "112233445566778899", + "61f077c6f62ed802dad7c2f38f5c67f2cc453601e61bd076bb46179e", + "2272f9e9f5933e70388ee652513443b5e289dd135dcc0d0299b225e4", + }, + { + "112233445566778899112233445566778899", + "29895f0af496bfc62b6ef8d8a65c88c613949b03668aab4f0429e35", + "3ea6e53f9a841f2019ec24bde1a75677aa9b5902e61081c01064de93", + }, + { + "6950511619965839450988900688150712778015737983940691968051900319680", + "ab689930bcae4a4aa5f5cb085e823e8ae30fd365eb1da4aba9cf0379", + "3345a121bbd233548af0d210654eb40bab788a03666419be6fbd34e7", + }, + { + "13479972933410060327035789020509431695094902435494295338570602119423", + "bdb6a8817c1f89da1c2f3dd8e97feb4494f2ed302a4ce2bc7f5f4025", + "4c7020d57c00411889462d77a5438bb4e97d177700bf7243a07f1680", + }, + { + "13479971751745682581351455311314208093898607229429740618390390702079", + "d58b61aa41c32dd5eba462647dba75c5d67c83606c0af2bd928446a9", + "d24ba6a837be0460dd107ae77725696d211446c5609b4595976b16bd", + }, + { + "13479972931865328106486971546324465392952975980343228160962702868479", + "dc9fa77978a005510980e929a1485f63716df695d7a0c18bb518df03", + "ede2b016f2ddffc2a8c015b134928275ce09e5661b7ab14ce0d1d403", + }, + { + "11795773708834916026404142434151065506931607341523388140225443265536", + "499d8b2829cfb879c901f7d85d357045edab55028824d0f05ba279ba", + "bf929537b06e4015919639d94f57838fa33fc3d952598dcdbb44d638", + }, + { + "784254593043826236572847595991346435467177662189391577090", + "8246c999137186632c5f9eddf3b1b0e1764c5e8bd0e0d8a554b9cb77", + "e80ed8660bc1cb17ac7d845be40a7a022d3306f116ae9f81fea65947", + }, + { + "13479767645505654746623887797783387853576174193480695826442858012671", + "6670c20afcceaea672c97f75e2e9dd5c8460e54bb38538ebb4bd30eb", + "f280d8008d07a4caf54271f993527d46ff3ff46fd1190a3f1faa4f74", + }, + { + "205688069665150753842126177372015544874550518966168735589597183", + "eca934247425cfd949b795cb5ce1eff401550386e28d1a4c5a8eb", + "d4c01040dba19628931bc8855370317c722cbd9ca6156985f1c2e9ce", + }, + { + "13479966930919337728895168462090683249159702977113823384618282123295", + "ef353bf5c73cd551b96d596fbc9a67f16d61dd9fe56af19de1fba9cd", + "21771b9cdce3e8430c09b3838be70b48c21e15bc09ee1f2d7945b91f", + }, + { + "50210731791415612487756441341851895584393717453129007497216", + "4036052a3091eb481046ad3289c95d3ac905ca0023de2c03ecd451cf", + "d768165a38a2b96f812586a9d59d4136035d9c853a5bf2e1c86a4993", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368041", + "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455", + "f2a28eefd8b345832116f1e574f2c6b2c895aa8c24941f40d8b80ad1", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368042", + "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c", + "230e093c24f638f533dac6e2b6d01da3b5e7f45429315ca93fb8e634", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368043", + "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc", + "156729f1a003647030666054e208180f8f7b0df2249e44fba5931fff", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368044", + "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc", + "eb610599f95942df1082e4f9426d086fb9c6231ae8b24933aab5db", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368045", + "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d", + "cc662b9bcba6f94ee4ff1c9c10bd6ddd0d138df2d099a282152a4b7f", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368046", + "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9", + "6865a0b8a607f0b04b13d1cb0aa992a5a97f5ee8ca1849efb9ed8678", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368047", + "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa", + "2a7eb328dbe663b5a468b5bc97a040a3745396ba636b964370dc3352", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368048", + "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca", + "dad7e608e380480434ea641cc82c82cbc92801469c8db0204f13489a", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368049", + "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a", + "df82220fc7a4021549165325725f94c3410ddb56c54e161fc9ef62ee", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368050", + "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c", + "df4aefffbf6d1699c930481cd102127c9a3d992048ab05929b6e5927", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368051", + "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd", + "c644cf154cc81f5ade49345e541b4d4b5c1adb3eb5c01c14ee949aa2", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368052", + "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d", + "c8e8cd1b0be40b0877cfca1958603122f1e6914f84b7e8e968ae8b9e", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368053", + "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550", + "fb9232c15a3bc7673a3a03b0253824c53d0fd1411b1cabe2e187fb87", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368054", + "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28", + "f0c5cff7ab680d09ee11dae84e9c1072ac48ea2e744b1b7f72fd469e", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368055", + "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408", + "76050f3348af2664aac3a8b05281304ebc7a7914c6ad50a4b4eac383", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368056", + "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa", + "d817400e8ba9ca13a45f360e3d121eaaeb39af82d6001c8186f5f866", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368057", + "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301", + "fb7da7f5f13a43b81774373c879cd32d6934c05fa758eeb14fcfab38", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368058", + "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04", + "5c080fc3522f41bbb3f55a97cfecf21f882ce8cbb1e50ca6e67e56dc", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368059", + "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6", + "e3d4895843da188fd58fb0567976d7b50359d6b78530c8f62d1b1746", + }, + { + "26959946667150639794667015087019625940457807714424391721682722368060", + "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", + "42c89c774a08dc04b3dd201932bc8a5ea5f8b89bbb2a7e667aff81cd", + }, +} + +func TestP224BaseMult(t *testing.T) { + p224 := P224() + for i, e := range p224BaseMultTests { + k, ok := new(big.Int).SetString(e.k, 10) + if !ok { + t.Errorf("%d: bad value for k: %s", i, e.k) + } + x, y := p224.ScalarBaseMult(k.Bytes()) + if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y { + t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y) + } + if testing.Short() && i > 5 { + break + } + } +} + +func TestP224GenericBaseMult(t *testing.T) { + // We use the P224 CurveParams directly in order to test the generic implementation. + p224 := P224().Params() + for i, e := range p224BaseMultTests { + k, ok := new(big.Int).SetString(e.k, 10) + if !ok { + t.Errorf("%d: bad value for k: %s", i, e.k) + } + x, y := p224.ScalarBaseMult(k.Bytes()) + if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y { + t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y) + } + if testing.Short() && i > 5 { + break + } + } +} + +func TestP224Overflow(t *testing.T) { + // This tests for a specific bug in the P224 implementation. + p224 := P224() + pointData, _ := hex.DecodeString("049B535B45FB0A2072398A6831834624C7E32CCFD5A4B933BCEAF77F1DD945E08BBE5178F5EDF5E733388F196D2A631D2E075BB16CBFEEA15B") + x, y := Unmarshal(p224, pointData) + if !p224.IsOnCurve(x, y) { + t.Error("P224 failed to validate a correct point") + } +} diff --git a/src/crypto/elliptic/p256.go b/src/crypto/elliptic/p256.go new file mode 100644 index 0000000..da52837 --- /dev/null +++ b/src/crypto/elliptic/p256.go @@ -0,0 +1,1193 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 && !arm64 +// +build !amd64,!arm64 + +package elliptic + +// This file contains a constant-time, 32-bit implementation of P256. + +import ( + "math/big" +) + +type p256Curve struct { + *CurveParams +} + +var ( + p256Params *CurveParams + + // RInverse contains 1/R mod p - the inverse of the Montgomery constant + // (2**257). + p256RInverse *big.Int +) + +func initP256() { + // See FIPS 186-3, section D.2.3 + p256Params = &CurveParams{Name: "P-256"} + p256Params.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) + p256Params.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) + p256Params.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) + p256Params.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) + p256Params.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) + p256Params.BitSize = 256 + + p256RInverse, _ = new(big.Int).SetString("7fffffff00000001fffffffe8000000100000000ffffffff0000000180000000", 16) + + // Arch-specific initialization, i.e. let a platform dynamically pick a P256 implementation + initP256Arch() +} + +func (curve p256Curve) Params() *CurveParams { + return curve.CurveParams +} + +// p256GetScalar endian-swaps the big-endian scalar value from in and writes it +// to out. If the scalar is equal or greater than the order of the group, it's +// reduced modulo that order. +func p256GetScalar(out *[32]byte, in []byte) { + n := new(big.Int).SetBytes(in) + var scalarBytes []byte + + if n.Cmp(p256Params.N) >= 0 || len(in) > len(out) { + n.Mod(n, p256Params.N) + scalarBytes = n.Bytes() + } else { + scalarBytes = in + } + + for i, v := range scalarBytes { + out[len(scalarBytes)-(1+i)] = v + } +} + +func (p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { + var scalarReversed [32]byte + p256GetScalar(&scalarReversed, scalar) + + var x1, y1, z1 [p256Limbs]uint32 + p256ScalarBaseMult(&x1, &y1, &z1, &scalarReversed) + return p256ToAffine(&x1, &y1, &z1) +} + +func (p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { + var scalarReversed [32]byte + p256GetScalar(&scalarReversed, scalar) + + var px, py, x1, y1, z1 [p256Limbs]uint32 + p256FromBig(&px, bigX) + p256FromBig(&py, bigY) + p256ScalarMult(&x1, &y1, &z1, &px, &py, &scalarReversed) + return p256ToAffine(&x1, &y1, &z1) +} + +// Field elements are represented as nine, unsigned 32-bit words. +// +// The value of a field element is: +// x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) +// +// That is, each limb is alternately 29 or 28-bits wide in little-endian +// order. +// +// This means that a field element hits 2**257, rather than 2**256 as we would +// like. A 28, 29, ... pattern would cause us to hit 2**256, but that causes +// problems when multiplying as terms end up one bit short of a limb which +// would require much bit-shifting to correct. +// +// Finally, the values stored in a field element are in Montgomery form. So the +// value |y| is stored as (y*R) mod p, where p is the P-256 prime and R is +// 2**257. + +const ( + p256Limbs = 9 + bottom29Bits = 0x1fffffff +) + +var ( + // p256One is the number 1 as a field element. + p256One = [p256Limbs]uint32{2, 0, 0, 0xffff800, 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, 0} + p256Zero = [p256Limbs]uint32{0, 0, 0, 0, 0, 0, 0, 0, 0} + // p256P is the prime modulus as a field element. + p256P = [p256Limbs]uint32{0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, 0, 0, 0x200000, 0xf000000, 0xfffffff} + // p2562P is the twice prime modulus as a field element. + p2562P = [p256Limbs]uint32{0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, 0, 0, 0x400000, 0xe000000, 0x1fffffff} +) + +// p256Precomputed contains precomputed values to aid the calculation of scalar +// multiples of the base point, G. It's actually two, equal length, tables +// concatenated. +// +// The first table contains (x,y) field element pairs for 16 multiples of the +// base point, G. +// +// Index | Index (binary) | Value +// 0 | 0000 | 0G (all zeros, omitted) +// 1 | 0001 | G +// 2 | 0010 | 2**64G +// 3 | 0011 | 2**64G + G +// 4 | 0100 | 2**128G +// 5 | 0101 | 2**128G + G +// 6 | 0110 | 2**128G + 2**64G +// 7 | 0111 | 2**128G + 2**64G + G +// 8 | 1000 | 2**192G +// 9 | 1001 | 2**192G + G +// 10 | 1010 | 2**192G + 2**64G +// 11 | 1011 | 2**192G + 2**64G + G +// 12 | 1100 | 2**192G + 2**128G +// 13 | 1101 | 2**192G + 2**128G + G +// 14 | 1110 | 2**192G + 2**128G + 2**64G +// 15 | 1111 | 2**192G + 2**128G + 2**64G + G +// +// The second table follows the same style, but the terms are 2**32G, +// 2**96G, 2**160G, 2**224G. +// +// This is ~2KB of data. +var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{ + 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, + 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, + 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, + 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, + 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, + 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, + 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, + 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, + 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, + 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, + 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, + 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, + 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, + 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, + 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, + 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, + 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, + 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, + 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, + 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, + 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, + 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, + 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, + 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, + 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, + 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, + 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, + 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, + 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, + 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, + 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, + 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, + 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, + 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, + 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, + 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, + 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, + 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, + 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, + 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, + 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, + 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, + 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, + 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, + 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, + 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, + 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, + 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, + 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, + 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, + 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, + 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, + 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, + 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, + 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, + 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, + 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, + 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, + 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, + 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78, +} + +// Field element operations: + +// nonZeroToAllOnes returns: +// 0xffffffff for 0 < x <= 2**31 +// 0 for x == 0 or x > 2**31. +func nonZeroToAllOnes(x uint32) uint32 { + return ((x - 1) >> 31) - 1 +} + +// p256ReduceCarry adds a multiple of p in order to cancel |carry|, +// which is a term at 2**257. +// +// On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. +// On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. +func p256ReduceCarry(inout *[p256Limbs]uint32, carry uint32) { + carry_mask := nonZeroToAllOnes(carry) + + inout[0] += carry << 1 + inout[3] += 0x10000000 & carry_mask + // carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the + // previous line therefore this doesn't underflow. + inout[3] -= carry << 11 + inout[4] += (0x20000000 - 1) & carry_mask + inout[5] += (0x10000000 - 1) & carry_mask + inout[6] += (0x20000000 - 1) & carry_mask + inout[6] -= carry << 22 + // This may underflow if carry is non-zero but, if so, we'll fix it in the + // next line. + inout[7] -= 1 & carry_mask + inout[7] += carry << 25 +} + +// p256Sum sets out = in+in2. +// +// On entry, in[i]+in2[i] must not overflow a 32-bit word. +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 +func p256Sum(out, in, in2 *[p256Limbs]uint32) { + carry := uint32(0) + for i := 0; ; i++ { + out[i] = in[i] + in2[i] + out[i] += carry + carry = out[i] >> 29 + out[i] &= bottom29Bits + + i++ + if i == p256Limbs { + break + } + + out[i] = in[i] + in2[i] + out[i] += carry + carry = out[i] >> 28 + out[i] &= bottom28Bits + } + + p256ReduceCarry(out, carry) +} + +const ( + two30m2 = 1<<30 - 1<<2 + two30p13m2 = 1<<30 + 1<<13 - 1<<2 + two31m2 = 1<<31 - 1<<2 + two31p24m2 = 1<<31 + 1<<24 - 1<<2 + two30m27m2 = 1<<30 - 1<<27 - 1<<2 +) + +// p256Zero31 is 0 mod p. +var p256Zero31 = [p256Limbs]uint32{two31m3, two30m2, two31m2, two30p13m2, two31m2, two30m2, two31p24m2, two30m27m2, two31m2} + +// p256Diff sets out = in-in2. +// +// On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and +// in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +func p256Diff(out, in, in2 *[p256Limbs]uint32) { + var carry uint32 + + for i := 0; ; i++ { + out[i] = in[i] - in2[i] + out[i] += p256Zero31[i] + out[i] += carry + carry = out[i] >> 29 + out[i] &= bottom29Bits + + i++ + if i == p256Limbs { + break + } + + out[i] = in[i] - in2[i] + out[i] += p256Zero31[i] + out[i] += carry + carry = out[i] >> 28 + out[i] &= bottom28Bits + } + + p256ReduceCarry(out, carry) +} + +// p256ReduceDegree sets out = tmp/R mod p where tmp contains 64-bit words with +// the same 29,28,... bit positions as a field element. +// +// The values in field elements are in Montgomery form: x*R mod p where R = +// 2**257. Since we just multiplied two Montgomery values together, the result +// is x*y*R*R mod p. We wish to divide by R in order for the result also to be +// in Montgomery form. +// +// On entry: tmp[i] < 2**64 +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 +func p256ReduceDegree(out *[p256Limbs]uint32, tmp [17]uint64) { + // The following table may be helpful when reading this code: + // + // Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... + // Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 + // Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 + // (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 + var tmp2 [18]uint32 + var carry, x, xMask uint32 + + // tmp contains 64-bit words with the same 29,28,29-bit positions as a + // field element. So the top of an element of tmp might overlap with + // another element two positions down. The following loop eliminates + // this overlap. + tmp2[0] = uint32(tmp[0]) & bottom29Bits + + tmp2[1] = uint32(tmp[0]) >> 29 + tmp2[1] |= (uint32(tmp[0]>>32) << 3) & bottom28Bits + tmp2[1] += uint32(tmp[1]) & bottom28Bits + carry = tmp2[1] >> 28 + tmp2[1] &= bottom28Bits + + for i := 2; i < 17; i++ { + tmp2[i] = (uint32(tmp[i-2] >> 32)) >> 25 + tmp2[i] += (uint32(tmp[i-1])) >> 28 + tmp2[i] += (uint32(tmp[i-1]>>32) << 4) & bottom29Bits + tmp2[i] += uint32(tmp[i]) & bottom29Bits + tmp2[i] += carry + carry = tmp2[i] >> 29 + tmp2[i] &= bottom29Bits + + i++ + if i == 17 { + break + } + tmp2[i] = uint32(tmp[i-2]>>32) >> 25 + tmp2[i] += uint32(tmp[i-1]) >> 29 + tmp2[i] += ((uint32(tmp[i-1] >> 32)) << 3) & bottom28Bits + tmp2[i] += uint32(tmp[i]) & bottom28Bits + tmp2[i] += carry + carry = tmp2[i] >> 28 + tmp2[i] &= bottom28Bits + } + + tmp2[17] = uint32(tmp[15]>>32) >> 25 + tmp2[17] += uint32(tmp[16]) >> 29 + tmp2[17] += uint32(tmp[16]>>32) << 3 + tmp2[17] += carry + + // Montgomery elimination of terms: + // + // Since R is 2**257, we can divide by R with a bitwise shift if we can + // ensure that the right-most 257 bits are all zero. We can make that true + // by adding multiplies of p without affecting the value. + // + // So we eliminate limbs from right to left. Since the bottom 29 bits of p + // are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. + // We can do that for 8 further limbs and then right shift to eliminate the + // extra factor of R. + for i := 0; ; i += 2 { + tmp2[i+1] += tmp2[i] >> 29 + x = tmp2[i] & bottom29Bits + xMask = nonZeroToAllOnes(x) + tmp2[i] = 0 + + // The bounds calculations for this loop are tricky. Each iteration of + // the loop eliminates two words by adding values to words to their + // right. + // + // The following table contains the amounts added to each word (as an + // offset from the value of i at the top of the loop). The amounts are + // accounted for from the first and second half of the loop separately + // and are written as, for example, 28 to mean a value <2**28. + // + // Word: 3 4 5 6 7 8 9 10 + // Added in top half: 28 11 29 21 29 28 + // 28 29 + // 29 + // Added in bottom half: 29 10 28 21 28 28 + // 29 + // + // The value that is currently offset 7 will be offset 5 for the next + // iteration and then offset 3 for the iteration after that. Therefore + // the total value added will be the values added at 7, 5 and 3. + // + // The following table accumulates these values. The sums at the bottom + // are written as, for example, 29+28, to mean a value < 2**29+2**28. + // + // Word: 3 4 5 6 7 8 9 10 11 12 13 + // 28 11 10 29 21 29 28 28 28 28 28 + // 29 28 11 28 29 28 29 28 29 28 + // 29 28 21 21 29 21 29 21 + // 10 29 28 21 28 21 28 + // 28 29 28 29 28 29 28 + // 11 10 29 10 29 10 + // 29 28 11 28 11 + // 29 29 + // -------------------------------------------- + // 30+ 31+ 30+ 31+ 30+ + // 28+ 29+ 28+ 29+ 21+ + // 21+ 28+ 21+ 28+ 10 + // 10 21+ 10 21+ + // 11 11 + // + // So the greatest amount is added to tmp2[10] and tmp2[12]. If + // tmp2[10/12] has an initial value of <2**29, then the maximum value + // will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, + // as required. + tmp2[i+3] += (x << 10) & bottom28Bits + tmp2[i+4] += (x >> 18) + + tmp2[i+6] += (x << 21) & bottom29Bits + tmp2[i+7] += x >> 8 + + // At position 200, which is the starting bit position for word 7, we + // have a factor of 0xf000000 = 2**28 - 2**24. + tmp2[i+7] += 0x10000000 & xMask + tmp2[i+8] += (x - 1) & xMask + tmp2[i+7] -= (x << 24) & bottom28Bits + tmp2[i+8] -= x >> 4 + + tmp2[i+8] += 0x20000000 & xMask + tmp2[i+8] -= x + tmp2[i+8] += (x << 28) & bottom29Bits + tmp2[i+9] += ((x >> 1) - 1) & xMask + + if i+1 == p256Limbs { + break + } + tmp2[i+2] += tmp2[i+1] >> 28 + x = tmp2[i+1] & bottom28Bits + xMask = nonZeroToAllOnes(x) + tmp2[i+1] = 0 + + tmp2[i+4] += (x << 11) & bottom29Bits + tmp2[i+5] += (x >> 18) + + tmp2[i+7] += (x << 21) & bottom28Bits + tmp2[i+8] += x >> 7 + + // At position 199, which is the starting bit of the 8th word when + // dealing with a context starting on an odd word, we have a factor of + // 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th + // word from i+1 is i+8. + tmp2[i+8] += 0x20000000 & xMask + tmp2[i+9] += (x - 1) & xMask + tmp2[i+8] -= (x << 25) & bottom29Bits + tmp2[i+9] -= x >> 4 + + tmp2[i+9] += 0x10000000 & xMask + tmp2[i+9] -= x + tmp2[i+10] += (x - 1) & xMask + } + + // We merge the right shift with a carry chain. The words above 2**257 have + // widths of 28,29,... which we need to correct when copying them down. + carry = 0 + for i := 0; i < 8; i++ { + // The maximum value of tmp2[i + 9] occurs on the first iteration and + // is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is + // therefore safe. + out[i] = tmp2[i+9] + out[i] += carry + out[i] += (tmp2[i+10] << 28) & bottom29Bits + carry = out[i] >> 29 + out[i] &= bottom29Bits + + i++ + out[i] = tmp2[i+9] >> 1 + out[i] += carry + carry = out[i] >> 28 + out[i] &= bottom28Bits + } + + out[8] = tmp2[17] + out[8] += carry + carry = out[8] >> 29 + out[8] &= bottom29Bits + + p256ReduceCarry(out, carry) +} + +// p256Square sets out=in*in. +// +// On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +func p256Square(out, in *[p256Limbs]uint32) { + var tmp [17]uint64 + + tmp[0] = uint64(in[0]) * uint64(in[0]) + tmp[1] = uint64(in[0]) * (uint64(in[1]) << 1) + tmp[2] = uint64(in[0])*(uint64(in[2])<<1) + + uint64(in[1])*(uint64(in[1])<<1) + tmp[3] = uint64(in[0])*(uint64(in[3])<<1) + + uint64(in[1])*(uint64(in[2])<<1) + tmp[4] = uint64(in[0])*(uint64(in[4])<<1) + + uint64(in[1])*(uint64(in[3])<<2) + + uint64(in[2])*uint64(in[2]) + tmp[5] = uint64(in[0])*(uint64(in[5])<<1) + + uint64(in[1])*(uint64(in[4])<<1) + + uint64(in[2])*(uint64(in[3])<<1) + tmp[6] = uint64(in[0])*(uint64(in[6])<<1) + + uint64(in[1])*(uint64(in[5])<<2) + + uint64(in[2])*(uint64(in[4])<<1) + + uint64(in[3])*(uint64(in[3])<<1) + tmp[7] = uint64(in[0])*(uint64(in[7])<<1) + + uint64(in[1])*(uint64(in[6])<<1) + + uint64(in[2])*(uint64(in[5])<<1) + + uint64(in[3])*(uint64(in[4])<<1) + // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, + // which is < 2**64 as required. + tmp[8] = uint64(in[0])*(uint64(in[8])<<1) + + uint64(in[1])*(uint64(in[7])<<2) + + uint64(in[2])*(uint64(in[6])<<1) + + uint64(in[3])*(uint64(in[5])<<2) + + uint64(in[4])*uint64(in[4]) + tmp[9] = uint64(in[1])*(uint64(in[8])<<1) + + uint64(in[2])*(uint64(in[7])<<1) + + uint64(in[3])*(uint64(in[6])<<1) + + uint64(in[4])*(uint64(in[5])<<1) + tmp[10] = uint64(in[2])*(uint64(in[8])<<1) + + uint64(in[3])*(uint64(in[7])<<2) + + uint64(in[4])*(uint64(in[6])<<1) + + uint64(in[5])*(uint64(in[5])<<1) + tmp[11] = uint64(in[3])*(uint64(in[8])<<1) + + uint64(in[4])*(uint64(in[7])<<1) + + uint64(in[5])*(uint64(in[6])<<1) + tmp[12] = uint64(in[4])*(uint64(in[8])<<1) + + uint64(in[5])*(uint64(in[7])<<2) + + uint64(in[6])*uint64(in[6]) + tmp[13] = uint64(in[5])*(uint64(in[8])<<1) + + uint64(in[6])*(uint64(in[7])<<1) + tmp[14] = uint64(in[6])*(uint64(in[8])<<1) + + uint64(in[7])*(uint64(in[7])<<1) + tmp[15] = uint64(in[7]) * (uint64(in[8]) << 1) + tmp[16] = uint64(in[8]) * uint64(in[8]) + + p256ReduceDegree(out, tmp) +} + +// p256Mul sets out=in*in2. +// +// On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and +// in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +func p256Mul(out, in, in2 *[p256Limbs]uint32) { + var tmp [17]uint64 + + tmp[0] = uint64(in[0]) * uint64(in2[0]) + tmp[1] = uint64(in[0])*(uint64(in2[1])<<0) + + uint64(in[1])*(uint64(in2[0])<<0) + tmp[2] = uint64(in[0])*(uint64(in2[2])<<0) + + uint64(in[1])*(uint64(in2[1])<<1) + + uint64(in[2])*(uint64(in2[0])<<0) + tmp[3] = uint64(in[0])*(uint64(in2[3])<<0) + + uint64(in[1])*(uint64(in2[2])<<0) + + uint64(in[2])*(uint64(in2[1])<<0) + + uint64(in[3])*(uint64(in2[0])<<0) + tmp[4] = uint64(in[0])*(uint64(in2[4])<<0) + + uint64(in[1])*(uint64(in2[3])<<1) + + uint64(in[2])*(uint64(in2[2])<<0) + + uint64(in[3])*(uint64(in2[1])<<1) + + uint64(in[4])*(uint64(in2[0])<<0) + tmp[5] = uint64(in[0])*(uint64(in2[5])<<0) + + uint64(in[1])*(uint64(in2[4])<<0) + + uint64(in[2])*(uint64(in2[3])<<0) + + uint64(in[3])*(uint64(in2[2])<<0) + + uint64(in[4])*(uint64(in2[1])<<0) + + uint64(in[5])*(uint64(in2[0])<<0) + tmp[6] = uint64(in[0])*(uint64(in2[6])<<0) + + uint64(in[1])*(uint64(in2[5])<<1) + + uint64(in[2])*(uint64(in2[4])<<0) + + uint64(in[3])*(uint64(in2[3])<<1) + + uint64(in[4])*(uint64(in2[2])<<0) + + uint64(in[5])*(uint64(in2[1])<<1) + + uint64(in[6])*(uint64(in2[0])<<0) + tmp[7] = uint64(in[0])*(uint64(in2[7])<<0) + + uint64(in[1])*(uint64(in2[6])<<0) + + uint64(in[2])*(uint64(in2[5])<<0) + + uint64(in[3])*(uint64(in2[4])<<0) + + uint64(in[4])*(uint64(in2[3])<<0) + + uint64(in[5])*(uint64(in2[2])<<0) + + uint64(in[6])*(uint64(in2[1])<<0) + + uint64(in[7])*(uint64(in2[0])<<0) + // tmp[8] has the greatest value but doesn't overflow. See logic in + // p256Square. + tmp[8] = uint64(in[0])*(uint64(in2[8])<<0) + + uint64(in[1])*(uint64(in2[7])<<1) + + uint64(in[2])*(uint64(in2[6])<<0) + + uint64(in[3])*(uint64(in2[5])<<1) + + uint64(in[4])*(uint64(in2[4])<<0) + + uint64(in[5])*(uint64(in2[3])<<1) + + uint64(in[6])*(uint64(in2[2])<<0) + + uint64(in[7])*(uint64(in2[1])<<1) + + uint64(in[8])*(uint64(in2[0])<<0) + tmp[9] = uint64(in[1])*(uint64(in2[8])<<0) + + uint64(in[2])*(uint64(in2[7])<<0) + + uint64(in[3])*(uint64(in2[6])<<0) + + uint64(in[4])*(uint64(in2[5])<<0) + + uint64(in[5])*(uint64(in2[4])<<0) + + uint64(in[6])*(uint64(in2[3])<<0) + + uint64(in[7])*(uint64(in2[2])<<0) + + uint64(in[8])*(uint64(in2[1])<<0) + tmp[10] = uint64(in[2])*(uint64(in2[8])<<0) + + uint64(in[3])*(uint64(in2[7])<<1) + + uint64(in[4])*(uint64(in2[6])<<0) + + uint64(in[5])*(uint64(in2[5])<<1) + + uint64(in[6])*(uint64(in2[4])<<0) + + uint64(in[7])*(uint64(in2[3])<<1) + + uint64(in[8])*(uint64(in2[2])<<0) + tmp[11] = uint64(in[3])*(uint64(in2[8])<<0) + + uint64(in[4])*(uint64(in2[7])<<0) + + uint64(in[5])*(uint64(in2[6])<<0) + + uint64(in[6])*(uint64(in2[5])<<0) + + uint64(in[7])*(uint64(in2[4])<<0) + + uint64(in[8])*(uint64(in2[3])<<0) + tmp[12] = uint64(in[4])*(uint64(in2[8])<<0) + + uint64(in[5])*(uint64(in2[7])<<1) + + uint64(in[6])*(uint64(in2[6])<<0) + + uint64(in[7])*(uint64(in2[5])<<1) + + uint64(in[8])*(uint64(in2[4])<<0) + tmp[13] = uint64(in[5])*(uint64(in2[8])<<0) + + uint64(in[6])*(uint64(in2[7])<<0) + + uint64(in[7])*(uint64(in2[6])<<0) + + uint64(in[8])*(uint64(in2[5])<<0) + tmp[14] = uint64(in[6])*(uint64(in2[8])<<0) + + uint64(in[7])*(uint64(in2[7])<<1) + + uint64(in[8])*(uint64(in2[6])<<0) + tmp[15] = uint64(in[7])*(uint64(in2[8])<<0) + + uint64(in[8])*(uint64(in2[7])<<0) + tmp[16] = uint64(in[8]) * (uint64(in2[8]) << 0) + + p256ReduceDegree(out, tmp) +} + +func p256Assign(out, in *[p256Limbs]uint32) { + *out = *in +} + +// p256Invert calculates |out| = |in|^{-1} +// +// Based on Fermat's Little Theorem: +// a^p = a (mod p) +// a^{p-1} = 1 (mod p) +// a^{p-2} = a^{-1} (mod p) +func p256Invert(out, in *[p256Limbs]uint32) { + var ftmp, ftmp2 [p256Limbs]uint32 + + // each e_I will hold |in|^{2^I - 1} + var e2, e4, e8, e16, e32, e64 [p256Limbs]uint32 + + p256Square(&ftmp, in) // 2^1 + p256Mul(&ftmp, in, &ftmp) // 2^2 - 2^0 + p256Assign(&e2, &ftmp) + p256Square(&ftmp, &ftmp) // 2^3 - 2^1 + p256Square(&ftmp, &ftmp) // 2^4 - 2^2 + p256Mul(&ftmp, &ftmp, &e2) // 2^4 - 2^0 + p256Assign(&e4, &ftmp) + p256Square(&ftmp, &ftmp) // 2^5 - 2^1 + p256Square(&ftmp, &ftmp) // 2^6 - 2^2 + p256Square(&ftmp, &ftmp) // 2^7 - 2^3 + p256Square(&ftmp, &ftmp) // 2^8 - 2^4 + p256Mul(&ftmp, &ftmp, &e4) // 2^8 - 2^0 + p256Assign(&e8, &ftmp) + for i := 0; i < 8; i++ { + p256Square(&ftmp, &ftmp) + } // 2^16 - 2^8 + p256Mul(&ftmp, &ftmp, &e8) // 2^16 - 2^0 + p256Assign(&e16, &ftmp) + for i := 0; i < 16; i++ { + p256Square(&ftmp, &ftmp) + } // 2^32 - 2^16 + p256Mul(&ftmp, &ftmp, &e16) // 2^32 - 2^0 + p256Assign(&e32, &ftmp) + for i := 0; i < 32; i++ { + p256Square(&ftmp, &ftmp) + } // 2^64 - 2^32 + p256Assign(&e64, &ftmp) + p256Mul(&ftmp, &ftmp, in) // 2^64 - 2^32 + 2^0 + for i := 0; i < 192; i++ { + p256Square(&ftmp, &ftmp) + } // 2^256 - 2^224 + 2^192 + + p256Mul(&ftmp2, &e64, &e32) // 2^64 - 2^0 + for i := 0; i < 16; i++ { + p256Square(&ftmp2, &ftmp2) + } // 2^80 - 2^16 + p256Mul(&ftmp2, &ftmp2, &e16) // 2^80 - 2^0 + for i := 0; i < 8; i++ { + p256Square(&ftmp2, &ftmp2) + } // 2^88 - 2^8 + p256Mul(&ftmp2, &ftmp2, &e8) // 2^88 - 2^0 + for i := 0; i < 4; i++ { + p256Square(&ftmp2, &ftmp2) + } // 2^92 - 2^4 + p256Mul(&ftmp2, &ftmp2, &e4) // 2^92 - 2^0 + p256Square(&ftmp2, &ftmp2) // 2^93 - 2^1 + p256Square(&ftmp2, &ftmp2) // 2^94 - 2^2 + p256Mul(&ftmp2, &ftmp2, &e2) // 2^94 - 2^0 + p256Square(&ftmp2, &ftmp2) // 2^95 - 2^1 + p256Square(&ftmp2, &ftmp2) // 2^96 - 2^2 + p256Mul(&ftmp2, &ftmp2, in) // 2^96 - 3 + + p256Mul(out, &ftmp2, &ftmp) // 2^256 - 2^224 + 2^192 + 2^96 - 3 +} + +// p256Scalar3 sets out=3*out. +// +// On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +func p256Scalar3(out *[p256Limbs]uint32) { + var carry uint32 + + for i := 0; ; i++ { + out[i] *= 3 + out[i] += carry + carry = out[i] >> 29 + out[i] &= bottom29Bits + + i++ + if i == p256Limbs { + break + } + + out[i] *= 3 + out[i] += carry + carry = out[i] >> 28 + out[i] &= bottom28Bits + } + + p256ReduceCarry(out, carry) +} + +// p256Scalar4 sets out=4*out. +// +// On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +func p256Scalar4(out *[p256Limbs]uint32) { + var carry, nextCarry uint32 + + for i := 0; ; i++ { + nextCarry = out[i] >> 27 + out[i] <<= 2 + out[i] &= bottom29Bits + out[i] += carry + carry = nextCarry + (out[i] >> 29) + out[i] &= bottom29Bits + + i++ + if i == p256Limbs { + break + } + nextCarry = out[i] >> 26 + out[i] <<= 2 + out[i] &= bottom28Bits + out[i] += carry + carry = nextCarry + (out[i] >> 28) + out[i] &= bottom28Bits + } + + p256ReduceCarry(out, carry) +} + +// p256Scalar8 sets out=8*out. +// +// On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +// On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. +func p256Scalar8(out *[p256Limbs]uint32) { + var carry, nextCarry uint32 + + for i := 0; ; i++ { + nextCarry = out[i] >> 26 + out[i] <<= 3 + out[i] &= bottom29Bits + out[i] += carry + carry = nextCarry + (out[i] >> 29) + out[i] &= bottom29Bits + + i++ + if i == p256Limbs { + break + } + nextCarry = out[i] >> 25 + out[i] <<= 3 + out[i] &= bottom28Bits + out[i] += carry + carry = nextCarry + (out[i] >> 28) + out[i] &= bottom28Bits + } + + p256ReduceCarry(out, carry) +} + +// Group operations: +// +// Elements of the elliptic curve group are represented in Jacobian +// coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in +// Jacobian form. + +// p256PointDouble sets {xOut,yOut,zOut} = 2*{x,y,z}. +// +// See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l +func p256PointDouble(xOut, yOut, zOut, x, y, z *[p256Limbs]uint32) { + var delta, gamma, alpha, beta, tmp, tmp2 [p256Limbs]uint32 + + p256Square(&delta, z) + p256Square(&gamma, y) + p256Mul(&beta, x, &gamma) + + p256Sum(&tmp, x, &delta) + p256Diff(&tmp2, x, &delta) + p256Mul(&alpha, &tmp, &tmp2) + p256Scalar3(&alpha) + + p256Sum(&tmp, y, z) + p256Square(&tmp, &tmp) + p256Diff(&tmp, &tmp, &gamma) + p256Diff(zOut, &tmp, &delta) + + p256Scalar4(&beta) + p256Square(xOut, &alpha) + p256Diff(xOut, xOut, &beta) + p256Diff(xOut, xOut, &beta) + + p256Diff(&tmp, &beta, xOut) + p256Mul(&tmp, &alpha, &tmp) + p256Square(&tmp2, &gamma) + p256Scalar8(&tmp2) + p256Diff(yOut, &tmp, &tmp2) +} + +// p256PointAddMixed sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,1}. +// (i.e. the second point is affine.) +// +// See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl +// +// Note that this function does not handle P+P, infinity+P nor P+infinity +// correctly. +func p256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *[p256Limbs]uint32) { + var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 + + p256Square(&z1z1, z1) + p256Sum(&tmp, z1, z1) + + p256Mul(&u2, x2, &z1z1) + p256Mul(&z1z1z1, z1, &z1z1) + p256Mul(&s2, y2, &z1z1z1) + p256Diff(&h, &u2, x1) + p256Sum(&i, &h, &h) + p256Square(&i, &i) + p256Mul(&j, &h, &i) + p256Diff(&r, &s2, y1) + p256Sum(&r, &r, &r) + p256Mul(&v, x1, &i) + + p256Mul(zOut, &tmp, &h) + p256Square(&rr, &r) + p256Diff(xOut, &rr, &j) + p256Diff(xOut, xOut, &v) + p256Diff(xOut, xOut, &v) + + p256Diff(&tmp, &v, xOut) + p256Mul(yOut, &tmp, &r) + p256Mul(&tmp, y1, &j) + p256Diff(yOut, yOut, &tmp) + p256Diff(yOut, yOut, &tmp) +} + +// p256PointAdd sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,z2}. +// +// See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl +// +// Note that this function does not handle P+P, infinity+P nor P+infinity +// correctly. +func p256PointAdd(xOut, yOut, zOut, x1, y1, z1, x2, y2, z2 *[p256Limbs]uint32) { + var z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 + + p256Square(&z1z1, z1) + p256Square(&z2z2, z2) + p256Mul(&u1, x1, &z2z2) + + p256Sum(&tmp, z1, z2) + p256Square(&tmp, &tmp) + p256Diff(&tmp, &tmp, &z1z1) + p256Diff(&tmp, &tmp, &z2z2) + + p256Mul(&z2z2z2, z2, &z2z2) + p256Mul(&s1, y1, &z2z2z2) + + p256Mul(&u2, x2, &z1z1) + p256Mul(&z1z1z1, z1, &z1z1) + p256Mul(&s2, y2, &z1z1z1) + p256Diff(&h, &u2, &u1) + p256Sum(&i, &h, &h) + p256Square(&i, &i) + p256Mul(&j, &h, &i) + p256Diff(&r, &s2, &s1) + p256Sum(&r, &r, &r) + p256Mul(&v, &u1, &i) + + p256Mul(zOut, &tmp, &h) + p256Square(&rr, &r) + p256Diff(xOut, &rr, &j) + p256Diff(xOut, xOut, &v) + p256Diff(xOut, xOut, &v) + + p256Diff(&tmp, &v, xOut) + p256Mul(yOut, &tmp, &r) + p256Mul(&tmp, &s1, &j) + p256Diff(yOut, yOut, &tmp) + p256Diff(yOut, yOut, &tmp) +} + +// p256CopyConditional sets out=in if mask = 0xffffffff in constant time. +// +// On entry: mask is either 0 or 0xffffffff. +func p256CopyConditional(out, in *[p256Limbs]uint32, mask uint32) { + for i := 0; i < p256Limbs; i++ { + tmp := mask & (in[i] ^ out[i]) + out[i] ^= tmp + } +} + +// p256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table. +// On entry: index < 16, table[0] must be zero. +func p256SelectAffinePoint(xOut, yOut *[p256Limbs]uint32, table []uint32, index uint32) { + for i := range xOut { + xOut[i] = 0 + } + for i := range yOut { + yOut[i] = 0 + } + + for i := uint32(1); i < 16; i++ { + mask := i ^ index + mask |= mask >> 2 + mask |= mask >> 1 + mask &= 1 + mask-- + for j := range xOut { + xOut[j] |= table[0] & mask + table = table[1:] + } + for j := range yOut { + yOut[j] |= table[0] & mask + table = table[1:] + } + } +} + +// p256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of +// table. +// On entry: index < 16, table[0] must be zero. +func p256SelectJacobianPoint(xOut, yOut, zOut *[p256Limbs]uint32, table *[16][3][p256Limbs]uint32, index uint32) { + for i := range xOut { + xOut[i] = 0 + } + for i := range yOut { + yOut[i] = 0 + } + for i := range zOut { + zOut[i] = 0 + } + + // The implicit value at index 0 is all zero. We don't need to perform that + // iteration of the loop because we already set out_* to zero. + for i := uint32(1); i < 16; i++ { + mask := i ^ index + mask |= mask >> 2 + mask |= mask >> 1 + mask &= 1 + mask-- + for j := range xOut { + xOut[j] |= table[i][0][j] & mask + } + for j := range yOut { + yOut[j] |= table[i][1][j] & mask + } + for j := range zOut { + zOut[j] |= table[i][2][j] & mask + } + } +} + +// p256GetBit returns the bit'th bit of scalar. +func p256GetBit(scalar *[32]uint8, bit uint) uint32 { + return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1) +} + +// p256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a +// little-endian number. Note that the value of scalar must be less than the +// order of the group. +func p256ScalarBaseMult(xOut, yOut, zOut *[p256Limbs]uint32, scalar *[32]uint8) { + nIsInfinityMask := ^uint32(0) + var pIsNoninfiniteMask, mask, tableOffset uint32 + var px, py, tx, ty, tz [p256Limbs]uint32 + + for i := range xOut { + xOut[i] = 0 + } + for i := range yOut { + yOut[i] = 0 + } + for i := range zOut { + zOut[i] = 0 + } + + // The loop adds bits at positions 0, 64, 128 and 192, followed by + // positions 32,96,160 and 224 and does this 32 times. + for i := uint(0); i < 32; i++ { + if i != 0 { + p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) + } + tableOffset = 0 + for j := uint(0); j <= 32; j += 32 { + bit0 := p256GetBit(scalar, 31-i+j) + bit1 := p256GetBit(scalar, 95-i+j) + bit2 := p256GetBit(scalar, 159-i+j) + bit3 := p256GetBit(scalar, 223-i+j) + index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3) + + p256SelectAffinePoint(&px, &py, p256Precomputed[tableOffset:], index) + tableOffset += 30 * p256Limbs + + // Since scalar is less than the order of the group, we know that + // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle + // below. + p256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py) + // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero + // (a.k.a. the point at infinity). We handle that situation by + // copying the point from the table. + p256CopyConditional(xOut, &px, nIsInfinityMask) + p256CopyConditional(yOut, &py, nIsInfinityMask) + p256CopyConditional(zOut, &p256One, nIsInfinityMask) + + // Equally, the result is also wrong if the point from the table is + // zero, which happens when the index is zero. We handle that by + // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0. + pIsNoninfiniteMask = nonZeroToAllOnes(index) + mask = pIsNoninfiniteMask & ^nIsInfinityMask + p256CopyConditional(xOut, &tx, mask) + p256CopyConditional(yOut, &ty, mask) + p256CopyConditional(zOut, &tz, mask) + // If p was not zero, then n is now non-zero. + nIsInfinityMask &^= pIsNoninfiniteMask + } + } +} + +// p256PointToAffine converts a Jacobian point to an affine point. If the input +// is the point at infinity then it returns (0, 0) in constant time. +func p256PointToAffine(xOut, yOut, x, y, z *[p256Limbs]uint32) { + var zInv, zInvSq [p256Limbs]uint32 + + p256Invert(&zInv, z) + p256Square(&zInvSq, &zInv) + p256Mul(xOut, x, &zInvSq) + p256Mul(&zInv, &zInv, &zInvSq) + p256Mul(yOut, y, &zInv) +} + +// p256ToAffine returns a pair of *big.Int containing the affine representation +// of {x,y,z}. +func p256ToAffine(x, y, z *[p256Limbs]uint32) (xOut, yOut *big.Int) { + var xx, yy [p256Limbs]uint32 + p256PointToAffine(&xx, &yy, x, y, z) + return p256ToBig(&xx), p256ToBig(&yy) +} + +// p256ScalarMult sets {xOut,yOut,zOut} = scalar*{x,y}. +func p256ScalarMult(xOut, yOut, zOut, x, y *[p256Limbs]uint32, scalar *[32]uint8) { + var px, py, pz, tx, ty, tz [p256Limbs]uint32 + var precomp [16][3][p256Limbs]uint32 + var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32 + + // We precompute 0,1,2,... times {x,y}. + precomp[1][0] = *x + precomp[1][1] = *y + precomp[1][2] = p256One + + for i := 2; i < 16; i += 2 { + p256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2]) + p256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y) + } + + for i := range xOut { + xOut[i] = 0 + } + for i := range yOut { + yOut[i] = 0 + } + for i := range zOut { + zOut[i] = 0 + } + nIsInfinityMask = ^uint32(0) + + // We add in a window of four bits each iteration and do this 64 times. + for i := 0; i < 64; i++ { + if i != 0 { + p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) + p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) + p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) + p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) + } + + index = uint32(scalar[31-i/2]) + if (i & 1) == 1 { + index &= 15 + } else { + index >>= 4 + } + + // See the comments in scalarBaseMult about handling infinities. + p256SelectJacobianPoint(&px, &py, &pz, &precomp, index) + p256PointAdd(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py, &pz) + p256CopyConditional(xOut, &px, nIsInfinityMask) + p256CopyConditional(yOut, &py, nIsInfinityMask) + p256CopyConditional(zOut, &pz, nIsInfinityMask) + + pIsNoninfiniteMask = nonZeroToAllOnes(index) + mask = pIsNoninfiniteMask & ^nIsInfinityMask + p256CopyConditional(xOut, &tx, mask) + p256CopyConditional(yOut, &ty, mask) + p256CopyConditional(zOut, &tz, mask) + nIsInfinityMask &^= pIsNoninfiniteMask + } +} + +// p256FromBig sets out = R*in. +func p256FromBig(out *[p256Limbs]uint32, in *big.Int) { + tmp := new(big.Int).Lsh(in, 257) + tmp.Mod(tmp, p256Params.P) + + for i := 0; i < p256Limbs; i++ { + if bits := tmp.Bits(); len(bits) > 0 { + out[i] = uint32(bits[0]) & bottom29Bits + } else { + out[i] = 0 + } + tmp.Rsh(tmp, 29) + + i++ + if i == p256Limbs { + break + } + + if bits := tmp.Bits(); len(bits) > 0 { + out[i] = uint32(bits[0]) & bottom28Bits + } else { + out[i] = 0 + } + tmp.Rsh(tmp, 28) + } +} + +// p256ToBig returns a *big.Int containing the value of in. +func p256ToBig(in *[p256Limbs]uint32) *big.Int { + result, tmp := new(big.Int), new(big.Int) + + result.SetInt64(int64(in[p256Limbs-1])) + for i := p256Limbs - 2; i >= 0; i-- { + if (i & 1) == 0 { + result.Lsh(result, 29) + } else { + result.Lsh(result, 28) + } + tmp.SetInt64(int64(in[i])) + result.Add(result, tmp) + } + + result.Mul(result, p256RInverse) + result.Mod(result, p256Params.P) + return result +} diff --git a/src/crypto/elliptic/p256_asm.go b/src/crypto/elliptic/p256_asm.go new file mode 100644 index 0000000..9a808f2 --- /dev/null +++ b/src/crypto/elliptic/p256_asm.go @@ -0,0 +1,539 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains the Go wrapper for the constant-time, 64-bit assembly +// implementation of P256. The optimizations performed here are described in +// detail in: +// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with +// 256-bit primes" +// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x +// https://eprint.iacr.org/2013/816.pdf + +//go:build amd64 || arm64 +// +build amd64 arm64 + +package elliptic + +import ( + "math/big" +) + +type ( + p256Curve struct { + *CurveParams + } + + p256Point struct { + xyz [12]uint64 + } +) + +var p256 p256Curve + +func initP256() { + // See FIPS 186-3, section D.2.3 + p256.CurveParams = &CurveParams{Name: "P-256"} + p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) + p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) + p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) + p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) + p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) + p256.BitSize = 256 +} + +func (curve p256Curve) Params() *CurveParams { + return curve.CurveParams +} + +// Functions implemented in p256_asm_*64.s +// Montgomery multiplication modulo P256 +//go:noescape +func p256Mul(res, in1, in2 []uint64) + +// Montgomery square modulo P256, repeated n times (n >= 1) +//go:noescape +func p256Sqr(res, in []uint64, n int) + +// Montgomery multiplication by 1 +//go:noescape +func p256FromMont(res, in []uint64) + +// iff cond == 1 val <- -val +//go:noescape +func p256NegCond(val []uint64, cond int) + +// if cond == 0 res <- b; else res <- a +//go:noescape +func p256MovCond(res, a, b []uint64, cond int) + +// Endianness swap +//go:noescape +func p256BigToLittle(res []uint64, in []byte) + +//go:noescape +func p256LittleToBig(res []byte, in []uint64) + +// Constant time table access +//go:noescape +func p256Select(point, table []uint64, idx int) + +//go:noescape +func p256SelectBase(point, table []uint64, idx int) + +// Montgomery multiplication modulo Ord(G) +//go:noescape +func p256OrdMul(res, in1, in2 []uint64) + +// Montgomery square modulo Ord(G), repeated n times +//go:noescape +func p256OrdSqr(res, in []uint64, n int) + +// Point add with in2 being affine point +// If sign == 1 -> in2 = -in2 +// If sel == 0 -> res = in1 +// if zero == 0 -> res = in2 +//go:noescape +func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) + +// Point add. Returns one if the two input points were equal and zero +// otherwise. (Note that, due to the way that the equations work out, some +// representations of ∞ are considered equal to everything by this function.) +//go:noescape +func p256PointAddAsm(res, in1, in2 []uint64) int + +// Point double +//go:noescape +func p256PointDoubleAsm(res, in []uint64) + +func (curve p256Curve) Inverse(k *big.Int) *big.Int { + if k.Sign() < 0 { + // This should never happen. + k = new(big.Int).Neg(k) + } + + if k.Cmp(p256.N) >= 0 { + // This should never happen. + k = new(big.Int).Mod(k, p256.N) + } + + // table will store precomputed powers of x. + var table [4 * 9]uint64 + var ( + _1 = table[4*0 : 4*1] + _11 = table[4*1 : 4*2] + _101 = table[4*2 : 4*3] + _111 = table[4*3 : 4*4] + _1111 = table[4*4 : 4*5] + _10101 = table[4*5 : 4*6] + _101111 = table[4*6 : 4*7] + x = table[4*7 : 4*8] + t = table[4*8 : 4*9] + ) + + fromBig(x[:], k) + // This code operates in the Montgomery domain where R = 2^256 mod n + // and n is the order of the scalar field. (See initP256 for the + // value.) Elements in the Montgomery domain take the form a×R and + // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR + // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, + // i.e. converts x into the Montgomery domain. + // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion + RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} + p256OrdMul(_1, x, RR) // _1 + p256OrdSqr(x, _1, 1) // _10 + p256OrdMul(_11, x, _1) // _11 + p256OrdMul(_101, x, _11) // _101 + p256OrdMul(_111, x, _101) // _111 + p256OrdSqr(x, _101, 1) // _1010 + p256OrdMul(_1111, _101, x) // _1111 + + p256OrdSqr(t, x, 1) // _10100 + p256OrdMul(_10101, t, _1) // _10101 + p256OrdSqr(x, _10101, 1) // _101010 + p256OrdMul(_101111, _101, x) // _101111 + p256OrdMul(x, _10101, x) // _111111 = x6 + p256OrdSqr(t, x, 2) // _11111100 + p256OrdMul(t, t, _11) // _11111111 = x8 + p256OrdSqr(x, t, 8) // _ff00 + p256OrdMul(x, x, t) // _ffff = x16 + p256OrdSqr(t, x, 16) // _ffff0000 + p256OrdMul(t, t, x) // _ffffffff = x32 + + p256OrdSqr(x, t, 64) + p256OrdMul(x, x, t) + p256OrdSqr(x, x, 32) + p256OrdMul(x, x, t) + + sqrs := []uint8{ + 6, 5, 4, 5, 5, + 4, 3, 3, 5, 9, + 6, 2, 5, 6, 5, + 4, 5, 5, 3, 10, + 2, 5, 5, 3, 7, 6} + muls := [][]uint64{ + _101111, _111, _11, _1111, _10101, + _101, _101, _101, _111, _101111, + _1111, _1, _1, _1111, _111, + _111, _111, _101, _11, _101111, + _11, _11, _11, _1, _10101, _1111} + + for i, s := range sqrs { + p256OrdSqr(x, x, int(s)) + p256OrdMul(x, x, muls[i]) + } + + // Multiplying by one in the Montgomery domain converts a Montgomery + // value out of the domain. + one := []uint64{1, 0, 0, 0} + p256OrdMul(x, x, one) + + xOut := make([]byte, 32) + p256LittleToBig(xOut, x) + return new(big.Int).SetBytes(xOut) +} + +// fromBig converts a *big.Int into a format used by this code. +func fromBig(out []uint64, big *big.Int) { + for i := range out { + out[i] = 0 + } + + for i, v := range big.Bits() { + out[i] = uint64(v) + } +} + +// p256GetScalar endian-swaps the big-endian scalar value from in and writes it +// to out. If the scalar is equal or greater than the order of the group, it's +// reduced modulo that order. +func p256GetScalar(out []uint64, in []byte) { + n := new(big.Int).SetBytes(in) + + if n.Cmp(p256.N) >= 0 { + n.Mod(n, p256.N) + } + fromBig(out, n) +} + +// p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the +// underlying field of the curve. (See initP256 for the value.) Thus rr here is +// R×R mod p. See comment in Inverse about how this is used. +var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} + +func maybeReduceModP(in *big.Int) *big.Int { + if in.Cmp(p256.P) < 0 { + return in + } + return new(big.Int).Mod(in, p256.P) +} + +func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { + scalarReversed := make([]uint64, 4) + var r1, r2 p256Point + p256GetScalar(scalarReversed, baseScalar) + r1IsInfinity := scalarIsZero(scalarReversed) + r1.p256BaseMult(scalarReversed) + + p256GetScalar(scalarReversed, scalar) + r2IsInfinity := scalarIsZero(scalarReversed) + fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) + fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) + p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) + p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) + + // This sets r2's Z value to 1, in the Montgomery domain. + r2.xyz[8] = 0x0000000000000001 + r2.xyz[9] = 0xffffffff00000000 + r2.xyz[10] = 0xffffffffffffffff + r2.xyz[11] = 0x00000000fffffffe + + r2.p256ScalarMult(scalarReversed) + + var sum, double p256Point + pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) + p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) + sum.CopyConditional(&double, pointsEqual) + sum.CopyConditional(&r1, r2IsInfinity) + sum.CopyConditional(&r2, r1IsInfinity) + + return sum.p256PointToAffine() +} + +func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { + scalarReversed := make([]uint64, 4) + p256GetScalar(scalarReversed, scalar) + + var r p256Point + r.p256BaseMult(scalarReversed) + return r.p256PointToAffine() +} + +func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { + scalarReversed := make([]uint64, 4) + p256GetScalar(scalarReversed, scalar) + + var r p256Point + fromBig(r.xyz[0:4], maybeReduceModP(bigX)) + fromBig(r.xyz[4:8], maybeReduceModP(bigY)) + p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) + p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) + // This sets r2's Z value to 1, in the Montgomery domain. + r.xyz[8] = 0x0000000000000001 + r.xyz[9] = 0xffffffff00000000 + r.xyz[10] = 0xffffffffffffffff + r.xyz[11] = 0x00000000fffffffe + + r.p256ScalarMult(scalarReversed) + return r.p256PointToAffine() +} + +// uint64IsZero returns 1 if x is zero and zero otherwise. +func uint64IsZero(x uint64) int { + x = ^x + x &= x >> 32 + x &= x >> 16 + x &= x >> 8 + x &= x >> 4 + x &= x >> 2 + x &= x >> 1 + return int(x & 1) +} + +// scalarIsZero returns 1 if scalar represents the zero value, and zero +// otherwise. +func scalarIsZero(scalar []uint64) int { + return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) +} + +func (p *p256Point) p256PointToAffine() (x, y *big.Int) { + zInv := make([]uint64, 4) + zInvSq := make([]uint64, 4) + p256Inverse(zInv, p.xyz[8:12]) + p256Sqr(zInvSq, zInv, 1) + p256Mul(zInv, zInv, zInvSq) + + p256Mul(zInvSq, p.xyz[0:4], zInvSq) + p256Mul(zInv, p.xyz[4:8], zInv) + + p256FromMont(zInvSq, zInvSq) + p256FromMont(zInv, zInv) + + xOut := make([]byte, 32) + yOut := make([]byte, 32) + p256LittleToBig(xOut, zInvSq) + p256LittleToBig(yOut, zInv) + + return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) +} + +// CopyConditional copies overwrites p with src if v == 1, and leaves p +// unchanged if v == 0. +func (p *p256Point) CopyConditional(src *p256Point, v int) { + pMask := uint64(v) - 1 + srcMask := ^pMask + + for i, n := range p.xyz { + p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) + } +} + +// p256Inverse sets out to in^-1 mod p. +func p256Inverse(out, in []uint64) { + var stack [6 * 4]uint64 + p2 := stack[4*0 : 4*0+4] + p4 := stack[4*1 : 4*1+4] + p8 := stack[4*2 : 4*2+4] + p16 := stack[4*3 : 4*3+4] + p32 := stack[4*4 : 4*4+4] + + p256Sqr(out, in, 1) + p256Mul(p2, out, in) // 3*p + + p256Sqr(out, p2, 2) + p256Mul(p4, out, p2) // f*p + + p256Sqr(out, p4, 4) + p256Mul(p8, out, p4) // ff*p + + p256Sqr(out, p8, 8) + p256Mul(p16, out, p8) // ffff*p + + p256Sqr(out, p16, 16) + p256Mul(p32, out, p16) // ffffffff*p + + p256Sqr(out, p32, 32) + p256Mul(out, out, in) + + p256Sqr(out, out, 128) + p256Mul(out, out, p32) + + p256Sqr(out, out, 32) + p256Mul(out, out, p32) + + p256Sqr(out, out, 16) + p256Mul(out, out, p16) + + p256Sqr(out, out, 8) + p256Mul(out, out, p8) + + p256Sqr(out, out, 4) + p256Mul(out, out, p4) + + p256Sqr(out, out, 2) + p256Mul(out, out, p2) + + p256Sqr(out, out, 2) + p256Mul(out, out, in) +} + +func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { + copy(r[index*12:], p.xyz[:]) +} + +func boothW5(in uint) (int, int) { + var s uint = ^((in >> 5) - 1) + var d uint = (1 << 6) - in - 1 + d = (d & s) | (in & (^s)) + d = (d >> 1) + (d & 1) + return int(d), int(s & 1) +} + +func boothW6(in uint) (int, int) { + var s uint = ^((in >> 6) - 1) + var d uint = (1 << 7) - in - 1 + d = (d & s) | (in & (^s)) + d = (d >> 1) + (d & 1) + return int(d), int(s & 1) +} + +func (p *p256Point) p256BaseMult(scalar []uint64) { + wvalue := (scalar[0] << 1) & 0x7f + sel, sign := boothW6(uint(wvalue)) + p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel) + p256NegCond(p.xyz[4:8], sign) + + // (This is one, in the Montgomery domain.) + p.xyz[8] = 0x0000000000000001 + p.xyz[9] = 0xffffffff00000000 + p.xyz[10] = 0xffffffffffffffff + p.xyz[11] = 0x00000000fffffffe + + var t0 p256Point + // (This is one, in the Montgomery domain.) + t0.xyz[8] = 0x0000000000000001 + t0.xyz[9] = 0xffffffff00000000 + t0.xyz[10] = 0xffffffffffffffff + t0.xyz[11] = 0x00000000fffffffe + + index := uint(5) + zero := sel + + for i := 1; i < 43; i++ { + if index < 192 { + wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f + } else { + wvalue = (scalar[index/64] >> (index % 64)) & 0x7f + } + index += 6 + sel, sign = boothW6(uint(wvalue)) + p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel) + p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) + zero |= sel + } +} + +func (p *p256Point) p256ScalarMult(scalar []uint64) { + // precomp is a table of precomputed points that stores powers of p + // from p^1 to p^16. + var precomp [16 * 4 * 3]uint64 + var t0, t1, t2, t3 p256Point + + // Prepare the table + p.p256StorePoint(&precomp, 0) // 1 + + p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) + p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) + p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) + p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) + t0.p256StorePoint(&precomp, 1) // 2 + t1.p256StorePoint(&precomp, 3) // 4 + t2.p256StorePoint(&precomp, 7) // 8 + t3.p256StorePoint(&precomp, 15) // 16 + + p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) + p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) + p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) + t0.p256StorePoint(&precomp, 2) // 3 + t1.p256StorePoint(&precomp, 4) // 5 + t2.p256StorePoint(&precomp, 8) // 9 + + p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) + p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) + t0.p256StorePoint(&precomp, 5) // 6 + t1.p256StorePoint(&precomp, 9) // 10 + + p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) + p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) + t2.p256StorePoint(&precomp, 6) // 7 + t1.p256StorePoint(&precomp, 10) // 11 + + p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) + p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) + t0.p256StorePoint(&precomp, 11) // 12 + t2.p256StorePoint(&precomp, 13) // 14 + + p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) + p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) + t0.p256StorePoint(&precomp, 12) // 13 + t2.p256StorePoint(&precomp, 14) // 15 + + // Start scanning the window from top bit + index := uint(254) + var sel, sign int + + wvalue := (scalar[index/64] >> (index % 64)) & 0x3f + sel, _ = boothW5(uint(wvalue)) + + p256Select(p.xyz[0:12], precomp[0:], sel) + zero := sel + + for index > 4 { + index -= 5 + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + + if index < 192 { + wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f + } else { + wvalue = (scalar[index/64] >> (index % 64)) & 0x3f + } + + sel, sign = boothW5(uint(wvalue)) + + p256Select(t0.xyz[0:], precomp[0:], sel) + p256NegCond(t0.xyz[4:8], sign) + p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) + p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) + p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) + zero |= sel + } + + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + p256PointDoubleAsm(p.xyz[:], p.xyz[:]) + + wvalue = (scalar[0] << 1) & 0x3f + sel, sign = boothW5(uint(wvalue)) + + p256Select(t0.xyz[0:], precomp[0:], sel) + p256NegCond(t0.xyz[4:8], sign) + p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) + p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) + p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) +} diff --git a/src/crypto/elliptic/p256_asm_amd64.s b/src/crypto/elliptic/p256_asm_amd64.s new file mode 100644 index 0000000..c77b11b --- /dev/null +++ b/src/crypto/elliptic/p256_asm_amd64.s @@ -0,0 +1,2347 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains constant-time, 64-bit assembly implementation of +// P256. The optimizations performed here are described in detail in: +// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with +// 256-bit primes" +// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x +// https://eprint.iacr.org/2013/816.pdf + +#include "textflag.h" + +#define res_ptr DI +#define x_ptr SI +#define y_ptr CX + +#define acc0 R8 +#define acc1 R9 +#define acc2 R10 +#define acc3 R11 +#define acc4 R12 +#define acc5 R13 +#define t0 R14 +#define t1 R15 + +DATA p256const0<>+0x00(SB)/8, $0x00000000ffffffff +DATA p256const1<>+0x00(SB)/8, $0xffffffff00000001 +DATA p256ordK0<>+0x00(SB)/8, $0xccd1c8aaee00bc4f +DATA p256ord<>+0x00(SB)/8, $0xf3b9cac2fc632551 +DATA p256ord<>+0x08(SB)/8, $0xbce6faada7179e84 +DATA p256ord<>+0x10(SB)/8, $0xffffffffffffffff +DATA p256ord<>+0x18(SB)/8, $0xffffffff00000000 +DATA p256one<>+0x00(SB)/8, $0x0000000000000001 +DATA p256one<>+0x08(SB)/8, $0xffffffff00000000 +DATA p256one<>+0x10(SB)/8, $0xffffffffffffffff +DATA p256one<>+0x18(SB)/8, $0x00000000fffffffe +GLOBL p256const0<>(SB), 8, $8 +GLOBL p256const1<>(SB), 8, $8 +GLOBL p256ordK0<>(SB), 8, $8 +GLOBL p256ord<>(SB), 8, $32 +GLOBL p256one<>(SB), 8, $32 + +/* ---------------------------------------*/ +// func p256LittleToBig(res []byte, in []uint64) +TEXT ·p256LittleToBig(SB),NOSPLIT,$0 + JMP ·p256BigToLittle(SB) +/* ---------------------------------------*/ +// func p256BigToLittle(res []uint64, in []byte) +TEXT ·p256BigToLittle(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ in+24(FP), x_ptr + + MOVQ (8*0)(x_ptr), acc0 + MOVQ (8*1)(x_ptr), acc1 + MOVQ (8*2)(x_ptr), acc2 + MOVQ (8*3)(x_ptr), acc3 + + BSWAPQ acc0 + BSWAPQ acc1 + BSWAPQ acc2 + BSWAPQ acc3 + + MOVQ acc3, (8*0)(res_ptr) + MOVQ acc2, (8*1)(res_ptr) + MOVQ acc1, (8*2)(res_ptr) + MOVQ acc0, (8*3)(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256MovCond(res, a, b []uint64, cond int) +// If cond == 0 res=b, else res=a +TEXT ·p256MovCond(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ a+24(FP), x_ptr + MOVQ b+48(FP), y_ptr + MOVQ cond+72(FP), X12 + + PXOR X13, X13 + PSHUFD $0, X12, X12 + PCMPEQL X13, X12 + + MOVOU X12, X0 + MOVOU (16*0)(x_ptr), X6 + PANDN X6, X0 + MOVOU X12, X1 + MOVOU (16*1)(x_ptr), X7 + PANDN X7, X1 + MOVOU X12, X2 + MOVOU (16*2)(x_ptr), X8 + PANDN X8, X2 + MOVOU X12, X3 + MOVOU (16*3)(x_ptr), X9 + PANDN X9, X3 + MOVOU X12, X4 + MOVOU (16*4)(x_ptr), X10 + PANDN X10, X4 + MOVOU X12, X5 + MOVOU (16*5)(x_ptr), X11 + PANDN X11, X5 + + MOVOU (16*0)(y_ptr), X6 + MOVOU (16*1)(y_ptr), X7 + MOVOU (16*2)(y_ptr), X8 + MOVOU (16*3)(y_ptr), X9 + MOVOU (16*4)(y_ptr), X10 + MOVOU (16*5)(y_ptr), X11 + + PAND X12, X6 + PAND X12, X7 + PAND X12, X8 + PAND X12, X9 + PAND X12, X10 + PAND X12, X11 + + PXOR X6, X0 + PXOR X7, X1 + PXOR X8, X2 + PXOR X9, X3 + PXOR X10, X4 + PXOR X11, X5 + + MOVOU X0, (16*0)(res_ptr) + MOVOU X1, (16*1)(res_ptr) + MOVOU X2, (16*2)(res_ptr) + MOVOU X3, (16*3)(res_ptr) + MOVOU X4, (16*4)(res_ptr) + MOVOU X5, (16*5)(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256NegCond(val []uint64, cond int) +TEXT ·p256NegCond(SB),NOSPLIT,$0 + MOVQ val+0(FP), res_ptr + MOVQ cond+24(FP), t0 + // acc = poly + MOVQ $-1, acc0 + MOVQ p256const0<>(SB), acc1 + MOVQ $0, acc2 + MOVQ p256const1<>(SB), acc3 + // Load the original value + MOVQ (8*0)(res_ptr), acc5 + MOVQ (8*1)(res_ptr), x_ptr + MOVQ (8*2)(res_ptr), y_ptr + MOVQ (8*3)(res_ptr), t1 + // Speculatively subtract + SUBQ acc5, acc0 + SBBQ x_ptr, acc1 + SBBQ y_ptr, acc2 + SBBQ t1, acc3 + // If condition is 0, keep original value + TESTQ t0, t0 + CMOVQEQ acc5, acc0 + CMOVQEQ x_ptr, acc1 + CMOVQEQ y_ptr, acc2 + CMOVQEQ t1, acc3 + // Store result + MOVQ acc0, (8*0)(res_ptr) + MOVQ acc1, (8*1)(res_ptr) + MOVQ acc2, (8*2)(res_ptr) + MOVQ acc3, (8*3)(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256Sqr(res, in []uint64, n int) +TEXT ·p256Sqr(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ in+24(FP), x_ptr + MOVQ n+48(FP), BX + +sqrLoop: + + // y[1:] * y[0] + MOVQ (8*0)(x_ptr), t0 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + MOVQ AX, acc1 + MOVQ DX, acc2 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, acc3 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, acc4 + // y[2:] * y[1] + MOVQ (8*1)(x_ptr), t0 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, acc5 + // y[3] * y[2] + MOVQ (8*2)(x_ptr), t0 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ AX, acc5 + ADCQ $0, DX + MOVQ DX, y_ptr + XORQ t1, t1 + // *2 + ADDQ acc1, acc1 + ADCQ acc2, acc2 + ADCQ acc3, acc3 + ADCQ acc4, acc4 + ADCQ acc5, acc5 + ADCQ y_ptr, y_ptr + ADCQ $0, t1 + // Missing products + MOVQ (8*0)(x_ptr), AX + MULQ AX + MOVQ AX, acc0 + MOVQ DX, t0 + + MOVQ (8*1)(x_ptr), AX + MULQ AX + ADDQ t0, acc1 + ADCQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t0 + + MOVQ (8*2)(x_ptr), AX + MULQ AX + ADDQ t0, acc3 + ADCQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t0 + + MOVQ (8*3)(x_ptr), AX + MULQ AX + ADDQ t0, acc5 + ADCQ AX, y_ptr + ADCQ DX, t1 + MOVQ t1, x_ptr + // First reduction step + MOVQ acc0, AX + MOVQ acc0, t1 + SHLQ $32, acc0 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc0, acc1 + ADCQ t1, acc2 + ADCQ AX, acc3 + ADCQ $0, DX + MOVQ DX, acc0 + // Second reduction step + MOVQ acc1, AX + MOVQ acc1, t1 + SHLQ $32, acc1 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc1, acc2 + ADCQ t1, acc3 + ADCQ AX, acc0 + ADCQ $0, DX + MOVQ DX, acc1 + // Third reduction step + MOVQ acc2, AX + MOVQ acc2, t1 + SHLQ $32, acc2 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc2, acc3 + ADCQ t1, acc0 + ADCQ AX, acc1 + ADCQ $0, DX + MOVQ DX, acc2 + // Last reduction step + XORQ t0, t0 + MOVQ acc3, AX + MOVQ acc3, t1 + SHLQ $32, acc3 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc3, acc0 + ADCQ t1, acc1 + ADCQ AX, acc2 + ADCQ $0, DX + MOVQ DX, acc3 + // Add bits [511:256] of the sqr result + ADCQ acc4, acc0 + ADCQ acc5, acc1 + ADCQ y_ptr, acc2 + ADCQ x_ptr, acc3 + ADCQ $0, t0 + + MOVQ acc0, acc4 + MOVQ acc1, acc5 + MOVQ acc2, y_ptr + MOVQ acc3, t1 + // Subtract p256 + SUBQ $-1, acc0 + SBBQ p256const0<>(SB) ,acc1 + SBBQ $0, acc2 + SBBQ p256const1<>(SB), acc3 + SBBQ $0, t0 + + CMOVQCS acc4, acc0 + CMOVQCS acc5, acc1 + CMOVQCS y_ptr, acc2 + CMOVQCS t1, acc3 + + MOVQ acc0, (8*0)(res_ptr) + MOVQ acc1, (8*1)(res_ptr) + MOVQ acc2, (8*2)(res_ptr) + MOVQ acc3, (8*3)(res_ptr) + MOVQ res_ptr, x_ptr + DECQ BX + JNE sqrLoop + + RET +/* ---------------------------------------*/ +// func p256Mul(res, in1, in2 []uint64) +TEXT ·p256Mul(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ in1+24(FP), x_ptr + MOVQ in2+48(FP), y_ptr + // x * y[0] + MOVQ (8*0)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + MOVQ AX, acc0 + MOVQ DX, acc1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, acc2 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, acc3 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, acc4 + XORQ acc5, acc5 + // First reduction step + MOVQ acc0, AX + MOVQ acc0, t1 + SHLQ $32, acc0 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc0, acc1 + ADCQ t1, acc2 + ADCQ AX, acc3 + ADCQ DX, acc4 + ADCQ $0, acc5 + XORQ acc0, acc0 + // x * y[1] + MOVQ (8*1)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ t1, acc2 + ADCQ $0, DX + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ DX, acc5 + ADCQ $0, acc0 + // Second reduction step + MOVQ acc1, AX + MOVQ acc1, t1 + SHLQ $32, acc1 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc1, acc2 + ADCQ t1, acc3 + ADCQ AX, acc4 + ADCQ DX, acc5 + ADCQ $0, acc0 + XORQ acc1, acc1 + // x * y[2] + MOVQ (8*2)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 + ADCQ DX, acc0 + ADCQ $0, acc1 + // Third reduction step + MOVQ acc2, AX + MOVQ acc2, t1 + SHLQ $32, acc2 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc2, acc3 + ADCQ t1, acc4 + ADCQ AX, acc5 + ADCQ DX, acc0 + ADCQ $0, acc1 + XORQ acc2, acc2 + // x * y[3] + MOVQ (8*3)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc0 + ADCQ $0, DX + ADDQ AX, acc0 + ADCQ DX, acc1 + ADCQ $0, acc2 + // Last reduction step + MOVQ acc3, AX + MOVQ acc3, t1 + SHLQ $32, acc3 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc3, acc4 + ADCQ t1, acc5 + ADCQ AX, acc0 + ADCQ DX, acc1 + ADCQ $0, acc2 + // Copy result [255:0] + MOVQ acc4, x_ptr + MOVQ acc5, acc3 + MOVQ acc0, t0 + MOVQ acc1, t1 + // Subtract p256 + SUBQ $-1, acc4 + SBBQ p256const0<>(SB) ,acc5 + SBBQ $0, acc0 + SBBQ p256const1<>(SB), acc1 + SBBQ $0, acc2 + + CMOVQCS x_ptr, acc4 + CMOVQCS acc3, acc5 + CMOVQCS t0, acc0 + CMOVQCS t1, acc1 + + MOVQ acc4, (8*0)(res_ptr) + MOVQ acc5, (8*1)(res_ptr) + MOVQ acc0, (8*2)(res_ptr) + MOVQ acc1, (8*3)(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256FromMont(res, in []uint64) +TEXT ·p256FromMont(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ in+24(FP), x_ptr + + MOVQ (8*0)(x_ptr), acc0 + MOVQ (8*1)(x_ptr), acc1 + MOVQ (8*2)(x_ptr), acc2 + MOVQ (8*3)(x_ptr), acc3 + XORQ acc4, acc4 + + // Only reduce, no multiplications are needed + // First stage + MOVQ acc0, AX + MOVQ acc0, t1 + SHLQ $32, acc0 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc0, acc1 + ADCQ t1, acc2 + ADCQ AX, acc3 + ADCQ DX, acc4 + XORQ acc5, acc5 + // Second stage + MOVQ acc1, AX + MOVQ acc1, t1 + SHLQ $32, acc1 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc1, acc2 + ADCQ t1, acc3 + ADCQ AX, acc4 + ADCQ DX, acc5 + XORQ acc0, acc0 + // Third stage + MOVQ acc2, AX + MOVQ acc2, t1 + SHLQ $32, acc2 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc2, acc3 + ADCQ t1, acc4 + ADCQ AX, acc5 + ADCQ DX, acc0 + XORQ acc1, acc1 + // Last stage + MOVQ acc3, AX + MOVQ acc3, t1 + SHLQ $32, acc3 + MULQ p256const1<>(SB) + SHRQ $32, t1 + ADDQ acc3, acc4 + ADCQ t1, acc5 + ADCQ AX, acc0 + ADCQ DX, acc1 + + MOVQ acc4, x_ptr + MOVQ acc5, acc3 + MOVQ acc0, t0 + MOVQ acc1, t1 + + SUBQ $-1, acc4 + SBBQ p256const0<>(SB), acc5 + SBBQ $0, acc0 + SBBQ p256const1<>(SB), acc1 + + CMOVQCS x_ptr, acc4 + CMOVQCS acc3, acc5 + CMOVQCS t0, acc0 + CMOVQCS t1, acc1 + + MOVQ acc4, (8*0)(res_ptr) + MOVQ acc5, (8*1)(res_ptr) + MOVQ acc0, (8*2)(res_ptr) + MOVQ acc1, (8*3)(res_ptr) + + RET +/* ---------------------------------------*/ +// Constant time point access to arbitrary point table. +// Indexed from 1 to 15, with -1 offset +// (index 0 is implicitly point at infinity) +// func p256Select(point, table []uint64, idx int) +TEXT ·p256Select(SB),NOSPLIT,$0 + MOVQ idx+48(FP),AX + MOVQ table+24(FP),DI + MOVQ point+0(FP),DX + + PXOR X15, X15 // X15 = 0 + PCMPEQL X14, X14 // X14 = -1 + PSUBL X14, X15 // X15 = 1 + MOVL AX, X14 + PSHUFD $0, X14, X14 + + PXOR X0, X0 + PXOR X1, X1 + PXOR X2, X2 + PXOR X3, X3 + PXOR X4, X4 + PXOR X5, X5 + MOVQ $16, AX + + MOVOU X15, X13 + +loop_select: + + MOVOU X13, X12 + PADDL X15, X13 + PCMPEQL X14, X12 + + MOVOU (16*0)(DI), X6 + MOVOU (16*1)(DI), X7 + MOVOU (16*2)(DI), X8 + MOVOU (16*3)(DI), X9 + MOVOU (16*4)(DI), X10 + MOVOU (16*5)(DI), X11 + ADDQ $(16*6), DI + + PAND X12, X6 + PAND X12, X7 + PAND X12, X8 + PAND X12, X9 + PAND X12, X10 + PAND X12, X11 + + PXOR X6, X0 + PXOR X7, X1 + PXOR X8, X2 + PXOR X9, X3 + PXOR X10, X4 + PXOR X11, X5 + + DECQ AX + JNE loop_select + + MOVOU X0, (16*0)(DX) + MOVOU X1, (16*1)(DX) + MOVOU X2, (16*2)(DX) + MOVOU X3, (16*3)(DX) + MOVOU X4, (16*4)(DX) + MOVOU X5, (16*5)(DX) + + RET +/* ---------------------------------------*/ +// Constant time point access to base point table. +// func p256SelectBase(point, table []uint64, idx int) +TEXT ·p256SelectBase(SB),NOSPLIT,$0 + MOVQ idx+48(FP),AX + MOVQ table+24(FP),DI + MOVQ point+0(FP),DX + + PXOR X15, X15 // X15 = 0 + PCMPEQL X14, X14 // X14 = -1 + PSUBL X14, X15 // X15 = 1 + MOVL AX, X14 + PSHUFD $0, X14, X14 + + PXOR X0, X0 + PXOR X1, X1 + PXOR X2, X2 + PXOR X3, X3 + MOVQ $16, AX + + MOVOU X15, X13 + +loop_select_base: + + MOVOU X13, X12 + PADDL X15, X13 + PCMPEQL X14, X12 + + MOVOU (16*0)(DI), X4 + MOVOU (16*1)(DI), X5 + MOVOU (16*2)(DI), X6 + MOVOU (16*3)(DI), X7 + + MOVOU (16*4)(DI), X8 + MOVOU (16*5)(DI), X9 + MOVOU (16*6)(DI), X10 + MOVOU (16*7)(DI), X11 + + ADDQ $(16*8), DI + + PAND X12, X4 + PAND X12, X5 + PAND X12, X6 + PAND X12, X7 + + MOVOU X13, X12 + PADDL X15, X13 + PCMPEQL X14, X12 + + PAND X12, X8 + PAND X12, X9 + PAND X12, X10 + PAND X12, X11 + + PXOR X4, X0 + PXOR X5, X1 + PXOR X6, X2 + PXOR X7, X3 + + PXOR X8, X0 + PXOR X9, X1 + PXOR X10, X2 + PXOR X11, X3 + + DECQ AX + JNE loop_select_base + + MOVOU X0, (16*0)(DX) + MOVOU X1, (16*1)(DX) + MOVOU X2, (16*2)(DX) + MOVOU X3, (16*3)(DX) + + RET +/* ---------------------------------------*/ +// func p256OrdMul(res, in1, in2 []uint64) +TEXT ·p256OrdMul(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ in1+24(FP), x_ptr + MOVQ in2+48(FP), y_ptr + // x * y[0] + MOVQ (8*0)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + MOVQ AX, acc0 + MOVQ DX, acc1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, acc2 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, acc3 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, acc4 + XORQ acc5, acc5 + // First reduction step + MOVQ acc0, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc0 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc1 + ADCQ $0, DX + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x10(SB), AX + MULQ t0 + ADDQ t1, acc2 + ADCQ $0, DX + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x18(SB), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ DX, acc4 + ADCQ $0, acc5 + // x * y[1] + MOVQ (8*1)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ t1, acc2 + ADCQ $0, DX + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ DX, acc5 + ADCQ $0, acc0 + // Second reduction step + MOVQ acc1, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc2 + ADCQ $0, DX + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x10(SB), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x18(SB), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ DX, acc5 + ADCQ $0, acc0 + // x * y[2] + MOVQ (8*2)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 + ADCQ DX, acc0 + ADCQ $0, acc1 + // Third reduction step + MOVQ acc2, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x10(SB), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x18(SB), AX + MULQ t0 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 + ADCQ DX, acc0 + ADCQ $0, acc1 + // x * y[3] + MOVQ (8*3)(y_ptr), t0 + + MOVQ (8*0)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc0 + ADCQ $0, DX + ADDQ AX, acc0 + ADCQ DX, acc1 + ADCQ $0, acc2 + // Last reduction step + MOVQ acc3, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x10(SB), AX + MULQ t0 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x18(SB), AX + MULQ t0 + ADDQ t1, acc0 + ADCQ $0, DX + ADDQ AX, acc0 + ADCQ DX, acc1 + ADCQ $0, acc2 + // Copy result [255:0] + MOVQ acc4, x_ptr + MOVQ acc5, acc3 + MOVQ acc0, t0 + MOVQ acc1, t1 + // Subtract p256 + SUBQ p256ord<>+0x00(SB), acc4 + SBBQ p256ord<>+0x08(SB) ,acc5 + SBBQ p256ord<>+0x10(SB), acc0 + SBBQ p256ord<>+0x18(SB), acc1 + SBBQ $0, acc2 + + CMOVQCS x_ptr, acc4 + CMOVQCS acc3, acc5 + CMOVQCS t0, acc0 + CMOVQCS t1, acc1 + + MOVQ acc4, (8*0)(res_ptr) + MOVQ acc5, (8*1)(res_ptr) + MOVQ acc0, (8*2)(res_ptr) + MOVQ acc1, (8*3)(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256OrdSqr(res, in []uint64, n int) +TEXT ·p256OrdSqr(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ in+24(FP), x_ptr + MOVQ n+48(FP), BX + +ordSqrLoop: + + // y[1:] * y[0] + MOVQ (8*0)(x_ptr), t0 + + MOVQ (8*1)(x_ptr), AX + MULQ t0 + MOVQ AX, acc1 + MOVQ DX, acc2 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, acc3 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, acc4 + // y[2:] * y[1] + MOVQ (8*1)(x_ptr), t0 + + MOVQ (8*2)(x_ptr), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, acc5 + // y[3] * y[2] + MOVQ (8*2)(x_ptr), t0 + + MOVQ (8*3)(x_ptr), AX + MULQ t0 + ADDQ AX, acc5 + ADCQ $0, DX + MOVQ DX, y_ptr + XORQ t1, t1 + // *2 + ADDQ acc1, acc1 + ADCQ acc2, acc2 + ADCQ acc3, acc3 + ADCQ acc4, acc4 + ADCQ acc5, acc5 + ADCQ y_ptr, y_ptr + ADCQ $0, t1 + // Missing products + MOVQ (8*0)(x_ptr), AX + MULQ AX + MOVQ AX, acc0 + MOVQ DX, t0 + + MOVQ (8*1)(x_ptr), AX + MULQ AX + ADDQ t0, acc1 + ADCQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t0 + + MOVQ (8*2)(x_ptr), AX + MULQ AX + ADDQ t0, acc3 + ADCQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t0 + + MOVQ (8*3)(x_ptr), AX + MULQ AX + ADDQ t0, acc5 + ADCQ AX, y_ptr + ADCQ DX, t1 + MOVQ t1, x_ptr + // First reduction step + MOVQ acc0, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc0 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc1 + ADCQ $0, DX + ADDQ AX, acc1 + + MOVQ t0, t1 + ADCQ DX, acc2 + ADCQ $0, t1 + SUBQ t0, acc2 + SBBQ $0, t1 + + MOVQ t0, AX + MOVQ t0, DX + MOVQ t0, acc0 + SHLQ $32, AX + SHRQ $32, DX + + ADDQ t1, acc3 + ADCQ $0, acc0 + SUBQ AX, acc3 + SBBQ DX, acc0 + // Second reduction step + MOVQ acc1, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc2 + ADCQ $0, DX + ADDQ AX, acc2 + + MOVQ t0, t1 + ADCQ DX, acc3 + ADCQ $0, t1 + SUBQ t0, acc3 + SBBQ $0, t1 + + MOVQ t0, AX + MOVQ t0, DX + MOVQ t0, acc1 + SHLQ $32, AX + SHRQ $32, DX + + ADDQ t1, acc0 + ADCQ $0, acc1 + SUBQ AX, acc0 + SBBQ DX, acc1 + // Third reduction step + MOVQ acc2, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + + MOVQ t0, t1 + ADCQ DX, acc0 + ADCQ $0, t1 + SUBQ t0, acc0 + SBBQ $0, t1 + + MOVQ t0, AX + MOVQ t0, DX + MOVQ t0, acc2 + SHLQ $32, AX + SHRQ $32, DX + + ADDQ t1, acc1 + ADCQ $0, acc2 + SUBQ AX, acc1 + SBBQ DX, acc2 + // Last reduction step + MOVQ acc3, AX + MULQ p256ordK0<>(SB) + MOVQ AX, t0 + + MOVQ p256ord<>+0x00(SB), AX + MULQ t0 + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ p256ord<>+0x08(SB), AX + MULQ t0 + ADDQ t1, acc0 + ADCQ $0, DX + ADDQ AX, acc0 + ADCQ $0, DX + MOVQ DX, t1 + + MOVQ t0, t1 + ADCQ DX, acc1 + ADCQ $0, t1 + SUBQ t0, acc1 + SBBQ $0, t1 + + MOVQ t0, AX + MOVQ t0, DX + MOVQ t0, acc3 + SHLQ $32, AX + SHRQ $32, DX + + ADDQ t1, acc2 + ADCQ $0, acc3 + SUBQ AX, acc2 + SBBQ DX, acc3 + XORQ t0, t0 + // Add bits [511:256] of the sqr result + ADCQ acc4, acc0 + ADCQ acc5, acc1 + ADCQ y_ptr, acc2 + ADCQ x_ptr, acc3 + ADCQ $0, t0 + + MOVQ acc0, acc4 + MOVQ acc1, acc5 + MOVQ acc2, y_ptr + MOVQ acc3, t1 + // Subtract p256 + SUBQ p256ord<>+0x00(SB), acc0 + SBBQ p256ord<>+0x08(SB) ,acc1 + SBBQ p256ord<>+0x10(SB), acc2 + SBBQ p256ord<>+0x18(SB), acc3 + SBBQ $0, t0 + + CMOVQCS acc4, acc0 + CMOVQCS acc5, acc1 + CMOVQCS y_ptr, acc2 + CMOVQCS t1, acc3 + + MOVQ acc0, (8*0)(res_ptr) + MOVQ acc1, (8*1)(res_ptr) + MOVQ acc2, (8*2)(res_ptr) + MOVQ acc3, (8*3)(res_ptr) + MOVQ res_ptr, x_ptr + DECQ BX + JNE ordSqrLoop + + RET +/* ---------------------------------------*/ +#undef res_ptr +#undef x_ptr +#undef y_ptr + +#undef acc0 +#undef acc1 +#undef acc2 +#undef acc3 +#undef acc4 +#undef acc5 +#undef t0 +#undef t1 +/* ---------------------------------------*/ +#define mul0 AX +#define mul1 DX +#define acc0 BX +#define acc1 CX +#define acc2 R8 +#define acc3 R9 +#define acc4 R10 +#define acc5 R11 +#define acc6 R12 +#define acc7 R13 +#define t0 R14 +#define t1 R15 +#define t2 DI +#define t3 SI +#define hlp BP +/* ---------------------------------------*/ +TEXT p256SubInternal(SB),NOSPLIT,$0 + XORQ mul0, mul0 + SUBQ t0, acc4 + SBBQ t1, acc5 + SBBQ t2, acc6 + SBBQ t3, acc7 + SBBQ $0, mul0 + + MOVQ acc4, acc0 + MOVQ acc5, acc1 + MOVQ acc6, acc2 + MOVQ acc7, acc3 + + ADDQ $-1, acc4 + ADCQ p256const0<>(SB), acc5 + ADCQ $0, acc6 + ADCQ p256const1<>(SB), acc7 + ANDQ $1, mul0 + + CMOVQEQ acc0, acc4 + CMOVQEQ acc1, acc5 + CMOVQEQ acc2, acc6 + CMOVQEQ acc3, acc7 + + RET +/* ---------------------------------------*/ +TEXT p256MulInternal(SB),NOSPLIT,$8 + MOVQ acc4, mul0 + MULQ t0 + MOVQ mul0, acc0 + MOVQ mul1, acc1 + + MOVQ acc4, mul0 + MULQ t1 + ADDQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, acc2 + + MOVQ acc4, mul0 + MULQ t2 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, acc3 + + MOVQ acc4, mul0 + MULQ t3 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, acc4 + + MOVQ acc5, mul0 + MULQ t0 + ADDQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc5, mul0 + MULQ t1 + ADDQ hlp, acc2 + ADCQ $0, mul1 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc5, mul0 + MULQ t2 + ADDQ hlp, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc5, mul0 + MULQ t3 + ADDQ hlp, acc4 + ADCQ $0, mul1 + ADDQ mul0, acc4 + ADCQ $0, mul1 + MOVQ mul1, acc5 + + MOVQ acc6, mul0 + MULQ t0 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc6, mul0 + MULQ t1 + ADDQ hlp, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc6, mul0 + MULQ t2 + ADDQ hlp, acc4 + ADCQ $0, mul1 + ADDQ mul0, acc4 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc6, mul0 + MULQ t3 + ADDQ hlp, acc5 + ADCQ $0, mul1 + ADDQ mul0, acc5 + ADCQ $0, mul1 + MOVQ mul1, acc6 + + MOVQ acc7, mul0 + MULQ t0 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc7, mul0 + MULQ t1 + ADDQ hlp, acc4 + ADCQ $0, mul1 + ADDQ mul0, acc4 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc7, mul0 + MULQ t2 + ADDQ hlp, acc5 + ADCQ $0, mul1 + ADDQ mul0, acc5 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc7, mul0 + MULQ t3 + ADDQ hlp, acc6 + ADCQ $0, mul1 + ADDQ mul0, acc6 + ADCQ $0, mul1 + MOVQ mul1, acc7 + // First reduction step + MOVQ acc0, mul0 + MOVQ acc0, hlp + SHLQ $32, acc0 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc0, acc1 + ADCQ hlp, acc2 + ADCQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, acc0 + // Second reduction step + MOVQ acc1, mul0 + MOVQ acc1, hlp + SHLQ $32, acc1 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc1, acc2 + ADCQ hlp, acc3 + ADCQ mul0, acc0 + ADCQ $0, mul1 + MOVQ mul1, acc1 + // Third reduction step + MOVQ acc2, mul0 + MOVQ acc2, hlp + SHLQ $32, acc2 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc2, acc3 + ADCQ hlp, acc0 + ADCQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, acc2 + // Last reduction step + MOVQ acc3, mul0 + MOVQ acc3, hlp + SHLQ $32, acc3 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc3, acc0 + ADCQ hlp, acc1 + ADCQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, acc3 + MOVQ $0, BP + // Add bits [511:256] of the result + ADCQ acc0, acc4 + ADCQ acc1, acc5 + ADCQ acc2, acc6 + ADCQ acc3, acc7 + ADCQ $0, hlp + // Copy result + MOVQ acc4, acc0 + MOVQ acc5, acc1 + MOVQ acc6, acc2 + MOVQ acc7, acc3 + // Subtract p256 + SUBQ $-1, acc4 + SBBQ p256const0<>(SB) ,acc5 + SBBQ $0, acc6 + SBBQ p256const1<>(SB), acc7 + SBBQ $0, hlp + // If the result of the subtraction is negative, restore the previous result + CMOVQCS acc0, acc4 + CMOVQCS acc1, acc5 + CMOVQCS acc2, acc6 + CMOVQCS acc3, acc7 + + RET +/* ---------------------------------------*/ +TEXT p256SqrInternal(SB),NOSPLIT,$8 + + MOVQ acc4, mul0 + MULQ acc5 + MOVQ mul0, acc1 + MOVQ mul1, acc2 + + MOVQ acc4, mul0 + MULQ acc6 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, acc3 + + MOVQ acc4, mul0 + MULQ acc7 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, t0 + + MOVQ acc5, mul0 + MULQ acc6 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + + MOVQ acc5, mul0 + MULQ acc7 + ADDQ hlp, t0 + ADCQ $0, mul1 + ADDQ mul0, t0 + ADCQ $0, mul1 + MOVQ mul1, t1 + + MOVQ acc6, mul0 + MULQ acc7 + ADDQ mul0, t1 + ADCQ $0, mul1 + MOVQ mul1, t2 + XORQ t3, t3 + // *2 + ADDQ acc1, acc1 + ADCQ acc2, acc2 + ADCQ acc3, acc3 + ADCQ t0, t0 + ADCQ t1, t1 + ADCQ t2, t2 + ADCQ $0, t3 + // Missing products + MOVQ acc4, mul0 + MULQ mul0 + MOVQ mul0, acc0 + MOVQ DX, acc4 + + MOVQ acc5, mul0 + MULQ mul0 + ADDQ acc4, acc1 + ADCQ mul0, acc2 + ADCQ $0, DX + MOVQ DX, acc4 + + MOVQ acc6, mul0 + MULQ mul0 + ADDQ acc4, acc3 + ADCQ mul0, t0 + ADCQ $0, DX + MOVQ DX, acc4 + + MOVQ acc7, mul0 + MULQ mul0 + ADDQ acc4, t1 + ADCQ mul0, t2 + ADCQ DX, t3 + // First reduction step + MOVQ acc0, mul0 + MOVQ acc0, hlp + SHLQ $32, acc0 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc0, acc1 + ADCQ hlp, acc2 + ADCQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, acc0 + // Second reduction step + MOVQ acc1, mul0 + MOVQ acc1, hlp + SHLQ $32, acc1 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc1, acc2 + ADCQ hlp, acc3 + ADCQ mul0, acc0 + ADCQ $0, mul1 + MOVQ mul1, acc1 + // Third reduction step + MOVQ acc2, mul0 + MOVQ acc2, hlp + SHLQ $32, acc2 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc2, acc3 + ADCQ hlp, acc0 + ADCQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, acc2 + // Last reduction step + MOVQ acc3, mul0 + MOVQ acc3, hlp + SHLQ $32, acc3 + MULQ p256const1<>(SB) + SHRQ $32, hlp + ADDQ acc3, acc0 + ADCQ hlp, acc1 + ADCQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, acc3 + MOVQ $0, BP + // Add bits [511:256] of the result + ADCQ acc0, t0 + ADCQ acc1, t1 + ADCQ acc2, t2 + ADCQ acc3, t3 + ADCQ $0, hlp + // Copy result + MOVQ t0, acc4 + MOVQ t1, acc5 + MOVQ t2, acc6 + MOVQ t3, acc7 + // Subtract p256 + SUBQ $-1, acc4 + SBBQ p256const0<>(SB) ,acc5 + SBBQ $0, acc6 + SBBQ p256const1<>(SB), acc7 + SBBQ $0, hlp + // If the result of the subtraction is negative, restore the previous result + CMOVQCS t0, acc4 + CMOVQCS t1, acc5 + CMOVQCS t2, acc6 + CMOVQCS t3, acc7 + + RET +/* ---------------------------------------*/ +#define p256MulBy2Inline\ + XORQ mul0, mul0;\ + ADDQ acc4, acc4;\ + ADCQ acc5, acc5;\ + ADCQ acc6, acc6;\ + ADCQ acc7, acc7;\ + ADCQ $0, mul0;\ + MOVQ acc4, t0;\ + MOVQ acc5, t1;\ + MOVQ acc6, t2;\ + MOVQ acc7, t3;\ + SUBQ $-1, t0;\ + SBBQ p256const0<>(SB), t1;\ + SBBQ $0, t2;\ + SBBQ p256const1<>(SB), t3;\ + SBBQ $0, mul0;\ + CMOVQCS acc4, t0;\ + CMOVQCS acc5, t1;\ + CMOVQCS acc6, t2;\ + CMOVQCS acc7, t3; +/* ---------------------------------------*/ +#define p256AddInline \ + XORQ mul0, mul0;\ + ADDQ t0, acc4;\ + ADCQ t1, acc5;\ + ADCQ t2, acc6;\ + ADCQ t3, acc7;\ + ADCQ $0, mul0;\ + MOVQ acc4, t0;\ + MOVQ acc5, t1;\ + MOVQ acc6, t2;\ + MOVQ acc7, t3;\ + SUBQ $-1, t0;\ + SBBQ p256const0<>(SB), t1;\ + SBBQ $0, t2;\ + SBBQ p256const1<>(SB), t3;\ + SBBQ $0, mul0;\ + CMOVQCS acc4, t0;\ + CMOVQCS acc5, t1;\ + CMOVQCS acc6, t2;\ + CMOVQCS acc7, t3; +/* ---------------------------------------*/ +#define LDacc(src) MOVQ src(8*0), acc4; MOVQ src(8*1), acc5; MOVQ src(8*2), acc6; MOVQ src(8*3), acc7 +#define LDt(src) MOVQ src(8*0), t0; MOVQ src(8*1), t1; MOVQ src(8*2), t2; MOVQ src(8*3), t3 +#define ST(dst) MOVQ acc4, dst(8*0); MOVQ acc5, dst(8*1); MOVQ acc6, dst(8*2); MOVQ acc7, dst(8*3) +#define STt(dst) MOVQ t0, dst(8*0); MOVQ t1, dst(8*1); MOVQ t2, dst(8*2); MOVQ t3, dst(8*3) +#define acc2t MOVQ acc4, t0; MOVQ acc5, t1; MOVQ acc6, t2; MOVQ acc7, t3 +#define t2acc MOVQ t0, acc4; MOVQ t1, acc5; MOVQ t2, acc6; MOVQ t3, acc7 +/* ---------------------------------------*/ +#define x1in(off) (32*0 + off)(SP) +#define y1in(off) (32*1 + off)(SP) +#define z1in(off) (32*2 + off)(SP) +#define x2in(off) (32*3 + off)(SP) +#define y2in(off) (32*4 + off)(SP) +#define xout(off) (32*5 + off)(SP) +#define yout(off) (32*6 + off)(SP) +#define zout(off) (32*7 + off)(SP) +#define s2(off) (32*8 + off)(SP) +#define z1sqr(off) (32*9 + off)(SP) +#define h(off) (32*10 + off)(SP) +#define r(off) (32*11 + off)(SP) +#define hsqr(off) (32*12 + off)(SP) +#define rsqr(off) (32*13 + off)(SP) +#define hcub(off) (32*14 + off)(SP) +#define rptr (32*15)(SP) +#define sel_save (32*15 + 8)(SP) +#define zero_save (32*15 + 8 + 4)(SP) + +// func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) +TEXT ·p256PointAddAffineAsm(SB),0,$512-96 + // Move input to stack in order to free registers + MOVQ res+0(FP), AX + MOVQ in1+24(FP), BX + MOVQ in2+48(FP), CX + MOVQ sign+72(FP), DX + MOVQ sel+80(FP), t1 + MOVQ zero+88(FP), t2 + + MOVOU (16*0)(BX), X0 + MOVOU (16*1)(BX), X1 + MOVOU (16*2)(BX), X2 + MOVOU (16*3)(BX), X3 + MOVOU (16*4)(BX), X4 + MOVOU (16*5)(BX), X5 + + MOVOU X0, x1in(16*0) + MOVOU X1, x1in(16*1) + MOVOU X2, y1in(16*0) + MOVOU X3, y1in(16*1) + MOVOU X4, z1in(16*0) + MOVOU X5, z1in(16*1) + + MOVOU (16*0)(CX), X0 + MOVOU (16*1)(CX), X1 + + MOVOU X0, x2in(16*0) + MOVOU X1, x2in(16*1) + // Store pointer to result + MOVQ mul0, rptr + MOVL t1, sel_save + MOVL t2, zero_save + // Negate y2in based on sign + MOVQ (16*2 + 8*0)(CX), acc4 + MOVQ (16*2 + 8*1)(CX), acc5 + MOVQ (16*2 + 8*2)(CX), acc6 + MOVQ (16*2 + 8*3)(CX), acc7 + MOVQ $-1, acc0 + MOVQ p256const0<>(SB), acc1 + MOVQ $0, acc2 + MOVQ p256const1<>(SB), acc3 + XORQ mul0, mul0 + // Speculatively subtract + SUBQ acc4, acc0 + SBBQ acc5, acc1 + SBBQ acc6, acc2 + SBBQ acc7, acc3 + SBBQ $0, mul0 + MOVQ acc0, t0 + MOVQ acc1, t1 + MOVQ acc2, t2 + MOVQ acc3, t3 + // Add in case the operand was > p256 + ADDQ $-1, acc0 + ADCQ p256const0<>(SB), acc1 + ADCQ $0, acc2 + ADCQ p256const1<>(SB), acc3 + ADCQ $0, mul0 + CMOVQNE t0, acc0 + CMOVQNE t1, acc1 + CMOVQNE t2, acc2 + CMOVQNE t3, acc3 + // If condition is 0, keep original value + TESTQ DX, DX + CMOVQEQ acc4, acc0 + CMOVQEQ acc5, acc1 + CMOVQEQ acc6, acc2 + CMOVQEQ acc7, acc3 + // Store result + MOVQ acc0, y2in(8*0) + MOVQ acc1, y2in(8*1) + MOVQ acc2, y2in(8*2) + MOVQ acc3, y2in(8*3) + // Begin point add + LDacc (z1in) + CALL p256SqrInternal(SB) // z1ˆ2 + ST (z1sqr) + + LDt (x2in) + CALL p256MulInternal(SB) // x2 * z1ˆ2 + + LDt (x1in) + CALL p256SubInternal(SB) // h = u2 - u1 + ST (h) + + LDt (z1in) + CALL p256MulInternal(SB) // z3 = h * z1 + ST (zout) + + LDacc (z1sqr) + CALL p256MulInternal(SB) // z1ˆ3 + + LDt (y2in) + CALL p256MulInternal(SB) // s2 = y2 * z1ˆ3 + ST (s2) + + LDt (y1in) + CALL p256SubInternal(SB) // r = s2 - s1 + ST (r) + + CALL p256SqrInternal(SB) // rsqr = rˆ2 + ST (rsqr) + + LDacc (h) + CALL p256SqrInternal(SB) // hsqr = hˆ2 + ST (hsqr) + + LDt (h) + CALL p256MulInternal(SB) // hcub = hˆ3 + ST (hcub) + + LDt (y1in) + CALL p256MulInternal(SB) // y1 * hˆ3 + ST (s2) + + LDacc (x1in) + LDt (hsqr) + CALL p256MulInternal(SB) // u1 * hˆ2 + ST (h) + + p256MulBy2Inline // u1 * hˆ2 * 2, inline + LDacc (rsqr) + CALL p256SubInternal(SB) // rˆ2 - u1 * hˆ2 * 2 + + LDt (hcub) + CALL p256SubInternal(SB) + ST (xout) + + MOVQ acc4, t0 + MOVQ acc5, t1 + MOVQ acc6, t2 + MOVQ acc7, t3 + LDacc (h) + CALL p256SubInternal(SB) + + LDt (r) + CALL p256MulInternal(SB) + + LDt (s2) + CALL p256SubInternal(SB) + ST (yout) + // Load stored values from stack + MOVQ rptr, AX + MOVL sel_save, BX + MOVL zero_save, CX + // The result is not valid if (sel == 0), conditional choose + MOVOU xout(16*0), X0 + MOVOU xout(16*1), X1 + MOVOU yout(16*0), X2 + MOVOU yout(16*1), X3 + MOVOU zout(16*0), X4 + MOVOU zout(16*1), X5 + + MOVL BX, X6 + MOVL CX, X7 + + PXOR X8, X8 + PCMPEQL X9, X9 + + PSHUFD $0, X6, X6 + PSHUFD $0, X7, X7 + + PCMPEQL X8, X6 + PCMPEQL X8, X7 + + MOVOU X6, X15 + PANDN X9, X15 + + MOVOU x1in(16*0), X9 + MOVOU x1in(16*1), X10 + MOVOU y1in(16*0), X11 + MOVOU y1in(16*1), X12 + MOVOU z1in(16*0), X13 + MOVOU z1in(16*1), X14 + + PAND X15, X0 + PAND X15, X1 + PAND X15, X2 + PAND X15, X3 + PAND X15, X4 + PAND X15, X5 + + PAND X6, X9 + PAND X6, X10 + PAND X6, X11 + PAND X6, X12 + PAND X6, X13 + PAND X6, X14 + + PXOR X9, X0 + PXOR X10, X1 + PXOR X11, X2 + PXOR X12, X3 + PXOR X13, X4 + PXOR X14, X5 + // Similarly if zero == 0 + PCMPEQL X9, X9 + MOVOU X7, X15 + PANDN X9, X15 + + MOVOU x2in(16*0), X9 + MOVOU x2in(16*1), X10 + MOVOU y2in(16*0), X11 + MOVOU y2in(16*1), X12 + MOVOU p256one<>+0x00(SB), X13 + MOVOU p256one<>+0x10(SB), X14 + + PAND X15, X0 + PAND X15, X1 + PAND X15, X2 + PAND X15, X3 + PAND X15, X4 + PAND X15, X5 + + PAND X7, X9 + PAND X7, X10 + PAND X7, X11 + PAND X7, X12 + PAND X7, X13 + PAND X7, X14 + + PXOR X9, X0 + PXOR X10, X1 + PXOR X11, X2 + PXOR X12, X3 + PXOR X13, X4 + PXOR X14, X5 + // Finally output the result + MOVOU X0, (16*0)(AX) + MOVOU X1, (16*1)(AX) + MOVOU X2, (16*2)(AX) + MOVOU X3, (16*3)(AX) + MOVOU X4, (16*4)(AX) + MOVOU X5, (16*5)(AX) + MOVQ $0, rptr + + RET +#undef x1in +#undef y1in +#undef z1in +#undef x2in +#undef y2in +#undef xout +#undef yout +#undef zout +#undef s2 +#undef z1sqr +#undef h +#undef r +#undef hsqr +#undef rsqr +#undef hcub +#undef rptr +#undef sel_save +#undef zero_save + +// p256IsZero returns 1 in AX if [acc4..acc7] represents zero and zero +// otherwise. It writes to [acc4..acc7], t0 and t1. +TEXT p256IsZero(SB),NOSPLIT,$0 + // AX contains a flag that is set if the input is zero. + XORQ AX, AX + MOVQ $1, t1 + + // Check whether [acc4..acc7] are all zero. + MOVQ acc4, t0 + ORQ acc5, t0 + ORQ acc6, t0 + ORQ acc7, t0 + + // Set the zero flag if so. (CMOV of a constant to a register doesn't + // appear to be supported in Go. Thus t1 = 1.) + CMOVQEQ t1, AX + + // XOR [acc4..acc7] with P and compare with zero again. + XORQ $-1, acc4 + XORQ p256const0<>(SB), acc5 + XORQ p256const1<>(SB), acc7 + ORQ acc5, acc4 + ORQ acc6, acc4 + ORQ acc7, acc4 + + // Set the zero flag if so. + CMOVQEQ t1, AX + RET + +/* ---------------------------------------*/ +#define x1in(off) (32*0 + off)(SP) +#define y1in(off) (32*1 + off)(SP) +#define z1in(off) (32*2 + off)(SP) +#define x2in(off) (32*3 + off)(SP) +#define y2in(off) (32*4 + off)(SP) +#define z2in(off) (32*5 + off)(SP) + +#define xout(off) (32*6 + off)(SP) +#define yout(off) (32*7 + off)(SP) +#define zout(off) (32*8 + off)(SP) + +#define u1(off) (32*9 + off)(SP) +#define u2(off) (32*10 + off)(SP) +#define s1(off) (32*11 + off)(SP) +#define s2(off) (32*12 + off)(SP) +#define z1sqr(off) (32*13 + off)(SP) +#define z2sqr(off) (32*14 + off)(SP) +#define h(off) (32*15 + off)(SP) +#define r(off) (32*16 + off)(SP) +#define hsqr(off) (32*17 + off)(SP) +#define rsqr(off) (32*18 + off)(SP) +#define hcub(off) (32*19 + off)(SP) +#define rptr (32*20)(SP) +#define points_eq (32*20+8)(SP) + +//func p256PointAddAsm(res, in1, in2 []uint64) int +TEXT ·p256PointAddAsm(SB),0,$680-80 + // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl + // Move input to stack in order to free registers + MOVQ res+0(FP), AX + MOVQ in1+24(FP), BX + MOVQ in2+48(FP), CX + + MOVOU (16*0)(BX), X0 + MOVOU (16*1)(BX), X1 + MOVOU (16*2)(BX), X2 + MOVOU (16*3)(BX), X3 + MOVOU (16*4)(BX), X4 + MOVOU (16*5)(BX), X5 + + MOVOU X0, x1in(16*0) + MOVOU X1, x1in(16*1) + MOVOU X2, y1in(16*0) + MOVOU X3, y1in(16*1) + MOVOU X4, z1in(16*0) + MOVOU X5, z1in(16*1) + + MOVOU (16*0)(CX), X0 + MOVOU (16*1)(CX), X1 + MOVOU (16*2)(CX), X2 + MOVOU (16*3)(CX), X3 + MOVOU (16*4)(CX), X4 + MOVOU (16*5)(CX), X5 + + MOVOU X0, x2in(16*0) + MOVOU X1, x2in(16*1) + MOVOU X2, y2in(16*0) + MOVOU X3, y2in(16*1) + MOVOU X4, z2in(16*0) + MOVOU X5, z2in(16*1) + // Store pointer to result + MOVQ AX, rptr + // Begin point add + LDacc (z2in) + CALL p256SqrInternal(SB) // z2ˆ2 + ST (z2sqr) + LDt (z2in) + CALL p256MulInternal(SB) // z2ˆ3 + LDt (y1in) + CALL p256MulInternal(SB) // s1 = z2ˆ3*y1 + ST (s1) + + LDacc (z1in) + CALL p256SqrInternal(SB) // z1ˆ2 + ST (z1sqr) + LDt (z1in) + CALL p256MulInternal(SB) // z1ˆ3 + LDt (y2in) + CALL p256MulInternal(SB) // s2 = z1ˆ3*y2 + ST (s2) + + LDt (s1) + CALL p256SubInternal(SB) // r = s2 - s1 + ST (r) + CALL p256IsZero(SB) + MOVQ AX, points_eq + + LDacc (z2sqr) + LDt (x1in) + CALL p256MulInternal(SB) // u1 = x1 * z2ˆ2 + ST (u1) + LDacc (z1sqr) + LDt (x2in) + CALL p256MulInternal(SB) // u2 = x2 * z1ˆ2 + ST (u2) + + LDt (u1) + CALL p256SubInternal(SB) // h = u2 - u1 + ST (h) + CALL p256IsZero(SB) + ANDQ points_eq, AX + MOVQ AX, points_eq + + LDacc (r) + CALL p256SqrInternal(SB) // rsqr = rˆ2 + ST (rsqr) + + LDacc (h) + CALL p256SqrInternal(SB) // hsqr = hˆ2 + ST (hsqr) + + LDt (h) + CALL p256MulInternal(SB) // hcub = hˆ3 + ST (hcub) + + LDt (s1) + CALL p256MulInternal(SB) + ST (s2) + + LDacc (z1in) + LDt (z2in) + CALL p256MulInternal(SB) // z1 * z2 + LDt (h) + CALL p256MulInternal(SB) // z1 * z2 * h + ST (zout) + + LDacc (hsqr) + LDt (u1) + CALL p256MulInternal(SB) // hˆ2 * u1 + ST (u2) + + p256MulBy2Inline // u1 * hˆ2 * 2, inline + LDacc (rsqr) + CALL p256SubInternal(SB) // rˆ2 - u1 * hˆ2 * 2 + + LDt (hcub) + CALL p256SubInternal(SB) + ST (xout) + + MOVQ acc4, t0 + MOVQ acc5, t1 + MOVQ acc6, t2 + MOVQ acc7, t3 + LDacc (u2) + CALL p256SubInternal(SB) + + LDt (r) + CALL p256MulInternal(SB) + + LDt (s2) + CALL p256SubInternal(SB) + ST (yout) + + MOVOU xout(16*0), X0 + MOVOU xout(16*1), X1 + MOVOU yout(16*0), X2 + MOVOU yout(16*1), X3 + MOVOU zout(16*0), X4 + MOVOU zout(16*1), X5 + // Finally output the result + MOVQ rptr, AX + MOVQ $0, rptr + MOVOU X0, (16*0)(AX) + MOVOU X1, (16*1)(AX) + MOVOU X2, (16*2)(AX) + MOVOU X3, (16*3)(AX) + MOVOU X4, (16*4)(AX) + MOVOU X5, (16*5)(AX) + + MOVQ points_eq, AX + MOVQ AX, ret+72(FP) + + RET +#undef x1in +#undef y1in +#undef z1in +#undef x2in +#undef y2in +#undef z2in +#undef xout +#undef yout +#undef zout +#undef s1 +#undef s2 +#undef u1 +#undef u2 +#undef z1sqr +#undef z2sqr +#undef h +#undef r +#undef hsqr +#undef rsqr +#undef hcub +#undef rptr +/* ---------------------------------------*/ +#define x(off) (32*0 + off)(SP) +#define y(off) (32*1 + off)(SP) +#define z(off) (32*2 + off)(SP) + +#define s(off) (32*3 + off)(SP) +#define m(off) (32*4 + off)(SP) +#define zsqr(off) (32*5 + off)(SP) +#define tmp(off) (32*6 + off)(SP) +#define rptr (32*7)(SP) + +//func p256PointDoubleAsm(res, in []uint64) +TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$256-48 + // Move input to stack in order to free registers + MOVQ res+0(FP), AX + MOVQ in+24(FP), BX + + MOVOU (16*0)(BX), X0 + MOVOU (16*1)(BX), X1 + MOVOU (16*2)(BX), X2 + MOVOU (16*3)(BX), X3 + MOVOU (16*4)(BX), X4 + MOVOU (16*5)(BX), X5 + + MOVOU X0, x(16*0) + MOVOU X1, x(16*1) + MOVOU X2, y(16*0) + MOVOU X3, y(16*1) + MOVOU X4, z(16*0) + MOVOU X5, z(16*1) + // Store pointer to result + MOVQ AX, rptr + // Begin point double + LDacc (z) + CALL p256SqrInternal(SB) + ST (zsqr) + + LDt (x) + p256AddInline + STt (m) + + LDacc (z) + LDt (y) + CALL p256MulInternal(SB) + p256MulBy2Inline + MOVQ rptr, AX + // Store z + MOVQ t0, (16*4 + 8*0)(AX) + MOVQ t1, (16*4 + 8*1)(AX) + MOVQ t2, (16*4 + 8*2)(AX) + MOVQ t3, (16*4 + 8*3)(AX) + + LDacc (x) + LDt (zsqr) + CALL p256SubInternal(SB) + LDt (m) + CALL p256MulInternal(SB) + ST (m) + // Multiply by 3 + p256MulBy2Inline + LDacc (m) + p256AddInline + STt (m) + //////////////////////// + LDacc (y) + p256MulBy2Inline + t2acc + CALL p256SqrInternal(SB) + ST (s) + CALL p256SqrInternal(SB) + // Divide by 2 + XORQ mul0, mul0 + MOVQ acc4, t0 + MOVQ acc5, t1 + MOVQ acc6, t2 + MOVQ acc7, t3 + + ADDQ $-1, acc4 + ADCQ p256const0<>(SB), acc5 + ADCQ $0, acc6 + ADCQ p256const1<>(SB), acc7 + ADCQ $0, mul0 + TESTQ $1, t0 + + CMOVQEQ t0, acc4 + CMOVQEQ t1, acc5 + CMOVQEQ t2, acc6 + CMOVQEQ t3, acc7 + ANDQ t0, mul0 + + SHRQ $1, acc5, acc4 + SHRQ $1, acc6, acc5 + SHRQ $1, acc7, acc6 + SHRQ $1, mul0, acc7 + ST (y) + ///////////////////////// + LDacc (x) + LDt (s) + CALL p256MulInternal(SB) + ST (s) + p256MulBy2Inline + STt (tmp) + + LDacc (m) + CALL p256SqrInternal(SB) + LDt (tmp) + CALL p256SubInternal(SB) + + MOVQ rptr, AX + // Store x + MOVQ acc4, (16*0 + 8*0)(AX) + MOVQ acc5, (16*0 + 8*1)(AX) + MOVQ acc6, (16*0 + 8*2)(AX) + MOVQ acc7, (16*0 + 8*3)(AX) + + acc2t + LDacc (s) + CALL p256SubInternal(SB) + + LDt (m) + CALL p256MulInternal(SB) + + LDt (y) + CALL p256SubInternal(SB) + MOVQ rptr, AX + // Store y + MOVQ acc4, (16*2 + 8*0)(AX) + MOVQ acc5, (16*2 + 8*1)(AX) + MOVQ acc6, (16*2 + 8*2)(AX) + MOVQ acc7, (16*2 + 8*3)(AX) + /////////////////////// + MOVQ $0, rptr + + RET +/* ---------------------------------------*/ diff --git a/src/crypto/elliptic/p256_asm_arm64.s b/src/crypto/elliptic/p256_asm_arm64.s new file mode 100644 index 0000000..f571c50 --- /dev/null +++ b/src/crypto/elliptic/p256_asm_arm64.s @@ -0,0 +1,1529 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains constant-time, 64-bit assembly implementation of +// P256. The optimizations performed here are described in detail in: +// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with +// 256-bit primes" +// http://link.springer.com/article/10.1007%2Fs13389-014-0090-x +// https://eprint.iacr.org/2013/816.pdf + +#include "textflag.h" + +#define res_ptr R0 +#define a_ptr R1 +#define b_ptr R2 + +#define acc0 R3 +#define acc1 R4 +#define acc2 R5 +#define acc3 R6 + +#define acc4 R7 +#define acc5 R8 +#define acc6 R9 +#define acc7 R10 +#define t0 R11 +#define t1 R12 +#define t2 R13 +#define t3 R14 +#define const0 R15 +#define const1 R16 + +#define hlp0 R17 +#define hlp1 res_ptr + +#define x0 R19 +#define x1 R20 +#define x2 R21 +#define x3 R22 +#define y0 R23 +#define y1 R24 +#define y2 R25 +#define y3 R26 + +#define const2 t2 +#define const3 t3 + +DATA p256const0<>+0x00(SB)/8, $0x00000000ffffffff +DATA p256const1<>+0x00(SB)/8, $0xffffffff00000001 +DATA p256ordK0<>+0x00(SB)/8, $0xccd1c8aaee00bc4f +DATA p256ord<>+0x00(SB)/8, $0xf3b9cac2fc632551 +DATA p256ord<>+0x08(SB)/8, $0xbce6faada7179e84 +DATA p256ord<>+0x10(SB)/8, $0xffffffffffffffff +DATA p256ord<>+0x18(SB)/8, $0xffffffff00000000 +DATA p256one<>+0x00(SB)/8, $0x0000000000000001 +DATA p256one<>+0x08(SB)/8, $0xffffffff00000000 +DATA p256one<>+0x10(SB)/8, $0xffffffffffffffff +DATA p256one<>+0x18(SB)/8, $0x00000000fffffffe +GLOBL p256const0<>(SB), 8, $8 +GLOBL p256const1<>(SB), 8, $8 +GLOBL p256ordK0<>(SB), 8, $8 +GLOBL p256ord<>(SB), 8, $32 +GLOBL p256one<>(SB), 8, $32 + +/* ---------------------------------------*/ +// func p256LittleToBig(res []byte, in []uint64) +TEXT ·p256LittleToBig(SB),NOSPLIT,$0 + JMP ·p256BigToLittle(SB) +/* ---------------------------------------*/ +// func p256BigToLittle(res []uint64, in []byte) +TEXT ·p256BigToLittle(SB),NOSPLIT,$0 + MOVD res+0(FP), res_ptr + MOVD in+24(FP), a_ptr + + LDP 0*16(a_ptr), (acc0, acc1) + LDP 1*16(a_ptr), (acc2, acc3) + + REV acc0, acc0 + REV acc1, acc1 + REV acc2, acc2 + REV acc3, acc3 + + STP (acc3, acc2), 0*16(res_ptr) + STP (acc1, acc0), 1*16(res_ptr) + RET +/* ---------------------------------------*/ +// func p256MovCond(res, a, b []uint64, cond int) +// If cond == 0 res=b, else res=a +TEXT ·p256MovCond(SB),NOSPLIT,$0 + MOVD res+0(FP), res_ptr + MOVD a+24(FP), a_ptr + MOVD b+48(FP), b_ptr + MOVD cond+72(FP), R3 + + CMP $0, R3 + // Two remarks: + // 1) Will want to revisit NEON, when support is better + // 2) CSEL might not be constant time on all ARM processors + LDP 0*16(a_ptr), (R4, R5) + LDP 1*16(a_ptr), (R6, R7) + LDP 2*16(a_ptr), (R8, R9) + LDP 0*16(b_ptr), (R16, R17) + LDP 1*16(b_ptr), (R19, R20) + LDP 2*16(b_ptr), (R21, R22) + CSEL EQ, R16, R4, R4 + CSEL EQ, R17, R5, R5 + CSEL EQ, R19, R6, R6 + CSEL EQ, R20, R7, R7 + CSEL EQ, R21, R8, R8 + CSEL EQ, R22, R9, R9 + STP (R4, R5), 0*16(res_ptr) + STP (R6, R7), 1*16(res_ptr) + STP (R8, R9), 2*16(res_ptr) + + LDP 3*16(a_ptr), (R4, R5) + LDP 4*16(a_ptr), (R6, R7) + LDP 5*16(a_ptr), (R8, R9) + LDP 3*16(b_ptr), (R16, R17) + LDP 4*16(b_ptr), (R19, R20) + LDP 5*16(b_ptr), (R21, R22) + CSEL EQ, R16, R4, R4 + CSEL EQ, R17, R5, R5 + CSEL EQ, R19, R6, R6 + CSEL EQ, R20, R7, R7 + CSEL EQ, R21, R8, R8 + CSEL EQ, R22, R9, R9 + STP (R4, R5), 3*16(res_ptr) + STP (R6, R7), 4*16(res_ptr) + STP (R8, R9), 5*16(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256NegCond(val []uint64, cond int) +TEXT ·p256NegCond(SB),NOSPLIT,$0 + MOVD val+0(FP), a_ptr + MOVD cond+24(FP), hlp0 + MOVD a_ptr, res_ptr + // acc = poly + MOVD $-1, acc0 + MOVD p256const0<>(SB), acc1 + MOVD $0, acc2 + MOVD p256const1<>(SB), acc3 + // Load the original value + LDP 0*16(a_ptr), (t0, t1) + LDP 1*16(a_ptr), (t2, t3) + // Speculatively subtract + SUBS t0, acc0 + SBCS t1, acc1 + SBCS t2, acc2 + SBC t3, acc3 + // If condition is 0, keep original value + CMP $0, hlp0 + CSEL EQ, t0, acc0, acc0 + CSEL EQ, t1, acc1, acc1 + CSEL EQ, t2, acc2, acc2 + CSEL EQ, t3, acc3, acc3 + // Store result + STP (acc0, acc1), 0*16(res_ptr) + STP (acc2, acc3), 1*16(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256Sqr(res, in []uint64, n int) +TEXT ·p256Sqr(SB),NOSPLIT,$0 + MOVD res+0(FP), res_ptr + MOVD in+24(FP), a_ptr + MOVD n+48(FP), b_ptr + + MOVD p256const0<>(SB), const0 + MOVD p256const1<>(SB), const1 + + LDP 0*16(a_ptr), (x0, x1) + LDP 1*16(a_ptr), (x2, x3) + +sqrLoop: + SUB $1, b_ptr + CALL p256SqrInternal<>(SB) + MOVD y0, x0 + MOVD y1, x1 + MOVD y2, x2 + MOVD y3, x3 + CBNZ b_ptr, sqrLoop + + STP (y0, y1), 0*16(res_ptr) + STP (y2, y3), 1*16(res_ptr) + RET +/* ---------------------------------------*/ +// func p256Mul(res, in1, in2 []uint64) +TEXT ·p256Mul(SB),NOSPLIT,$0 + MOVD res+0(FP), res_ptr + MOVD in1+24(FP), a_ptr + MOVD in2+48(FP), b_ptr + + MOVD p256const0<>(SB), const0 + MOVD p256const1<>(SB), const1 + + LDP 0*16(a_ptr), (x0, x1) + LDP 1*16(a_ptr), (x2, x3) + + LDP 0*16(b_ptr), (y0, y1) + LDP 1*16(b_ptr), (y2, y3) + + CALL p256MulInternal<>(SB) + + STP (y0, y1), 0*16(res_ptr) + STP (y2, y3), 1*16(res_ptr) + RET +/* ---------------------------------------*/ +// func p256FromMont(res, in []uint64) +TEXT ·p256FromMont(SB),NOSPLIT,$0 + MOVD res+0(FP), res_ptr + MOVD in+24(FP), a_ptr + + MOVD p256const0<>(SB), const0 + MOVD p256const1<>(SB), const1 + + LDP 0*16(a_ptr), (acc0, acc1) + LDP 1*16(a_ptr), (acc2, acc3) + // Only reduce, no multiplications are needed + // First reduction step + ADDS acc0<<32, acc1, acc1 + LSR $32, acc0, t0 + MUL acc0, const1, t1 + UMULH acc0, const1, acc0 + ADCS t0, acc2 + ADCS t1, acc3 + ADC $0, acc0 + // Second reduction step + ADDS acc1<<32, acc2, acc2 + LSR $32, acc1, t0 + MUL acc1, const1, t1 + UMULH acc1, const1, acc1 + ADCS t0, acc3 + ADCS t1, acc0 + ADC $0, acc1 + // Third reduction step + ADDS acc2<<32, acc3, acc3 + LSR $32, acc2, t0 + MUL acc2, const1, t1 + UMULH acc2, const1, acc2 + ADCS t0, acc0 + ADCS t1, acc1 + ADC $0, acc2 + // Last reduction step + ADDS acc3<<32, acc0, acc0 + LSR $32, acc3, t0 + MUL acc3, const1, t1 + UMULH acc3, const1, acc3 + ADCS t0, acc1 + ADCS t1, acc2 + ADC $0, acc3 + + SUBS $-1, acc0, t0 + SBCS const0, acc1, t1 + SBCS $0, acc2, t2 + SBCS const1, acc3, t3 + + CSEL CS, t0, acc0, acc0 + CSEL CS, t1, acc1, acc1 + CSEL CS, t2, acc2, acc2 + CSEL CS, t3, acc3, acc3 + + STP (acc0, acc1), 0*16(res_ptr) + STP (acc2, acc3), 1*16(res_ptr) + + RET +/* ---------------------------------------*/ +// Constant time point access to arbitrary point table. +// Indexed from 1 to 15, with -1 offset +// (index 0 is implicitly point at infinity) +// func p256Select(point, table []uint64, idx int) +TEXT ·p256Select(SB),NOSPLIT,$0 + MOVD idx+48(FP), const0 + MOVD table+24(FP), b_ptr + MOVD point+0(FP), res_ptr + + EOR x0, x0, x0 + EOR x1, x1, x1 + EOR x2, x2, x2 + EOR x3, x3, x3 + EOR y0, y0, y0 + EOR y1, y1, y1 + EOR y2, y2, y2 + EOR y3, y3, y3 + EOR t0, t0, t0 + EOR t1, t1, t1 + EOR t2, t2, t2 + EOR t3, t3, t3 + + MOVD $0, const1 + +loop_select: + ADD $1, const1 + CMP const0, const1 + LDP.P 16(b_ptr), (acc0, acc1) + CSEL EQ, acc0, x0, x0 + CSEL EQ, acc1, x1, x1 + LDP.P 16(b_ptr), (acc2, acc3) + CSEL EQ, acc2, x2, x2 + CSEL EQ, acc3, x3, x3 + LDP.P 16(b_ptr), (acc4, acc5) + CSEL EQ, acc4, y0, y0 + CSEL EQ, acc5, y1, y1 + LDP.P 16(b_ptr), (acc6, acc7) + CSEL EQ, acc6, y2, y2 + CSEL EQ, acc7, y3, y3 + LDP.P 16(b_ptr), (acc0, acc1) + CSEL EQ, acc0, t0, t0 + CSEL EQ, acc1, t1, t1 + LDP.P 16(b_ptr), (acc2, acc3) + CSEL EQ, acc2, t2, t2 + CSEL EQ, acc3, t3, t3 + + CMP $16, const1 + BNE loop_select + + STP (x0, x1), 0*16(res_ptr) + STP (x2, x3), 1*16(res_ptr) + STP (y0, y1), 2*16(res_ptr) + STP (y2, y3), 3*16(res_ptr) + STP (t0, t1), 4*16(res_ptr) + STP (t2, t3), 5*16(res_ptr) + RET +/* ---------------------------------------*/ +// Constant time point access to base point table. +// func p256SelectBase(point, table []uint64, idx int) +TEXT ·p256SelectBase(SB),NOSPLIT,$0 + MOVD idx+48(FP), t0 + MOVD table+24(FP), t1 + MOVD point+0(FP), res_ptr + + EOR x0, x0, x0 + EOR x1, x1, x1 + EOR x2, x2, x2 + EOR x3, x3, x3 + EOR y0, y0, y0 + EOR y1, y1, y1 + EOR y2, y2, y2 + EOR y3, y3, y3 + + MOVD $0, t2 + +loop_select: + ADD $1, t2 + CMP t0, t2 + LDP.P 16(t1), (acc0, acc1) + CSEL EQ, acc0, x0, x0 + CSEL EQ, acc1, x1, x1 + LDP.P 16(t1), (acc2, acc3) + CSEL EQ, acc2, x2, x2 + CSEL EQ, acc3, x3, x3 + LDP.P 16(t1), (acc4, acc5) + CSEL EQ, acc4, y0, y0 + CSEL EQ, acc5, y1, y1 + LDP.P 16(t1), (acc6, acc7) + CSEL EQ, acc6, y2, y2 + CSEL EQ, acc7, y3, y3 + + CMP $32, t2 + BNE loop_select + + STP (x0, x1), 0*16(res_ptr) + STP (x2, x3), 1*16(res_ptr) + STP (y0, y1), 2*16(res_ptr) + STP (y2, y3), 3*16(res_ptr) + RET +/* ---------------------------------------*/ +// func p256OrdSqr(res, in []uint64, n int) +TEXT ·p256OrdSqr(SB),NOSPLIT,$0 + MOVD in+24(FP), a_ptr + MOVD n+48(FP), b_ptr + + MOVD p256ordK0<>(SB), hlp1 + LDP p256ord<>+0x00(SB), (const0, const1) + LDP p256ord<>+0x10(SB), (const2, const3) + + LDP 0*16(a_ptr), (x0, x1) + LDP 1*16(a_ptr), (x2, x3) + +ordSqrLoop: + SUB $1, b_ptr + + // x[1:] * x[0] + MUL x0, x1, acc1 + UMULH x0, x1, acc2 + + MUL x0, x2, t0 + ADDS t0, acc2, acc2 + UMULH x0, x2, acc3 + + MUL x0, x3, t0 + ADCS t0, acc3, acc3 + UMULH x0, x3, acc4 + ADC $0, acc4, acc4 + // x[2:] * x[1] + MUL x1, x2, t0 + ADDS t0, acc3 + UMULH x1, x2, t1 + ADCS t1, acc4 + ADC $0, ZR, acc5 + + MUL x1, x3, t0 + ADDS t0, acc4 + UMULH x1, x3, t1 + ADC t1, acc5 + // x[3] * x[2] + MUL x2, x3, t0 + ADDS t0, acc5 + UMULH x2, x3, acc6 + ADC $0, acc6 + + MOVD $0, acc7 + // *2 + ADDS acc1, acc1 + ADCS acc2, acc2 + ADCS acc3, acc3 + ADCS acc4, acc4 + ADCS acc5, acc5 + ADCS acc6, acc6 + ADC $0, acc7 + // Missing products + MUL x0, x0, acc0 + UMULH x0, x0, t0 + ADDS t0, acc1, acc1 + + MUL x1, x1, t0 + ADCS t0, acc2, acc2 + UMULH x1, x1, t1 + ADCS t1, acc3, acc3 + + MUL x2, x2, t0 + ADCS t0, acc4, acc4 + UMULH x2, x2, t1 + ADCS t1, acc5, acc5 + + MUL x3, x3, t0 + ADCS t0, acc6, acc6 + UMULH x3, x3, t1 + ADC t1, acc7, acc7 + // First reduction step + MUL acc0, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc0, acc0 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc1, acc1 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc2, acc2 + UMULH const2, hlp0, acc0 + + MUL const3, hlp0, t0 + ADCS t0, acc3, acc3 + + UMULH const3, hlp0, hlp0 + ADC $0, hlp0 + + ADDS t1, acc1, acc1 + ADCS y0, acc2, acc2 + ADCS acc0, acc3, acc3 + ADC $0, hlp0, acc0 + // Second reduction step + MUL acc1, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc1, acc1 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc2, acc2 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc3, acc3 + UMULH const2, hlp0, acc1 + + MUL const3, hlp0, t0 + ADCS t0, acc0, acc0 + + UMULH const3, hlp0, hlp0 + ADC $0, hlp0 + + ADDS t1, acc2, acc2 + ADCS y0, acc3, acc3 + ADCS acc1, acc0, acc0 + ADC $0, hlp0, acc1 + // Third reduction step + MUL acc2, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc2, acc2 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc3, acc3 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc0, acc0 + UMULH const2, hlp0, acc2 + + MUL const3, hlp0, t0 + ADCS t0, acc1, acc1 + + UMULH const3, hlp0, hlp0 + ADC $0, hlp0 + + ADDS t1, acc3, acc3 + ADCS y0, acc0, acc0 + ADCS acc2, acc1, acc1 + ADC $0, hlp0, acc2 + + // Last reduction step + MUL acc3, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc3, acc3 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc0, acc0 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc1, acc1 + UMULH const2, hlp0, acc3 + + MUL const3, hlp0, t0 + ADCS t0, acc2, acc2 + + UMULH const3, hlp0, hlp0 + ADC $0, acc7 + + ADDS t1, acc0, acc0 + ADCS y0, acc1, acc1 + ADCS acc3, acc2, acc2 + ADC $0, hlp0, acc3 + + ADDS acc4, acc0, acc0 + ADCS acc5, acc1, acc1 + ADCS acc6, acc2, acc2 + ADCS acc7, acc3, acc3 + ADC $0, ZR, acc4 + + SUBS const0, acc0, y0 + SBCS const1, acc1, y1 + SBCS const2, acc2, y2 + SBCS const3, acc3, y3 + SBCS $0, acc4, acc4 + + CSEL CS, y0, acc0, x0 + CSEL CS, y1, acc1, x1 + CSEL CS, y2, acc2, x2 + CSEL CS, y3, acc3, x3 + + CBNZ b_ptr, ordSqrLoop + + MOVD res+0(FP), res_ptr + STP (x0, x1), 0*16(res_ptr) + STP (x2, x3), 1*16(res_ptr) + + RET +/* ---------------------------------------*/ +// func p256OrdMul(res, in1, in2 []uint64) +TEXT ·p256OrdMul(SB),NOSPLIT,$0 + MOVD in1+24(FP), a_ptr + MOVD in2+48(FP), b_ptr + + MOVD p256ordK0<>(SB), hlp1 + LDP p256ord<>+0x00(SB), (const0, const1) + LDP p256ord<>+0x10(SB), (const2, const3) + + LDP 0*16(a_ptr), (x0, x1) + LDP 1*16(a_ptr), (x2, x3) + LDP 0*16(b_ptr), (y0, y1) + LDP 1*16(b_ptr), (y2, y3) + + // y[0] * x + MUL y0, x0, acc0 + UMULH y0, x0, acc1 + + MUL y0, x1, t0 + ADDS t0, acc1 + UMULH y0, x1, acc2 + + MUL y0, x2, t0 + ADCS t0, acc2 + UMULH y0, x2, acc3 + + MUL y0, x3, t0 + ADCS t0, acc3 + UMULH y0, x3, acc4 + ADC $0, acc4 + // First reduction step + MUL acc0, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc0, acc0 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc1, acc1 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc2, acc2 + UMULH const2, hlp0, acc0 + + MUL const3, hlp0, t0 + ADCS t0, acc3, acc3 + + UMULH const3, hlp0, hlp0 + ADC $0, acc4 + + ADDS t1, acc1, acc1 + ADCS y0, acc2, acc2 + ADCS acc0, acc3, acc3 + ADC $0, hlp0, acc0 + // y[1] * x + MUL y1, x0, t0 + ADDS t0, acc1 + UMULH y1, x0, t1 + + MUL y1, x1, t0 + ADCS t0, acc2 + UMULH y1, x1, hlp0 + + MUL y1, x2, t0 + ADCS t0, acc3 + UMULH y1, x2, y0 + + MUL y1, x3, t0 + ADCS t0, acc4 + UMULH y1, x3, y1 + ADC $0, ZR, acc5 + + ADDS t1, acc2 + ADCS hlp0, acc3 + ADCS y0, acc4 + ADC y1, acc5 + // Second reduction step + MUL acc1, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc1, acc1 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc2, acc2 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc3, acc3 + UMULH const2, hlp0, acc1 + + MUL const3, hlp0, t0 + ADCS t0, acc0, acc0 + + UMULH const3, hlp0, hlp0 + ADC $0, acc5 + + ADDS t1, acc2, acc2 + ADCS y0, acc3, acc3 + ADCS acc1, acc0, acc0 + ADC $0, hlp0, acc1 + // y[2] * x + MUL y2, x0, t0 + ADDS t0, acc2 + UMULH y2, x0, t1 + + MUL y2, x1, t0 + ADCS t0, acc3 + UMULH y2, x1, hlp0 + + MUL y2, x2, t0 + ADCS t0, acc4 + UMULH y2, x2, y0 + + MUL y2, x3, t0 + ADCS t0, acc5 + UMULH y2, x3, y1 + ADC $0, ZR, acc6 + + ADDS t1, acc3 + ADCS hlp0, acc4 + ADCS y0, acc5 + ADC y1, acc6 + // Third reduction step + MUL acc2, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc2, acc2 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc3, acc3 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc0, acc0 + UMULH const2, hlp0, acc2 + + MUL const3, hlp0, t0 + ADCS t0, acc1, acc1 + + UMULH const3, hlp0, hlp0 + ADC $0, acc6 + + ADDS t1, acc3, acc3 + ADCS y0, acc0, acc0 + ADCS acc2, acc1, acc1 + ADC $0, hlp0, acc2 + // y[3] * x + MUL y3, x0, t0 + ADDS t0, acc3 + UMULH y3, x0, t1 + + MUL y3, x1, t0 + ADCS t0, acc4 + UMULH y3, x1, hlp0 + + MUL y3, x2, t0 + ADCS t0, acc5 + UMULH y3, x2, y0 + + MUL y3, x3, t0 + ADCS t0, acc6 + UMULH y3, x3, y1 + ADC $0, ZR, acc7 + + ADDS t1, acc4 + ADCS hlp0, acc5 + ADCS y0, acc6 + ADC y1, acc7 + // Last reduction step + MUL acc3, hlp1, hlp0 + + MUL const0, hlp1, t0 + ADDS t0, acc3, acc3 + UMULH const0, hlp0, t1 + + MUL const1, hlp0, t0 + ADCS t0, acc0, acc0 + UMULH const1, hlp0, y0 + + MUL const2, hlp0, t0 + ADCS t0, acc1, acc1 + UMULH const2, hlp0, acc3 + + MUL const3, hlp0, t0 + ADCS t0, acc2, acc2 + + UMULH const3, hlp0, hlp0 + ADC $0, acc7 + + ADDS t1, acc0, acc0 + ADCS y0, acc1, acc1 + ADCS acc3, acc2, acc2 + ADC $0, hlp0, acc3 + + ADDS acc4, acc0, acc0 + ADCS acc5, acc1, acc1 + ADCS acc6, acc2, acc2 + ADCS acc7, acc3, acc3 + ADC $0, ZR, acc4 + + SUBS const0, acc0, t0 + SBCS const1, acc1, t1 + SBCS const2, acc2, t2 + SBCS const3, acc3, t3 + SBCS $0, acc4, acc4 + + CSEL CS, t0, acc0, acc0 + CSEL CS, t1, acc1, acc1 + CSEL CS, t2, acc2, acc2 + CSEL CS, t3, acc3, acc3 + + MOVD res+0(FP), res_ptr + STP (acc0, acc1), 0*16(res_ptr) + STP (acc2, acc3), 1*16(res_ptr) + + RET +/* ---------------------------------------*/ +TEXT p256SubInternal<>(SB),NOSPLIT,$0 + SUBS x0, y0, acc0 + SBCS x1, y1, acc1 + SBCS x2, y2, acc2 + SBCS x3, y3, acc3 + SBC $0, ZR, t0 + + ADDS $-1, acc0, acc4 + ADCS const0, acc1, acc5 + ADCS $0, acc2, acc6 + ADC const1, acc3, acc7 + + ANDS $1, t0 + CSEL EQ, acc0, acc4, x0 + CSEL EQ, acc1, acc5, x1 + CSEL EQ, acc2, acc6, x2 + CSEL EQ, acc3, acc7, x3 + + RET +/* ---------------------------------------*/ +TEXT p256SqrInternal<>(SB),NOSPLIT,$0 + // x[1:] * x[0] + MUL x0, x1, acc1 + UMULH x0, x1, acc2 + + MUL x0, x2, t0 + ADDS t0, acc2, acc2 + UMULH x0, x2, acc3 + + MUL x0, x3, t0 + ADCS t0, acc3, acc3 + UMULH x0, x3, acc4 + ADC $0, acc4, acc4 + // x[2:] * x[1] + MUL x1, x2, t0 + ADDS t0, acc3 + UMULH x1, x2, t1 + ADCS t1, acc4 + ADC $0, ZR, acc5 + + MUL x1, x3, t0 + ADDS t0, acc4 + UMULH x1, x3, t1 + ADC t1, acc5 + // x[3] * x[2] + MUL x2, x3, t0 + ADDS t0, acc5 + UMULH x2, x3, acc6 + ADC $0, acc6 + + MOVD $0, acc7 + // *2 + ADDS acc1, acc1 + ADCS acc2, acc2 + ADCS acc3, acc3 + ADCS acc4, acc4 + ADCS acc5, acc5 + ADCS acc6, acc6 + ADC $0, acc7 + // Missing products + MUL x0, x0, acc0 + UMULH x0, x0, t0 + ADDS t0, acc1, acc1 + + MUL x1, x1, t0 + ADCS t0, acc2, acc2 + UMULH x1, x1, t1 + ADCS t1, acc3, acc3 + + MUL x2, x2, t0 + ADCS t0, acc4, acc4 + UMULH x2, x2, t1 + ADCS t1, acc5, acc5 + + MUL x3, x3, t0 + ADCS t0, acc6, acc6 + UMULH x3, x3, t1 + ADCS t1, acc7, acc7 + // First reduction step + ADDS acc0<<32, acc1, acc1 + LSR $32, acc0, t0 + MUL acc0, const1, t1 + UMULH acc0, const1, acc0 + ADCS t0, acc2, acc2 + ADCS t1, acc3, acc3 + ADC $0, acc0, acc0 + // Second reduction step + ADDS acc1<<32, acc2, acc2 + LSR $32, acc1, t0 + MUL acc1, const1, t1 + UMULH acc1, const1, acc1 + ADCS t0, acc3, acc3 + ADCS t1, acc0, acc0 + ADC $0, acc1, acc1 + // Third reduction step + ADDS acc2<<32, acc3, acc3 + LSR $32, acc2, t0 + MUL acc2, const1, t1 + UMULH acc2, const1, acc2 + ADCS t0, acc0, acc0 + ADCS t1, acc1, acc1 + ADC $0, acc2, acc2 + // Last reduction step + ADDS acc3<<32, acc0, acc0 + LSR $32, acc3, t0 + MUL acc3, const1, t1 + UMULH acc3, const1, acc3 + ADCS t0, acc1, acc1 + ADCS t1, acc2, acc2 + ADC $0, acc3, acc3 + // Add bits [511:256] of the sqr result + ADDS acc4, acc0, acc0 + ADCS acc5, acc1, acc1 + ADCS acc6, acc2, acc2 + ADCS acc7, acc3, acc3 + ADC $0, ZR, acc4 + + SUBS $-1, acc0, t0 + SBCS const0, acc1, t1 + SBCS $0, acc2, t2 + SBCS const1, acc3, t3 + SBCS $0, acc4, acc4 + + CSEL CS, t0, acc0, y0 + CSEL CS, t1, acc1, y1 + CSEL CS, t2, acc2, y2 + CSEL CS, t3, acc3, y3 + RET +/* ---------------------------------------*/ +TEXT p256MulInternal<>(SB),NOSPLIT,$0 + // y[0] * x + MUL y0, x0, acc0 + UMULH y0, x0, acc1 + + MUL y0, x1, t0 + ADDS t0, acc1 + UMULH y0, x1, acc2 + + MUL y0, x2, t0 + ADCS t0, acc2 + UMULH y0, x2, acc3 + + MUL y0, x3, t0 + ADCS t0, acc3 + UMULH y0, x3, acc4 + ADC $0, acc4 + // First reduction step + ADDS acc0<<32, acc1, acc1 + LSR $32, acc0, t0 + MUL acc0, const1, t1 + UMULH acc0, const1, acc0 + ADCS t0, acc2 + ADCS t1, acc3 + ADC $0, acc0 + // y[1] * x + MUL y1, x0, t0 + ADDS t0, acc1 + UMULH y1, x0, t1 + + MUL y1, x1, t0 + ADCS t0, acc2 + UMULH y1, x1, t2 + + MUL y1, x2, t0 + ADCS t0, acc3 + UMULH y1, x2, t3 + + MUL y1, x3, t0 + ADCS t0, acc4 + UMULH y1, x3, hlp0 + ADC $0, ZR, acc5 + + ADDS t1, acc2 + ADCS t2, acc3 + ADCS t3, acc4 + ADC hlp0, acc5 + // Second reduction step + ADDS acc1<<32, acc2, acc2 + LSR $32, acc1, t0 + MUL acc1, const1, t1 + UMULH acc1, const1, acc1 + ADCS t0, acc3 + ADCS t1, acc0 + ADC $0, acc1 + // y[2] * x + MUL y2, x0, t0 + ADDS t0, acc2 + UMULH y2, x0, t1 + + MUL y2, x1, t0 + ADCS t0, acc3 + UMULH y2, x1, t2 + + MUL y2, x2, t0 + ADCS t0, acc4 + UMULH y2, x2, t3 + + MUL y2, x3, t0 + ADCS t0, acc5 + UMULH y2, x3, hlp0 + ADC $0, ZR, acc6 + + ADDS t1, acc3 + ADCS t2, acc4 + ADCS t3, acc5 + ADC hlp0, acc6 + // Third reduction step + ADDS acc2<<32, acc3, acc3 + LSR $32, acc2, t0 + MUL acc2, const1, t1 + UMULH acc2, const1, acc2 + ADCS t0, acc0 + ADCS t1, acc1 + ADC $0, acc2 + // y[3] * x + MUL y3, x0, t0 + ADDS t0, acc3 + UMULH y3, x0, t1 + + MUL y3, x1, t0 + ADCS t0, acc4 + UMULH y3, x1, t2 + + MUL y3, x2, t0 + ADCS t0, acc5 + UMULH y3, x2, t3 + + MUL y3, x3, t0 + ADCS t0, acc6 + UMULH y3, x3, hlp0 + ADC $0, ZR, acc7 + + ADDS t1, acc4 + ADCS t2, acc5 + ADCS t3, acc6 + ADC hlp0, acc7 + // Last reduction step + ADDS acc3<<32, acc0, acc0 + LSR $32, acc3, t0 + MUL acc3, const1, t1 + UMULH acc3, const1, acc3 + ADCS t0, acc1 + ADCS t1, acc2 + ADC $0, acc3 + // Add bits [511:256] of the mul result + ADDS acc4, acc0, acc0 + ADCS acc5, acc1, acc1 + ADCS acc6, acc2, acc2 + ADCS acc7, acc3, acc3 + ADC $0, ZR, acc4 + + SUBS $-1, acc0, t0 + SBCS const0, acc1, t1 + SBCS $0, acc2, t2 + SBCS const1, acc3, t3 + SBCS $0, acc4, acc4 + + CSEL CS, t0, acc0, y0 + CSEL CS, t1, acc1, y1 + CSEL CS, t2, acc2, y2 + CSEL CS, t3, acc3, y3 + RET +/* ---------------------------------------*/ +#define p256MulBy2Inline \ + ADDS y0, y0, x0; \ + ADCS y1, y1, x1; \ + ADCS y2, y2, x2; \ + ADCS y3, y3, x3; \ + ADC $0, ZR, hlp0; \ + SUBS $-1, x0, t0; \ + SBCS const0, x1, t1;\ + SBCS $0, x2, t2; \ + SBCS const1, x3, t3;\ + SBCS $0, hlp0, hlp0;\ + CSEL CC, x0, t0, x0;\ + CSEL CC, x1, t1, x1;\ + CSEL CC, x2, t2, x2;\ + CSEL CC, x3, t3, x3; +/* ---------------------------------------*/ +#define x1in(off) (off)(a_ptr) +#define y1in(off) (off + 32)(a_ptr) +#define z1in(off) (off + 64)(a_ptr) +#define x2in(off) (off)(b_ptr) +#define z2in(off) (off + 64)(b_ptr) +#define x3out(off) (off)(res_ptr) +#define y3out(off) (off + 32)(res_ptr) +#define z3out(off) (off + 64)(res_ptr) +#define LDx(src) LDP src(0), (x0, x1); LDP src(16), (x2, x3) +#define LDy(src) LDP src(0), (y0, y1); LDP src(16), (y2, y3) +#define STx(src) STP (x0, x1), src(0); STP (x2, x3), src(16) +#define STy(src) STP (y0, y1), src(0); STP (y2, y3), src(16) +/* ---------------------------------------*/ +#define y2in(off) (32*0 + 8 + off)(RSP) +#define s2(off) (32*1 + 8 + off)(RSP) +#define z1sqr(off) (32*2 + 8 + off)(RSP) +#define h(off) (32*3 + 8 + off)(RSP) +#define r(off) (32*4 + 8 + off)(RSP) +#define hsqr(off) (32*5 + 8 + off)(RSP) +#define rsqr(off) (32*6 + 8 + off)(RSP) +#define hcub(off) (32*7 + 8 + off)(RSP) + +#define z2sqr(off) (32*8 + 8 + off)(RSP) +#define s1(off) (32*9 + 8 + off)(RSP) +#define u1(off) (32*10 + 8 + off)(RSP) +#define u2(off) (32*11 + 8 + off)(RSP) + +// func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) +TEXT ·p256PointAddAffineAsm(SB),0,$264-96 + MOVD in1+24(FP), a_ptr + MOVD in2+48(FP), b_ptr + MOVD sign+72(FP), hlp0 + MOVD sel+80(FP), hlp1 + MOVD zero+88(FP), t2 + + MOVD $1, t0 + CMP $0, t2 + CSEL EQ, ZR, t0, t2 + CMP $0, hlp1 + CSEL EQ, ZR, t0, hlp1 + + MOVD p256const0<>(SB), const0 + MOVD p256const1<>(SB), const1 + EOR t2<<1, hlp1 + + // Negate y2in based on sign + LDP 2*16(b_ptr), (y0, y1) + LDP 3*16(b_ptr), (y2, y3) + MOVD $-1, acc0 + + SUBS y0, acc0, acc0 + SBCS y1, const0, acc1 + SBCS y2, ZR, acc2 + SBCS y3, const1, acc3 + SBC $0, ZR, t0 + + ADDS $-1, acc0, acc4 + ADCS const0, acc1, acc5 + ADCS $0, acc2, acc6 + ADCS const1, acc3, acc7 + ADC $0, t0, t0 + + CMP $0, t0 + CSEL EQ, acc4, acc0, acc0 + CSEL EQ, acc5, acc1, acc1 + CSEL EQ, acc6, acc2, acc2 + CSEL EQ, acc7, acc3, acc3 + // If condition is 0, keep original value + CMP $0, hlp0 + CSEL EQ, y0, acc0, y0 + CSEL EQ, y1, acc1, y1 + CSEL EQ, y2, acc2, y2 + CSEL EQ, y3, acc3, y3 + // Store result + STy(y2in) + // Begin point add + LDx(z1in) + CALL p256SqrInternal<>(SB) // z1ˆ2 + STy(z1sqr) + + LDx(x2in) + CALL p256MulInternal<>(SB) // x2 * z1ˆ2 + + LDx(x1in) + CALL p256SubInternal<>(SB) // h = u2 - u1 + STx(h) + + LDy(z1in) + CALL p256MulInternal<>(SB) // z3 = h * z1 + + LDP 4*16(a_ptr), (acc0, acc1)// iff select[0] == 0, z3 = z1 + LDP 5*16(a_ptr), (acc2, acc3) + ANDS $1, hlp1, ZR + CSEL EQ, acc0, y0, y0 + CSEL EQ, acc1, y1, y1 + CSEL EQ, acc2, y2, y2 + CSEL EQ, acc3, y3, y3 + LDP p256one<>+0x00(SB), (acc0, acc1) + LDP p256one<>+0x10(SB), (acc2, acc3) + ANDS $2, hlp1, ZR // iff select[1] == 0, z3 = 1 + CSEL EQ, acc0, y0, y0 + CSEL EQ, acc1, y1, y1 + CSEL EQ, acc2, y2, y2 + CSEL EQ, acc3, y3, y3 + LDx(z1in) + MOVD res+0(FP), t0 + STP (y0, y1), 4*16(t0) + STP (y2, y3), 5*16(t0) + + LDy(z1sqr) + CALL p256MulInternal<>(SB) // z1 ^ 3 + + LDx(y2in) + CALL p256MulInternal<>(SB) // s2 = y2 * z1ˆ3 + STy(s2) + + LDx(y1in) + CALL p256SubInternal<>(SB) // r = s2 - s1 + STx(r) + + CALL p256SqrInternal<>(SB) // rsqr = rˆ2 + STy (rsqr) + + LDx(h) + CALL p256SqrInternal<>(SB) // hsqr = hˆ2 + STy(hsqr) + + CALL p256MulInternal<>(SB) // hcub = hˆ3 + STy(hcub) + + LDx(y1in) + CALL p256MulInternal<>(SB) // y1 * hˆ3 + STy(s2) + + LDP hsqr(0*8), (x0, x1) + LDP hsqr(2*8), (x2, x3) + LDP 0*16(a_ptr), (y0, y1) + LDP 1*16(a_ptr), (y2, y3) + CALL p256MulInternal<>(SB) // u1 * hˆ2 + STP (y0, y1), h(0*8) + STP (y2, y3), h(2*8) + + p256MulBy2Inline // u1 * hˆ2 * 2, inline + + LDy(rsqr) + CALL p256SubInternal<>(SB) // rˆ2 - u1 * hˆ2 * 2 + + MOVD x0, y0 + MOVD x1, y1 + MOVD x2, y2 + MOVD x3, y3 + LDx(hcub) + CALL p256SubInternal<>(SB) + + LDP 0*16(a_ptr), (acc0, acc1) + LDP 1*16(a_ptr), (acc2, acc3) + ANDS $1, hlp1, ZR // iff select[0] == 0, x3 = x1 + CSEL EQ, acc0, x0, x0 + CSEL EQ, acc1, x1, x1 + CSEL EQ, acc2, x2, x2 + CSEL EQ, acc3, x3, x3 + LDP 0*16(b_ptr), (acc0, acc1) + LDP 1*16(b_ptr), (acc2, acc3) + ANDS $2, hlp1, ZR // iff select[1] == 0, x3 = x2 + CSEL EQ, acc0, x0, x0 + CSEL EQ, acc1, x1, x1 + CSEL EQ, acc2, x2, x2 + CSEL EQ, acc3, x3, x3 + MOVD res+0(FP), t0 + STP (x0, x1), 0*16(t0) + STP (x2, x3), 1*16(t0) + + LDP h(0*8), (y0, y1) + LDP h(2*8), (y2, y3) + CALL p256SubInternal<>(SB) + + LDP r(0*8), (y0, y1) + LDP r(2*8), (y2, y3) + CALL p256MulInternal<>(SB) + + LDP s2(0*8), (x0, x1) + LDP s2(2*8), (x2, x3) + CALL p256SubInternal<>(SB) + LDP 2*16(a_ptr), (acc0, acc1) + LDP 3*16(a_ptr), (acc2, acc3) + ANDS $1, hlp1, ZR // iff select[0] == 0, y3 = y1 + CSEL EQ, acc0, x0, x0 + CSEL EQ, acc1, x1, x1 + CSEL EQ, acc2, x2, x2 + CSEL EQ, acc3, x3, x3 + LDP y2in(0*8), (acc0, acc1) + LDP y2in(2*8), (acc2, acc3) + ANDS $2, hlp1, ZR // iff select[1] == 0, y3 = y2 + CSEL EQ, acc0, x0, x0 + CSEL EQ, acc1, x1, x1 + CSEL EQ, acc2, x2, x2 + CSEL EQ, acc3, x3, x3 + MOVD res+0(FP), t0 + STP (x0, x1), 2*16(t0) + STP (x2, x3), 3*16(t0) + + RET + +#define p256AddInline \ + ADDS y0, x0, x0; \ + ADCS y1, x1, x1; \ + ADCS y2, x2, x2; \ + ADCS y3, x3, x3; \ + ADC $0, ZR, hlp0; \ + SUBS $-1, x0, t0; \ + SBCS const0, x1, t1;\ + SBCS $0, x2, t2; \ + SBCS const1, x3, t3;\ + SBCS $0, hlp0, hlp0;\ + CSEL CC, x0, t0, x0;\ + CSEL CC, x1, t1, x1;\ + CSEL CC, x2, t2, x2;\ + CSEL CC, x3, t3, x3; + +#define s(off) (32*0 + 8 + off)(RSP) +#define m(off) (32*1 + 8 + off)(RSP) +#define zsqr(off) (32*2 + 8 + off)(RSP) +#define tmp(off) (32*3 + 8 + off)(RSP) + +//func p256PointDoubleAsm(res, in []uint64) +TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-48 + MOVD res+0(FP), res_ptr + MOVD in+24(FP), a_ptr + + MOVD p256const0<>(SB), const0 + MOVD p256const1<>(SB), const1 + + // Begin point double + LDP 4*16(a_ptr), (x0, x1) + LDP 5*16(a_ptr), (x2, x3) + CALL p256SqrInternal<>(SB) + STP (y0, y1), zsqr(0*8) + STP (y2, y3), zsqr(2*8) + + LDP 0*16(a_ptr), (x0, x1) + LDP 1*16(a_ptr), (x2, x3) + p256AddInline + STx(m) + + LDx(z1in) + LDy(y1in) + CALL p256MulInternal<>(SB) + p256MulBy2Inline + STx(z3out) + + LDy(x1in) + LDx(zsqr) + CALL p256SubInternal<>(SB) + LDy(m) + CALL p256MulInternal<>(SB) + + // Multiply by 3 + p256MulBy2Inline + p256AddInline + STx(m) + + LDy(y1in) + p256MulBy2Inline + CALL p256SqrInternal<>(SB) + STy(s) + MOVD y0, x0 + MOVD y1, x1 + MOVD y2, x2 + MOVD y3, x3 + CALL p256SqrInternal<>(SB) + + // Divide by 2 + ADDS $-1, y0, t0 + ADCS const0, y1, t1 + ADCS $0, y2, t2 + ADCS const1, y3, t3 + ADC $0, ZR, hlp0 + + ANDS $1, y0, ZR + CSEL EQ, y0, t0, t0 + CSEL EQ, y1, t1, t1 + CSEL EQ, y2, t2, t2 + CSEL EQ, y3, t3, t3 + AND y0, hlp0, hlp0 + + EXTR $1, t0, t1, y0 + EXTR $1, t1, t2, y1 + EXTR $1, t2, t3, y2 + EXTR $1, t3, hlp0, y3 + STy(y3out) + + LDx(x1in) + LDy(s) + CALL p256MulInternal<>(SB) + STy(s) + p256MulBy2Inline + STx(tmp) + + LDx(m) + CALL p256SqrInternal<>(SB) + LDx(tmp) + CALL p256SubInternal<>(SB) + + STx(x3out) + + LDy(s) + CALL p256SubInternal<>(SB) + + LDy(m) + CALL p256MulInternal<>(SB) + + LDx(y3out) + CALL p256SubInternal<>(SB) + STx(y3out) + RET +/* ---------------------------------------*/ +#undef y2in +#undef x3out +#undef y3out +#undef z3out +#define y2in(off) (off + 32)(b_ptr) +#define x3out(off) (off)(b_ptr) +#define y3out(off) (off + 32)(b_ptr) +#define z3out(off) (off + 64)(b_ptr) +//func p256PointAddAsm(res, in1, in2 []uint64) int +TEXT ·p256PointAddAsm(SB),0,$392-80 + // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl + // Move input to stack in order to free registers + MOVD in1+24(FP), a_ptr + MOVD in2+48(FP), b_ptr + + MOVD p256const0<>(SB), const0 + MOVD p256const1<>(SB), const1 + + // Begin point add + LDx(z2in) + CALL p256SqrInternal<>(SB) // z2^2 + STy(z2sqr) + + CALL p256MulInternal<>(SB) // z2^3 + + LDx(y1in) + CALL p256MulInternal<>(SB) // s1 = z2ˆ3*y1 + STy(s1) + + LDx(z1in) + CALL p256SqrInternal<>(SB) // z1^2 + STy(z1sqr) + + CALL p256MulInternal<>(SB) // z1^3 + + LDx(y2in) + CALL p256MulInternal<>(SB) // s2 = z1ˆ3*y2 + + LDx(s1) + CALL p256SubInternal<>(SB) // r = s2 - s1 + STx(r) + + MOVD $1, t2 + ORR x0, x1, t0 // Check if zero mod p256 + ORR x2, x3, t1 + ORR t1, t0, t0 + CMP $0, t0 + CSEL EQ, t2, ZR, hlp1 + + EOR $-1, x0, t0 + EOR const0, x1, t1 + EOR const1, x3, t3 + + ORR t0, t1, t0 + ORR x2, t3, t1 + ORR t1, t0, t0 + CMP $0, t0 + CSEL EQ, t2, hlp1, hlp1 + + LDx(z2sqr) + LDy(x1in) + CALL p256MulInternal<>(SB) // u1 = x1 * z2ˆ2 + STy(u1) + + LDx(z1sqr) + LDy(x2in) + CALL p256MulInternal<>(SB) // u2 = x2 * z1ˆ2 + STy(u2) + + LDx(u1) + CALL p256SubInternal<>(SB) // h = u2 - u1 + STx(h) + + MOVD $1, t2 + ORR x0, x1, t0 // Check if zero mod p256 + ORR x2, x3, t1 + ORR t1, t0, t0 + CMP $0, t0 + CSEL EQ, t2, ZR, hlp0 + + EOR $-1, x0, t0 + EOR const0, x1, t1 + EOR const1, x3, t3 + + ORR t0, t1, t0 + ORR x2, t3, t1 + ORR t1, t0, t0 + CMP $0, t0 + CSEL EQ, t2, hlp0, hlp0 + + AND hlp0, hlp1, hlp1 + + LDx(r) + CALL p256SqrInternal<>(SB) // rsqr = rˆ2 + STy(rsqr) + + LDx(h) + CALL p256SqrInternal<>(SB) // hsqr = hˆ2 + STy(hsqr) + + LDx(h) + CALL p256MulInternal<>(SB) // hcub = hˆ3 + STy(hcub) + + LDx(s1) + CALL p256MulInternal<>(SB) + STy(s2) + + LDx(z1in) + LDy(z2in) + CALL p256MulInternal<>(SB) // z1 * z2 + LDx(h) + CALL p256MulInternal<>(SB) // z1 * z2 * h + MOVD res+0(FP), b_ptr + STy(z3out) + + LDx(hsqr) + LDy(u1) + CALL p256MulInternal<>(SB) // hˆ2 * u1 + STy(u2) + + p256MulBy2Inline // u1 * hˆ2 * 2, inline + LDy(rsqr) + CALL p256SubInternal<>(SB) // rˆ2 - u1 * hˆ2 * 2 + + MOVD x0, y0 + MOVD x1, y1 + MOVD x2, y2 + MOVD x3, y3 + LDx(hcub) + CALL p256SubInternal<>(SB) + STx(x3out) + + LDy(u2) + CALL p256SubInternal<>(SB) + + LDy(r) + CALL p256MulInternal<>(SB) + + LDx(s2) + CALL p256SubInternal<>(SB) + STx(y3out) + + MOVD hlp1, R0 + MOVD R0, ret+72(FP) + + RET diff --git a/src/crypto/elliptic/p256_asm_ppc64le.s b/src/crypto/elliptic/p256_asm_ppc64le.s new file mode 100644 index 0000000..69e96e2 --- /dev/null +++ b/src/crypto/elliptic/p256_asm_ppc64le.s @@ -0,0 +1,2494 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// This is a port of the s390x asm implementation. +// to ppc64le. + +// Some changes were needed due to differences in +// the Go opcodes and/or available instructions +// between s390x and ppc64le. + +// 1. There were operand order differences in the +// VSUBUQM, VSUBCUQ, and VSEL instructions. + +// 2. ppc64 does not have a multiply high and low +// like s390x, so those were implemented using +// macros to compute the equivalent values. + +// 3. The LVX, STVX instructions on ppc64 require +// 16 byte alignment of the data. To avoid that +// requirement, data is loaded using LXVD2X and +// STXVD2X with VPERM to reorder bytes correctly. + +// I have identified some areas where I believe +// changes would be needed to make this work for big +// endian; however additional changes beyond what I +// have noted are most likely needed to make it work. +// - The string used with VPERM to swap the byte order +// for loads and stores. +// - The EXTRACT_HI and EXTRACT_LO strings. +// - The constants that are loaded from CPOOL. +// + +// Permute string used by VPERM to reorder bytes +// loaded or stored using LXVD2X or STXVD2X +// on little endian. +DATA byteswap<>+0(SB)/8, $0x08090a0b0c0d0e0f +DATA byteswap<>+8(SB)/8, $0x0001020304050607 + +// The following constants are defined in an order +// that is correct for use with LXVD2X/STXVD2X +// on little endian. +DATA p256<>+0x00(SB)/8, $0xffffffff00000001 // P256 +DATA p256<>+0x08(SB)/8, $0x0000000000000000 // P256 +DATA p256<>+0x10(SB)/8, $0x00000000ffffffff // P256 +DATA p256<>+0x18(SB)/8, $0xffffffffffffffff // P256 +DATA p256<>+0x20(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256<>+0x28(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256<>+0x30(SB)/8, $0x0000000010111213 // SEL 0 d1 d0 0 +DATA p256<>+0x38(SB)/8, $0x1415161700000000 // SEL 0 d1 d0 0 +DATA p256<>+0x40(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256<>+0x48(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256mul<>+0x00(SB)/8, $0x00000000ffffffff // P256 original +DATA p256mul<>+0x08(SB)/8, $0xffffffffffffffff // P256 +DATA p256mul<>+0x10(SB)/8, $0xffffffff00000001 // P256 original +DATA p256mul<>+0x18(SB)/8, $0x0000000000000000 // P256 +DATA p256mul<>+0x20(SB)/8, $0x1c1d1e1f00000000 // SEL d0 0 0 d0 +DATA p256mul<>+0x28(SB)/8, $0x000000001c1d1e1f // SEL d0 0 0 d0 +DATA p256mul<>+0x30(SB)/8, $0x0001020304050607 // SEL d0 0 d1 d0 +DATA p256mul<>+0x38(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL d0 0 d1 d0 +DATA p256mul<>+0x40(SB)/8, $0x040506071c1d1e1f // SEL 0 d1 d0 d1 +DATA p256mul<>+0x48(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL 0 d1 d0 d1 +DATA p256mul<>+0x50(SB)/8, $0x0405060704050607 // SEL 0 0 d1 d0 +DATA p256mul<>+0x58(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL 0 0 d1 d0 +DATA p256mul<>+0x60(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256mul<>+0x68(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256mul<>+0x70(SB)/8, $0x141516170c0d0e0f // SEL 0 d1 d0 0 +DATA p256mul<>+0x78(SB)/8, $0x1c1d1e1f14151617 // SEL 0 d1 d0 0 +DATA p256mul<>+0x80(SB)/8, $0xffffffff00000000 // (1*2^256)%P256 +DATA p256mul<>+0x88(SB)/8, $0x0000000000000001 // (1*2^256)%P256 +DATA p256mul<>+0x90(SB)/8, $0x00000000fffffffe // (1*2^256)%P256 +DATA p256mul<>+0x98(SB)/8, $0xffffffffffffffff // (1*2^256)%P256 + +// The following are used with VPERM to extract the high and low +// values from the intermediate results of a vector multiply. +// They are used in the VMULTxxx macros. These have been tested +// only on little endian, I think they would have to be different +// for big endian. +DATA p256permhilo<>+0x00(SB)/8, $0x0405060714151617 // least significant +DATA p256permhilo<>+0x08(SB)/8, $0x0c0d0e0f1c1d1e1f +DATA p256permhilo<>+0x10(SB)/8, $0x0001020310111213 // most significant +DATA p256permhilo<>+0x18(SB)/8, $0x08090a0b18191A1B + +// External declarations for constants +GLOBL p256ord<>(SB), 8, $32 +GLOBL p256<>(SB), 8, $80 +GLOBL p256mul<>(SB), 8, $160 +GLOBL p256permhilo<>(SB), 8, $32 +GLOBL byteswap<>+0(SB), RODATA, $16 + +// The following macros are used to implement the ppc64le +// equivalent function from the corresponding s390x +// instruction for vector multiply high, low, and add, +// since there aren't exact equivalent instructions. +// The corresponding s390x instructions appear in the +// comments. +// Implementation for big endian would have to be +// investigated, I think it would be different. +// +// Vector multiply low word +// +// VMLF x0, x1, out_low +#define VMULT_LOW(x1, x2, out_low) \ + VMULUWM x1, x2, out_low + +// +// Vector multiply high word +// +// VMLHF x0, x1, out_hi +#define VMULT_HI(x1, x2, out_hi) \ + VMULEUW x1, x2, TMP1; \ + VMULOUW x1, x2, TMP2; \ + VPERM TMP1, TMP2, EXTRACT_HI, out_hi + +// +// Vector multiply word +// +// VMLF x0, x1, out_low +// VMLHF x0, x1, out_hi +#define VMULT(x1, x2, out_low, out_hi) \ + VMULEUW x1, x2, TMP1; \ + VMULOUW x1, x2, TMP2; \ + VPERM TMP1, TMP2, EXTRACT_LO, out_low; \ + VPERM TMP1, TMP2, EXTRACT_HI, out_hi + +// +// Vector multiply add word +// +// VMALF x0, x1, y, out_low +// VMALHF x0, x1, y, out_hi +#define VMULT_ADD(x1, x2, y, out_low, out_hi) \ + VSPLTISW $1, TMP1; \ + VMULEUW y, TMP1, TMP2; \ + VMULOUW y, TMP1, TMP1; \ + VMULEUW x1, x2, out_low; \ + VMULOUW x1, x2, out_hi; \ + VADDUDM TMP1, out_hi, TMP1; \ + VADDUDM TMP2, out_low, TMP2; \ + VPERM TMP2, TMP1, EXTRACT_LO, out_low; \ + VPERM TMP2, TMP1, EXTRACT_HI, out_hi + +// +// Vector multiply add high word +// +// VMALF x0, x1, y, out_low +// VMALHF x0, x1, y, out_hi +#define VMULT_ADD_HI(x1, x2, y, out_low, out_hi) \ + VSPLTISW $1, TMP1; \ + VMULOUW y, TMP1, TMP2; \ + VMULEUW y, TMP1, TMP1; \ + VMULEUW x1, x2, out_hi; \ + VMULOUW x1, x2, out_low; \ + VADDUDM TMP1, out_hi, TMP1; \ + VADDUDM TMP2, out_low, TMP2; \ + VPERM TMP2, TMP1, EXTRACT_HI, out_hi + +// +// Vector multiply add low word +// +// VMALF s0, x1, y, out_low +#define VMULT_ADD_LOW(x1, x2, y, out_low) \ + VMULUWM x1, x2, out_low; \ + VADDUWM out_low, y, out_low + +#define res_ptr R3 +#define a_ptr R4 + +#undef res_ptr +#undef a_ptr + +// func p256NegCond(val *p256Point, cond int) +#define P1ptr R3 +#define CPOOL R7 + +#define Y1L V0 +#define Y1L_ VS32 +#define Y1H V1 +#define Y1H_ VS33 +#define T1L V2 +#define T1L_ VS34 +#define T1H V3 +#define T1H_ VS35 + +#define SWAP V28 +#define SWAP_ VS60 + +#define PL V30 +#define PL_ VS62 +#define PH V31 +#define PH_ VS63 + +#define SEL1 V5 +#define SEL1_ VS37 +#define CAR1 V6 +// +// iff cond == 1 val <- -val +// +TEXT ·p256NegCond(SB), NOSPLIT, $0-16 + MOVD val+0(FP), P1ptr + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $40, R19 + + MOVD cond+8(FP), R6 + CMP $0, R6 + BC 12, 2, LR // just return if cond == 0 + + MOVD $p256mul<>+0x00(SB), CPOOL + + MOVD $byteswap<>+0x00(SB), R8 + LXVD2X (R8)(R0), SWAP_ + + LXVD2X (P1ptr)(R17), Y1L_ + LXVD2X (P1ptr)(R18), Y1H_ + + VPERM Y1H, Y1H, SWAP, Y1H + VPERM Y1L, Y1L, SWAP, Y1L + + LXVD2X (CPOOL)(R0), PL_ + LXVD2X (CPOOL)(R16), PH_ + + VSUBCUQ PL, Y1L, CAR1 // subtract part2 giving carry + VSUBUQM PL, Y1L, T1L // subtract part2 giving result + VSUBEUQM PH, Y1H, CAR1, T1H // subtract part1 using carry from part2 + + VPERM T1H, T1H, SWAP, T1H + VPERM T1L, T1L, SWAP, T1L + + STXVD2X T1L_, (R17+P1ptr) + STXVD2X T1H_, (R18+P1ptr) + RET + +#undef P1ptr +#undef CPOOL +#undef Y1L +#undef Y1L_ +#undef Y1H +#undef Y1H_ +#undef T1L +#undef T1L_ +#undef T1H +#undef T1H_ +#undef PL +#undef PL_ +#undef PH +#undef PH_ +#undef SEL1 +#undef SEL1_ +#undef CAR1 + +// +// if cond == 0 res <-b else res <-a +// +// func p256MovCond(res, a, b *p256Point, cond int) +#define P3ptr R3 +#define P1ptr R4 +#define P2ptr R5 + +#define FROMptr R7 +#define X1L V0 +#define X1H V1 +#define Y1L V2 +#define Y1H V3 +#define Z1L V4 +#define Z1H V5 +#define X1L_ VS32 +#define X1H_ VS33 +#define Y1L_ VS34 +#define Y1H_ VS35 +#define Z1L_ VS36 +#define Z1H_ VS37 + +// This function uses LXVD2X and STXVD2X to avoid the +// data alignment requirement for LVX, STVX. Since +// this code is just moving bytes and not doing arithmetic, +// order of the bytes doesn't matter. +// +TEXT ·p256MovCond(SB), NOSPLIT, $0-32 + MOVD res+0(FP), P3ptr + MOVD a+8(FP), P1ptr + MOVD b+16(FP), P2ptr + MOVD cond+24(FP), R6 + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $56, R21 + MOVD $64, R19 + MOVD $80, R20 + + // Check the condition + CMP $0, R6 + + // If 0, use b as the source + BEQ FROMB + + // Not 0, use a as the source + MOVD P1ptr, FROMptr + BR LOADVALS + +FROMB: + MOVD P2ptr, FROMptr + +LOADVALS: + // Load from a or b depending on the setting + // of FROMptr + LXVW4X (FROMptr+R0), X1H_ + LXVW4X (FROMptr+R16), X1L_ + LXVW4X (FROMptr+R17), Y1H_ + LXVW4X (FROMptr+R18), Y1L_ + LXVW4X (FROMptr+R19), Z1H_ + LXVW4X (FROMptr+R20), Z1L_ + + STXVW4X X1H_, (P3ptr+R0) + STXVW4X X1L_, (P3ptr+R16) + STXVW4X Y1H_, (P3ptr+R17) + STXVW4X Y1L_, (P3ptr+R18) + STXVW4X Z1H_, (P3ptr+R19) + STXVW4X Z1L_, (P3ptr+R20) + + RET + +#undef P3ptr +#undef P1ptr +#undef P2ptr +#undef FROMptr +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef X1L_ +#undef X1H_ +#undef Y1L_ +#undef Y1H_ +#undef Z1L_ +#undef Z1H_ +// +// Select the point from the table for idx +// +// func p256Select(point *p256Point, table []p256Point, idx int) +#define P3ptr R3 +#define P1ptr R4 +#define COUNT R5 + +#define X1L V0 +#define X1H V1 +#define Y1L V2 +#define Y1H V3 +#define Z1L V4 +#define Z1H V5 +#define X1L_ VS32 +#define X1H_ VS33 +#define Y1L_ VS34 +#define Y1H_ VS35 +#define Z1L_ VS36 +#define Z1H_ VS37 +#define X2L V6 +#define X2H V7 +#define Y2L V8 +#define Y2H V9 +#define Z2L V10 +#define Z2H V11 +#define X2L_ VS38 +#define X2H_ VS39 +#define Y2L_ VS40 +#define Y2H_ VS41 +#define Z2L_ VS42 +#define Z2H_ VS43 + +#define ONE V18 +#define IDX V19 +#define SEL1 V20 +#define SEL1_ VS52 +#define SEL2 V21 +// +TEXT ·p256Select(SB), NOSPLIT, $0-40 + MOVD point+0(FP), P3ptr + MOVD table+8(FP), P1ptr + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $64, R19 + MOVD $80, R20 + + LXVDSX (R1)(R19), SEL1_ // VLREPG idx+32(FP), SEL1 + VSPLTB $7, SEL1, IDX // splat byte + VSPLTISB $1, ONE // VREPIB $1, ONE + VSPLTISB $1, SEL2 // VREPIB $1, SEL2 + MOVD $17, COUNT + MOVD COUNT, CTR // set up ctr + + VSPLTISB $0, X1H // VZERO X1H + VSPLTISB $0, X1L // VZERO X1L + VSPLTISB $0, Y1H // VZERO Y1H + VSPLTISB $0, Y1L // VZERO Y1L + VSPLTISB $0, Z1H // VZERO Z1H + VSPLTISB $0, Z1L // VZERO Z1L + +loop_select: + + // LVXD2X is used here since data alignment doesn't + // matter. + + LXVD2X (P1ptr+R0), X2H_ + LXVD2X (P1ptr+R16), X2L_ + LXVD2X (P1ptr+R17), Y2H_ + LXVD2X (P1ptr+R18), Y2L_ + LXVD2X (P1ptr+R19), Z2H_ + LXVD2X (P1ptr+R20), Z2L_ + + VCMPEQUD SEL2, IDX, SEL1 // VCEQG SEL2, IDX, SEL1 OK + + // This will result in SEL1 being all 0s or 1s, meaning + // the result is either X1L or X2L, no individual byte + // selection. + + VSEL X1L, X2L, SEL1, X1L + VSEL X1H, X2H, SEL1, X1H + VSEL Y1L, Y2L, SEL1, Y1L + VSEL Y1H, Y2H, SEL1, Y1H + VSEL Z1L, Z2L, SEL1, Z1L + VSEL Z1H, Z2H, SEL1, Z1H + + // Add 1 to all bytes in SEL2 + VADDUBM SEL2, ONE, SEL2 // VAB SEL2, ONE, SEL2 OK + ADD $96, P1ptr + BC 16, 0, loop_select + + // STXVD2X is used here so that alignment doesn't + // need to be verified. Since values were loaded + // using LXVD2X this is OK. + STXVD2X X1H_, (P3ptr+R0) + STXVD2X X1L_, (P3ptr+R16) + STXVD2X Y1H_, (P3ptr+R17) + STXVD2X Y1L_, (P3ptr+R18) + STXVD2X Z1H_, (P3ptr+R19) + STXVD2X Z1L_, (P3ptr+R20) + RET + +#undef P3ptr +#undef P1ptr +#undef COUNT +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef X2L +#undef X2H +#undef Y2L +#undef Y2H +#undef Z2L +#undef Z2H +#undef X2L_ +#undef X2H_ +#undef Y2L_ +#undef Y2H_ +#undef Z2L_ +#undef Z2H_ +#undef ONE +#undef IDX +#undef SEL1 +#undef SEL1_ +#undef SEL2 + +// func p256SelectBase(point, table []uint64, idx int) +#define P3ptr R3 +#define P1ptr R4 +#define COUNT R5 + +#define X1L V0 +#define X1H V1 +#define Y1L V2 +#define Y1H V3 +#define Z1L V4 +#define Z1H V5 +#define X2L V6 +#define X2H V7 +#define Y2L V8 +#define Y2H V9 +#define Z2L V10 +#define Z2H V11 +#define X2L_ VS38 +#define X2H_ VS39 +#define Y2L_ VS40 +#define Y2H_ VS41 +#define Z2L_ VS42 +#define Z2H_ VS43 + +#define ONE V18 +#define IDX V19 +#define SEL1 V20 +#define SEL1_ VS52 +#define SEL2 V21 +TEXT ·p256SelectBase(SB), NOSPLIT, $0-40 + MOVD point+0(FP), P3ptr + MOVD table+8(FP), P1ptr + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $64, R19 + MOVD $80, R20 + MOVD $56, R21 + + LXVDSX (R1)(R19), SEL1_ + VSPLTB $7, SEL1, IDX // splat byte + + VSPLTISB $1, ONE // Vector with byte 1s + VSPLTISB $1, SEL2 // Vector with byte 1s + MOVD $65, COUNT + MOVD COUNT, CTR // loop count + + VSPLTISB $0, X1H // VZERO X1H + VSPLTISB $0, X1L // VZERO X1L + VSPLTISB $0, Y1H // VZERO Y1H + VSPLTISB $0, Y1L // VZERO Y1L + VSPLTISB $0, Z1H // VZERO Z1H + VSPLTISB $0, Z1L // VZERO Z1L + +loop_select: + LXVD2X (P1ptr+R0), X2H_ + LXVD2X (P1ptr+R16), X2L_ + LXVD2X (P1ptr+R17), Y2H_ + LXVD2X (P1ptr+R18), Y2L_ + LXVD2X (P1ptr+R19), Z2H_ + LXVD2X (P1ptr+R20), Z2L_ + + VCMPEQUD SEL2, IDX, SEL1 // Compare against idx + + VSEL X1L, X2L, SEL1, X1L // Select if idx matched + VSEL X1H, X2H, SEL1, X1H + VSEL Y1L, Y2L, SEL1, Y1L + VSEL Y1H, Y2H, SEL1, Y1H + VSEL Z1L, Z2L, SEL1, Z1L + VSEL Z1H, Z2H, SEL1, Z1H + + VADDUBM SEL2, ONE, SEL2 // Increment SEL2 bytes by 1 + ADD $96, P1ptr // Next chunk + BC 16, 0, loop_select + + STXVD2X X1H_, (P3ptr+R0) + STXVD2X X1L_, (P3ptr+R16) + STXVD2X Y1H_, (P3ptr+R17) + STXVD2X Y1L_, (P3ptr+R18) + STXVD2X Z1H_, (P3ptr+R19) + STXVD2X Z1L_, (P3ptr+R20) + RET + +#undef P3ptr +#undef P1ptr +#undef COUNT +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef X2L +#undef X2H +#undef Y2L +#undef Y2H +#undef Z2L +#undef Z2H +#undef X1L_ +#undef X1H_ +#undef X2L_ +#undef X2H_ +#undef Y1L_ +#undef Y1H_ +#undef Y2L_ +#undef Y2H_ +#undef Z1L_ +#undef Z1H_ +#undef Z2L_ +#undef Z2H_ +#undef ONE +#undef IDX +#undef SEL1 +#undef SEL1_ +#undef SEL2 +#undef SWAP +#undef SWAP_ + +// --------------------------------------- +// func p256FromMont(res, in []byte) +#define res_ptr R3 +#define x_ptr R4 +#define CPOOL R7 + +#define T0 V0 +#define T0_ VS32 +#define T1 V1 +#define T1_ VS33 +#define T2 V2 +#define TT0 V3 +#define TT1 V4 +#define TT0_ VS35 +#define TT1_ VS36 + +#define ZER V6 +#define SEL1 V7 +#define SEL1_ VS39 +#define SEL2 V8 +#define SEL2_ VS40 +#define CAR1 V9 +#define CAR2 V10 +#define RED1 V11 +#define RED2 V12 +#define PL V13 +#define PL_ VS45 +#define PH V14 +#define PH_ VS46 +#define SWAP V28 +#define SWAP_ VS57 + +TEXT ·p256FromMont(SB), NOSPLIT, $0-48 + MOVD res+0(FP), res_ptr + MOVD in+24(FP), x_ptr + + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $64, R19 + MOVD $p256<>+0x00(SB), CPOOL + MOVD $byteswap<>+0x00(SB), R15 + + VSPLTISB $0, T2 // VZERO T2 + VSPLTISB $0, ZER // VZERO ZER + + // Constants are defined so that the LXVD2X is correct + LXVD2X (CPOOL+R0), PH_ + LXVD2X (CPOOL+R16), PL_ + + // VPERM byte selections + LXVD2X (CPOOL+R18), SEL2_ + LXVD2X (CPOOL+R19), SEL1_ + + LXVD2X (R15)(R0), SWAP_ + + LXVD2X (R16)(x_ptr), T1_ + LXVD2X (R0)(x_ptr), T0_ + + // Put in true little endian order + VPERM T0, T0, SWAP, T0 + VPERM T1, T1, SWAP, T1 + + // First round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0 + VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1 + + VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1 + VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0 + VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2 + VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1 + VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2 + + // Second round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0 + VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1 + + VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1 + VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0 + VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2 + VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1 + VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2 + + // Third round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0 + VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1 + + VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1 + VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0 + VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2 + VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1 + VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2 + + // Last round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSUBUQM RED2, RED1, RED2 // VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDOI $8, T1, T0, T0 // VSLDB $8, T1, T0, T0 + VSLDOI $8, T2, T1, T1 // VSLDB $8, T2, T1, T1 + + VADDCUQ T0, RED1, CAR1 // VACCQ T0, RED1, CAR1 + VADDUQM T0, RED1, T0 // VAQ T0, RED1, T0 + VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ T1, RED2, CAR1, CAR2 + VADDEUQM T1, RED2, CAR1, T1 // VACQ T1, RED2, CAR1, T1 + VADDUQM T2, CAR2, T2 // VAQ T2, CAR2, T2 + + // --------------------------------------------------- + + VSUBCUQ T0, PL, CAR1 // VSCBIQ PL, T0, CAR1 + VSUBUQM T0, PL, TT0 // VSQ PL, T0, TT0 + VSUBECUQ T1, PH, CAR1, CAR2 // VSBCBIQ T1, PH, CAR1, CAR2 + VSUBEUQM T1, PH, CAR1, TT1 // VSBIQ T1, PH, CAR1, TT1 + VSUBEUQM T2, ZER, CAR2, T2 // VSBIQ T2, ZER, CAR2, T2 + + VSEL TT0, T0, T2, T0 + VSEL TT1, T1, T2, T1 + + // Reorder the bytes so STXVD2X can be used. + // TT0, TT1 used for VPERM result in case + // the caller expects T0, T1 to be good. + VPERM T0, T0, SWAP, TT0 + VPERM T1, T1, SWAP, TT1 + + STXVD2X TT0_, (R0)(res_ptr) + STXVD2X TT1_, (R16)(res_ptr) + RET + +#undef res_ptr +#undef x_ptr +#undef CPOOL +#undef T0 +#undef T0_ +#undef T1 +#undef T1_ +#undef T2 +#undef TT0 +#undef TT1 +#undef ZER +#undef SEL1 +#undef SEL1_ +#undef SEL2 +#undef SEL2_ +#undef CAR1 +#undef CAR2 +#undef RED1 +#undef RED2 +#undef PL +#undef PL_ +#undef PH +#undef PH_ +#undef SWAP +#undef SWAP_ + +// --------------------------------------- +// p256MulInternal +// V0-V3 V30,V31 - Not Modified +// V4-V15 V27-V29 - Volatile + +#define CPOOL R7 + +// Parameters +#define X0 V0 // Not modified +#define X1 V1 // Not modified +#define Y0 V2 // Not modified +#define Y1 V3 // Not modified +#define T0 V4 // Result +#define T1 V5 // Result +#define P0 V30 // Not modified +#define P1 V31 // Not modified + +// Temporaries: lots of reused vector regs +#define YDIG V6 // Overloaded with CAR2 +#define ADD1H V7 // Overloaded with ADD3H +#define ADD2H V8 // Overloaded with ADD4H +#define ADD3 V9 // Overloaded with SEL2,SEL5 +#define ADD4 V10 // Overloaded with SEL3,SEL6 +#define RED1 V11 // Overloaded with CAR2 +#define RED2 V12 +#define RED3 V13 // Overloaded with SEL1 +#define T2 V14 +// Overloaded temporaries +#define ADD1 V4 // Overloaded with T0 +#define ADD2 V5 // Overloaded with T1 +#define ADD3H V7 // Overloaded with ADD1H +#define ADD4H V8 // Overloaded with ADD2H +#define ZER V28 // Overloaded with TMP1 +#define CAR1 V6 // Overloaded with YDIG +#define CAR2 V11 // Overloaded with RED1 +// Constant Selects +#define SEL1 V13 // Overloaded with RED3 +#define SEL2 V9 // Overloaded with ADD3,SEL5 +#define SEL3 V10 // Overloaded with ADD4,SEL6 +#define SEL4 V6 // Overloaded with YDIG,CAR1 +#define SEL5 V9 // Overloaded with ADD3,SEL2 +#define SEL6 V10 // Overloaded with ADD4,SEL3 +#define SEL1_ VS45 +#define SEL2_ VS41 +#define SEL3_ VS42 +#define SEL4_ VS38 +#define SEL5_ VS41 +#define SEL6_ VS42 + +// TMP1, TMP2, EXTRACT_LO, EXTRACT_HI used in +// VMULT macros +#define TMP1 V13 // Overloaded with RED3 +#define TMP2 V27 +#define EVENODD R5 +#define EXTRACT_LO V28 +#define EXTRACT_LO_ VS60 +#define EXTRACT_HI V29 +#define EXTRACT_HI_ VS61 + +/* * + * To follow the flow of bits, for your own sanity a stiff drink, need you shall. + * Of a single round, a 'helpful' picture, here is. Meaning, column position has. + * With you, SIMD be... + * + * +--------+--------+ + * +--------| RED2 | RED1 | + * | +--------+--------+ + * | ---+--------+--------+ + * | +---- T2| T1 | T0 |--+ + * | | ---+--------+--------+ | + * | | | + * | | ======================= | + * | | | + * | | +--------+--------+<-+ + * | +-------| ADD2 | ADD1 |--|-----+ + * | | +--------+--------+ | | + * | | +--------+--------+<---+ | + * | | | ADD2H | ADD1H |--+ | + * | | +--------+--------+ | | + * | | +--------+--------+<-+ | + * | | | ADD4 | ADD3 |--|-+ | + * | | +--------+--------+ | | | + * | | +--------+--------+<---+ | | + * | | | ADD4H | ADD3H |------|-+ |(+vzero) + * | | +--------+--------+ | | V + * | | ------------------------ | | +--------+ + * | | | | | RED3 | [d0 0 0 d0] + * | | | | +--------+ + * | +---->+--------+--------+ | | | + * (T2[1w]||ADD2[4w]||ADD1[3w]) +--------| T1 | T0 | | | | + * | +--------+--------+ | | | + * +---->---+--------+--------+ | | | + * T2| T1 | T0 |----+ | | + * ---+--------+--------+ | | | + * ---+--------+--------+<---+ | | + * +--- T2| T1 | T0 |----------+ + * | ---+--------+--------+ | | + * | +--------+--------+<-------------+ + * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0] + * | +--------+--------+ | | | + * | +--------+<----------------------+ + * | | RED3 |--------------+ | [0 0 d1 d0] + * | +--------+ | | + * +--->+--------+--------+ | | + * | T1 | T0 |--------+ + * +--------+--------+ | | + * --------------------------- | | + * | | + * +--------+--------+<----+ | + * | RED2 | RED1 | | + * +--------+--------+ | + * ---+--------+--------+<-------+ + * T2| T1 | T0 | (H1P-H1P-H00RRAY!) + * ---+--------+--------+ + * + * *Mi obra de arte de siglo XXI @vpaprots + * + * + * First group is special, doesn't get the two inputs: + * +--------+--------+<-+ + * +-------| ADD2 | ADD1 |--|-----+ + * | +--------+--------+ | | + * | +--------+--------+<---+ | + * | | ADD2H | ADD1H |--+ | + * | +--------+--------+ | | + * | +--------+--------+<-+ | + * | | ADD4 | ADD3 |--|-+ | + * | +--------+--------+ | | | + * | +--------+--------+<---+ | | + * | | ADD4H | ADD3H |------|-+ |(+vzero) + * | +--------+--------+ | | V + * | ------------------------ | | +--------+ + * | | | | RED3 | [d0 0 0 d0] + * | | | +--------+ + * +---->+--------+--------+ | | | + * (T2[1w]||ADD2[4w]||ADD1[3w]) | T1 | T0 |----+ | | + * +--------+--------+ | | | + * ---+--------+--------+<---+ | | + * +--- T2| T1 | T0 |----------+ + * | ---+--------+--------+ | | + * | +--------+--------+<-------------+ + * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0] + * | +--------+--------+ | | | + * | +--------+<----------------------+ + * | | RED3 |--------------+ | [0 0 d1 d0] + * | +--------+ | | + * +--->+--------+--------+ | | + * | T1 | T0 |--------+ + * +--------+--------+ | | + * --------------------------- | | + * | | + * +--------+--------+<----+ | + * | RED2 | RED1 | | + * +--------+--------+ | + * ---+--------+--------+<-------+ + * T2| T1 | T0 | (H1P-H1P-H00RRAY!) + * ---+--------+--------+ + * + * Last 'group' needs to RED2||RED1 shifted less + */ +TEXT p256MulInternal<>(SB), NOSPLIT, $0-16 + // CPOOL loaded from caller + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $64, R19 + MOVD $80, R20 + MOVD $96, R21 + MOVD $112, R22 + + MOVD $p256permhilo<>+0x00(SB), EVENODD + + // These values are used by the VMULTxxx macros to + // extract the high and low portions of the intermediate + // result. + LXVD2X (R0)(EVENODD), EXTRACT_LO_ + LXVD2X (R16)(EVENODD), EXTRACT_HI_ + + // --------------------------------------------------- + + VSPLTW $3, Y0, YDIG // VREPF Y0 is input + + // VMLHF X0, YDIG, ADD1H + // VMLHF X1, YDIG, ADD2H + // VMLF X0, YDIG, ADD1 + // VMLF X1, YDIG, ADD2 + // + VMULT(X0, YDIG, ADD1, ADD1H) + VMULT(X1, YDIG, ADD2, ADD2H) + + VSPLTW $2, Y0, YDIG // VREPF + + // VMALF X0, YDIG, ADD1H, ADD3 + // VMALF X1, YDIG, ADD2H, ADD4 + // VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free + // VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free + VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H) + VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H) + + LXVD2X (R17)(CPOOL), SEL1_ + VSPLTISB $0, ZER // VZERO ZER + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDOI $12, ADD2, ADD1, T0 // ADD1 Free // VSLDB + VSLDOI $12, ZER, ADD2, T1 // ADD2 Free // VSLDB + + VADDCUQ T0, ADD3, CAR1 // VACCQ + VADDUQM T0, ADD3, T0 // ADD3 Free // VAQ + VADDECUQ T1, ADD4, CAR1, T2 // VACCCQ + VADDEUQM T1, ADD4, CAR1, T1 // ADD4 Free // VACQ + + LXVD2X (R18)(CPOOL), SEL2_ + LXVD2X (R19)(CPOOL), SEL3_ + LXVD2X (R20)(CPOOL), SEL4_ + VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0] + VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1] + VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0] + VSUBUQM RED2, RED3, RED2 // Guaranteed not to underflow -->? // VSQ + + VSLDOI $12, T1, T0, T0 // VSLDB + VSLDOI $12, T2, T1, T1 // VSLDB + + VADDCUQ T0, ADD3H, CAR1 // VACCQ + VADDUQM T0, ADD3H, T0 // VAQ + VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ + VADDEUQM T1, ADD4H, CAR1, T1 // VACQ + + // --------------------------------------------------- + + VSPLTW $1, Y0, YDIG // VREPF + LXVD2X (R0)(EVENODD), EXTRACT_LO_ + LXVD2X (R16)(EVENODD), EXTRACT_HI_ + + // VMALHF X0, YDIG, T0, ADD1H + // VMALHF X1, YDIG, T1, ADD2H + // VMALF X0, YDIG, T0, ADD1 // T0 Free->ADD1 + // VMALF X1, YDIG, T1, ADD2 // T1 Free->ADD2 + VMULT_ADD(X0, YDIG, T0, ADD1, ADD1H) + VMULT_ADD(X1, YDIG, T1, ADD2, ADD2H) + + VSPLTW $0, Y0, YDIG // VREPF + + // VMALF X0, YDIG, ADD1H, ADD3 + // VMALF X1, YDIG, ADD2H, ADD4 + // VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free->ADD3H + // VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free->ADD4H , YDIG Free->ZER + VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H) + VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H) + + VSPLTISB $0, ZER // VZERO ZER + LXVD2X (R17)(CPOOL), SEL1_ + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDOI $12, ADD2, ADD1, T0 // ADD1 Free->T0 // VSLDB + VSLDOI $12, T2, ADD2, T1 // ADD2 Free->T1, T2 Free // VSLDB + + VADDCUQ T0, RED1, CAR1 // VACCQ + VADDUQM T0, RED1, T0 // VAQ + VADDECUQ T1, RED2, CAR1, T2 // VACCCQ + VADDEUQM T1, RED2, CAR1, T1 // VACQ + + VADDCUQ T0, ADD3, CAR1 // VACCQ + VADDUQM T0, ADD3, T0 // VAQ + VADDECUQ T1, ADD4, CAR1, CAR2 // VACCCQ + VADDEUQM T1, ADD4, CAR1, T1 // VACQ + VADDUQM T2, CAR2, T2 // VAQ + + LXVD2X (R18)(CPOOL), SEL2_ + LXVD2X (R19)(CPOOL), SEL3_ + LXVD2X (R20)(CPOOL), SEL4_ + VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0] + VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1] + VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0] + VSUBUQM RED2, RED3, RED2 // Guaranteed not to underflow // VSQ + + VSLDOI $12, T1, T0, T0 // VSLDB + VSLDOI $12, T2, T1, T1 // VSLDB + + VADDCUQ T0, ADD3H, CAR1 // VACCQ + VADDUQM T0, ADD3H, T0 // VAQ + VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ + VADDEUQM T1, ADD4H, CAR1, T1 // VACQ + + // --------------------------------------------------- + + VSPLTW $3, Y1, YDIG // VREPF + LXVD2X (R0)(EVENODD), EXTRACT_LO_ + LXVD2X (R16)(EVENODD), EXTRACT_HI_ + + // VMALHF X0, YDIG, T0, ADD1H + // VMALHF X1, YDIG, T1, ADD2H + // VMALF X0, YDIG, T0, ADD1 + // VMALF X1, YDIG, T1, ADD2 + VMULT_ADD(X0, YDIG, T0, ADD1, ADD1H) + VMULT_ADD(X1, YDIG, T1, ADD2, ADD2H) + + VSPLTW $2, Y1, YDIG // VREPF + + // VMALF X0, YDIG, ADD1H, ADD3 + // VMALF X1, YDIG, ADD2H, ADD4 + // VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free + // VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free + VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H) + VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H) + + LXVD2X (R17)(CPOOL), SEL1_ + VSPLTISB $0, ZER // VZERO ZER + LXVD2X (R17)(CPOOL), SEL1_ + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDOI $12, ADD2, ADD1, T0 // ADD1 Free // VSLDB + VSLDOI $12, T2, ADD2, T1 // ADD2 Free // VSLDB + + VADDCUQ T0, RED1, CAR1 // VACCQ + VADDUQM T0, RED1, T0 // VAQ + VADDECUQ T1, RED2, CAR1, T2 // VACCCQ + VADDEUQM T1, RED2, CAR1, T1 // VACQ + + VADDCUQ T0, ADD3, CAR1 // VACCQ + VADDUQM T0, ADD3, T0 // VAQ + VADDECUQ T1, ADD4, CAR1, CAR2 // VACCCQ + VADDEUQM T1, ADD4, CAR1, T1 // VACQ + VADDUQM T2, CAR2, T2 // VAQ + + LXVD2X (R18)(CPOOL), SEL2_ + LXVD2X (R19)(CPOOL), SEL3_ + LXVD2X (R20)(CPOOL), SEL4_ + VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0] + VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1] + VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0] + VSUBUQM RED2, RED3, RED2 // Guaranteed not to underflow // VSQ + + VSLDOI $12, T1, T0, T0 // VSLDB + VSLDOI $12, T2, T1, T1 // VSLDB + + VADDCUQ T0, ADD3H, CAR1 // VACCQ + VADDUQM T0, ADD3H, T0 // VAQ + VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ + VADDEUQM T1, ADD4H, CAR1, T1 // VACQ + + // --------------------------------------------------- + + VSPLTW $1, Y1, YDIG // VREPF + LXVD2X (R0)(EVENODD), EXTRACT_LO_ + LXVD2X (R16)(EVENODD), EXTRACT_HI_ + + // VMALHF X0, YDIG, T0, ADD1H + // VMALHF X1, YDIG, T1, ADD2H + // VMALF X0, YDIG, T0, ADD1 + // VMALF X1, YDIG, T1, ADD2 + VMULT_ADD(X0, YDIG, T0, ADD1, ADD1H) + VMULT_ADD(X1, YDIG, T1, ADD2, ADD2H) + + VSPLTW $0, Y1, YDIG // VREPF + + // VMALF X0, YDIG, ADD1H, ADD3 + // VMALF X1, YDIG, ADD2H, ADD4 + // VMALHF X0, YDIG, ADD1H, ADD3H + // VMALHF X1, YDIG, ADD2H, ADD4H + VMULT_ADD(X0, YDIG, ADD1H, ADD3, ADD3H) + VMULT_ADD(X1, YDIG, ADD2H, ADD4, ADD4H) + + VSPLTISB $0, ZER // VZERO ZER + LXVD2X (R17)(CPOOL), SEL1_ + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDOI $12, ADD2, ADD1, T0 // VSLDB + VSLDOI $12, T2, ADD2, T1 // VSLDB + + VADDCUQ T0, RED1, CAR1 // VACCQ + VADDUQM T0, RED1, T0 // VAQ + VADDECUQ T1, RED2, CAR1, T2 // VACCCQ + VADDEUQM T1, RED2, CAR1, T1 // VACQ + + VADDCUQ T0, ADD3, CAR1 // VACCQ + VADDUQM T0, ADD3, T0 // VAQ + VADDECUQ T1, ADD4, CAR1, CAR2 // VACCCQ + VADDEUQM T1, ADD4, CAR1, T1 // VACQ + VADDUQM T2, CAR2, T2 // VAQ + + LXVD2X (R21)(CPOOL), SEL5_ + LXVD2X (R22)(CPOOL), SEL6_ + VPERM T0, RED3, SEL5, RED2 // [d1 d0 d1 d0] + VPERM T0, RED3, SEL6, RED1 // [ 0 d1 d0 0] + VSUBUQM RED2, RED1, RED2 // Guaranteed not to underflow // VSQ + + VSLDOI $12, T1, T0, T0 // VSLDB + VSLDOI $12, T2, T1, T1 // VSLDB + + VADDCUQ T0, ADD3H, CAR1 // VACCQ + VADDUQM T0, ADD3H, T0 // VAQ + VADDECUQ T1, ADD4H, CAR1, T2 // VACCCQ + VADDEUQM T1, ADD4H, CAR1, T1 // VACQ + + VADDCUQ T0, RED1, CAR1 // VACCQ + VADDUQM T0, RED1, T0 // VAQ + VADDECUQ T1, RED2, CAR1, CAR2 // VACCCQ + VADDEUQM T1, RED2, CAR1, T1 // VACQ + VADDUQM T2, CAR2, T2 // VAQ + + // --------------------------------------------------- + + VSPLTISB $0, RED3 // VZERO RED3 + VSUBCUQ T0, P0, CAR1 // VSCBIQ + VSUBUQM T0, P0, ADD1H // VSQ + VSUBECUQ T1, P1, CAR1, CAR2 // VSBCBIQ + VSUBEUQM T1, P1, CAR1, ADD2H // VSBIQ + VSUBEUQM T2, RED3, CAR2, T2 // VSBIQ + + // what output to use, ADD2H||ADD1H or T1||T0? + VSEL ADD1H, T0, T2, T0 + VSEL ADD2H, T1, T2, T1 + RET + +#undef CPOOL + +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef T0 +#undef T1 +#undef P0 +#undef P1 + +#undef SEL1 +#undef SEL2 +#undef SEL3 +#undef SEL4 +#undef SEL5 +#undef SEL6 +#undef SEL1_ +#undef SEL2_ +#undef SEL3_ +#undef SEL4_ +#undef SEL5_ +#undef SEL6_ + +#undef YDIG +#undef ADD1H +#undef ADD2H +#undef ADD3 +#undef ADD4 +#undef RED1 +#undef RED2 +#undef RED3 +#undef T2 +#undef ADD1 +#undef ADD2 +#undef ADD3H +#undef ADD4H +#undef ZER +#undef CAR1 +#undef CAR2 + +#undef TMP1 +#undef TMP2 +#undef EVENODD +#undef EXTRACT_HI +#undef EXTRACT_HI_ +#undef EXTRACT_LO +#undef EXTRACT_LO_ + +#define p256SubInternal(T1, T0, X1, X0, Y1, Y0) \ + VSPLTISB $0, ZER \ // VZERO + VSUBCUQ X0, Y0, CAR1 \ + VSUBUQM X0, Y0, T0 \ + VSUBECUQ X1, Y1, CAR1, SEL1 \ + VSUBEUQM X1, Y1, CAR1, T1 \ + VSUBUQM ZER, SEL1, SEL1 \ // VSQ + \ + VADDCUQ T0, PL, CAR1 \ // VACCQ + VADDUQM T0, PL, TT0 \ // VAQ + VADDEUQM T1, PH, CAR1, TT1 \ // VACQ + \ + VSEL TT0, T0, SEL1, T0 \ + VSEL TT1, T1, SEL1, T1 \ + +#define p256AddInternal(T1, T0, X1, X0, Y1, Y0) \ + VADDCUQ X0, Y0, CAR1 \ + VADDUQM X0, Y0, T0 \ + VADDECUQ X1, Y1, CAR1, T2 \ // VACCCQ + VADDEUQM X1, Y1, CAR1, T1 \ + \ + VSPLTISB $0, ZER \ + VSUBCUQ T0, PL, CAR1 \ // VSCBIQ + VSUBUQM T0, PL, TT0 \ + VSUBECUQ T1, PH, CAR1, CAR2 \ // VSBCBIQ + VSUBEUQM T1, PH, CAR1, TT1 \ // VSBIQ + VSUBEUQM T2, ZER, CAR2, SEL1 \ + \ + VSEL TT0, T0, SEL1, T0 \ + VSEL TT1, T1, SEL1, T1 + +#define p256HalfInternal(T1, T0, X1, X0) \ + VSPLTISB $0, ZER \ + VSUBEUQM ZER, ZER, X0, SEL1 \ + \ + VADDCUQ X0, PL, CAR1 \ + VADDUQM X0, PL, T0 \ + VADDECUQ X1, PH, CAR1, T2 \ + VADDEUQM X1, PH, CAR1, T1 \ + \ + VSEL T0, X0, SEL1, T0 \ + VSEL T1, X1, SEL1, T1 \ + VSEL T2, ZER, SEL1, T2 \ + \ + VSLDOI $15, T2, ZER, TT1 \ + VSLDOI $15, T1, ZER, TT0 \ + VSPLTISB $1, SEL1 \ + VSR T0, SEL1, T0 \ // VSRL + VSR T1, SEL1, T1 \ + VSPLTISB $7, SEL1 \ // VREPIB + VSL TT0, SEL1, TT0 \ + VSL TT1, SEL1, TT1 \ + VOR T0, TT0, T0 \ + VOR T1, TT1, T1 + +// --------------------------------------- +// func p256MulAsm(res, in1, in2 []byte) +#define res_ptr R3 +#define x_ptr R4 +#define y_ptr R5 +#define CPOOL R7 +#define TEMP R8 + +// Parameters +#define X0 V0 +#define X1 V1 +#define Y0 V2 +#define Y1 V3 +#define T0 V4 +#define T1 V5 +#define X0_ VS32 +#define X1_ VS33 +#define Y0_ VS34 +#define Y1_ VS35 +#define T0_ VS36 +#define T1_ VS37 +#define SWAP V28 +#define SWAP_ VS60 + +// Constants +#define P0 V30 +#define P1 V31 +#define P0_ VS62 +#define P1_ VS63 +// +// Montgomery multiplication modulo P256 +// +TEXT ·p256MulAsm(SB), NOSPLIT, $0-72 + MOVD res+0(FP), res_ptr + MOVD in1+24(FP), x_ptr + MOVD in2+48(FP), y_ptr + MOVD $16, R16 + MOVD $32, R17 + + MOVD $p256mul<>+0x00(SB), CPOOL + MOVD $byteswap<>+0x00(SB), R8 + + LXVD2X (R8)(R0), SWAP_ + + LXVD2X (R0)(x_ptr), X0_ + LXVD2X (R16)(x_ptr), X1_ + + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + + LXVD2X (R0)(y_ptr), Y0_ + LXVD2X (R16)(y_ptr), Y1_ + + VPERM Y0, Y0, SWAP, Y0 + VPERM Y1, Y1, SWAP, Y1 + + LXVD2X (R16)(CPOOL), P1_ + LXVD2X (R0)(CPOOL), P0_ + + CALL p256MulInternal<>(SB) + + MOVD $p256mul<>+0x00(SB), CPOOL + MOVD $byteswap<>+0x00(SB), R8 + + LXVD2X (R8)(R0), SWAP_ + + VPERM T0, T0, SWAP, T0 + VPERM T1, T1, SWAP, T1 + STXVD2X T0_, (R0)(res_ptr) + STXVD2X T1_, (R16)(res_ptr) + RET + +#undef res_ptr +#undef x_ptr +#undef y_ptr +#undef CPOOL + +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef T0 +#undef T1 +#undef P0 +#undef P1 +#undef X0_ +#undef X1_ +#undef Y0_ +#undef Y1_ +#undef T0_ +#undef T1_ +#undef P0_ +#undef P1_ + +// Point add with P2 being affine point +// If sign == 1 -> P2 = -P2 +// If sel == 0 -> P3 = P1 +// if zero == 0 -> P3 = P2 +// p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) +#define P3ptr R3 +#define P1ptr R4 +#define P2ptr R5 +#define CPOOL R7 + +// Temporaries in REGs +#define Y2L V15 +#define Y2H V16 +#define Y2L_ VS47 +#define Y2H_ VS48 +#define T1L V17 +#define T1H V18 +#define T2L V19 +#define T2H V20 +#define T3L V21 +#define T3H V22 +#define T4L V23 +#define T4H V24 + +// Temps for Sub and Add +#define TT0 V11 +#define TT1 V12 +#define T2 V13 + +// p256MulAsm Parameters +#define X0 V0 +#define X1 V1 +#define X0_ VS32 +#define X1_ VS33 +#define Y0 V2 +#define Y1 V3 +#define Y0_ VS34 +#define Y1_ VS35 +#define T0 V4 +#define T1 V5 + +#define PL V30 +#define PH V31 +#define PL_ VS62 +#define PH_ VS63 + +// Names for zero/sel selects +#define X1L V0 +#define X1H V1 +#define X1L_ VS32 +#define X1H_ VS33 +#define Y1L V2 // p256MulAsmParmY +#define Y1H V3 // p256MulAsmParmY +#define Y1L_ VS34 +#define Y1H_ VS35 +#define Z1L V4 +#define Z1H V5 +#define Z1L_ VS36 +#define Z1H_ VS37 +#define X2L V0 +#define X2H V1 +#define X2L_ VS32 +#define X2H_ VS33 +#define Z2L V4 +#define Z2H V5 +#define Z2L_ VS36 +#define Z2H_ VS37 +#define X3L V17 // T1L +#define X3H V18 // T1H +#define Y3L V21 // T3L +#define Y3H V22 // T3H +#define Z3L V25 +#define Z3H V26 +#define X3L_ VS49 +#define X3H_ VS50 +#define Y3L_ VS53 +#define Y3H_ VS54 +#define Z3L_ VS57 +#define Z3H_ VS58 + +#define ZER V6 +#define SEL1 V7 +#define SEL1_ VS39 +#define CAR1 V8 +#define CAR2 V9 +/* * + * Three operand formula: + * Source: 2004 Hankerson–Menezes–Vanstone, page 91. + * T1 = Z1² + * T2 = T1*Z1 + * T1 = T1*X2 + * T2 = T2*Y2 + * T1 = T1-X1 + * T2 = T2-Y1 + * Z3 = Z1*T1 + * T3 = T1² + * T4 = T3*T1 + * T3 = T3*X1 + * T1 = 2*T3 + * X3 = T2² + * X3 = X3-T1 + * X3 = X3-T4 + * T3 = T3-X3 + * T3 = T3*T2 + * T4 = T4*Y1 + * Y3 = T3-T4 + + * Three operand formulas, but with MulInternal X,Y used to store temps +X=Z1; Y=Z1; MUL;T- // T1 = Z1² T1 +X=T ; Y- ; MUL;T2=T // T2 = T1*Z1 T1 T2 +X- ; Y=X2; MUL;T1=T // T1 = T1*X2 T1 T2 +X=T2; Y=Y2; MUL;T- // T2 = T2*Y2 T1 T2 +SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2 +SUB(Y<T1-X1) // T1 = T1-X1 T1 T2 +X=Z1; Y- ; MUL;Z3:=T// Z3 = Z1*T1 T2 +X=Y; Y- ; MUL;X=T // T3 = T1*T1 T2 +X- ; Y- ; MUL;T4=T // T4 = T3*T1 T2 T4 +X- ; Y=X1; MUL;T3=T // T3 = T3*X1 T2 T3 T4 +ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4 +X=T2; Y=T2; MUL;T- // X3 = T2*T2 T1 T2 T3 T4 +SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4 +SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4 +SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4 +X- ; Y- ; MUL;T3=T // T3 = T3*T2 T2 T3 T4 +X=T4; Y=Y1; MUL;T- // T4 = T4*Y1 T3 T4 +SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4 + + */ +// +// V27 is clobbered by p256MulInternal so must be +// saved in a temp. +// +TEXT ·p256PointAddAffineAsm(SB), NOSPLIT, $16-48 + MOVD res+0(FP), P3ptr + MOVD in1+8(FP), P1ptr + MOVD in2+16(FP), P2ptr + + MOVD $p256mul<>+0x00(SB), CPOOL + + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $64, R19 + MOVD $80, R20 + MOVD $96, R21 + MOVD $112, R22 + MOVD $128, R23 + MOVD $144, R24 + MOVD $160, R25 + MOVD $104, R26 // offset of sign+24(FP) + + MOVD $byteswap<>+0+00(SB), R8 + LXVD2X (R16)(CPOOL), PH_ + LXVD2X (R0)(CPOOL), PL_ + + // if (sign == 1) { + // Y2 = fromBig(new(big.Int).Mod(new(big.Int).Sub(p256.P, new(big.Int).SetBytes(Y2)), p256.P)) // Y2 = P-Y2 + // } + + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R17)(P2ptr), Y2L_ + LXVD2X (R18)(P2ptr), Y2H_ + VPERM Y2H, Y2H, SWAP, Y2H + VPERM Y2L, Y2L, SWAP, Y2L + + // Equivalent of VLREPG sign+24(FP), SEL1 + LXVDSX (R1)(R26), SEL1_ + VSPLTISB $0, ZER + VCMPEQUD SEL1, ZER, SEL1 + + VSUBCUQ PL, Y2L, CAR1 + VSUBUQM PL, Y2L, T1L + VSUBEUQM PH, Y2H, CAR1, T1H + + VSEL T1L, Y2L, SEL1, Y2L + VSEL T1H, Y2H, SEL1, Y2H + +/* * + * Three operand formula: + * Source: 2004 Hankerson–Menezes–Vanstone, page 91. + */ + // X=Z1; Y=Z1; MUL; T- // T1 = Z1² T1 + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R19)(P1ptr), X0_ // Z1H + LXVD2X (R20)(P1ptr), X1_ // Z1L + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + VOR X0, X0, Y0 + VOR X1, X1, Y1 + CALL p256MulInternal<>(SB) + + // X=T ; Y- ; MUL; T2=T // T2 = T1*Z1 T1 T2 + VOR T0, T0, X0 + VOR T1, T1, X1 + CALL p256MulInternal<>(SB) + VOR T0, T0, T2L + VOR T1, T1, T2H + + // X- ; Y=X2; MUL; T1=T // T1 = T1*X2 T1 T2 + MOVD in2+16(FP), P2ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R0)(P2ptr), Y0_ // X2H + LXVD2X (R16)(P2ptr), Y1_ // X2L + VPERM Y0, Y0, SWAP, Y0 + VPERM Y1, Y1, SWAP, Y1 + CALL p256MulInternal<>(SB) + VOR T0, T0, T1L + VOR T1, T1, T1H + + // X=T2; Y=Y2; MUL; T- // T2 = T2*Y2 T1 T2 + VOR T2L, T2L, X0 + VOR T2H, T2H, X1 + VOR Y2L, Y2L, Y0 + VOR Y2H, Y2H, Y1 + CALL p256MulInternal<>(SB) + + // SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2 + MOVD in1+8(FP), P1ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R17)(P1ptr), Y1L_ + LXVD2X (R18)(P1ptr), Y1H_ + VPERM Y1H, Y1H, SWAP, Y1H + VPERM Y1L, Y1L, SWAP, Y1L + p256SubInternal(T2H,T2L,T1,T0,Y1H,Y1L) + + // SUB(Y<T1-X1) // T1 = T1-X1 T1 T2 + LXVD2X (R0)(P1ptr), X1L_ + LXVD2X (R16)(P1ptr), X1H_ + VPERM X1H, X1H, SWAP, X1H + VPERM X1L, X1L, SWAP, X1L + p256SubInternal(Y1,Y0,T1H,T1L,X1H,X1L) + + // X=Z1; Y- ; MUL; Z3:=T// Z3 = Z1*T1 T2 + LXVD2X (R19)(P1ptr), X0_ // Z1H + LXVD2X (R20)(P1ptr), X1_ // Z1L + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + CALL p256MulInternal<>(SB) + + VOR T0, T0, Z3L + VOR T1, T1, Z3H + + // X=Y; Y- ; MUL; X=T // T3 = T1*T1 T2 + VOR Y0, Y0, X0 + VOR Y1, Y1, X1 + CALL p256MulInternal<>(SB) + VOR T0, T0, X0 + VOR T1, T1, X1 + + // X- ; Y- ; MUL; T4=T // T4 = T3*T1 T2 T4 + CALL p256MulInternal<>(SB) + VOR T0, T0, T4L + VOR T1, T1, T4H + + // X- ; Y=X1; MUL; T3=T // T3 = T3*X1 T2 T3 T4 + MOVD in1+8(FP), P1ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R0)(P1ptr), Y0_ // X1H + LXVD2X (R16)(P1ptr), Y1_ // X1L + VPERM Y1, Y1, SWAP, Y1 + VPERM Y0, Y0, SWAP, Y0 + CALL p256MulInternal<>(SB) + VOR T0, T0, T3L + VOR T1, T1, T3H + + // ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4 + p256AddInternal(T1H,T1L, T1,T0,T1,T0) + + // X=T2; Y=T2; MUL; T- // X3 = T2*T2 T1 T2 T3 T4 + VOR T2L, T2L, X0 + VOR T2H, T2H, X1 + VOR T2L, T2L, Y0 + VOR T2H, T2H, Y1 + CALL p256MulInternal<>(SB) + + // SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4 (T1 = X3) + p256SubInternal(T1,T0,T1,T0,T1H,T1L) + + // SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4 + p256SubInternal(T1,T0,T1,T0,T4H,T4L) + VOR T0, T0, X3L + VOR T1, T1, X3H + + // SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4 + p256SubInternal(X1,X0,T3H,T3L,T1,T0) + + // X- ; Y- ; MUL; T3=T // T3 = T3*T2 T2 T3 T4 + CALL p256MulInternal<>(SB) + VOR T0, T0, T3L + VOR T1, T1, T3H + + // X=T4; Y=Y1; MUL; T- // T4 = T4*Y1 T3 T4 + VOR T4L, T4L, X0 + VOR T4H, T4H, X1 + MOVD in1+8(FP), P1ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R17)(P1ptr), Y0_ // Y1H + LXVD2X (R18)(P1ptr), Y1_ // Y1L + VPERM Y0, Y0, SWAP, Y0 + VPERM Y1, Y1, SWAP, Y1 + CALL p256MulInternal<>(SB) + + // SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4 (T3 = Y3) + p256SubInternal(Y3H,Y3L,T3H,T3L,T1,T0) + + // if (sel == 0) { + // copy(P3.x[:], X1) + // copy(P3.y[:], Y1) + // copy(P3.z[:], Z1) + // } + + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R0)(P1ptr), X1L_ + LXVD2X (R16)(P1ptr), X1H_ + VPERM X1H, X1H, SWAP, X1H + VPERM X1L, X1L, SWAP, X1L + + // Y1 already loaded, left over from addition + LXVD2X (R19)(P1ptr), Z1L_ + LXVD2X (R20)(P1ptr), Z1H_ + VPERM Z1H, Z1H, SWAP, Z1H + VPERM Z1L, Z1L, SWAP, Z1L + + MOVD $112, R26 // Get offset to sel+32 + LXVDSX (R1)(R26), SEL1_ + VSPLTISB $0, ZER + VCMPEQUD SEL1, ZER, SEL1 + + VSEL X3L, X1L, SEL1, X3L + VSEL X3H, X1H, SEL1, X3H + VSEL Y3L, Y1L, SEL1, Y3L + VSEL Y3H, Y1H, SEL1, Y3H + VSEL Z3L, Z1L, SEL1, Z3L + VSEL Z3H, Z1H, SEL1, Z3H + + // if (zero == 0) { + // copy(P3.x[:], X2) + // copy(P3.y[:], Y2) + // copy(P3.z[:], []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + // 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}) //(p256.z*2^256)%p + // } + MOVD in2+16(FP), P2ptr + LXVD2X (R0)(P2ptr), X2L_ + LXVD2X (R16)(P2ptr), X2H_ + VPERM X2H, X2H, SWAP, X2H + VPERM X2L, X2L, SWAP, X2L + + // Y2 already loaded + LXVD2X (R23)(CPOOL), Z2L_ + LXVD2X (R24)(CPOOL), Z2H_ + + MOVD $120, R26 // Get the value from zero+40(FP) + LXVDSX (R1)(R26), SEL1_ + VSPLTISB $0, ZER + VCMPEQUD SEL1, ZER, SEL1 + + VSEL X3L, X2L, SEL1, X3L + VSEL X3H, X2H, SEL1, X3H + VSEL Y3L, Y2L, SEL1, Y3L + VSEL Y3H, Y2H, SEL1, Y3H + VSEL Z3L, Z2L, SEL1, Z3L + VSEL Z3H, Z2H, SEL1, Z3H + + // Reorder the bytes so they can be stored using STXVD2X. + MOVD res+0(FP), P3ptr + VPERM X3H, X3H, SWAP, X3H + VPERM X3L, X3L, SWAP, X3L + VPERM Y3H, Y3H, SWAP, Y3H + VPERM Y3L, Y3L, SWAP, Y3L + VPERM Z3H, Z3H, SWAP, Z3H + VPERM Z3L, Z3L, SWAP, Z3L + STXVD2X X3L_, (R0)(P3ptr) + STXVD2X X3H_, (R16)(P3ptr) + STXVD2X Y3L_, (R17)(P3ptr) + STXVD2X Y3H_, (R18)(P3ptr) + STXVD2X Z3L_, (R19)(P3ptr) + STXVD2X Z3H_, (R20)(P3ptr) + + RET + +#undef P3ptr +#undef P1ptr +#undef P2ptr +#undef CPOOL +#undef SWAP +#undef SWAP_ + +#undef Y2L +#undef Y2H +#undef Y2L_ +#undef Y2H_ +#undef T1L +#undef T1H +#undef T2L +#undef T2H +#undef T3L +#undef T3H +#undef T4L +#undef T4H + +#undef TT0 +#undef TT1 +#undef TT0_ +#undef TT1_ +#undef T2 + +#undef X0 +#undef X1 +#undef X0_ +#undef X1_ +#undef Y0 +#undef Y1 +#undef Y0_ +#undef Y1_ +#undef T0 +#undef T1 + +#undef PL +#undef PH +#undef PL_ +#undef PH_ + +#undef X1L +#undef X1H +#undef X1L_ +#undef X1H_ +#undef Y1L +#undef Y1H +#undef Y1L_ +#undef Y1H_ +#undef Z1L +#undef Z1H +#undef Z1L_ +#undef Z1H_ +#undef X2L +#undef X2H +#undef X2L_ +#undef X2H_ +#undef Z2L +#undef Z2H +#undef Z2L_ +#undef Z2H_ +#undef X3L +#undef X3H +#undef X3L_ +#undef X3H_ +#undef Y3L +#undef Y3H +#undef Y3L_ +#undef Y3H_ +#undef Z3L +#undef Z3H +#undef Z3L_ +#undef Z3H_ + +#undef ZER +#undef SEL1 +#undef SEL1_ +#undef CAR1 +#undef CAR2 + +// p256PointDoubleAsm(P3, P1 *p256Point) +// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl +// http://www.hyperelliptic.org/EFD/g1p/auto-shortw.html +// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html +#define P3ptr R3 +#define P1ptr R4 +#define CPOOL R7 + +// Temporaries in REGs +#define X3L V15 +#define X3H V16 +#define X3L_ VS47 +#define X3H_ VS48 +#define Y3L V17 +#define Y3H V18 +#define Y3L_ VS49 +#define Y3H_ VS50 +#define T1L V19 +#define T1H V20 +#define T2L V21 +#define T2H V22 +#define T3L V23 +#define T3H V24 + +#define X1L V6 +#define X1H V7 +#define X1L_ VS38 +#define X1H_ VS39 +#define Y1L V8 +#define Y1H V9 +#define Y1L_ VS40 +#define Y1H_ VS41 +#define Z1L V10 +#define Z1H V11 + +// Temps for Sub and Add +#define TT0 V11 +#define TT1 V12 +#define TT0_ VS43 +#define TT1_ VS44 +#define T2 V13 + +// p256MulAsm Parameters +#define X0 V0 +#define X1 V1 +#define X0_ VS32 +#define X1_ VS33 +#define Y0 V2 +#define Y1 V3 +#define Y0_ VS34 +#define Y1_ VS35 +#define T0 V4 +#define T1 V5 +#define T0_ VS36 +#define T1_ VS37 + +#define PL V30 +#define PH V31 +#define PL_ VS62 +#define PH_ VS63 + +#define Z3L V23 +#define Z3H V24 + +#define SWAP V25 +#define SWAP_ VS57 +#define ZER V26 +#define SEL1 V27 +#define CAR1 V28 +#define CAR2 V29 +/* + * http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2004-hmv + * Cost: 4M + 4S + 1*half + 5add + 2*2 + 1*3. + * Source: 2004 Hankerson–Menezes–Vanstone, page 91. + * A = 3(X₁-Z₁²)×(X₁+Z₁²) + * B = 2Y₁ + * Z₃ = B×Z₁ + * C = B² + * D = C×X₁ + * X₃ = A²-2D + * Y₃ = (D-X₃)×A-C²/2 + * + * Three-operand formula: + * T1 = Z1² + * T2 = X1-T1 + * T1 = X1+T1 + * T2 = T2*T1 + * T2 = 3*T2 + * Y3 = 2*Y1 + * Z3 = Y3*Z1 + * Y3 = Y3² + * T3 = Y3*X1 + * Y3 = Y3² + * Y3 = half*Y3 + * X3 = T2² + * T1 = 2*T3 + * X3 = X3-T1 + * T1 = T3-X3 + * T1 = T1*T2 + * Y3 = T1-Y3 + */ + +TEXT ·p256PointDoubleAsm(SB), NOSPLIT, $0-16 + MOVD res+0(FP), P3ptr + MOVD in+8(FP), P1ptr + + MOVD $p256mul<>+0x00(SB), CPOOL + MOVD $byteswap<>+0x00(SB), R15 + + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $64, R19 + MOVD $80, R20 + + LXVD2X (R16)(CPOOL), PH_ + LXVD2X (R0)(CPOOL), PL_ + + LXVD2X (R15)(R0), SWAP_ + + // X=Z1; Y=Z1; MUL; T- // T1 = Z1² + LXVD2X (R19)(P1ptr), X0_ // Z1H + LXVD2X (R20)(P1ptr), X1_ // Z1L + + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + + VOR X0, X0, Y0 + VOR X1, X1, Y1 + CALL p256MulInternal<>(SB) + + // SUB(X<X1-T) // T2 = X1-T1 + LXVD2X (R0)(P1ptr), X1L_ + LXVD2X (R16)(P1ptr), X1H_ + VPERM X1L, X1L, SWAP, X1L + VPERM X1H, X1H, SWAP, X1H + + p256SubInternal(X1,X0,X1H,X1L,T1,T0) + + // ADD(Y<X1+T) // T1 = X1+T1 + p256AddInternal(Y1,Y0,X1H,X1L,T1,T0) + + // X- ; Y- ; MUL; T- // T2 = T2*T1 + CALL p256MulInternal<>(SB) + + // ADD(T2<T+T); ADD(T2<T2+T) // T2 = 3*T2 + p256AddInternal(T2H,T2L,T1,T0,T1,T0) + p256AddInternal(T2H,T2L,T2H,T2L,T1,T0) + + // ADD(X<Y1+Y1) // Y3 = 2*Y1 + LXVD2X (R15)(R0), SWAP_ + LXVD2X (R17)(P1ptr), Y1L_ + LXVD2X (R18)(P1ptr), Y1H_ + VPERM Y1L, Y1L, SWAP, Y1L + VPERM Y1H, Y1H, SWAP, Y1H + + p256AddInternal(X1,X0,Y1H,Y1L,Y1H,Y1L) + + // X- ; Y=Z1; MUL; Z3:=T // Z3 = Y3*Z1 + LXVD2X (R15)(R0), SWAP_ + LXVD2X (R19)(P1ptr), Y0_ + LXVD2X (R20)(P1ptr), Y1_ + VPERM Y0, Y0, SWAP, Y0 + VPERM Y1, Y1, SWAP, Y1 + + CALL p256MulInternal<>(SB) + + LXVD2X (R15)(R0), SWAP_ + + // Leave T0, T1 as is. + VPERM T0, T0, SWAP, TT0 + VPERM T1, T1, SWAP, TT1 + STXVD2X TT0_, (R19)(P3ptr) + STXVD2X TT1_, (R20)(P3ptr) + + // X- ; Y=X ; MUL; T- // Y3 = Y3² + VOR X0, X0, Y0 + VOR X1, X1, Y1 + CALL p256MulInternal<>(SB) + + // X=T ; Y=X1; MUL; T3=T // T3 = Y3*X1 + VOR T0, T0, X0 + VOR T1, T1, X1 + LXVD2X (R15)(R0), SWAP_ + LXVD2X (R0)(P1ptr), Y0_ + LXVD2X (R16)(P1ptr), Y1_ + VPERM Y0, Y0, SWAP, Y0 + VPERM Y1, Y1, SWAP, Y1 + CALL p256MulInternal<>(SB) + VOR T0, T0, T3L + VOR T1, T1, T3H + + // X- ; Y=X ; MUL; T- // Y3 = Y3² + VOR X0, X0, Y0 + VOR X1, X1, Y1 + CALL p256MulInternal<>(SB) + + // HAL(Y3<T) // Y3 = half*Y3 + p256HalfInternal(Y3H,Y3L, T1,T0) + + // X=T2; Y=T2; MUL; T- // X3 = T2² + VOR T2L, T2L, X0 + VOR T2H, T2H, X1 + VOR T2L, T2L, Y0 + VOR T2H, T2H, Y1 + CALL p256MulInternal<>(SB) + + // ADD(T1<T3+T3) // T1 = 2*T3 + p256AddInternal(T1H,T1L,T3H,T3L,T3H,T3L) + + // SUB(X3<T-T1) X3:=X3 // X3 = X3-T1 + p256SubInternal(X3H,X3L,T1,T0,T1H,T1L) + + LXVD2X (R15)(R0), SWAP_ + VPERM X3L, X3L, SWAP, TT0 + VPERM X3H, X3H, SWAP, TT1 + STXVD2X TT0_, (R0)(P3ptr) + STXVD2X TT1_, (R16)(P3ptr) + + // SUB(X<T3-X3) // T1 = T3-X3 + p256SubInternal(X1,X0,T3H,T3L,X3H,X3L) + + // X- ; Y- ; MUL; T- // T1 = T1*T2 + CALL p256MulInternal<>(SB) + + // SUB(Y3<T-Y3) // Y3 = T1-Y3 + p256SubInternal(Y3H,Y3L,T1,T0,Y3H,Y3L) + + LXVD2X (R15)(R0), SWAP_ + VPERM Y3L, Y3L, SWAP, Y3L + VPERM Y3H, Y3H, SWAP, Y3H + STXVD2X Y3L_, (R17)(P3ptr) + STXVD2X Y3H_, (R18)(P3ptr) + RET + +#undef P3ptr +#undef P1ptr +#undef CPOOL +#undef X3L +#undef X3H +#undef X3L_ +#undef X3H_ +#undef Y3L +#undef Y3H +#undef Y3L_ +#undef Y3H_ +#undef T1L +#undef T1H +#undef T2L +#undef T2H +#undef T3L +#undef T3H +#undef X1L +#undef X1H +#undef X1L_ +#undef X1H_ +#undef Y1L +#undef Y1H +#undef Y1L_ +#undef Y1H_ +#undef Z1L +#undef Z1H +#undef TT0 +#undef TT1 +#undef TT0_ +#undef TT1_ +#undef T2 +#undef X0 +#undef X1 +#undef X0_ +#undef X1_ +#undef Y0 +#undef Y1 +#undef Y0_ +#undef Y1_ +#undef T0 +#undef T1 +#undef T0_ +#undef T1_ +#undef PL +#undef PH +#undef PL_ +#undef PH_ +#undef Z3L +#undef Z3H +#undef ZER +#undef SEL1 +#undef CAR1 +#undef CAR2 +#undef SWAP +#undef SWAP_ + +// p256PointAddAsm(P3, P1, P2 *p256Point) +#define P3ptr R3 +#define P1ptr R4 +#define P2ptr R5 +#define CPOOL R7 +#define TRUE R14 +#define RES1 R9 +#define RES2 R10 + +// Temporaries in REGs +#define T1L V16 +#define T1H V17 +#define T2L V18 +#define T2H V19 +#define U1L V20 +#define U1H V21 +#define S1L V22 +#define S1H V23 +#define HL V24 +#define HH V25 +#define RL V26 +#define RH V27 +#define RH_ VS59 + +// Temps for Sub and Add +#define ZER V6 +#define SEL1 V7 +#define CAR1 V8 +#define CAR2 V9 +#define TT0 V11 +#define TT0_ VS43 +#define TT1 V12 +#define TT1_ VS44 +#define T2 V13 + +#define SWAP V28 +#define SWAP_ VS60 + +// p256MulAsm Parameters +#define X0 V0 +#define X1 V1 +#define X0_ VS32 +#define X1_ VS33 +#define Y0 V2 +#define Y1 V3 +#define Y0_ VS34 +#define Y1_ VS35 +#define T0 V4 +#define T1 V5 +#define T0_ VS36 +#define T1_ VS37 + +#define PL V30 +#define PH V31 +#define PL_ VS62 +#define PH_ VS63 +/* + * https://choucroutage.com/Papers/SideChannelAttacks/ctrsa-2011-brown.pdf "Software Implementation of the NIST Elliptic Curves Over Prime Fields" + * + * A = X₁×Z₂² + * B = Y₁×Z₂³ + * C = X₂×Z₁²-A + * D = Y₂×Z₁³-B + * X₃ = D² - 2A×C² - C³ + * Y₃ = D×(A×C² - X₃) - B×C³ + * Z₃ = Z₁×Z₂×C + * + * Three-operand formula (adopted): http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-1998-cmo-2 + * Temp storage: T1,T2,U1,H,Z3=X3=Y3,S1,R + * + * T1 = Z1*Z1 + * T2 = Z2*Z2 + * U1 = X1*T2 + * H = X2*T1 + * H = H-U1 + * Z3 = Z1*Z2 + * Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array + * + * S1 = Z2*T2 + * S1 = Y1*S1 + * R = Z1*T1 + * R = Y2*R + * R = R-S1 + * + * T1 = H*H + * T2 = H*T1 + * U1 = U1*T1 + * + * X3 = R*R + * X3 = X3-T2 + * T1 = 2*U1 + * X3 = X3-T1 << store-out X3 result reg + * + * T2 = S1*T2 + * Y3 = U1-X3 + * Y3 = R*Y3 + * Y3 = Y3-T2 << store-out Y3 result reg + + // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1 + // X- ; Y=T ; MUL; R=T // R = Z1*T1 + // X=X2; Y- ; MUL; H=T // H = X2*T1 + // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2 + // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2 + // X=X1; Y- ; MUL; U1=T // U1 = X1*T2 + // SUB(H<H-T) // H = H-U1 + // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2 + // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array + // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1 + // X=Y2; Y=R ; MUL; T- // R = Y2*R + // SUB(R<T-S1) // R = R-S1 + // X=H ; Y=H ; MUL; T- // T1 = H*H + // X- ; Y=T ; MUL; T2=T // T2 = H*T1 + // X=U1; Y- ; MUL; U1=T // U1 = U1*T1 + // X=R ; Y=R ; MUL; T- // X3 = R*R + // SUB(T<T-T2) // X3 = X3-T2 + // ADD(X<U1+U1) // T1 = 2*U1 + // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg + // SUB(Y<U1-T) // Y3 = U1-X3 + // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3 + // X=S1; Y=T2; MUL; T- // T2 = S1*T2 + // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg + */ +TEXT ·p256PointAddAsm(SB), NOSPLIT, $16-32 + MOVD res+0(FP), P3ptr + MOVD in1+8(FP), P1ptr + MOVD $p256mul<>+0x00(SB), CPOOL + MOVD $16, R16 + MOVD $32, R17 + MOVD $48, R18 + MOVD $64, R19 + MOVD $80, R20 + + MOVD $byteswap<>+0x00(SB), R8 + LXVD2X (R16)(CPOOL), PH_ + LXVD2X (R0)(CPOOL), PL_ + + // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1 + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R19)(P1ptr), X0_ // Z1L + LXVD2X (R20)(P1ptr), X1_ // Z1H + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + VOR X0, X0, Y0 + VOR X1, X1, Y1 + CALL p256MulInternal<>(SB) + + // X- ; Y=T ; MUL; R=T // R = Z1*T1 + VOR T0, T0, Y0 + VOR T1, T1, Y1 + CALL p256MulInternal<>(SB) + VOR T0, T0, RL // SAVE: RL + VOR T1, T1, RH // SAVE: RH + + STXVD2X RH_, (R1)(R17) // V27 has to be saved + + // X=X2; Y- ; MUL; H=T // H = X2*T1 + MOVD in2+16(FP), P2ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R0)(P2ptr), X0_ // X2L + LXVD2X (R16)(P2ptr), X1_ // X2H + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + CALL p256MulInternal<>(SB) + VOR T0, T0, HL // SAVE: HL + VOR T1, T1, HH // SAVE: HH + + // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2 + MOVD in2+16(FP), P2ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R19)(P2ptr), X0_ // Z2L + LXVD2X (R20)(P2ptr), X1_ // Z2H + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + VOR X0, X0, Y0 + VOR X1, X1, Y1 + CALL p256MulInternal<>(SB) + + // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2 + VOR T0, T0, Y0 + VOR T1, T1, Y1 + CALL p256MulInternal<>(SB) + VOR T0, T0, S1L // SAVE: S1L + VOR T1, T1, S1H // SAVE: S1H + + // X=X1; Y- ; MUL; U1=T // U1 = X1*T2 + MOVD in1+8(FP), P1ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R0)(P1ptr), X0_ // X1L + LXVD2X (R16)(P1ptr), X1_ // X1H + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + CALL p256MulInternal<>(SB) + VOR T0, T0, U1L // SAVE: U1L + VOR T1, T1, U1H // SAVE: U1H + + // SUB(H<H-T) // H = H-U1 + p256SubInternal(HH,HL,HH,HL,T1,T0) + + // if H == 0 or H^P == 0 then ret=1 else ret=0 + // clobbers T1H and T1L + MOVD $1, TRUE + VSPLTISB $0, ZER + VOR HL, HH, T1H + VCMPEQUDCC ZER, T1H, T1H + + // 26 = CR6 NE + ISEL $26, R0, TRUE, RES1 + VXOR HL, PL, T1L // SAVE: T1L + VXOR HH, PH, T1H // SAVE: T1H + VOR T1L, T1H, T1H + VCMPEQUDCC ZER, T1H, T1H + + // 26 = CR6 NE + ISEL $26, R0, TRUE, RES2 + OR RES2, RES1, RES1 + MOVD RES1, ret+24(FP) + + // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2 + MOVD $byteswap<>+0x00(SB), R8 + MOVD in1+8(FP), P1ptr + MOVD in2+16(FP), P2ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R19)(P1ptr), X0_ // Z1L + LXVD2X (R20)(P1ptr), X1_ // Z1H + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + LXVD2X (R19)(P2ptr), Y0_ // Z2L + LXVD2X (R20)(P2ptr), Y1_ // Z2H + VPERM Y0, Y0, SWAP, Y0 + VPERM Y1, Y1, SWAP, Y1 + CALL p256MulInternal<>(SB) + + // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H + VOR T0, T0, X0 + VOR T1, T1, X1 + VOR HL, HL, Y0 + VOR HH, HH, Y1 + CALL p256MulInternal<>(SB) + MOVD res+0(FP), P3ptr + LXVD2X (R8)(R0), SWAP_ + VPERM T1, T1, SWAP, TT1 + VPERM T0, T0, SWAP, TT0 + STXVD2X TT0_, (R19)(P3ptr) + STXVD2X TT1_, (R20)(P3ptr) + + // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1 + MOVD in1+8(FP), P1ptr + LXVD2X (R17)(P1ptr), X0_ + LXVD2X (R18)(P1ptr), X1_ + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + VOR S1L, S1L, Y0 + VOR S1H, S1H, Y1 + CALL p256MulInternal<>(SB) + VOR T0, T0, S1L + VOR T1, T1, S1H + + // X=Y2; Y=R ; MUL; T- // R = Y2*R + MOVD in2+16(FP), P2ptr + LXVD2X (R8)(R0), SWAP_ + LXVD2X (R17)(P2ptr), X0_ + LXVD2X (R18)(P2ptr), X1_ + VPERM X0, X0, SWAP, X0 + VPERM X1, X1, SWAP, X1 + VOR RL, RL, Y0 + + // VOR RH, RH, Y1 RH was saved above in D2X format + LXVD2X (R1)(R17), Y1_ + CALL p256MulInternal<>(SB) + + // SUB(R<T-S1) // R = T-S1 + p256SubInternal(RH,RL,T1,T0,S1H,S1L) + + STXVD2X RH_, (R1)(R17) // Save RH + + // if R == 0 or R^P == 0 then ret=ret else ret=0 + // clobbers T1H and T1L + // Redo this using ISEL?? + MOVD $1, TRUE + VSPLTISB $0, ZER + VOR RL, RH, T1H + VCMPEQUDCC ZER, T1H, T1H + + // 24 = CR6 NE + ISEL $26, R0, TRUE, RES1 + VXOR RL, PL, T1L + VXOR RH, PH, T1H // SAVE: T1L + VOR T1L, T1H, T1H + VCMPEQUDCC ZER, T1H, T1H + + // 26 = CR6 NE + ISEL $26, R0, TRUE, RES2 + OR RES2, RES1, RES1 + MOVD ret+24(FP), RES2 + AND RES2, RES1, RES1 + MOVD RES1, ret+24(FP) + + // X=H ; Y=H ; MUL; T- // T1 = H*H + VOR HL, HL, X0 + VOR HH, HH, X1 + VOR HL, HL, Y0 + VOR HH, HH, Y1 + CALL p256MulInternal<>(SB) + + // X- ; Y=T ; MUL; T2=T // T2 = H*T1 + VOR T0, T0, Y0 + VOR T1, T1, Y1 + CALL p256MulInternal<>(SB) + VOR T0, T0, T2L + VOR T1, T1, T2H + + // X=U1; Y- ; MUL; U1=T // U1 = U1*T1 + VOR U1L, U1L, X0 + VOR U1H, U1H, X1 + CALL p256MulInternal<>(SB) + VOR T0, T0, U1L + VOR T1, T1, U1H + + // X=R ; Y=R ; MUL; T- // X3 = R*R + VOR RL, RL, X0 + + // VOR RH, RH, X1 + VOR RL, RL, Y0 + + // RH was saved above using STXVD2X + LXVD2X (R1)(R17), X1_ + VOR X1, X1, Y1 + + // VOR RH, RH, Y1 + CALL p256MulInternal<>(SB) + + // SUB(T<T-T2) // X3 = X3-T2 + p256SubInternal(T1,T0,T1,T0,T2H,T2L) + + // ADD(X<U1+U1) // T1 = 2*U1 + p256AddInternal(X1,X0,U1H,U1L,U1H,U1L) + + // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg + p256SubInternal(T1,T0,T1,T0,X1,X0) + MOVD res+0(FP), P3ptr + LXVD2X (R8)(R0), SWAP_ + VPERM T1, T1, SWAP, TT1 + VPERM T0, T0, SWAP, TT0 + STXVD2X TT0_, (R0)(P3ptr) + STXVD2X TT1_, (R16)(P3ptr) + + // SUB(Y<U1-T) // Y3 = U1-X3 + p256SubInternal(Y1,Y0,U1H,U1L,T1,T0) + + // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3 + VOR RL, RL, X0 + + // VOR RH, RH, X1 + LXVD2X (R1)(R17), X1_ + CALL p256MulInternal<>(SB) + VOR T0, T0, U1L + VOR T1, T1, U1H + + // X=S1; Y=T2; MUL; T- // T2 = S1*T2 + VOR S1L, S1L, X0 + VOR S1H, S1H, X1 + VOR T2L, T2L, Y0 + VOR T2H, T2H, Y1 + CALL p256MulInternal<>(SB) + + // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg + p256SubInternal(T1,T0,U1H,U1L,T1,T0) + MOVD res+0(FP), P3ptr + LXVD2X (R8)(R0), SWAP_ + VPERM T1, T1, SWAP, TT1 + VPERM T0, T0, SWAP, TT0 + STXVD2X TT0_, (R17)(P3ptr) + STXVD2X TT1_, (R18)(P3ptr) + + RET diff --git a/src/crypto/elliptic/p256_asm_s390x.s b/src/crypto/elliptic/p256_asm_s390x.s new file mode 100644 index 0000000..cf37e20 --- /dev/null +++ b/src/crypto/elliptic/p256_asm_s390x.s @@ -0,0 +1,2714 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" +#include "go_asm.h" + + +DATA p256ordK0<>+0x00(SB)/4, $0xee00bc4f +DATA p256ord<>+0x00(SB)/8, $0xffffffff00000000 +DATA p256ord<>+0x08(SB)/8, $0xffffffffffffffff +DATA p256ord<>+0x10(SB)/8, $0xbce6faada7179e84 +DATA p256ord<>+0x18(SB)/8, $0xf3b9cac2fc632551 +DATA p256<>+0x00(SB)/8, $0xffffffff00000001 // P256 +DATA p256<>+0x08(SB)/8, $0x0000000000000000 // P256 +DATA p256<>+0x10(SB)/8, $0x00000000ffffffff // P256 +DATA p256<>+0x18(SB)/8, $0xffffffffffffffff // P256 +DATA p256<>+0x20(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256<>+0x28(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256<>+0x30(SB)/8, $0x0000000010111213 // SEL 0 d1 d0 0 +DATA p256<>+0x38(SB)/8, $0x1415161700000000 // SEL 0 d1 d0 0 +DATA p256<>+0x40(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256<>+0x48(SB)/8, $0x18191a1b1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256mul<>+0x00(SB)/8, $0xffffffff00000001 // P256 +DATA p256mul<>+0x08(SB)/8, $0x0000000000000000 // P256 +DATA p256mul<>+0x10(SB)/8, $0x00000000ffffffff // P256 +DATA p256mul<>+0x18(SB)/8, $0xffffffffffffffff // P256 +DATA p256mul<>+0x20(SB)/8, $0x1c1d1e1f00000000 // SEL d0 0 0 d0 +DATA p256mul<>+0x28(SB)/8, $0x000000001c1d1e1f // SEL d0 0 0 d0 +DATA p256mul<>+0x30(SB)/8, $0x0001020304050607 // SEL d0 0 d1 d0 +DATA p256mul<>+0x38(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL d0 0 d1 d0 +DATA p256mul<>+0x40(SB)/8, $0x040506071c1d1e1f // SEL 0 d1 d0 d1 +DATA p256mul<>+0x48(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL 0 d1 d0 d1 +DATA p256mul<>+0x50(SB)/8, $0x0405060704050607 // SEL 0 0 d1 d0 +DATA p256mul<>+0x58(SB)/8, $0x1c1d1e1f0c0d0e0f // SEL 0 0 d1 d0 +DATA p256mul<>+0x60(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256mul<>+0x68(SB)/8, $0x0c0d0e0f1c1d1e1f // SEL d1 d0 d1 d0 +DATA p256mul<>+0x70(SB)/8, $0x141516170c0d0e0f // SEL 0 d1 d0 0 +DATA p256mul<>+0x78(SB)/8, $0x1c1d1e1f14151617 // SEL 0 d1 d0 0 +DATA p256mul<>+0x80(SB)/8, $0x00000000fffffffe // (1*2^256)%P256 +DATA p256mul<>+0x88(SB)/8, $0xffffffffffffffff // (1*2^256)%P256 +DATA p256mul<>+0x90(SB)/8, $0xffffffff00000000 // (1*2^256)%P256 +DATA p256mul<>+0x98(SB)/8, $0x0000000000000001 // (1*2^256)%P256 +GLOBL p256ordK0<>(SB), 8, $4 +GLOBL p256ord<>(SB), 8, $32 +GLOBL p256<>(SB), 8, $80 +GLOBL p256mul<>(SB), 8, $160 + +DATA p256vmsl<>+0x0(SB)/8, $0x0012131415161718 +DATA p256vmsl<>+0x8(SB)/8, $0x00191a1b1c1d1e1f +DATA p256vmsl<>+0x10(SB)/8, $0x0012131415161718 +DATA p256vmsl<>+0x18(SB)/8, $0x000b0c0d0e0f1011 +DATA p256vmsl<>+0x20(SB)/8, $0x00191a1b1c1d1e1f +DATA p256vmsl<>+0x28(SB)/8, $0x0012131415161718 +DATA p256vmsl<>+0x30(SB)/8, $0x000b0c0d0e0f1011 +DATA p256vmsl<>+0x38(SB)/8, $0x0012131415161718 +DATA p256vmsl<>+0x40(SB)/8, $0x000405060708090a +DATA p256vmsl<>+0x48(SB)/8, $0x000b0c0d0e0f1011 +DATA p256vmsl<>+0x50(SB)/8, $0x000b0c0d0e0f1011 +DATA p256vmsl<>+0x58(SB)/8, $0x000405060708090a +DATA p256vmsl<>+0x60(SB)/8, $0x1010101000010203 +DATA p256vmsl<>+0x68(SB)/8, $0x100405060708090a +DATA p256vmsl<>+0x70(SB)/8, $0x100405060708090a +DATA p256vmsl<>+0x78(SB)/8, $0x1010101000010203 +GLOBL p256vmsl<>(SB), 8, $128 + +// --------------------------------------- +// iff cond == 1 val <- -val +// func p256NegCond(val *p256Point, cond int) +#define P1ptr R1 +#define CPOOL R4 + +#define Y1L V0 +#define Y1H V1 +#define T1L V2 +#define T1H V3 + +#define PL V30 +#define PH V31 + +#define ZER V4 +#define SEL1 V5 +#define CAR1 V6 +TEXT ·p256NegCond(SB), NOSPLIT, $0 + MOVD val+0(FP), P1ptr + + MOVD $p256mul<>+0x00(SB), CPOOL + VL 16(CPOOL), PL + VL 0(CPOOL), PH + + VL 32(P1ptr), Y1H + VL 48(P1ptr), Y1L + + VLREPG cond+8(FP), SEL1 + VZERO ZER + VCEQG SEL1, ZER, SEL1 + + VSCBIQ Y1L, PL, CAR1 + VSQ Y1L, PL, T1L + VSBIQ PH, Y1H, CAR1, T1H + + VSEL Y1L, T1L, SEL1, Y1L + VSEL Y1H, T1H, SEL1, Y1H + + VST Y1H, 32(P1ptr) + VST Y1L, 48(P1ptr) + RET + +#undef P1ptr +#undef CPOOL +#undef Y1L +#undef Y1H +#undef T1L +#undef T1H +#undef PL +#undef PH +#undef ZER +#undef SEL1 +#undef CAR1 + +// --------------------------------------- +// if cond == 0 res <- b; else res <- a +// func p256MovCond(res, a, b *p256Point, cond int) +#define P3ptr R1 +#define P1ptr R2 +#define P2ptr R3 + +#define X1L V0 +#define X1H V1 +#define Y1L V2 +#define Y1H V3 +#define Z1L V4 +#define Z1H V5 +#define X2L V6 +#define X2H V7 +#define Y2L V8 +#define Y2H V9 +#define Z2L V10 +#define Z2H V11 + +#define ZER V18 +#define SEL1 V19 +TEXT ·p256MovCond(SB), NOSPLIT, $0 + MOVD res+0(FP), P3ptr + MOVD a+8(FP), P1ptr + MOVD b+16(FP), P2ptr + VLREPG cond+24(FP), SEL1 + VZERO ZER + VCEQG SEL1, ZER, SEL1 + + VL 0(P1ptr), X1H + VL 16(P1ptr), X1L + VL 32(P1ptr), Y1H + VL 48(P1ptr), Y1L + VL 64(P1ptr), Z1H + VL 80(P1ptr), Z1L + + VL 0(P2ptr), X2H + VL 16(P2ptr), X2L + VL 32(P2ptr), Y2H + VL 48(P2ptr), Y2L + VL 64(P2ptr), Z2H + VL 80(P2ptr), Z2L + + VSEL X2L, X1L, SEL1, X1L + VSEL X2H, X1H, SEL1, X1H + VSEL Y2L, Y1L, SEL1, Y1L + VSEL Y2H, Y1H, SEL1, Y1H + VSEL Z2L, Z1L, SEL1, Z1L + VSEL Z2H, Z1H, SEL1, Z1H + + VST X1H, 0(P3ptr) + VST X1L, 16(P3ptr) + VST Y1H, 32(P3ptr) + VST Y1L, 48(P3ptr) + VST Z1H, 64(P3ptr) + VST Z1L, 80(P3ptr) + + RET + +#undef P3ptr +#undef P1ptr +#undef P2ptr +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef X2L +#undef X2H +#undef Y2L +#undef Y2H +#undef Z2L +#undef Z2H +#undef ZER +#undef SEL1 + +// --------------------------------------- +// Constant time table access +// Indexed from 1 to 15, with -1 offset +// (index 0 is implicitly point at infinity) +// func p256Select(point *p256Point, table []p256Point, idx int) +#define P3ptr R1 +#define P1ptr R2 +#define COUNT R4 + +#define X1L V0 +#define X1H V1 +#define Y1L V2 +#define Y1H V3 +#define Z1L V4 +#define Z1H V5 +#define X2L V6 +#define X2H V7 +#define Y2L V8 +#define Y2H V9 +#define Z2L V10 +#define Z2H V11 + +#define ONE V18 +#define IDX V19 +#define SEL1 V20 +#define SEL2 V21 +TEXT ·p256Select(SB), NOSPLIT, $0 + MOVD point+0(FP), P3ptr + MOVD table+8(FP), P1ptr + VLREPB idx+(32+7)(FP), IDX + VREPIB $1, ONE + VREPIB $1, SEL2 + MOVD $1, COUNT + + VZERO X1H + VZERO X1L + VZERO Y1H + VZERO Y1L + VZERO Z1H + VZERO Z1L + +loop_select: + VL 0(P1ptr), X2H + VL 16(P1ptr), X2L + VL 32(P1ptr), Y2H + VL 48(P1ptr), Y2L + VL 64(P1ptr), Z2H + VL 80(P1ptr), Z2L + + VCEQG SEL2, IDX, SEL1 + + VSEL X2L, X1L, SEL1, X1L + VSEL X2H, X1H, SEL1, X1H + VSEL Y2L, Y1L, SEL1, Y1L + VSEL Y2H, Y1H, SEL1, Y1H + VSEL Z2L, Z1L, SEL1, Z1L + VSEL Z2H, Z1H, SEL1, Z1H + + VAB SEL2, ONE, SEL2 + ADDW $1, COUNT + ADD $96, P1ptr + CMPW COUNT, $17 + BLT loop_select + + VST X1H, 0(P3ptr) + VST X1L, 16(P3ptr) + VST Y1H, 32(P3ptr) + VST Y1L, 48(P3ptr) + VST Z1H, 64(P3ptr) + VST Z1L, 80(P3ptr) + RET + +#undef P3ptr +#undef P1ptr +#undef COUNT +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef X2L +#undef X2H +#undef Y2L +#undef Y2H +#undef Z2L +#undef Z2H +#undef ONE +#undef IDX +#undef SEL1 +#undef SEL2 + +// --------------------------------------- +// Constant time table access +// Indexed from 1 to 15, with -1 offset +// (index 0 is implicitly point at infinity) +// func p256SelectBase(point *p256Point, table []p256Point, idx int) +#define P3ptr R1 +#define P1ptr R2 +#define COUNT R4 + +#define X1L V0 +#define X1H V1 +#define Y1L V2 +#define Y1H V3 +#define Z1L V4 +#define Z1H V5 +#define X2L V6 +#define X2H V7 +#define Y2L V8 +#define Y2H V9 +#define Z2L V10 +#define Z2H V11 + +#define ONE V18 +#define IDX V19 +#define SEL1 V20 +#define SEL2 V21 +TEXT ·p256SelectBase(SB), NOSPLIT, $0 + MOVD point+0(FP), P3ptr + MOVD table+8(FP), P1ptr + VLREPB idx+(32+7)(FP), IDX + VREPIB $1, ONE + VREPIB $1, SEL2 + MOVD $1, COUNT + + VZERO X1H + VZERO X1L + VZERO Y1H + VZERO Y1L + VZERO Z1H + VZERO Z1L + +loop_select: + VL 0(P1ptr), X2H + VL 16(P1ptr), X2L + VL 32(P1ptr), Y2H + VL 48(P1ptr), Y2L + VL 64(P1ptr), Z2H + VL 80(P1ptr), Z2L + + VCEQG SEL2, IDX, SEL1 + + VSEL X2L, X1L, SEL1, X1L + VSEL X2H, X1H, SEL1, X1H + VSEL Y2L, Y1L, SEL1, Y1L + VSEL Y2H, Y1H, SEL1, Y1H + VSEL Z2L, Z1L, SEL1, Z1L + VSEL Z2H, Z1H, SEL1, Z1H + + VAB SEL2, ONE, SEL2 + ADDW $1, COUNT + ADD $96, P1ptr + CMPW COUNT, $65 + BLT loop_select + + VST X1H, 0(P3ptr) + VST X1L, 16(P3ptr) + VST Y1H, 32(P3ptr) + VST Y1L, 48(P3ptr) + VST Z1H, 64(P3ptr) + VST Z1L, 80(P3ptr) + RET + +#undef P3ptr +#undef P1ptr +#undef COUNT +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef X2L +#undef X2H +#undef Y2L +#undef Y2H +#undef Z2L +#undef Z2H +#undef ONE +#undef IDX +#undef SEL1 +#undef SEL2 + +// --------------------------------------- +// func p256FromMont(res, in []byte) +#define res_ptr R1 +#define x_ptr R2 +#define CPOOL R4 + +#define T0 V0 +#define T1 V1 +#define T2 V2 +#define TT0 V3 +#define TT1 V4 + +#define ZER V6 +#define SEL1 V7 +#define SEL2 V8 +#define CAR1 V9 +#define CAR2 V10 +#define RED1 V11 +#define RED2 V12 +#define PL V13 +#define PH V14 + +TEXT ·p256FromMont(SB), NOSPLIT, $0 + MOVD res+0(FP), res_ptr + MOVD in+24(FP), x_ptr + + VZERO T2 + VZERO ZER + MOVD $p256<>+0x00(SB), CPOOL + VL 16(CPOOL), PL + VL 0(CPOOL), PH + VL 48(CPOOL), SEL2 + VL 64(CPOOL), SEL1 + + VL (1*16)(x_ptr), T0 + VL (0*16)(x_ptr), T1 + + // First round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDB $8, T1, T0, T0 + VSLDB $8, T2, T1, T1 + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, CAR2 + VACQ T1, RED2, CAR1, T1 + VAQ T2, CAR2, T2 + + // Second round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDB $8, T1, T0, T0 + VSLDB $8, T2, T1, T1 + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, CAR2 + VACQ T1, RED2, CAR1, T1 + VAQ T2, CAR2, T2 + + // Third round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDB $8, T1, T0, T0 + VSLDB $8, T2, T1, T1 + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, CAR2 + VACQ T1, RED2, CAR1, T1 + VAQ T2, CAR2, T2 + + // Last round + VPERM T1, T0, SEL1, RED2 // d1 d0 d1 d0 + VPERM ZER, RED2, SEL2, RED1 // 0 d1 d0 0 + VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDB $8, T1, T0, T0 + VSLDB $8, T2, T1, T1 + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, CAR2 + VACQ T1, RED2, CAR1, T1 + VAQ T2, CAR2, T2 + + // --------------------------------------------------- + + VSCBIQ PL, T0, CAR1 + VSQ PL, T0, TT0 + VSBCBIQ T1, PH, CAR1, CAR2 + VSBIQ T1, PH, CAR1, TT1 + VSBIQ T2, ZER, CAR2, T2 + + // what output to use, TT1||TT0 or T1||T0? + VSEL T0, TT0, T2, T0 + VSEL T1, TT1, T2, T1 + + VST T0, (1*16)(res_ptr) + VST T1, (0*16)(res_ptr) + RET + +#undef res_ptr +#undef x_ptr +#undef CPOOL +#undef T0 +#undef T1 +#undef T2 +#undef TT0 +#undef TT1 +#undef ZER +#undef SEL1 +#undef SEL2 +#undef CAR1 +#undef CAR2 +#undef RED1 +#undef RED2 +#undef PL +#undef PH + +// --------------------------------------- +// func p256OrdMul(res, in1, in2 []byte) +#define res_ptr R1 +#define x_ptr R2 +#define y_ptr R3 +#define X0 V0 +#define X1 V1 +#define Y0 V2 +#define Y1 V3 +#define M0 V4 +#define M1 V5 +#define T0 V6 +#define T1 V7 +#define T2 V8 +#define YDIG V9 + +#define ADD1 V16 +#define ADD1H V17 +#define ADD2 V18 +#define ADD2H V19 +#define RED1 V20 +#define RED1H V21 +#define RED2 V22 +#define RED2H V23 +#define CAR1 V24 +#define CAR1M V25 + +#define MK0 V30 +#define K0 V31 +TEXT ·p256OrdMul(SB), NOSPLIT, $0 + MOVD res+0(FP), res_ptr + MOVD in1+24(FP), x_ptr + MOVD in2+48(FP), y_ptr + + VZERO T2 + MOVD $p256ordK0<>+0x00(SB), R4 + + // VLEF $3, 0(R4), K0 + WORD $0xE7F40000 + BYTE $0x38 + BYTE $0x03 + MOVD $p256ord<>+0x00(SB), R4 + VL 16(R4), M0 + VL 0(R4), M1 + + VL (1*16)(x_ptr), X0 + VL (0*16)(x_ptr), X1 + VL (1*16)(y_ptr), Y0 + VL (0*16)(y_ptr), Y1 + + // ---------------------------------------------------------------------------/ + VREPF $3, Y0, YDIG + VMLF X0, YDIG, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMLF X1, YDIG, ADD2 + VMLHF X0, YDIG, ADD1H + VMLHF X1, YDIG, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- +/* * + * ---+--------+--------+ + * T2| T1 | T0 | + * ---+--------+--------+ + * *(add)* + * +--------+--------+ + * | X1 | X0 | + * +--------+--------+ + * *(mul)* + * +--------+--------+ + * | YDIG | YDIG | + * +--------+--------+ + * *(add)* + * +--------+--------+ + * | M1 | M0 | + * +--------+--------+ + * *(mul)* + * +--------+--------+ + * | MK0 | MK0 | + * +--------+--------+ + * + * --------------------- + * + * +--------+--------+ + * | ADD2 | ADD1 | + * +--------+--------+ + * +--------+--------+ + * | ADD2H | ADD1H | + * +--------+--------+ + * +--------+--------+ + * | RED2 | RED1 | + * +--------+--------+ + * +--------+--------+ + * | RED2H | RED1H | + * +--------+--------+ + */ + VREPF $2, Y0, YDIG + VMALF X0, YDIG, T0, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMALF X1, YDIG, T1, ADD2 + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- + VREPF $1, Y0, YDIG + VMALF X0, YDIG, T0, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMALF X1, YDIG, T1, ADD2 + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- + VREPF $0, Y0, YDIG + VMALF X0, YDIG, T0, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMALF X1, YDIG, T1, ADD2 + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- + VREPF $3, Y1, YDIG + VMALF X0, YDIG, T0, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMALF X1, YDIG, T1, ADD2 + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- + VREPF $2, Y1, YDIG + VMALF X0, YDIG, T0, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMALF X1, YDIG, T1, ADD2 + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- + VREPF $1, Y1, YDIG + VMALF X0, YDIG, T0, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMALF X1, YDIG, T1, ADD2 + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- + VREPF $0, Y1, YDIG + VMALF X0, YDIG, T0, ADD1 + VMLF ADD1, K0, MK0 + VREPF $3, MK0, MK0 + + VMALF X1, YDIG, T1, ADD2 + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + + VMALF M0, MK0, ADD1, RED1 + VMALHF M0, MK0, ADD1, RED1H + VMALF M1, MK0, ADD2, RED2 + VMALHF M1, MK0, ADD2, RED2H + + VSLDB $12, RED2, RED1, RED1 + VSLDB $12, T2, RED2, RED2 + + VACCQ RED1, ADD1H, CAR1 + VAQ RED1, ADD1H, T0 + VACCQ RED1H, T0, CAR1M + VAQ RED1H, T0, T0 + + // << ready for next MK0 + + VACQ RED2, ADD2H, CAR1, T1 + VACCCQ RED2, ADD2H, CAR1, CAR1 + VACCCQ RED2H, T1, CAR1M, T2 + VACQ RED2H, T1, CAR1M, T1 + VAQ CAR1, T2, T2 + + // --------------------------------------------------- + + VZERO RED1 + VSCBIQ M0, T0, CAR1 + VSQ M0, T0, ADD1 + VSBCBIQ T1, M1, CAR1, CAR1M + VSBIQ T1, M1, CAR1, ADD2 + VSBIQ T2, RED1, CAR1M, T2 + + // what output to use, ADD2||ADD1 or T1||T0? + VSEL T0, ADD1, T2, T0 + VSEL T1, ADD2, T2, T1 + + VST T0, (1*16)(res_ptr) + VST T1, (0*16)(res_ptr) + RET + +#undef res_ptr +#undef x_ptr +#undef y_ptr +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef M0 +#undef M1 +#undef T0 +#undef T1 +#undef T2 +#undef YDIG + +#undef ADD1 +#undef ADD1H +#undef ADD2 +#undef ADD2H +#undef RED1 +#undef RED1H +#undef RED2 +#undef RED2H +#undef CAR1 +#undef CAR1M + +#undef MK0 +#undef K0 + +// --------------------------------------- +// p256MulInternalVX +// V0-V3,V30,V31 - Not Modified +// V4-V15 - Volatile + +#define CPOOL R4 + +// Parameters +#define X0 V0 // Not modified +#define X1 V1 // Not modified +#define Y0 V2 // Not modified +#define Y1 V3 // Not modified +#define T0 V4 +#define T1 V5 +#define P0 V30 // Not modified +#define P1 V31 // Not modified + +// Temporaries +#define YDIG V6 // Overloaded with CAR2, ZER +#define ADD1H V7 // Overloaded with ADD3H +#define ADD2H V8 // Overloaded with ADD4H +#define ADD3 V9 // Overloaded with SEL2,SEL5 +#define ADD4 V10 // Overloaded with SEL3,SEL6 +#define RED1 V11 // Overloaded with CAR2 +#define RED2 V12 +#define RED3 V13 // Overloaded with SEL1 +#define T2 V14 +// Overloaded temporaries +#define ADD1 V4 // Overloaded with T0 +#define ADD2 V5 // Overloaded with T1 +#define ADD3H V7 // Overloaded with ADD1H +#define ADD4H V8 // Overloaded with ADD2H +#define ZER V6 // Overloaded with YDIG, CAR2 +#define CAR1 V6 // Overloaded with YDIG, ZER +#define CAR2 V11 // Overloaded with RED1 +// Constant Selects +#define SEL1 V13 // Overloaded with RED3 +#define SEL2 V9 // Overloaded with ADD3,SEL5 +#define SEL3 V10 // Overloaded with ADD4,SEL6 +#define SEL4 V6 // Overloaded with YDIG,CAR2,ZER +#define SEL5 V9 // Overloaded with ADD3,SEL2 +#define SEL6 V10 // Overloaded with ADD4,SEL3 + +/* * + * To follow the flow of bits, for your own sanity a stiff drink, need you shall. + * Of a single round, a 'helpful' picture, here is. Meaning, column position has. + * With you, SIMD be... + * + * +--------+--------+ + * +--------| RED2 | RED1 | + * | +--------+--------+ + * | ---+--------+--------+ + * | +---- T2| T1 | T0 |--+ + * | | ---+--------+--------+ | + * | | | + * | | ======================= | + * | | | + * | | +--------+--------+<-+ + * | +-------| ADD2 | ADD1 |--|-----+ + * | | +--------+--------+ | | + * | | +--------+--------+<---+ | + * | | | ADD2H | ADD1H |--+ | + * | | +--------+--------+ | | + * | | +--------+--------+<-+ | + * | | | ADD4 | ADD3 |--|-+ | + * | | +--------+--------+ | | | + * | | +--------+--------+<---+ | | + * | | | ADD4H | ADD3H |------|-+ |(+vzero) + * | | +--------+--------+ | | V + * | | ------------------------ | | +--------+ + * | | | | | RED3 | [d0 0 0 d0] + * | | | | +--------+ + * | +---->+--------+--------+ | | | + * (T2[1w]||ADD2[4w]||ADD1[3w]) +--------| T1 | T0 | | | | + * | +--------+--------+ | | | + * +---->---+--------+--------+ | | | + * T2| T1 | T0 |----+ | | + * ---+--------+--------+ | | | + * ---+--------+--------+<---+ | | + * +--- T2| T1 | T0 |----------+ + * | ---+--------+--------+ | | + * | +--------+--------+<-------------+ + * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0] + * | +--------+--------+ | | | + * | +--------+<----------------------+ + * | | RED3 |--------------+ | [0 0 d1 d0] + * | +--------+ | | + * +--->+--------+--------+ | | + * | T1 | T0 |--------+ + * +--------+--------+ | | + * --------------------------- | | + * | | + * +--------+--------+<----+ | + * | RED2 | RED1 | | + * +--------+--------+ | + * ---+--------+--------+<-------+ + * T2| T1 | T0 | (H1P-H1P-H00RRAY!) + * ---+--------+--------+ + * + * *Mi obra de arte de siglo XXI @vpaprots + * + * + * First group is special, doesn't get the two inputs: + * +--------+--------+<-+ + * +-------| ADD2 | ADD1 |--|-----+ + * | +--------+--------+ | | + * | +--------+--------+<---+ | + * | | ADD2H | ADD1H |--+ | + * | +--------+--------+ | | + * | +--------+--------+<-+ | + * | | ADD4 | ADD3 |--|-+ | + * | +--------+--------+ | | | + * | +--------+--------+<---+ | | + * | | ADD4H | ADD3H |------|-+ |(+vzero) + * | +--------+--------+ | | V + * | ------------------------ | | +--------+ + * | | | | RED3 | [d0 0 0 d0] + * | | | +--------+ + * +---->+--------+--------+ | | | + * (T2[1w]||ADD2[4w]||ADD1[3w]) | T1 | T0 |----+ | | + * +--------+--------+ | | | + * ---+--------+--------+<---+ | | + * +--- T2| T1 | T0 |----------+ + * | ---+--------+--------+ | | + * | +--------+--------+<-------------+ + * | | RED2 | RED1 |-----+ | | [0 d1 d0 d1] [d0 0 d1 d0] + * | +--------+--------+ | | | + * | +--------+<----------------------+ + * | | RED3 |--------------+ | [0 0 d1 d0] + * | +--------+ | | + * +--->+--------+--------+ | | + * | T1 | T0 |--------+ + * +--------+--------+ | | + * --------------------------- | | + * | | + * +--------+--------+<----+ | + * | RED2 | RED1 | | + * +--------+--------+ | + * ---+--------+--------+<-------+ + * T2| T1 | T0 | (H1P-H1P-H00RRAY!) + * ---+--------+--------+ + * + * Last 'group' needs to RED2||RED1 shifted less + */ +TEXT ·p256MulInternalVX(SB), NOSPLIT, $0-0 + VL 32(CPOOL), SEL1 + VL 48(CPOOL), SEL2 + VL 64(CPOOL), SEL3 + VL 80(CPOOL), SEL4 + + // --------------------------------------------------- + + VREPF $3, Y0, YDIG + VMLHF X0, YDIG, ADD1H + VMLHF X1, YDIG, ADD2H + VMLF X0, YDIG, ADD1 + VMLF X1, YDIG, ADD2 + + VREPF $2, Y0, YDIG + VMALF X0, YDIG, ADD1H, ADD3 + VMALF X1, YDIG, ADD2H, ADD4 + VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free + VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free + + VZERO ZER + VL 32(CPOOL), SEL1 + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDB $12, ADD2, ADD1, T0 // ADD1 Free + VSLDB $12, ZER, ADD2, T1 // ADD2 Free + + VACCQ T0, ADD3, CAR1 + VAQ T0, ADD3, T0 // ADD3 Free + VACCCQ T1, ADD4, CAR1, T2 + VACQ T1, ADD4, CAR1, T1 // ADD4 Free + + VL 48(CPOOL), SEL2 + VL 64(CPOOL), SEL3 + VL 80(CPOOL), SEL4 + VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0] + VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1] + VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0] + VSQ RED3, RED2, RED2 // Guaranteed not to underflow + + VSLDB $12, T1, T0, T0 + VSLDB $12, T2, T1, T1 + + VACCQ T0, ADD3H, CAR1 + VAQ T0, ADD3H, T0 + VACCCQ T1, ADD4H, CAR1, T2 + VACQ T1, ADD4H, CAR1, T1 + + // --------------------------------------------------- + + VREPF $1, Y0, YDIG + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + VMALF X0, YDIG, T0, ADD1 // T0 Free->ADD1 + VMALF X1, YDIG, T1, ADD2 // T1 Free->ADD2 + + VREPF $0, Y0, YDIG + VMALF X0, YDIG, ADD1H, ADD3 + VMALF X1, YDIG, ADD2H, ADD4 + VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free->ADD3H + VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free->ADD4H , YDIG Free->ZER + + VZERO ZER + VL 32(CPOOL), SEL1 + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDB $12, ADD2, ADD1, T0 // ADD1 Free->T0 + VSLDB $12, T2, ADD2, T1 // ADD2 Free->T1, T2 Free + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, T2 + VACQ T1, RED2, CAR1, T1 + + VACCQ T0, ADD3, CAR1 + VAQ T0, ADD3, T0 + VACCCQ T1, ADD4, CAR1, CAR2 + VACQ T1, ADD4, CAR1, T1 + VAQ T2, CAR2, T2 + + VL 48(CPOOL), SEL2 + VL 64(CPOOL), SEL3 + VL 80(CPOOL), SEL4 + VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0] + VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1] + VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0] + VSQ RED3, RED2, RED2 // Guaranteed not to underflow + + VSLDB $12, T1, T0, T0 + VSLDB $12, T2, T1, T1 + + VACCQ T0, ADD3H, CAR1 + VAQ T0, ADD3H, T0 + VACCCQ T1, ADD4H, CAR1, T2 + VACQ T1, ADD4H, CAR1, T1 + + // --------------------------------------------------- + + VREPF $3, Y1, YDIG + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + VMALF X0, YDIG, T0, ADD1 + VMALF X1, YDIG, T1, ADD2 + + VREPF $2, Y1, YDIG + VMALF X0, YDIG, ADD1H, ADD3 + VMALF X1, YDIG, ADD2H, ADD4 + VMALHF X0, YDIG, ADD1H, ADD3H // ADD1H Free + VMALHF X1, YDIG, ADD2H, ADD4H // ADD2H Free + + VZERO ZER + VL 32(CPOOL), SEL1 + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDB $12, ADD2, ADD1, T0 // ADD1 Free + VSLDB $12, T2, ADD2, T1 // ADD2 Free + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, T2 + VACQ T1, RED2, CAR1, T1 + + VACCQ T0, ADD3, CAR1 + VAQ T0, ADD3, T0 + VACCCQ T1, ADD4, CAR1, CAR2 + VACQ T1, ADD4, CAR1, T1 + VAQ T2, CAR2, T2 + + VL 48(CPOOL), SEL2 + VL 64(CPOOL), SEL3 + VL 80(CPOOL), SEL4 + VPERM RED3, T0, SEL2, RED1 // [d0 0 d1 d0] + VPERM RED3, T0, SEL3, RED2 // [ 0 d1 d0 d1] + VPERM RED3, T0, SEL4, RED3 // [ 0 0 d1 d0] + VSQ RED3, RED2, RED2 // Guaranteed not to underflow + + VSLDB $12, T1, T0, T0 + VSLDB $12, T2, T1, T1 + + VACCQ T0, ADD3H, CAR1 + VAQ T0, ADD3H, T0 + VACCCQ T1, ADD4H, CAR1, T2 + VACQ T1, ADD4H, CAR1, T1 + + // --------------------------------------------------- + + VREPF $1, Y1, YDIG + VMALHF X0, YDIG, T0, ADD1H + VMALHF X1, YDIG, T1, ADD2H + VMALF X0, YDIG, T0, ADD1 + VMALF X1, YDIG, T1, ADD2 + + VREPF $0, Y1, YDIG + VMALF X0, YDIG, ADD1H, ADD3 + VMALF X1, YDIG, ADD2H, ADD4 + VMALHF X0, YDIG, ADD1H, ADD3H + VMALHF X1, YDIG, ADD2H, ADD4H + + VZERO ZER + VL 32(CPOOL), SEL1 + VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] + + VSLDB $12, ADD2, ADD1, T0 + VSLDB $12, T2, ADD2, T1 + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, T2 + VACQ T1, RED2, CAR1, T1 + + VACCQ T0, ADD3, CAR1 + VAQ T0, ADD3, T0 + VACCCQ T1, ADD4, CAR1, CAR2 + VACQ T1, ADD4, CAR1, T1 + VAQ T2, CAR2, T2 + + VL 96(CPOOL), SEL5 + VL 112(CPOOL), SEL6 + VPERM T0, RED3, SEL5, RED2 // [d1 d0 d1 d0] + VPERM T0, RED3, SEL6, RED1 // [ 0 d1 d0 0] + VSQ RED1, RED2, RED2 // Guaranteed not to underflow + + VSLDB $12, T1, T0, T0 + VSLDB $12, T2, T1, T1 + + VACCQ T0, ADD3H, CAR1 + VAQ T0, ADD3H, T0 + VACCCQ T1, ADD4H, CAR1, T2 + VACQ T1, ADD4H, CAR1, T1 + + VACCQ T0, RED1, CAR1 + VAQ T0, RED1, T0 + VACCCQ T1, RED2, CAR1, CAR2 + VACQ T1, RED2, CAR1, T1 + VAQ T2, CAR2, T2 + + // --------------------------------------------------- + + VZERO RED3 + VSCBIQ P0, T0, CAR1 + VSQ P0, T0, ADD1H + VSBCBIQ T1, P1, CAR1, CAR2 + VSBIQ T1, P1, CAR1, ADD2H + VSBIQ T2, RED3, CAR2, T2 + + // what output to use, ADD2H||ADD1H or T1||T0? + VSEL T0, ADD1H, T2, T0 + VSEL T1, ADD2H, T2, T1 + RET + +#undef CPOOL + +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef T0 +#undef T1 +#undef P0 +#undef P1 + +#undef SEL1 +#undef SEL2 +#undef SEL3 +#undef SEL4 +#undef SEL5 +#undef SEL6 + +#undef YDIG +#undef ADD1H +#undef ADD2H +#undef ADD3 +#undef ADD4 +#undef RED1 +#undef RED2 +#undef RED3 +#undef T2 +#undef ADD1 +#undef ADD2 +#undef ADD3H +#undef ADD4H +#undef ZER +#undef CAR1 +#undef CAR2 + +// --------------------------------------- +// p256MulInternalVMSL +// V0-V3,V30,V31 - Not Modified +// V4-V14 - Volatile + +#define CPOOL R4 +#define SCRATCH R9 + +// Parameters +#define X0 V0 // Not modified +#define X1 V1 // Not modified +#define Y0 V2 // Not modified +#define Y1 V3 // Not modified +#define T0 V4 +#define T1 V5 +#define T2 V6 +#define P0 V30 // Not modified +#define P1 V31 // Not modified + +// input: d0 +// output: h0, h1 +// temp: TEMP, ZERO, BORROW +#define OBSERVATION3(d0, h0, h1, TEMP, ZERO, BORROW) \ + VZERO ZERO \ + VSLDB $4, d0, ZERO, h0 \ + VLR h0, BORROW \ + VSLDB $12, ZERO, h0, TEMP \ + VSQ TEMP, h0, h0 \ + VSLDB $12, d0, BORROW, h1 \ + VSLDB $8, ZERO, BORROW, TEMP \ + VAQ TEMP, h0, h0 \ + +#define OBSERVATION3A(d2, h0, h1, TEMP, ZERO) \ + VZERO ZERO \ + VSLDB $8, d2, ZERO, TEMP \ + VSLDB $8, d2, TEMP, h0 \ + VSLDB $12, ZERO, TEMP, h1 \ + VSQ h1, h0, h0 \ + +TEXT ·p256MulInternalVMSL(SB), NOFRAME|NOSPLIT, $0-0 + VSTM V16, V19, (SCRATCH) + + MOVD $p256vmsl<>+0x00(SB), CPOOL + + // Divide input1 into 5 limbs + VGBM $0x007f, V14 + VZERO V12 + VSLDB $2, X1, X0, V13 + VSLDB $2, Y1, Y0, V8 + VSLDB $4, V12, X1, V11 // V11(X1): 4 bytes limb + VSLDB $4, V12, Y1, V6 // V6: 4 bytes limb + + VN V14, X0, V5 // V5: first 7 bytes limb + VN V14, Y0, V10 // V10: first 7 bytes limb + VN V14, V13, V13 // v13: third 7 bytes limb + VN V14, V8, V8 // V8: third 7 bytes limb + + VMSLG V10, V5, V12, V10 // v10: l10 x l5 (column 1) + VMSLG V8, V5, V12, V8 // v8: l8 x l5 + VMSLG V6, V13, V12, V13 // v13: l6 x l3 + VMSLG V6, V11, V12, V11 // v11: l6 x l1 (column 9) + VMSLG V6, V5, V12, V6 // v6: l6 x l5 + + MOVD $p256vmsl<>+0x00(SB), CPOOL + VGBM $0x7f7f, V14 + + VL 0(CPOOL), V4 + VL 16(CPOOL), V7 + VL 32(CPOOL), V9 + VL 48(CPOOL), V5 + VLM 64(CPOOL), V16, V19 + + VPERM V12, X0, V4, V4 // v4: limb4 | limb5 + VPERM Y1, Y0, V7, V7 + VPERM V12, Y0, V9, V9 // v9: limb10 | limb9 + VPERM X1, X0, V5, V5 + VPERM X1, X0, V16, V16 + VPERM Y1, Y0, V17, V17 + VPERM X1, V12, V18, V18 // v18: limb1 | limb2 + VPERM Y1, V12, V19, V19 // v19: limb7 | limb6 + VN V14, V7, V7 // v7: limb9 | limb8 + VN V14, V5, V5 // v5: limb3 | limb4 + VN V14, V16, V16 // v16: limb2 | limb3 + VN V14, V17, V17 // v17: limb8 | limb7 + + VMSLG V9, V4, V12, V14 // v14: l10 x l4 + l9 x l5 (column 2) + VMSLG V9, V5, V8, V8 // v8: l10 x l9 + l3 x l4 + l8 x l5 (column 3) + VMSLG V9, V16, V12, V16 // v16: l10 x l9 + l2 x l3 + VMSLG V9, V18, V12, V9 // v9: l10 x l1 + l9 x l2 + VMSLG V7, V18, V12, V7 // v7: l9 x l1 + l8 x l2 + VMSLG V17, V4, V16, V16 // v16: l8 x l4 + l7 x l5 + l10 x l9 + l2 x l3 (column 4) + VMSLG V17, V5, V9, V9 // v9: l10 x l1 + l9 x l2 + l8 x l3 + l7 x l4 + VMSLG V17, V18, V12, V17 // v18: l8 x l1 + l7 x l2 + VMSLG V19, V5, V7, V7 // v7: l9 x l1 + l8 x l2 + l7 x l3 + l6 x l4 (column 6) + VMSLG V19, V18, V12, V19 // v19: l7 x l1 + l6 x l2 (column 8) + VAQ V9, V6, V9 // v9: l10 x l1 + l9 x l2 + l8 x l3 + l7 x l4 + l6 x l5 (column 5) + VAQ V17, V13, V13 // v13: l8 x l1 + l7 x l2 + l6 x l3 (column 7) + + VSLDB $9, V12, V10, V4 + VSLDB $9, V12, V7, V5 + VAQ V4, V14, V14 + VAQ V5, V13, V13 + + VSLDB $9, V12, V14, V4 + VSLDB $9, V12, V13, V5 + VAQ V4, V8, V8 + VAQ V5, V19, V19 + + VSLDB $9, V12, V8, V4 + VSLDB $9, V12, V19, V5 + VAQ V4, V16, V16 + VAQ V5, V11, V11 + + VSLDB $9, V12, V16, V4 + VAQ V4, V9, V17 + + VGBM $0x007f, V4 + VGBM $0x00ff, V5 + + VN V10, V4, V10 + VN V14, V4, V14 + VN V8, V4, V8 + VN V16, V4, V16 + VN V17, V4, V9 + VN V7, V4, V7 + VN V13, V4, V13 + VN V19, V4, V19 + VN V11, V5, V11 + + VSLDB $7, V14, V14, V14 + VSLDB $14, V8, V12, V4 + VSLDB $14, V12, V8, V8 + VSLDB $5, V16, V16, V16 + VSLDB $12, V9, V12, V5 + + VO V14, V10, V10 + VO V8, V16, V16 + VO V4, V10, V10 // first rightmost 128bits of the multiplication result + VO V5, V16, V16 // second rightmost 128bits of the multiplication result + + // adjust v7, v13, v19, v11 + VSLDB $7, V13, V13, V13 + VSLDB $14, V19, V12, V4 + VSLDB $14, V12, V19, V19 + VSLDB $5, V11, V12, V5 + VO V13, V7, V7 + VO V4, V7, V7 + VO V19, V5, V11 + + VSLDB $9, V12, V17, V14 + VSLDB $12, V12, V9, V9 + VACCQ V7, V14, V13 + VAQ V7, V14, V7 + VAQ V11, V13, V11 + + // First reduction, 96 bits + VSLDB $4, V16, V10, T0 + VSLDB $4, V12, V16, T1 + VSLDB $3, V11, V7, V11 // fourth rightmost 128bits of the multiplication result + VSLDB $3, V7, V12, V7 + OBSERVATION3(V10, V8, T2, V17, V18, V19)// results V8 | T2 + VO V7, V9, V7 // third rightmost 128bits of the multiplication result + VACCQ T0, T2, V9 + VAQ T0, T2, T2 + VACQ T1, V8, V9, V8 + + // Second reduction 96 bits + VSLDB $4, V8, T2, T0 + VSLDB $4, V12, V8, T1 + OBSERVATION3(T2, V9, V8, V17, V18, V19)// results V9 | V8 + VACCQ T0, V8, T2 + VAQ T0, V8, V8 + VACQ T1, V9, T2, V9 + + // Third reduction 64 bits + VSLDB $8, V9, V8, T0 + VSLDB $8, V12, V9, T1 + OBSERVATION3A(V8, V14, V13, V17, V18)// results V14 | V13 + VACCQ T0, V13, V12 + VAQ T0, V13, V13 + VACQ T1, V14, V12, V14 + VACCQ V13, V7, V12 + VAQ V13, V7, T0 + VACCCQ V14, V11, V12, T2 + VACQ V14, V11, V12, T1 // results T2 | T1 | T0 + + // --------------------------------------------------- + MOVD $p256mul<>+0x00(SB), CPOOL + + VZERO V12 + VSCBIQ P0, T0, V8 + VSQ P0, T0, V7 + VSBCBIQ T1, P1, V8, V10 + VSBIQ T1, P1, V8, V9 + VSBIQ T2, V12, V10, T2 + + // what output to use, V9||V7 or T1||T0? + VSEL T0, V7, T2, T0 + VSEL T1, V9, T2, T1 + + VLM (SCRATCH), V16, V19 + + RET + +// --------------------------------------- +// p256SqrInternalVMSL +// V0-V1,V30,V31 - Not Modified +// V4-V14 - Volatile + +TEXT ·p256SqrInternalVMSL(SB), NOFRAME|NOSPLIT, $0-0 + VSTM V16, V18, (SCRATCH) + + MOVD $p256vmsl<>+0x00(SB), CPOOL + // Divide input into limbs + VGBM $0x007f, V14 + VZERO V12 + VSLDB $2, X1, X0, V13 + VSLDB $4, V12, X1, V11 // V11(X1): 4 bytes limb + + VN V14, X0, V10 // V10: first 7 bytes limb + VN V14, V13, V13 // v13: third 7 bytes limb + + VMSLG V10, V10, V12, V10 // v10: l10 x l5 (column 1) + VMSLG V13, V13, V12, V13 // v13: l8 x l3 + VMSLG V11, V11, V12, V11 // v11: l6 x l1 (column 9) + + MOVD $p256vmsl<>+0x00(SB), CPOOL + VGBM $0x7f7f, V14 + + VL 0(CPOOL), V4 + VL 16(CPOOL), V7 + VL 32(CPOOL), V9 + VL 48(CPOOL), V5 + VLM 64(CPOOL), V16, V18 + VL 112(CPOOL), V8 + + VPERM V12, X0, V4, V4 // v4: limb4 | limb5 + VPERM X1, X0, V7, V7 + VPERM V12, X0, V9, V9 // v9: limb10 | limb9 + VPERM X1, X0, V5, V5 + VPERM X1, X0, V16, V16 + VPERM X1, X0, V17, V17 + VPERM X1, V12, V18, V18 // v18: limb1 | limb2 + VPERM X1, V12, V8, V8 // v8: limb7 | limb6 + VN V14, V7, V7 // v7: limb9 | limb8 + VN V14, V5, V5 // v5: limb3 | limb4 + VN V14, V16, V16 // v16: limb2 | limb3 + VN V14, V17, V17 // v17: limb8 | limb7 + + VMSLEOG V9, V18, V13, V6 // v6: l10 x l1 + l9 x l2 + l8 x l3 + l7 x l4 + l6 x l5 (column 5) + VMSLG V9, V4, V12, V14 // v14: l10 x l4 + l9 x l5 (column 2) + VMSLEOG V9, V16, V12, V16 // v16: l10 x l2 + l9 x l3 + l8 x l4 + l7 x l5 (column 4) + VMSLEOG V7, V18, V12, V7 // v7: l9 x l1 + l8 x l2 (column 6) + VMSLEG V17, V18, V12, V13 // v13: l8 x l1 + l7 x l2 + l6 x l3 (column 7) + VMSLG V8, V18, V12, V8 // v8: l7 x l1 + l6 x l2 (column 8) + VMSLEG V9, V5, V12, V18 // v18: l10 x l3 + l9 x l4 + l8 x l5 (column 3) + + VSLDB $9, V12, V10, V4 + VSLDB $9, V12, V7, V5 + VAQ V4, V14, V14 + VAQ V5, V13, V13 + + VSLDB $9, V12, V14, V4 + VSLDB $9, V12, V13, V5 + VAQ V4, V18, V18 + VAQ V5, V8, V8 + + VSLDB $9, V12, V18, V4 + VSLDB $9, V12, V8, V5 + VAQ V4, V16, V16 + VAQ V5, V11, V11 + + VSLDB $9, V12, V16, V4 + VAQ V4, V6, V17 + + VGBM $0x007f, V4 + VGBM $0x00ff, V5 + + VN V10, V4, V10 + VN V14, V4, V14 + VN V18, V4, V18 + VN V16, V4, V16 + VN V17, V4, V9 + VN V7, V4, V7 + VN V13, V4, V13 + VN V8, V4, V8 + VN V11, V5, V11 + + VSLDB $7, V14, V14, V14 + VSLDB $14, V18, V12, V4 + VSLDB $14, V12, V18, V18 + VSLDB $5, V16, V16, V16 + VSLDB $12, V9, V12, V5 + + VO V14, V10, V10 + VO V18, V16, V16 + VO V4, V10, V10 // first rightmost 128bits of the multiplication result + VO V5, V16, V16 // second rightmost 128bits of the multiplication result + + // adjust v7, v13, v8, v11 + VSLDB $7, V13, V13, V13 + VSLDB $14, V8, V12, V4 + VSLDB $14, V12, V8, V8 + VSLDB $5, V11, V12, V5 + VO V13, V7, V7 + VO V4, V7, V7 + VO V8, V5, V11 + + VSLDB $9, V12, V17, V14 + VSLDB $12, V12, V9, V9 + VACCQ V7, V14, V13 + VAQ V7, V14, V7 + VAQ V11, V13, V11 + + // First reduction, 96 bits + VSLDB $4, V16, V10, T0 + VSLDB $4, V12, V16, T1 + VSLDB $3, V11, V7, V11 // fourth rightmost 128bits of the multiplication result + VSLDB $3, V7, V12, V7 + OBSERVATION3(V10, V8, T2, V16, V17, V18)// results V8 | T2 + VO V7, V9, V7 // third rightmost 128bits of the multiplication result + VACCQ T0, T2, V9 + VAQ T0, T2, T2 + VACQ T1, V8, V9, V8 + + // Second reduction 96 bits + VSLDB $4, V8, T2, T0 + VSLDB $4, V12, V8, T1 + OBSERVATION3(T2, V9, V8, V16, V17, V18)// results V9 | V8 + VACCQ T0, V8, T2 + VAQ T0, V8, V8 + VACQ T1, V9, T2, V9 + + // Third reduction 64 bits + VSLDB $8, V9, V8, T0 + VSLDB $8, V12, V9, T1 + OBSERVATION3A(V8, V14, V13, V17, V18)// results V14 | V13 + VACCQ T0, V13, V12 + VAQ T0, V13, V13 + VACQ T1, V14, V12, V14 + VACCQ V13, V7, V12 + VAQ V13, V7, T0 + VACCCQ V14, V11, V12, T2 + VACQ V14, V11, V12, T1 // results T2 | T1 | T0 + + // --------------------------------------------------- + MOVD $p256mul<>+0x00(SB), CPOOL + + VZERO V12 + VSCBIQ P0, T0, V8 + VSQ P0, T0, V7 + VSBCBIQ T1, P1, V8, V10 + VSBIQ T1, P1, V8, V9 + VSBIQ T2, V12, V10, T2 + + // what output to use, V9||V7 or T1||T0? + VSEL T0, V7, T2, T0 + VSEL T1, V9, T2, T1 + + VLM (SCRATCH), V16, V18 + RET + + + +#undef CPOOL +#undef SCRATCH +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef T0 +#undef T1 +#undef T2 +#undef P0 +#undef P1 + +#define SCRATCH R9 + +TEXT p256MulInternal<>(SB),NOSPLIT,$64-0 + MOVD $scratch-64(SP), SCRATCH + MOVD ·p256MulInternalFacility+0x00(SB),R7 + CALL (R7) + RET + +TEXT ·p256MulInternalTrampolineSetup(SB),NOSPLIT|NOFRAME, $0 + MOVBZ internal∕cpu·S390X+const_offsetS390xHasVE1(SB), R0 + MOVD $·p256MulInternalFacility+0x00(SB), R7 + MOVD $·p256MulInternalVX(SB), R8 + CMPBEQ R0, $0, novmsl // VE1 facility = 1, VMSL supported + MOVD $·p256MulInternalVMSL(SB), R8 +novmsl: + MOVD R8, 0(R7) + BR (R8) + +GLOBL ·p256MulInternalFacility+0x00(SB), NOPTR, $8 +DATA ·p256MulInternalFacility+0x00(SB)/8, $·p256MulInternalTrampolineSetup(SB) + +// Parameters +#define X0 V0 +#define X1 V1 +#define Y0 V2 +#define Y1 V3 + +TEXT ·p256SqrInternalVX(SB), NOFRAME|NOSPLIT, $0 + VLR X0, Y0 + VLR X1, Y1 + BR ·p256MulInternalVX(SB) + +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 + + +TEXT p256SqrInternal<>(SB),NOSPLIT,$48-0 + MOVD $scratch-48(SP), SCRATCH + MOVD ·p256SqrInternalFacility+0x00(SB),R7 + CALL (R7) + RET + +TEXT ·p256SqrInternalTrampolineSetup(SB),NOSPLIT|NOFRAME, $0 + MOVBZ internal∕cpu·S390X+const_offsetS390xHasVE1(SB), R0 + MOVD $·p256SqrInternalFacility+0x00(SB), R7 + MOVD $·p256SqrInternalVX(SB), R8 + CMPBEQ R0, $0, novmsl // VE1 facility = 1, VMSL supported + MOVD $·p256SqrInternalVMSL(SB), R8 +novmsl: + MOVD R8, 0(R7) + BR (R8) + + +GLOBL ·p256SqrInternalFacility+0x00(SB), NOPTR, $8 +DATA ·p256SqrInternalFacility+0x00(SB)/8, $·p256SqrInternalTrampolineSetup(SB) + +#undef SCRATCH + + +#define p256SubInternal(T1, T0, X1, X0, Y1, Y0) \ + VZERO ZER \ + VSCBIQ Y0, X0, CAR1 \ + VSQ Y0, X0, T0 \ + VSBCBIQ X1, Y1, CAR1, SEL1 \ + VSBIQ X1, Y1, CAR1, T1 \ + VSQ SEL1, ZER, SEL1 \ + \ + VACCQ T0, PL, CAR1 \ + VAQ T0, PL, TT0 \ + VACQ T1, PH, CAR1, TT1 \ + \ + VSEL T0, TT0, SEL1, T0 \ + VSEL T1, TT1, SEL1, T1 \ + +#define p256AddInternal(T1, T0, X1, X0, Y1, Y0) \ + VACCQ X0, Y0, CAR1 \ + VAQ X0, Y0, T0 \ + VACCCQ X1, Y1, CAR1, T2 \ + VACQ X1, Y1, CAR1, T1 \ + \ + VZERO ZER \ + VSCBIQ PL, T0, CAR1 \ + VSQ PL, T0, TT0 \ + VSBCBIQ T1, PH, CAR1, CAR2 \ + VSBIQ T1, PH, CAR1, TT1 \ + VSBIQ T2, ZER, CAR2, SEL1 \ + \ + VSEL T0, TT0, SEL1, T0 \ + VSEL T1, TT1, SEL1, T1 + +#define p256HalfInternal(T1, T0, X1, X0) \ + VZERO ZER \ + VSBIQ ZER, ZER, X0, SEL1 \ + \ + VACCQ X0, PL, CAR1 \ + VAQ X0, PL, T0 \ + VACCCQ X1, PH, CAR1, T2 \ + VACQ X1, PH, CAR1, T1 \ + \ + VSEL X0, T0, SEL1, T0 \ + VSEL X1, T1, SEL1, T1 \ + VSEL ZER, T2, SEL1, T2 \ + \ + VSLDB $15, T2, ZER, TT1 \ + VSLDB $15, T1, ZER, TT0 \ + VREPIB $1, SEL1 \ + VSRL SEL1, T0, T0 \ + VSRL SEL1, T1, T1 \ + VREPIB $7, SEL1 \ + VSL SEL1, TT0, TT0 \ + VSL SEL1, TT1, TT1 \ + VO T0, TT0, T0 \ + VO T1, TT1, T1 + +// --------------------------------------- +// func p256MulAsm(res, in1, in2 []byte) +#define res_ptr R1 +#define x_ptr R2 +#define y_ptr R3 +#define CPOOL R4 + +// Parameters +#define X0 V0 +#define X1 V1 +#define Y0 V2 +#define Y1 V3 +#define T0 V4 +#define T1 V5 + +// Constants +#define P0 V30 +#define P1 V31 +TEXT ·p256MulAsm(SB), NOSPLIT, $0 + MOVD res+0(FP), res_ptr + MOVD in1+24(FP), x_ptr + MOVD in2+48(FP), y_ptr + + VL (1*16)(x_ptr), X0 + VL (0*16)(x_ptr), X1 + VL (1*16)(y_ptr), Y0 + VL (0*16)(y_ptr), Y1 + + MOVD $p256mul<>+0x00(SB), CPOOL + VL 16(CPOOL), P0 + VL 0(CPOOL), P1 + + CALL p256MulInternal<>(SB) + + VST T0, (1*16)(res_ptr) + VST T1, (0*16)(res_ptr) + RET + +#undef res_ptr +#undef x_ptr +#undef y_ptr +#undef CPOOL + +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef T0 +#undef T1 +#undef P0 +#undef P1 + +// --------------------------------------- +// func p256SqrAsm(res, in1 []byte) +#define res_ptr R1 +#define x_ptr R2 +#define y_ptr R3 +#define CPOOL R4 + +// Parameters +#define X0 V0 +#define X1 V1 +#define T0 V4 +#define T1 V5 + +// Constants +#define P0 V30 +#define P1 V31 +TEXT ·p256SqrAsm(SB), NOSPLIT, $0 + MOVD res+0(FP), res_ptr + MOVD in1+24(FP), x_ptr + + VL (1*16)(x_ptr), X0 + VL (0*16)(x_ptr), X1 + + MOVD $p256mul<>+0x00(SB), CPOOL + VL 16(CPOOL), P0 + VL 0(CPOOL), P1 + + CALL p256SqrInternal<>(SB) + + VST T0, (1*16)(res_ptr) + VST T1, (0*16)(res_ptr) + RET + +#undef res_ptr +#undef x_ptr +#undef y_ptr +#undef CPOOL + +#undef X0 +#undef X1 +#undef T0 +#undef T1 +#undef P0 +#undef P1 + + +// Point add with P2 being affine point +// If sign == 1 -> P2 = -P2 +// If sel == 0 -> P3 = P1 +// if zero == 0 -> P3 = P2 +// p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) +#define P3ptr R1 +#define P1ptr R2 +#define P2ptr R3 +#define CPOOL R4 + +// Temporaries in REGs +#define Y2L V15 +#define Y2H V16 +#define T1L V17 +#define T1H V18 +#define T2L V19 +#define T2H V20 +#define T3L V21 +#define T3H V22 +#define T4L V23 +#define T4H V24 + +// Temps for Sub and Add +#define TT0 V11 +#define TT1 V12 +#define T2 V13 + +// p256MulAsm Parameters +#define X0 V0 +#define X1 V1 +#define Y0 V2 +#define Y1 V3 +#define T0 V4 +#define T1 V5 + +#define PL V30 +#define PH V31 + +// Names for zero/sel selects +#define X1L V0 +#define X1H V1 +#define Y1L V2 // p256MulAsmParmY +#define Y1H V3 // p256MulAsmParmY +#define Z1L V4 +#define Z1H V5 +#define X2L V0 +#define X2H V1 +#define Z2L V4 +#define Z2H V5 +#define X3L V17 // T1L +#define X3H V18 // T1H +#define Y3L V21 // T3L +#define Y3H V22 // T3H +#define Z3L V28 +#define Z3H V29 + +#define ZER V6 +#define SEL1 V7 +#define CAR1 V8 +#define CAR2 V9 +/* * + * Three operand formula: + * Source: 2004 Hankerson–Menezes–Vanstone, page 91. + * T1 = Z1² + * T2 = T1*Z1 + * T1 = T1*X2 + * T2 = T2*Y2 + * T1 = T1-X1 + * T2 = T2-Y1 + * Z3 = Z1*T1 + * T3 = T1² + * T4 = T3*T1 + * T3 = T3*X1 + * T1 = 2*T3 + * X3 = T2² + * X3 = X3-T1 + * X3 = X3-T4 + * T3 = T3-X3 + * T3 = T3*T2 + * T4 = T4*Y1 + * Y3 = T3-T4 + + * Three operand formulas, but with MulInternal X,Y used to store temps +X=Z1; Y=Z1; MUL;T- // T1 = Z1² T1 +X=T ; Y- ; MUL;T2=T // T2 = T1*Z1 T1 T2 +X- ; Y=X2; MUL;T1=T // T1 = T1*X2 T1 T2 +X=T2; Y=Y2; MUL;T- // T2 = T2*Y2 T1 T2 +SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2 +SUB(Y<T1-X1) // T1 = T1-X1 T1 T2 +X=Z1; Y- ; MUL;Z3:=T// Z3 = Z1*T1 T2 +X=Y; Y- ; MUL;X=T // T3 = T1*T1 T2 +X- ; Y- ; MUL;T4=T // T4 = T3*T1 T2 T4 +X- ; Y=X1; MUL;T3=T // T3 = T3*X1 T2 T3 T4 +ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4 +X=T2; Y=T2; MUL;T- // X3 = T2*T2 T1 T2 T3 T4 +SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4 +SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4 +SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4 +X- ; Y- ; MUL;T3=T // T3 = T3*T2 T2 T3 T4 +X=T4; Y=Y1; MUL;T- // T4 = T4*Y1 T3 T4 +SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4 + + */ +TEXT ·p256PointAddAffineAsm(SB), NOSPLIT, $0 + MOVD P3+0(FP), P3ptr + MOVD P1+8(FP), P1ptr + MOVD P2+16(FP), P2ptr + + MOVD $p256mul<>+0x00(SB), CPOOL + VL 16(CPOOL), PL + VL 0(CPOOL), PH + + // if (sign == 1) { + // Y2 = fromBig(new(big.Int).Mod(new(big.Int).Sub(p256.P, new(big.Int).SetBytes(Y2)), p256.P)) // Y2 = P-Y2 + // } + + VL 32(P2ptr), Y2H + VL 48(P2ptr), Y2L + + VLREPG sign+24(FP), SEL1 + VZERO ZER + VCEQG SEL1, ZER, SEL1 + + VSCBIQ Y2L, PL, CAR1 + VSQ Y2L, PL, T1L + VSBIQ PH, Y2H, CAR1, T1H + + VSEL Y2L, T1L, SEL1, Y2L + VSEL Y2H, T1H, SEL1, Y2H + +/* * + * Three operand formula: + * Source: 2004 Hankerson–Menezes–Vanstone, page 91. + */ + // X=Z1; Y=Z1; MUL; T- // T1 = Z1² T1 + VL 64(P1ptr), X1 // Z1H + VL 80(P1ptr), X0 // Z1L + VLR X0, Y0 + VLR X1, Y1 + CALL p256SqrInternal<>(SB) + + // X=T ; Y- ; MUL; T2=T // T2 = T1*Z1 T1 T2 + VLR T0, X0 + VLR T1, X1 + CALL p256MulInternal<>(SB) + VLR T0, T2L + VLR T1, T2H + + // X- ; Y=X2; MUL; T1=T // T1 = T1*X2 T1 T2 + VL 0(P2ptr), Y1 // X2H + VL 16(P2ptr), Y0 // X2L + CALL p256MulInternal<>(SB) + VLR T0, T1L + VLR T1, T1H + + // X=T2; Y=Y2; MUL; T- // T2 = T2*Y2 T1 T2 + VLR T2L, X0 + VLR T2H, X1 + VLR Y2L, Y0 + VLR Y2H, Y1 + CALL p256MulInternal<>(SB) + + // SUB(T2<T-Y1) // T2 = T2-Y1 T1 T2 + VL 32(P1ptr), Y1H + VL 48(P1ptr), Y1L + p256SubInternal(T2H,T2L,T1,T0,Y1H,Y1L) + + // SUB(Y<T1-X1) // T1 = T1-X1 T1 T2 + VL 0(P1ptr), X1H + VL 16(P1ptr), X1L + p256SubInternal(Y1,Y0,T1H,T1L,X1H,X1L) + + // X=Z1; Y- ; MUL; Z3:=T// Z3 = Z1*T1 T2 + VL 64(P1ptr), X1 // Z1H + VL 80(P1ptr), X0 // Z1L + CALL p256MulInternal<>(SB) + + // VST T1, 64(P3ptr) + // VST T0, 80(P3ptr) + VLR T0, Z3L + VLR T1, Z3H + + // X=Y; Y- ; MUL; X=T // T3 = T1*T1 T2 + VLR Y0, X0 + VLR Y1, X1 + CALL p256SqrInternal<>(SB) + VLR T0, X0 + VLR T1, X1 + + // X- ; Y- ; MUL; T4=T // T4 = T3*T1 T2 T4 + CALL p256MulInternal<>(SB) + VLR T0, T4L + VLR T1, T4H + + // X- ; Y=X1; MUL; T3=T // T3 = T3*X1 T2 T3 T4 + VL 0(P1ptr), Y1 // X1H + VL 16(P1ptr), Y0 // X1L + CALL p256MulInternal<>(SB) + VLR T0, T3L + VLR T1, T3H + + // ADD(T1<T+T) // T1 = T3+T3 T1 T2 T3 T4 + p256AddInternal(T1H,T1L, T1,T0,T1,T0) + + // X=T2; Y=T2; MUL; T- // X3 = T2*T2 T1 T2 T3 T4 + VLR T2L, X0 + VLR T2H, X1 + VLR T2L, Y0 + VLR T2H, Y1 + CALL p256SqrInternal<>(SB) + + // SUB(T<T-T1) // X3 = X3-T1 T1 T2 T3 T4 (T1 = X3) + p256SubInternal(T1,T0,T1,T0,T1H,T1L) + + // SUB(T<T-T4) X3:=T // X3 = X3-T4 T2 T3 T4 + p256SubInternal(T1,T0,T1,T0,T4H,T4L) + VLR T0, X3L + VLR T1, X3H + + // SUB(X<T3-T) // T3 = T3-X3 T2 T3 T4 + p256SubInternal(X1,X0,T3H,T3L,T1,T0) + + // X- ; Y- ; MUL; T3=T // T3 = T3*T2 T2 T3 T4 + CALL p256MulInternal<>(SB) + VLR T0, T3L + VLR T1, T3H + + // X=T4; Y=Y1; MUL; T- // T4 = T4*Y1 T3 T4 + VLR T4L, X0 + VLR T4H, X1 + VL 32(P1ptr), Y1 // Y1H + VL 48(P1ptr), Y0 // Y1L + CALL p256MulInternal<>(SB) + + // SUB(T<T3-T) Y3:=T // Y3 = T3-T4 T3 T4 (T3 = Y3) + p256SubInternal(Y3H,Y3L,T3H,T3L,T1,T0) + + // if (sel == 0) { + // copy(P3.x[:], X1) + // copy(P3.y[:], Y1) + // copy(P3.z[:], Z1) + // } + + VL 0(P1ptr), X1H + VL 16(P1ptr), X1L + + // Y1 already loaded, left over from addition + VL 64(P1ptr), Z1H + VL 80(P1ptr), Z1L + + VLREPG sel+32(FP), SEL1 + VZERO ZER + VCEQG SEL1, ZER, SEL1 + + VSEL X1L, X3L, SEL1, X3L + VSEL X1H, X3H, SEL1, X3H + VSEL Y1L, Y3L, SEL1, Y3L + VSEL Y1H, Y3H, SEL1, Y3H + VSEL Z1L, Z3L, SEL1, Z3L + VSEL Z1H, Z3H, SEL1, Z3H + + // if (zero == 0) { + // copy(P3.x[:], X2) + // copy(P3.y[:], Y2) + // copy(P3.z[:], []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + // 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}) //(p256.z*2^256)%p + // } + VL 0(P2ptr), X2H + VL 16(P2ptr), X2L + + // Y2 already loaded + VL 128(CPOOL), Z2H + VL 144(CPOOL), Z2L + + VLREPG zero+40(FP), SEL1 + VZERO ZER + VCEQG SEL1, ZER, SEL1 + + VSEL X2L, X3L, SEL1, X3L + VSEL X2H, X3H, SEL1, X3H + VSEL Y2L, Y3L, SEL1, Y3L + VSEL Y2H, Y3H, SEL1, Y3H + VSEL Z2L, Z3L, SEL1, Z3L + VSEL Z2H, Z3H, SEL1, Z3H + + // All done, store out the result!!! + VST X3H, 0(P3ptr) + VST X3L, 16(P3ptr) + VST Y3H, 32(P3ptr) + VST Y3L, 48(P3ptr) + VST Z3H, 64(P3ptr) + VST Z3L, 80(P3ptr) + + RET + +#undef P3ptr +#undef P1ptr +#undef P2ptr +#undef CPOOL + +#undef Y2L +#undef Y2H +#undef T1L +#undef T1H +#undef T2L +#undef T2H +#undef T3L +#undef T3H +#undef T4L +#undef T4H + +#undef TT0 +#undef TT1 +#undef T2 + +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef T0 +#undef T1 + +#undef PL +#undef PH + +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef X2L +#undef X2H +#undef Z2L +#undef Z2H +#undef X3L +#undef X3H +#undef Y3L +#undef Y3H +#undef Z3L +#undef Z3H + +#undef ZER +#undef SEL1 +#undef CAR1 +#undef CAR2 + +// p256PointDoubleAsm(P3, P1 *p256Point) +// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl +// https://www.hyperelliptic.org/EFD/g1p/auto-shortw.html +// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective-3.html +#define P3ptr R1 +#define P1ptr R2 +#define CPOOL R4 + +// Temporaries in REGs +#define X3L V15 +#define X3H V16 +#define Y3L V17 +#define Y3H V18 +#define T1L V19 +#define T1H V20 +#define T2L V21 +#define T2H V22 +#define T3L V23 +#define T3H V24 + +#define X1L V6 +#define X1H V7 +#define Y1L V8 +#define Y1H V9 +#define Z1L V10 +#define Z1H V11 + +// Temps for Sub and Add +#define TT0 V11 +#define TT1 V12 +#define T2 V13 + +// p256MulAsm Parameters +#define X0 V0 +#define X1 V1 +#define Y0 V2 +#define Y1 V3 +#define T0 V4 +#define T1 V5 + +#define PL V30 +#define PH V31 + +#define Z3L V23 +#define Z3H V24 + +#define ZER V26 +#define SEL1 V27 +#define CAR1 V28 +#define CAR2 V29 +/* + * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2004-hmv + * Cost: 4M + 4S + 1*half + 5add + 2*2 + 1*3. + * Source: 2004 Hankerson–Menezes–Vanstone, page 91. + * A = 3(X₁-Z₁²)×(X₁+Z₁²) + * B = 2Y₁ + * Z₃ = B×Z₁ + * C = B² + * D = C×X₁ + * X₃ = A²-2D + * Y₃ = (D-X₃)×A-C²/2 + * + * Three-operand formula: + * T1 = Z1² + * T2 = X1-T1 + * T1 = X1+T1 + * T2 = T2*T1 + * T2 = 3*T2 + * Y3 = 2*Y1 + * Z3 = Y3*Z1 + * Y3 = Y3² + * T3 = Y3*X1 + * Y3 = Y3² + * Y3 = half*Y3 + * X3 = T2² + * T1 = 2*T3 + * X3 = X3-T1 + * T1 = T3-X3 + * T1 = T1*T2 + * Y3 = T1-Y3 + */ + +TEXT ·p256PointDoubleAsm(SB), NOSPLIT, $0 + MOVD P3+0(FP), P3ptr + MOVD P1+8(FP), P1ptr + + MOVD $p256mul<>+0x00(SB), CPOOL + VL 16(CPOOL), PL + VL 0(CPOOL), PH + + // X=Z1; Y=Z1; MUL; T- // T1 = Z1² + VL 64(P1ptr), X1 // Z1H + VL 80(P1ptr), X0 // Z1L + VLR X0, Y0 + VLR X1, Y1 + CALL p256SqrInternal<>(SB) + + // SUB(X<X1-T) // T2 = X1-T1 + VL 0(P1ptr), X1H + VL 16(P1ptr), X1L + p256SubInternal(X1,X0,X1H,X1L,T1,T0) + + // ADD(Y<X1+T) // T1 = X1+T1 + p256AddInternal(Y1,Y0,X1H,X1L,T1,T0) + + // X- ; Y- ; MUL; T- // T2 = T2*T1 + CALL p256MulInternal<>(SB) + + // ADD(T2<T+T); ADD(T2<T2+T) // T2 = 3*T2 + p256AddInternal(T2H,T2L,T1,T0,T1,T0) + p256AddInternal(T2H,T2L,T2H,T2L,T1,T0) + + // ADD(X<Y1+Y1) // Y3 = 2*Y1 + VL 32(P1ptr), Y1H + VL 48(P1ptr), Y1L + p256AddInternal(X1,X0,Y1H,Y1L,Y1H,Y1L) + + // X- ; Y=Z1; MUL; Z3:=T // Z3 = Y3*Z1 + VL 64(P1ptr), Y1 // Z1H + VL 80(P1ptr), Y0 // Z1L + CALL p256MulInternal<>(SB) + VST T1, 64(P3ptr) + VST T0, 80(P3ptr) + + // X- ; Y=X ; MUL; T- // Y3 = Y3² + VLR X0, Y0 + VLR X1, Y1 + CALL p256SqrInternal<>(SB) + + // X=T ; Y=X1; MUL; T3=T // T3 = Y3*X1 + VLR T0, X0 + VLR T1, X1 + VL 0(P1ptr), Y1 + VL 16(P1ptr), Y0 + CALL p256MulInternal<>(SB) + VLR T0, T3L + VLR T1, T3H + + // X- ; Y=X ; MUL; T- // Y3 = Y3² + VLR X0, Y0 + VLR X1, Y1 + CALL p256SqrInternal<>(SB) + + // HAL(Y3<T) // Y3 = half*Y3 + p256HalfInternal(Y3H,Y3L, T1,T0) + + // X=T2; Y=T2; MUL; T- // X3 = T2² + VLR T2L, X0 + VLR T2H, X1 + VLR T2L, Y0 + VLR T2H, Y1 + CALL p256SqrInternal<>(SB) + + // ADD(T1<T3+T3) // T1 = 2*T3 + p256AddInternal(T1H,T1L,T3H,T3L,T3H,T3L) + + // SUB(X3<T-T1) X3:=X3 // X3 = X3-T1 + p256SubInternal(X3H,X3L,T1,T0,T1H,T1L) + VST X3H, 0(P3ptr) + VST X3L, 16(P3ptr) + + // SUB(X<T3-X3) // T1 = T3-X3 + p256SubInternal(X1,X0,T3H,T3L,X3H,X3L) + + // X- ; Y- ; MUL; T- // T1 = T1*T2 + CALL p256MulInternal<>(SB) + + // SUB(Y3<T-Y3) // Y3 = T1-Y3 + p256SubInternal(Y3H,Y3L,T1,T0,Y3H,Y3L) + + VST Y3H, 32(P3ptr) + VST Y3L, 48(P3ptr) + RET + +#undef P3ptr +#undef P1ptr +#undef CPOOL +#undef X3L +#undef X3H +#undef Y3L +#undef Y3H +#undef T1L +#undef T1H +#undef T2L +#undef T2H +#undef T3L +#undef T3H +#undef X1L +#undef X1H +#undef Y1L +#undef Y1H +#undef Z1L +#undef Z1H +#undef TT0 +#undef TT1 +#undef T2 +#undef X0 +#undef X1 +#undef Y0 +#undef Y1 +#undef T0 +#undef T1 +#undef PL +#undef PH +#undef Z3L +#undef Z3H +#undef ZER +#undef SEL1 +#undef CAR1 +#undef CAR2 + +// p256PointAddAsm(P3, P1, P2 *p256Point) +#define P3ptr R1 +#define P1ptr R2 +#define P2ptr R3 +#define CPOOL R4 +#define ISZERO R5 +#define TRUE R6 + +// Temporaries in REGs +#define T1L V16 +#define T1H V17 +#define T2L V18 +#define T2H V19 +#define U1L V20 +#define U1H V21 +#define S1L V22 +#define S1H V23 +#define HL V24 +#define HH V25 +#define RL V26 +#define RH V27 + +// Temps for Sub and Add +#define ZER V6 +#define SEL1 V7 +#define CAR1 V8 +#define CAR2 V9 +#define TT0 V11 +#define TT1 V12 +#define T2 V13 + +// p256MulAsm Parameters +#define X0 V0 +#define X1 V1 +#define Y0 V2 +#define Y1 V3 +#define T0 V4 +#define T1 V5 + +#define PL V30 +#define PH V31 +/* + * https://delta.cs.cinvestav.mx/~francisco/arith/julio.pdf "Software Implementation of the NIST Elliptic Curves Over Prime Fields" + * + * A = X₁×Z₂² + * B = Y₁×Z₂³ + * C = X₂×Z₁²-A + * D = Y₂×Z₁³-B + * X₃ = D² - 2A×C² - C³ + * Y₃ = D×(A×C² - X₃) - B×C³ + * Z₃ = Z₁×Z₂×C + * + * Three-operand formula (adopted): https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-1998-cmo-2 + * Temp storage: T1,T2,U1,H,Z3=X3=Y3,S1,R + * + * T1 = Z1*Z1 + * T2 = Z2*Z2 + * U1 = X1*T2 + * H = X2*T1 + * H = H-U1 + * Z3 = Z1*Z2 + * Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array + * + * S1 = Z2*T2 + * S1 = Y1*S1 + * R = Z1*T1 + * R = Y2*R + * R = R-S1 + * + * T1 = H*H + * T2 = H*T1 + * U1 = U1*T1 + * + * X3 = R*R + * X3 = X3-T2 + * T1 = 2*U1 + * X3 = X3-T1 << store-out X3 result reg + * + * T2 = S1*T2 + * Y3 = U1-X3 + * Y3 = R*Y3 + * Y3 = Y3-T2 << store-out Y3 result reg + + // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1 + // X- ; Y=T ; MUL; R=T // R = Z1*T1 + // X=X2; Y- ; MUL; H=T // H = X2*T1 + // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2 + // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2 + // X=X1; Y- ; MUL; U1=T // U1 = X1*T2 + // SUB(H<H-T) // H = H-U1 + // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2 + // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H << store-out Z3 result reg.. could override Z1, if slices have same backing array + // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1 + // X=Y2; Y=R ; MUL; T- // R = Y2*R + // SUB(R<T-S1) // R = R-S1 + // X=H ; Y=H ; MUL; T- // T1 = H*H + // X- ; Y=T ; MUL; T2=T // T2 = H*T1 + // X=U1; Y- ; MUL; U1=T // U1 = U1*T1 + // X=R ; Y=R ; MUL; T- // X3 = R*R + // SUB(T<T-T2) // X3 = X3-T2 + // ADD(X<U1+U1) // T1 = 2*U1 + // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg + // SUB(Y<U1-T) // Y3 = U1-X3 + // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3 + // X=S1; Y=T2; MUL; T- // T2 = S1*T2 + // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg + */ +TEXT ·p256PointAddAsm(SB), NOSPLIT, $0 + MOVD P3+0(FP), P3ptr + MOVD P1+8(FP), P1ptr + MOVD P2+16(FP), P2ptr + + MOVD $p256mul<>+0x00(SB), CPOOL + VL 16(CPOOL), PL + VL 0(CPOOL), PH + + // X=Z1; Y=Z1; MUL; T- // T1 = Z1*Z1 + VL 64(P1ptr), X1 // Z1H + VL 80(P1ptr), X0 // Z1L + VLR X0, Y0 + VLR X1, Y1 + CALL p256SqrInternal<>(SB) + + // X- ; Y=T ; MUL; R=T // R = Z1*T1 + VLR T0, Y0 + VLR T1, Y1 + CALL p256MulInternal<>(SB) + VLR T0, RL + VLR T1, RH + + // X=X2; Y- ; MUL; H=T // H = X2*T1 + VL 0(P2ptr), X1 // X2H + VL 16(P2ptr), X0 // X2L + CALL p256MulInternal<>(SB) + VLR T0, HL + VLR T1, HH + + // X=Z2; Y=Z2; MUL; T- // T2 = Z2*Z2 + VL 64(P2ptr), X1 // Z2H + VL 80(P2ptr), X0 // Z2L + VLR X0, Y0 + VLR X1, Y1 + CALL p256SqrInternal<>(SB) + + // X- ; Y=T ; MUL; S1=T // S1 = Z2*T2 + VLR T0, Y0 + VLR T1, Y1 + CALL p256MulInternal<>(SB) + VLR T0, S1L + VLR T1, S1H + + // X=X1; Y- ; MUL; U1=T // U1 = X1*T2 + VL 0(P1ptr), X1 // X1H + VL 16(P1ptr), X0 // X1L + CALL p256MulInternal<>(SB) + VLR T0, U1L + VLR T1, U1H + + // SUB(H<H-T) // H = H-U1 + p256SubInternal(HH,HL,HH,HL,T1,T0) + + // if H == 0 or H^P == 0 then ret=1 else ret=0 + // clobbers T1H and T1L + MOVD $0, ISZERO + MOVD $1, TRUE + VZERO ZER + VO HL, HH, T1H + VCEQGS ZER, T1H, T1H + MOVDEQ TRUE, ISZERO + VX HL, PL, T1L + VX HH, PH, T1H + VO T1L, T1H, T1H + VCEQGS ZER, T1H, T1H + MOVDEQ TRUE, ISZERO + MOVD ISZERO, ret+24(FP) + + // X=Z1; Y=Z2; MUL; T- // Z3 = Z1*Z2 + VL 64(P1ptr), X1 // Z1H + VL 80(P1ptr), X0 // Z1L + VL 64(P2ptr), Y1 // Z2H + VL 80(P2ptr), Y0 // Z2L + CALL p256MulInternal<>(SB) + + // X=T ; Y=H ; MUL; Z3:=T// Z3 = Z3*H + VLR T0, X0 + VLR T1, X1 + VLR HL, Y0 + VLR HH, Y1 + CALL p256MulInternal<>(SB) + VST T1, 64(P3ptr) + VST T0, 80(P3ptr) + + // X=Y1; Y=S1; MUL; S1=T // S1 = Y1*S1 + VL 32(P1ptr), X1 + VL 48(P1ptr), X0 + VLR S1L, Y0 + VLR S1H, Y1 + CALL p256MulInternal<>(SB) + VLR T0, S1L + VLR T1, S1H + + // X=Y2; Y=R ; MUL; T- // R = Y2*R + VL 32(P2ptr), X1 + VL 48(P2ptr), X0 + VLR RL, Y0 + VLR RH, Y1 + CALL p256MulInternal<>(SB) + + // SUB(R<T-S1) // R = T-S1 + p256SubInternal(RH,RL,T1,T0,S1H,S1L) + + // if R == 0 or R^P == 0 then ret=ret else ret=0 + // clobbers T1H and T1L + MOVD $0, ISZERO + MOVD $1, TRUE + VZERO ZER + VO RL, RH, T1H + VCEQGS ZER, T1H, T1H + MOVDEQ TRUE, ISZERO + VX RL, PL, T1L + VX RH, PH, T1H + VO T1L, T1H, T1H + VCEQGS ZER, T1H, T1H + MOVDEQ TRUE, ISZERO + AND ret+24(FP), ISZERO + MOVD ISZERO, ret+24(FP) + + // X=H ; Y=H ; MUL; T- // T1 = H*H + VLR HL, X0 + VLR HH, X1 + VLR HL, Y0 + VLR HH, Y1 + CALL p256SqrInternal<>(SB) + + // X- ; Y=T ; MUL; T2=T // T2 = H*T1 + VLR T0, Y0 + VLR T1, Y1 + CALL p256MulInternal<>(SB) + VLR T0, T2L + VLR T1, T2H + + // X=U1; Y- ; MUL; U1=T // U1 = U1*T1 + VLR U1L, X0 + VLR U1H, X1 + CALL p256MulInternal<>(SB) + VLR T0, U1L + VLR T1, U1H + + // X=R ; Y=R ; MUL; T- // X3 = R*R + VLR RL, X0 + VLR RH, X1 + VLR RL, Y0 + VLR RH, Y1 + CALL p256SqrInternal<>(SB) + + // SUB(T<T-T2) // X3 = X3-T2 + p256SubInternal(T1,T0,T1,T0,T2H,T2L) + + // ADD(X<U1+U1) // T1 = 2*U1 + p256AddInternal(X1,X0,U1H,U1L,U1H,U1L) + + // SUB(T<T-X) X3:=T // X3 = X3-T1 << store-out X3 result reg + p256SubInternal(T1,T0,T1,T0,X1,X0) + VST T1, 0(P3ptr) + VST T0, 16(P3ptr) + + // SUB(Y<U1-T) // Y3 = U1-X3 + p256SubInternal(Y1,Y0,U1H,U1L,T1,T0) + + // X=R ; Y- ; MUL; U1=T // Y3 = R*Y3 + VLR RL, X0 + VLR RH, X1 + CALL p256MulInternal<>(SB) + VLR T0, U1L + VLR T1, U1H + + // X=S1; Y=T2; MUL; T- // T2 = S1*T2 + VLR S1L, X0 + VLR S1H, X1 + VLR T2L, Y0 + VLR T2H, Y1 + CALL p256MulInternal<>(SB) + + // SUB(T<U1-T); Y3:=T // Y3 = Y3-T2 << store-out Y3 result reg + p256SubInternal(T1,T0,U1H,U1L,T1,T0) + VST T1, 32(P3ptr) + VST T0, 48(P3ptr) + + RET diff --git a/src/crypto/elliptic/p256_asm_table.go b/src/crypto/elliptic/p256_asm_table.go new file mode 100644 index 0000000..16a4b4f --- /dev/null +++ b/src/crypto/elliptic/p256_asm_table.go @@ -0,0 +1,1473 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 || arm64 +// +build amd64 arm64 + +package elliptic + +var p256Precomputed = &[43][32 * 8]uint64{ + { + 8784043285714375740, 8483257759279461889, 8789745728267363600, 1770019616739251654, 15992936863339206154, 10037038012062884956, 15197544864945402661, 9615747158586711429, + 9583737883674400333, 12279877754802111101, 8296198976379850969, 17778859909846088251, 3401986641240187301, 1525831644595056632, 1849003687033449918, 8702493044913179195, + 18423170064697770279, 12693387071620743675, 7398701556189346968, 2779682216903406718, 12703629940499916779, 6358598532389273114, 8683512038509439374, 15415938252666293255, + 8408419572923862476, 5066733120953500019, 926242532005776114, 6301489109130024811, 3285079390283344806, 1685054835664548935, 7740622190510199342, 9561507292862134371, + 13698695174800826869, 10442832251048252285, 10672604962207744524, 14485711676978308040, 16947216143812808464, 8342189264337602603, 3837253281927274344, 8331789856935110934, + 4627808394696681034, 6174000022702321214, 15351247319787348909, 1371147458593240691, 10651965436787680331, 2998319090323362997, 17592419471314886417, 11874181791118522207, + 524165018444839759, 3157588572894920951, 17599692088379947784, 1421537803477597699, 2902517390503550285, 7440776657136679901, 17263207614729765269, 16928425260420958311, + 2878166099891431311, 5056053391262430293, 10345032411278802027, 13214556496570163981, 17698482058276194679, 2441850938900527637, 1314061001345252336, 6263402014353842038, + 8487436533858443496, 12386798851261442113, 3224748875345095424, 16166568617729909099, 2213369110503306004, 6246347469485852131, 3129440554298978074, 605269941184323483, + 3177531230451277512, 11022989490494865721, 8321856985295555401, 14727273563873821327, 876865438755954294, 14139765236890058248, 6880705719513638354, 8678887646434118325, + 16896703203004244996, 11377226897030111200, 2302364246994590389, 4499255394192625779, 1906858144627445384, 2670515414718439880, 868537809054295101, 7535366755622172814, + 339769604981749608, 12384581172556225075, 2596838235904096350, 5684069910326796630, 913125548148611907, 1661497269948077623, 2892028918424825190, 9220412792897768138, + 14754959387565938441, 1023838193204581133, 13599978343236540433, 8323909593307920217, 3852032956982813055, 7526785533690696419, 8993798556223495105, 18140648187477079959, + 11692087196810962506, 1328079167955601379, 1664008958165329504, 18063501818261063470, 2861243404839114859, 13702578580056324034, 16781565866279299035, 1524194541633674171, + 8267721299596412251, 273633183929630283, 17164190306640434032, 16332882679719778825, 4663567915067622493, 15521151801790569253, 7273215397645141911, 2324445691280731636, + 9262509587234749312, 6377906816499461739, 15415592259881792841, 6113140067088447267, 17505803833889144731, 3922849788840338823, 6180172597177093790, 7272741317181956640, + 10883262735810583709, 17399134577381480315, 12750035195491397119, 13953097270363313998, 2485196748577282439, 393430759246706702, 3408702096065201083, 16993508724217005008, + 6863012803048745338, 5403358912802668606, 12029756355518540153, 7447472859343023066, 7395681837001497117, 5416243410959797052, 2749045697176435642, 7813683613416962381, + 18350218749545913064, 7509253999860752750, 13295789896366834092, 18326374111226872828, 3258048562491844971, 6342797029666289605, 9612474478300039361, 17807494771134060923, + 9986610307523639857, 6419466267862117340, 324428332031706042, 16344106992423140174, 9434790811884847197, 11733262517030341550, 10475871084940451430, 6836751077061091536, + 6209985515928007176, 8422208926220809341, 7866188125456775984, 17071696982741312252, 17482363193291892153, 1949968206030126177, 8823961512355572132, 13873010936602132387, + 1136614876084383862, 14528605954969250458, 7641208364604111421, 18286716654189762724, 10711679476846124804, 7209187038097309348, 11378652158954714549, 8736575389915992180, + 17651988839102959720, 4019365809979531118, 15024676458991206505, 12622721674297982786, 243375780477240745, 16572583194372185930, 5189991834753707889, 9498206494038386287, + 7870185149723322393, 7858065912377481781, 7050328012468471455, 17977001023138181397, 11912539230960339911, 1290924823368819542, 8532615064753763585, 6439520053123840906, + 11863354073576605599, 8052916203396229040, 16399182063316628399, 10837704607074036270, 5444229620349428264, 10866168931038804304, 1298875461760659950, 8904898233335519592, + 7053953350607273992, 16629865408105641272, 1929728580699289339, 11046110151029488400, 9569890690274721816, 6501378768832742664, 1512621765961532661, 17084264658609527495, + 300662505135603380, 15234062183651308307, 5319536121171934679, 11496182741145732970, 10520610739189276272, 4310163027157056344, 3285324287508696251, 3261468597637865009, + 5427469457023618335, 8296698312021944466, 10882999905197052907, 16825284839961451415, 2106903142647479713, 12008841566652934760, 5361688498494289694, 12228623597691228792, + 6799789383171367218, 17419525584795621504, 10685476378747845090, 16314000862057872712, 11014509959317588121, 10396318154903722147, 9986284919279346079, 4005974039875805723, + 12980531573372269379, 11790791377897350561, 14264262096731994829, 14999393269362129980, 15405842559289002684, 13147503876582120266, 15449556979080143937, 5047767509783027570, + 16354606995222167433, 15941889353688553446, 10301254747221876749, 10704428461746587572, 15702361407127630489, 3283718562982354984, 18081088275079496700, 17549094608770385733, + 15620168851912893400, 13762314693487747007, 13577625382054330775, 4116976667540664996, 712200332640207605, 9098568002202933512, 8905476951567380032, 7075673514150314660, + }, + { + 15811459837282651008, 4038049993244767161, 9054218236388056577, 10807376403937048775, 11113869110590438959, 10336442539492421506, 9228056645601479672, 2983388754829658480, + 7189376137127712298, 9147574701720272724, 3983921661449444789, 12479790317447783959, 13221020976679959405, 13001868979553711359, 17918176319017234744, 18030393246529651080, + 13903368497828271814, 8050546449078305498, 6932373216915935575, 16010012759059004304, 15451708272653450375, 211711129247219149, 10435723642185827585, 17790507800712341232, + 5218985848865375996, 5835476376320976237, 9096499658845542933, 8952489298840360492, 16605895184424459659, 505731104315931813, 17784248872812610986, 4151555707609374291, + 2650786429127545939, 15186825834659456704, 6983758297535957561, 1268007525252235269, 11433489869282443481, 12352824209154665189, 14555720778982121964, 3578659908492218328, + 4038120750249579469, 8304001815287976437, 251700810512667658, 17170873737273596918, 12809642145843734737, 5765073376623244019, 7923401246629348462, 5401634486288115286, + 12830329023522940651, 1302336131164509751, 7011936976469244509, 8446680306499168806, 1386043952588989461, 9057741950701319559, 6719607413978812889, 17371065020414280890, + 4963953120977916566, 6585250545846301109, 4490972754776862709, 10674499592300479120, 17494988318349821345, 1814042777264686189, 8938395423478059768, 9062966010283426056, + 4683683785459765575, 8782133909400988812, 7201726172249963453, 16855412996123591560, 12016536526053703526, 6004686128951599125, 4323130180079993082, 7578596978943754924, + 13272384473390546409, 15052793429857194942, 3597944793474963009, 5659515835454510338, 18013102402609680655, 15096936721974473248, 9096406212198787674, 948405832772983882, + 7533214627823316056, 8009521942734148665, 15370181949218639901, 17395801240657257383, 13217990616373936619, 16323510521919428293, 10737557422966235859, 11113837024463125316, + 3879469818714011415, 11493820457829716643, 10091807027724827301, 5032134849078736052, 18015749204009276046, 9667758283246223357, 4629693773460610802, 3807196436109718946, + 3214144211600931504, 9454007581819416365, 15275469051811991033, 18196204074031746058, 14853793468372596948, 7707818604949749658, 3266782539036976530, 12810995382868348472, + 17461133192060813135, 16870745208936545685, 5903437034120721004, 9683544852742219696, 7903581348090948266, 13461951083275541722, 2454715926022970060, 11781268272016425522, + 3026443193966185195, 8027125782547146632, 17998666917527163386, 17214321347454260227, 10230244243647560419, 8728893474426972780, 16205038544687771303, 13771356055080873468, + 5724217930909760221, 15420221591912350415, 9910465013739250802, 12511683773790779491, 2841532862707547306, 16614426471009646589, 407574220261191228, 17422057170120224031, + 17151505459068312604, 2041124098660595410, 3221606782120899146, 1382742171848704850, 17222534050501711510, 3195635935311449393, 17295393886051199723, 6296241746423654238, + 16510537668852450348, 15650518534057329304, 475704671950317376, 782416820485820540, 10970369699990900408, 16601298147057372792, 1599283701215392453, 8464446359559369186, + 602848212987973622, 9275953159572178443, 15133444738457385545, 7264639569553811785, 14831517002293569763, 4277078717758211028, 12880437544322007232, 16339691585101354857, + 3725101809714764432, 2995111022132376824, 9338883729618021504, 8824923738291347476, 3745497072215538317, 3576446898425965872, 2350077748663612773, 15793255155885543355, + 8020669822852194733, 5865488997462325879, 13508617967776537887, 12934930135071898613, 11708185641249600577, 14265801329217514207, 10451671488560352734, 13251349632343744236, + 10155392082337432799, 16725751190167983672, 17791070776350679280, 6653785531179322348, 10087876256887835780, 8422365820961469204, 13614411778251983480, 11687935452237305960, + 6094298050768263419, 13949448434836272414, 14194554169090099223, 2625524360834828424, 2823844915913169919, 14077211807091283985, 15592587505996634401, 6562949920171518305, + 12904339001067417585, 1972265247859388742, 8908590936681392405, 10320719400504224158, 8298368064038407143, 2105745729886511647, 16509751074757847095, 15156289751671616565, + 13922601558154971179, 10541544592627074316, 9996345160543671471, 2956094548427827774, 3384122842786043431, 7003441038438960180, 11945730502649377204, 14797331669618433927, + 533287349958250211, 13929395807878461602, 10036169742264959424, 18402398814900480008, 8793706913568807269, 4438954128288889267, 13996827050589647592, 6637236830429213437, + 4308464751705576538, 12193581114161194913, 7322672375339219540, 329737085274484939, 15312435499764491079, 1530264129038944128, 1545025965426980152, 14559433923601957161, + 885617946245226760, 17871561572095909264, 16413308527330544768, 2938750343525834336, 6332736376778563648, 16772687696658236231, 9760470695949399178, 17420556823805232882, + 15040671962589188567, 5841393164221253498, 8923415548773111750, 17449640982747951473, 541686588805328549, 9579487672246868564, 12793813917570150236, 5744918442591934533, + 12406806484060734474, 4655292039647906135, 5745141229449178147, 12923621608907306732, 1329768221522217568, 16293616079741201891, 16309603918079351385, 8282635243638078478, + 10842935462043546322, 9397166878596340773, 5807145729199489911, 17083242467628861919, 11473470924584853882, 15220945465200609206, 10470712526179658906, 8129601297261864071, + 4407657041250297528, 13832960959347315977, 15056546959967740939, 4350443362134191429, 8013419913016572733, 8888684099801298477, 15401479253620745715, 18279947201669400847, + }, + { + 7454214833719424418, 2128900810399984335, 8254953962723841408, 5341529923812819418, 11486532916552139238, 4528331821405917357, 5048485034448492541, 4189495710753944825, + 9223539587116638677, 879142711399260546, 2878829560233905572, 13081522393987952773, 3067261963348061547, 16008150780594711643, 5466468631453287640, 16659147898971349582, + 8047766502132608624, 8715815570878148809, 9093649079884580138, 3437267783531715968, 17562225708466062266, 3512667182749067601, 9223059995161026858, 1366690634827047376, + 17929132376737482163, 5565926714491294456, 5252303555134107501, 8169975563698132305, 1829326230943509100, 13780918661853274468, 17736665596871232627, 4246797342334307384, + 5113556452592134569, 7304867793218750141, 6016631926489320290, 16471860203547627727, 3444471410714850041, 16571738096215232488, 2003912224952639024, 4203884000388032492, + 16858425223672505353, 3192567884201479579, 1688923188642613263, 4159042130811153987, 14900413503869744297, 7521485042788722327, 14038093805562042641, 14713051866364662650, + 3738774192924465076, 14037618828827556145, 12882915396530927286, 10882862194263941815, 13115222579444494605, 18244214260845794346, 17217867059738274194, 2458870331386500577, + 8768788172344064509, 2761897497254272960, 1400167175024837359, 2591319596670212133, 1499652044223484974, 6820326453941021707, 9701009499879906773, 6897435972338565418, + 8314355711464256163, 8435944020779763783, 1814803522962187872, 1875253356755380905, 8214588085484192137, 18062045700553127821, 6649947905482043494, 3119726140944397531, + 11881571666857493523, 6300786026612414187, 4928573492087752294, 18343550313462089203, 6770642120472453960, 11296815027803718740, 17312916793783562794, 13028515123652649961, + 583508939637547757, 3195115082527873085, 8497784022800549923, 15135719419829981016, 1180291993378114150, 5447281494912347899, 4710517655176242215, 606503081693490000, + 17732293765863716052, 853971165248416821, 2022886284923413326, 7522564752651732633, 1417954193520965954, 5046659528429172066, 11426939225844711305, 17687206690616539318, + 6518552374736169438, 7940416740434530770, 15228615103587329007, 10662504584317997513, 18370800756791469891, 5564085264882124489, 51043856061444814, 11996026931884728651, + 7009195269496826002, 18330778751427846129, 7903886241001925539, 8198930484643879628, 5453546027419466587, 10378692433982210944, 2242412603379120569, 14762161396393277464, + 6146718865488327808, 8559779132928137805, 14001994895150170346, 9915701789711858841, 17119408219089522083, 4345363585985782988, 7559492126634131602, 17074565022279033371, + 1081257522368061155, 16144982029632101790, 2280210790959566458, 5000326950136078873, 10441086845859171982, 5717080014740953823, 16816253740467637151, 5185838557034755093, + 5835900675852976641, 14850646160621635240, 7784412869482958370, 2550282385659033114, 5102457400165153071, 16480738116286616710, 10726825595752219296, 3297564208881065856, + 1801069609044825811, 12940364451588241698, 11725236374887225564, 4731241880784054295, 8874695411069669852, 10719836334830130110, 12983549127094653526, 3498884940869443564, + 5044718241380586525, 9152928988998314956, 13274353997717834972, 5731942246653336838, 9453579625340936273, 6013700107129234092, 8535405089994224035, 12024982282827680252, + 8296339816955798913, 17352704937535250450, 11768204246645005024, 263391795317009517, 6622964609352808474, 14563414288208376628, 17887389278697258458, 1552357122641071119, + 13193447428787140357, 6701071088186218635, 17090465712567125397, 13844922841372559745, 4526664599449630663, 11321311100342816810, 2490532976442445147, 12506579957986846905, + 15880983134955361922, 12396434033732989105, 16113831675045409693, 2288598422068078002, 2992425583703267972, 5056435264545116829, 12961420259857080621, 3600697676136115398, + 12817090821406743193, 18109694174173693756, 17050997692981853333, 15796663317536421728, 15462568940214327094, 15151767385082666402, 17711108405932879054, 4030072071760424157, + 15725012875727498089, 7622633481063722008, 2241559275264867670, 4018985714167034871, 6507661741395512701, 13435865773135624096, 9564553262368888543, 2283482276790194513, + 2968718708369505078, 5001470555537559034, 985353756026354700, 3145435598301843880, 16410370922030326027, 13981261173663429189, 12565590835494971812, 222252377400865123, + 3096505383227168071, 9129898186148780825, 13266305466206099845, 16135309871416543685, 17993464782237414425, 8124413037611692960, 8497190057115012501, 16568064870411415344, + 7703394153241465824, 2740794419858297964, 4228931976096177316, 11207299659959318490, 3319492324515199082, 15611937984154016609, 3431896575019701337, 10686234123352138088, + 9979039962499030832, 11759414020433216431, 15164411293105859178, 2834464820255498467, 10716371968480406389, 2722055037405581557, 9240657586762413776, 267811388229104916, + 8385388272348876655, 16035999427286017211, 11255427708348651444, 18402964994101916340, 17964277099260928049, 16377129731672778885, 12034488763192562427, 15854195461740399790, + 14134352329607480415, 2406779189916280288, 1148342927890148438, 15859735140823564382, 12055096645840568482, 16044304241162505738, 7466809979198503617, 4519925955244504432, + 576546655711744206, 7729660311499709518, 16864252562359023573, 10537888397995064025, 11828992107618549830, 875843272290362538, 5328675038336932433, 3162100370899364658, + 3897734650128449985, 3892478283047854402, 153850176068596076, 4448963435449755938, 9911011406822436713, 6379744721342779297, 360324666931028426, 8002086405015565067, + }, + { + 18068291112584812890, 10288580446655316879, 16866690514199445805, 15458664010538199871, 17175925202246173890, 12208778610361755998, 927739404697616036, 10330183209424493139, + 17095781462759061506, 13984950766342782369, 7038608479806172868, 7073471193601880894, 9212126909381119712, 15865851343492198107, 3874039137971369196, 15366505242302572319, + 3235058267792568155, 8030551941244615429, 12851363585510555514, 9786973022560471711, 4901414898060251968, 13240438628487941170, 4707852389766057435, 6503805666511566831, + 12081034000518110752, 2773740877021737013, 6429475399936543899, 6804412599792308979, 16082860502313571182, 10013838454082912732, 7176778953927883654, 8065751639278576331, + 2577328127707636877, 17299987330349518428, 15104751346159263209, 15422444943335124822, 7934601464470854115, 2803659700354182364, 186161376356596699, 10049927908892587253, + 14226302346438044875, 11259466681405724608, 3945860952925292329, 3287876564263331089, 9461394118536996000, 12404803552046013762, 16569223065554335449, 15881973563254196893, + 6665552777203645232, 5302850683863152336, 7778922028047389493, 14212904863357452615, 15966527672828304861, 3802704091320278344, 10517530714917238300, 3742971938824724276, + 17741525051518363144, 1374267330336191942, 4677891169679898540, 9639823611685431337, 5482431056894095950, 6629146695374718376, 3884574854221819712, 13266790928096813258, + 16028684877412556061, 13405436055762225966, 12268280467958284214, 17956874998292211440, 8610745336676661311, 2945210828327041653, 7889492068658434371, 8032335517518163423, + 17096678157067198548, 11751872841896846778, 14951591239724148110, 14946797103632354342, 15001812923533485850, 1354666869219009884, 15934246013528228877, 17674533676709494379, + 5120889077991995845, 1349360562851394781, 7965752316528770369, 16276575673105864366, 16613052983770261287, 11454051240568463780, 16146905806612132291, 15806571503343623359, + 17505459029376928465, 14087658194607915364, 12240552390931686885, 1809009366037859941, 9468395484396723291, 9446052656478984520, 3720796795453397330, 18035355127900871187, + 10937782812576841853, 2203021845094443505, 9622750764486021477, 5518948515641487288, 8021555402800950130, 8884074473434139094, 3925095588129480413, 6199533424090268508, + 3824657730049729815, 4646911366274335188, 5283751816651713473, 16396970607630079440, 9102362834410101425, 3641728106404110497, 5923630184251416230, 13179374259571264822, + 656781601862662976, 13284523818709348938, 14255949685810234815, 17662122984572331094, 14005272187244763785, 2769973460677437621, 9745740172208222044, 4399769039990314590, + 18221138344920380175, 16761112742545021198, 14421639991115588619, 9419167563943683547, 6237248361283446238, 889754338720059891, 2289880386545166424, 4280148734121099084, + 8456687949810928719, 6224977199672540221, 9826855484829367341, 10345583292656523462, 715642447678891171, 10791247680314230673, 4390153264475025171, 6710036254773432312, + 11434352627810350015, 2720291857616706675, 15035363195148805272, 2358906741960696657, 9439888337759970778, 9654563570832962181, 6013570511081660799, 12742404245236356978, + 16538557679183724426, 6137267799072203525, 13879005145532324670, 14569289845201353615, 13784903266526414562, 1051135786146476033, 9343275973150690259, 18338317230916899584, + 5259165122317589354, 6306061166879114159, 2622163270351284906, 15212813592118086979, 11497359168652342849, 7085380391293263482, 11799059272489792659, 4912160901181298022, + 10956239956289671191, 12649697329483184719, 17174497942073298839, 16454040570484021598, 1964314354536023134, 15533681501854792750, 11384243972598433754, 14387731385073008825, + 1627256545355657442, 9010013556811110994, 14616120598404631231, 8391972858400276155, 244817907132802237, 1484397967210729699, 2663560284299448875, 4820353472962713985, + 6567121006858416952, 17119959301239619281, 3965303761563489817, 10211688306206007722, 724618943950031080, 16749575186941231354, 9181397347361086360, 470616631211702666, + 10512473611770099290, 11735037909076331019, 2922606398008539984, 2599799834378751770, 10226702495047936424, 6109026252412854338, 15572556822827169237, 11993701786788749663, + 12951512375864063487, 9270845327881823041, 13198171264747338719, 9240466821667660686, 14918848310897324032, 10111918274408213257, 9869937817792431868, 11316967879062014936, + 5691364106926740802, 2782311978438620206, 13067832493996441150, 12682941211395745221, 10515041347274706648, 9910726429809603026, 10540072260993990483, 15078586909891471233, + 10892036011159420575, 9833543566604313963, 15176473361412960115, 14504845496732895720, 15845034940445278100, 5364672156984348678, 308167090528488415, 12390239901799634313, + 5106897453764491321, 10540479232768400094, 3938246597140945859, 13156157265138481529, 4932443191001849637, 11374218582626530502, 2907557293167002437, 6807344711257391317, + 5413712686694380670, 13147753106081951274, 2960292187746417883, 13977097845105551070, 17642067093469099040, 7699556298723812469, 11597002672863564976, 12392265705126521509, + 4607947077536257852, 17763906282816835996, 11288463953418287893, 18299911648497821660, 3194698850612319839, 13371221457376435780, 10047718724723936081, 4488868185988157408, + 13742985513958274859, 14201726926009534105, 2183997556320958457, 2564950343266657518, 8846625471673389624, 10147673280504740463, 186524308060896613, 16409126738088317791, + 3993197157612782947, 14810882170056329119, 3476738916713041107, 1512933700383846542, 17820889751141311776, 9132720567660500233, 17598924894608972696, 9704537908665702455, + }, + { + 15695265447782578013, 15506558718447241194, 12341861439298989232, 18400958156300100845, 13704811286967591150, 5802260047239067846, 3266013253411255445, 9892957742319181954, + 6053458788987550519, 3243549259991905443, 17990025476026266205, 3918149716414968130, 562225565996136148, 12175287017902713154, 16317970834262634990, 5431198806775607012, + 15123186170178367264, 3054913246068400920, 8130663466272395280, 2852812622149318730, 6747630551931917110, 18302049774259156971, 3663865061939346221, 12340091610704678811, + 17453312277789785069, 1148609799196055657, 5931758962926317381, 5297597381576544508, 9136842287749269744, 3007653173299649609, 12364677607049679091, 3656932227466449489, + 17732801126130995365, 16093023291975796560, 17028512234142609413, 6293028118993952199, 4019599708379609715, 3557632583507755601, 16737140015288442165, 7041678499288317736, + 5187842645752673199, 14914432544387315508, 4944360914161982641, 4177329533692612281, 18423887817846407126, 11935898134550505219, 9191754399457615042, 962223885189306707, + 5467152559317256227, 13407023128339380091, 656806207084558176, 10823228592509573890, 282802378108427873, 13003319652664166176, 11501612671901244594, 8595770155487539951, + 1805255869285898834, 11676967700876044626, 3486630203654875772, 8366336693945605431, 17874084888474647879, 8803709150655606891, 680993178667234696, 1603785777136148315, + 3637712241808318043, 1673622542455477465, 14468899013751477571, 6607241816357171779, 2332200248940843815, 14077155355848738174, 17298155509321457563, 13624623885136011130, + 12550073596584561722, 13898852145577133829, 31360866049197404, 12629522544897401454, 4563446134761510332, 7272813487383198495, 16767195814334178362, 13600498026689482359, + 5170414136067325373, 10875290278133558518, 7331268231382198947, 4082234313337835514, 11725295114019619895, 6488017511603508543, 11399466652931338200, 2898113599278666684, + 7553418756944942759, 2324485560009697358, 16004009784734101843, 8345371056076551939, 7377284629098981027, 11049718262343754276, 8021605820736410979, 8175349759714152494, + 805625092558946929, 11996029844853502332, 1725029464967101849, 8189609560660709539, 10396089572662865065, 1756007811373736605, 7645529329516777410, 2608132938055260111, + 17014595621244604813, 1574499958190108567, 8684546755761322906, 9997538531915911005, 5652402150243592363, 6315865356369667235, 8687115108443972047, 12369176255146729475, + 5117004045853299762, 2238407612548002914, 12392667457861900490, 10614312910576266709, 6446577503758597410, 3767883614425995197, 17954966454260719539, 5233322901463099, + 9194523390605857831, 2335161171394626054, 12290490336543843159, 12464172604156286188, 15842196578033029974, 9081285134201585379, 6123406461662038242, 5504260508195082396, + 5345718195770885739, 10397124463462860788, 13439280718993427040, 15556816356929214653, 17286785976489019906, 13891416634083443852, 16346644312573564447, 917629419222512011, + 7892626957952656865, 11229871638807701267, 5249460237065799805, 9529842808650392581, 11803106868201977918, 4600784567377577444, 7178870186583927975, 13829108773971861655, + 1289703927368007546, 16175369390684210381, 2195306032582966573, 11284704298115036296, 4385727852306374408, 6915138269030006111, 13440775052829788601, 7742359656861485339, + 1418252737937945274, 10664598666472920539, 5146625453081549320, 7716408631061669307, 15481216546649435253, 9087869057854823101, 8438781502742166926, 16066665509570633845, + 13091912700111633391, 15443255616678375884, 11338466119311799740, 9641919445273031117, 12359044485240987114, 7665168466660756789, 13845183791692700257, 10700410793152785934, + 8528578803314347534, 18126789418189181716, 6819229450418907940, 10542951903574211724, 10335954505229790439, 773396809431100386, 763679854573261611, 7782804492252573047, + 14102889564247144327, 15479515297699126743, 6400257822506917934, 12400211936189844586, 8190162441518233318, 16489757979266243333, 11074854544980782334, 5650848136444824725, + 49299995316533601, 1658812626798186796, 14321322055030354, 1062750633396640947, 11458188738562323084, 10319462683954522885, 15398455357851813321, 9473384090828584828, + 17723857482947114516, 17563952877363564604, 3160294749187111891, 15485369139883303866, 7487527166185553604, 9661144290316674030, 16387431126208996705, 7499766964252214921, + 18331281993562501191, 16034836629143884193, 11149238385567891503, 1413880911197321245, 513884082842080653, 462183409527465946, 404977499728878854, 1459875951423441439, + 17249202826373649180, 11151860661862313351, 13606113651347187963, 13653298062695357606, 12346841931589648448, 10659825520992339734, 17857177794617595021, 12846976930859175746, + 7421386827645898229, 4567141110780940726, 9022716764262477732, 7287282267953917487, 12950322172330872807, 2675202024861592519, 18339036521791481203, 7003909904344391740, + 13539440097944399044, 7108271818419613260, 13229113021954558930, 14920952138366964142, 11082363944167072746, 16156446445495173622, 15515349862819133984, 12359827931233605745, + 17013104138257181849, 6982996767213561282, 16259302607976830987, 1579942890487532721, 8635464047344487404, 10435012080572068738, 2976844365866972753, 9471858478345924179, + 2347775782107812070, 311367032040459687, 10852128873404113998, 1338612296059509077, 1599132038614432432, 11533828746166635650, 14264087792956755961, 13004615331499537807, + 9599159247384506427, 13011559959971256245, 6331802547989957841, 2642320882309956905, 9140255367820630480, 6883541786494880896, 9560822438260750771, 9273862677291701694, + }, + { + 6185067974942684922, 6452325785873503452, 15247257292915812252, 3689324664200339196, 3264129576054154137, 14977721630214750038, 10720913377937894256, 15914487080763816378, + 5498621600689515064, 11498885804037952971, 12697073118321478539, 13999008478737234463, 4324885099420360642, 14021182496192299466, 10763088992583373843, 12898585157666243299, + 11622402383521841588, 18271726976204770291, 4805501248621435402, 13483122453682180759, 9288143526654965614, 4688427612851085031, 3280806057528512375, 2249349779106893174, + 2317249352525369754, 15030797481081533168, 12237590576308284360, 15660076992611440873, 11341015819700735887, 8280257602754365719, 9212501234696283343, 9673375202141178099, + 1576552382781604867, 14644873134348781157, 9860879950368557094, 12941927431704225849, 11959666739052502184, 16171152369226594315, 18364294470656574060, 15172859394935215584, + 7521577570681641021, 410124023045579310, 13806534589183327176, 2731801491329207875, 9959087943014269549, 16205635445474657221, 12069145915768935630, 14906463509471682144, + 13803661539609326912, 3233094883493439444, 13409742688319755923, 4298169788674760921, 12847636671560067434, 6702365884489797170, 5650404576647289813, 7380374625644226212, + 9762534010701623728, 15349113802811029024, 8305297169667044246, 18013018210154476198, 18414050009163335761, 2190126859433916996, 5444481274452218807, 13415988647365667724, + 12538763466834894269, 1385338113050483955, 17489001033590767373, 12734464941837886525, 5762578105750980174, 1466528690684075875, 4231060025049573530, 11218444849081704286, + 13520439252664960951, 3770430920651384098, 18274623217805904547, 11545267616883260257, 4084783424859113779, 13863968977657663867, 10237835480648574698, 7395611844854084274, + 12891185742908997266, 2889148203703402614, 663579373261619221, 7616112456816883701, 9002890885722034437, 10753730807791931375, 3553894671829778441, 4368240432504457389, + 11641532224340210206, 71261001011952801, 2321745415903209397, 6595984444855093239, 10341299657770272718, 11529878689049370930, 14381510695605158203, 6869272773579747365, + 18407769999912340511, 13801873415230385184, 8080153703167941484, 14143463895182697871, 5248006623352376562, 9271982788122934275, 11225161179021692620, 5076519449784098178, + 2388868518767189674, 381125256886459269, 11316805024982236485, 11671355336111917711, 3564177948445768908, 5245093046097324328, 16144139891410041039, 1201010948447708436, + 16591032462856670656, 533850935988668408, 2263232738784171066, 5572124915367282948, 12583782854491678471, 15015562207264407653, 1167486851113549964, 2987176661273439670, + 1073778278665127828, 301289653767944933, 11482324780950168317, 10626716202328004967, 7258024014655363102, 12597648090458567878, 2617914106162603499, 12324652644266476584, + 3803745429517922765, 1699256829990081513, 2179986933466617785, 4330084096014112280, 10403896221616386733, 11682369673591069651, 3879392495596713886, 17249085256521020624, + 13938687445393525296, 13822790166673492712, 13216892716897321740, 2165870280409061482, 9147830602451471293, 16339144108911449834, 17837507766320968023, 15340710005345158245, + 4485838034764604283, 8426970493386126530, 7709201418726935942, 13468796449943783583, 16441091014185628843, 2374269830239737237, 13423059086148485716, 17132218391020520330, + 13957732574177025801, 1100344278733481546, 12156946932617203965, 2007756425747006287, 16454508610548016441, 18058668363064769203, 15619001054652658323, 1201476243910931863, + 1292412381963708933, 10908631677673353277, 15138649576688192753, 10741579086621436813, 16534963566169304684, 10228646729525192055, 4256608246548223655, 16176814322718522963, + 3012125536603284881, 5466178753014623970, 16598422155702380175, 9902071731736699653, 13861556771123036741, 9534711573278556274, 9805898455743897538, 14683694395093520838, + 9125214754744730198, 143353181770418225, 8976042597473555858, 13405481215779362056, 2863125481657826330, 12333764090388731260, 15250359513836914002, 5086706595778559056, + 16341334571424533804, 4657494880554316004, 18092783076286521475, 19832895869027143, 8667365497283429486, 11501953273367138162, 5832487631687776985, 664754160781757174, + 13604774408440609080, 12660956565879820101, 4535452877362385626, 8063311919592303330, 4591511734775484245, 5164119991271568303, 5282209033000495913, 15181017218549722334, + 5070228782679699429, 277881850788922387, 17424210585510719837, 13586569401109299583, 16661809787463692974, 11543434536078774133, 2062889565117549061, 14261113517104795690, + 17683343658262519228, 13727986689138309987, 9352982220536939533, 7578301410191413286, 7970586266869249410, 14357824150096386968, 4381700479561442130, 13217807763329274501, + 13524719886605363170, 11553593166914861649, 12034020205580665813, 985175984881624722, 11040631342644137311, 12107496606245418129, 7328748654066365904, 14741972847454326011, + 9790251609380109406, 2449588380962781369, 4694893010228180943, 5172145489692928848, 5411277023198532852, 12752098113265127406, 4559500999103209672, 14921033543082064351, + 2248082728820243864, 8720404853152831367, 12431505213690884811, 14883031570121462817, 2278273022936639560, 14988529405400451352, 16732922988142595529, 1071232319627396164, + 8793441611217807824, 13198147137434605751, 8167936709747999601, 7685928882032231744, 8779182451658438210, 8059106577297479923, 18394590909567718507, 15508315617300242773, + 12828567388121500364, 2264959796189127526, 6222205589523394088, 8827682270015065013, 2619170858745203321, 14835030117099398518, 5960028394532808010, 2233397097850889639, + }, + { + 16319476863352440541, 10283930715856640343, 17675054544532098447, 11261132162985242084, 13712716897981761400, 2681907143459288706, 6930256922080133347, 1445069157579547822, + 16928574868467385886, 166417019993787654, 5882811520342817815, 14106304179344008065, 3747123724781081800, 197109533566874475, 14303280595714789450, 15457633026018307066, + 10773597511592584859, 12552868588431074640, 13500771767160426835, 8002499270056378440, 13792839099998553174, 12949371255843262119, 1713974340992291550, 16150173130483658061, + 14745984256428057001, 9333707338036985191, 15365925315303462, 11789129028059619744, 7873100217437235208, 5289763977161829145, 17731215200358323788, 8876377479309635703, + 265950821974454804, 5047467530470542278, 17523044368516619801, 10054436503372765176, 10321185867287373431, 1212061937729015591, 15311258419138633926, 11236518538207084768, + 10904693956407098222, 9013418755007070130, 15510005599846320670, 995378633446250700, 12234095025518917836, 6689106237771514188, 2809193993744369126, 548124799387888260, + 267350765778774337, 2853877351938464055, 3677119082593358544, 7685335121077514739, 6491980094762370981, 1384870316654548002, 10568872345228996498, 241629538659623878, + 5839400175629113441, 5238299193248381245, 16787876416022887315, 6051613843621920008, 9219569873668974552, 5916753090530931032, 13390630214643423749, 3265335490455191268, + 1507475744849985303, 2597075068965622770, 14968669113917266926, 597606442423659891, 44293923912414886, 3832651144752540816, 17438860065613195810, 782112340603343331, + 9050896199196733903, 6427608033444590004, 13787696679536621857, 9682087046920409188, 4519093754155995429, 13564098392055667371, 10512507082236058799, 5289934424008191746, + 3477191607403300916, 18283244229745029067, 8462159792484018099, 3056576498976014760, 7259283167233712785, 12530251965039903998, 10232104933720625111, 14190745998092156987, + 15759362035410895911, 16075598437961371531, 4651513528876405575, 7694151626503869614, 14468862724637972284, 1838601521755586245, 4168957446435305706, 8694905613027663664, + 14825552838983151434, 8639609968952840931, 15547914584352392016, 2313507499500708287, 6902543375698033684, 9987468886016348918, 9068175779860656258, 6899641689193116297, + 7449110402827616954, 5689206471789540768, 12722069021950813669, 16017131401246233663, 2240977975275954837, 4794705713606614946, 5734937900461809607, 118285984764445639, + 13917685647531721740, 2432096911145792817, 17733637484624159521, 3848601825403209903, 8192433211051054683, 17388066421914150767, 14336306308847565282, 7501625553608785022, + 9844707592410952215, 8167497709587672711, 18115463401502884199, 8471563375691441535, 17525405040226359165, 2460181803833371342, 3375780580907171205, 3024376384135966552, + 15446204978718816914, 10131769697256428720, 10937168660849553757, 12174257400309926241, 4774490771219074715, 13764876998736771340, 14299051476546774038, 11409551629093945893, + 5471366060803425265, 5927845172343647238, 13321967429805195545, 1981489848211999240, 3657997933879283852, 1341488083977381854, 11001000434984172675, 12666440712128450303, + 2127920575549495889, 8861781778185702064, 10501256952986753134, 8967441017586595593, 12224364761159418844, 11856305587178745520, 8176487286367052930, 4988165299911239066, + 12279712962388998309, 7552494563001503282, 7388800608466198128, 15704167671257743517, 17781278773093632973, 6688613358271814698, 12261263098366698895, 2585757657252460656, + 17064077609211627595, 14437064866605257652, 14386408901468621215, 14935685266532365200, 16350265985491822570, 5195856143474093260, 4623415440658404410, 2936315929072503688, + 18399447684263070219, 11294713679768342649, 1600242588657342501, 2841752419388937826, 15111313732750279467, 8179471361906256467, 10611923788752807866, 10823997080784081293, + 15607034385960553579, 6934034624930683639, 13318618594774430459, 7552865293216027192, 13692640408995992185, 11143313220936824014, 16721999332340829387, 6993089466645938014, + 1749509641415861857, 8597876492914031095, 17942224190138398306, 1760222146515426983, 1030117883582412628, 6655464189981098460, 14222900139576809037, 13041171493967200019, + 12415333505074758067, 16988669193263876033, 10482573641416551070, 368049292090348794, 16109255925523895167, 4949934055388640387, 12038479532506656790, 10829864449693781036, + 8355401100656399994, 16963696462538033761, 10131652431024537917, 4166156454906166144, 12888240412190236596, 8396312458335953166, 445297814986468978, 10512295830234020744, + 14405236772348157690, 6950094157338825845, 13379457125020057613, 11396720859761560260, 14063367153572826959, 4476298515903789197, 12708437873915498860, 8163633089770520638, + 6432708226771955694, 7524978005081948959, 6442046223716866789, 4082077252385606982, 13626979819841658005, 4264573835620574342, 14788317690584235128, 4710414518172449402, + 9592766374657389840, 3206081438027351157, 16013702689312533501, 15788528722601567812, 18314198451066123850, 13236688692074994659, 3229858297074852748, 2968037605940641303, + 148470760067500612, 10319937899190593880, 7185467485673911733, 13041030391924988650, 17119842693824847662, 10989053172849710331, 16048294708726813099, 6612710923681979611, + 17408606709137505005, 4263600430695220256, 8941719936291239640, 6115030817534063286, 838024703679813150, 9772071103323550178, 14394690179107421621, 8621372196164819917, + 18138724984190926809, 4154767406604950048, 5209478936276366634, 15469512148904762872, 3613429290324777670, 12631812917445459751, 10280179062385049041, 19656860716268566, + }, + { + 12030193727051241373, 13981191132445467472, 18205169646206905593, 12162181194046583886, 10929925499200074058, 15291772758590352491, 8452344520852919320, 8199821440451878776, + 9876482463501409337, 17331045017548147052, 17542726936580732548, 8299195336372136083, 12891767721269002608, 14062358772374813347, 4165127680073507756, 16350672737379495431, + 7595271959296927018, 15286771623821390629, 980612661413312852, 5738158184597378995, 6972117550390447038, 6246497134979772260, 2935831443692029059, 12837371529160496660, + 10228366644420455452, 12084265406352277347, 10265869676493821892, 3560143954870272139, 14368775225608592689, 13173522067191372913, 3592475039513249949, 11243554718624931135, + 15959892157814064696, 1882528744932347842, 3354709518288681174, 14147909176429982637, 12969888425264095514, 12509610747258602648, 10437226619684632037, 7605683691113295811, + 5697384515599291245, 14896081221061315030, 4329502046122948975, 16301954510440819492, 5191621061031169335, 12944691824816066027, 8080452646904372453, 793075614214301908, + 2662373477940179766, 8950125653339535995, 15717799081080898304, 7088593450282143511, 11723351531494404904, 38673945663198753, 5289769381065195334, 1742280067441486108, + 6543255501112359319, 6063422531520498671, 17228726254778244409, 7570754775709645369, 18222396492501523182, 14903937974816691813, 1216646642383641441, 7602781158453005929, + 3007861146695764376, 8267227185027440857, 3177625538211272080, 3144345182124760000, 9215753228267875296, 1342253891473772992, 8185239004636232247, 14951188427081280984, + 10569785547248986188, 3109435167722572449, 13661123218513357442, 4646373703143839998, 9712828426076586682, 11389514152975496268, 9796826623781200498, 234067627740956181, + 2188380882990542468, 9914251329861595119, 13616219875099353594, 16135919125023820218, 10732193797566088293, 7172996278550642736, 8531638916719074975, 14749121691457005349, + 14568782576170814716, 17582848432122737506, 1079387064102681806, 17278567874821675408, 3094302191119669899, 3730448048895347901, 3508894590334961422, 14091660942919228106, + 8414395549498864629, 370884714015114625, 9859130251236103342, 17120282094084970181, 15213286581037020029, 2737545283554254673, 12422648607044275574, 10601123825482591606, + 7332173975514924017, 6689389428877729175, 15162654177270425320, 17333672210015736765, 10717615330136876266, 15245080807947386155, 9830331968044142933, 16156512094344185621, + 9151817936103586393, 3495709317444898385, 10881665308947458223, 7087755261386867019, 18372828768905212167, 12873774245490293443, 12750361906770763255, 17688389897350887716, + 13329033749048978810, 4765695903433858377, 4452680451529163940, 5638854187414651620, 7897503022043776206, 17187635538005569019, 17766753195497526986, 13201529234556380285, + 3638940913491383692, 16533217388164981609, 4668956005988168087, 14053696531704823982, 12029006663244142834, 12043309283447175601, 7840964186868745782, 5691369519440953069, + 8500871974974230561, 6371833132372813539, 4002756625312698938, 7281624589760047276, 8813214993155334737, 7738842112797657395, 12616064385239668311, 11875709199633531753, + 13080663957403320059, 7305658810888170771, 12002632153917673214, 14713338572031413494, 11629178662016777533, 11669230410217642326, 6563025945717173284, 15889630204666511658, + 8934119620399766972, 4226481091885914732, 16000709195004445297, 14687868180928846933, 16269913392757896907, 18294005838441325342, 17625669613551023126, 10864307042201644469, + 1539179629859629979, 3357767843775461146, 2101360073251832751, 17234081431682678130, 10847284171622250162, 2314478515498414576, 17530153382031867644, 16210890762511619844, + 16932765725476234252, 17974885539458965712, 40892050255466590, 13934027942808790590, 6621543269907021363, 11796852321134174354, 16673775773384613137, 3370296079966463186, + 16962514259432373133, 4639769064050336100, 16965148588326727680, 2922022742538957808, 16752379982369550296, 12896050752492664079, 2708685629936199207, 17894058559833424187, + 14722530431277419104, 12333728409662476827, 4162946257347548625, 7052045338975163666, 16435875989376103923, 527141841571514699, 8480360258792897962, 4322786951393806781, + 17366230017999184054, 3121469063237497918, 16903979375392804254, 8204447732627807538, 10469007462907301028, 1938243801077680483, 10156223751968769092, 14290141776757271674, + 2576841693080637380, 17952976066680364360, 4927209893946473523, 12406446420948698970, 18104668447274679200, 11321546081935789882, 11763004760954725540, 7488613143173641394, + 7566233859107262691, 14065899052643539237, 17156645465457086529, 11936850757897686493, 17529993825395532749, 17145245039398670236, 17597009579379584432, 3472628483897995694, + 14380803034817149259, 11042134597430846555, 9992762805140200547, 9895143575678511242, 3228768306004664915, 16153114680219541243, 12036181641761325912, 491369524302296324, + 6378343037075212083, 4634622317247307316, 3415247351446435530, 5939760313513758783, 9540802244218497398, 2134082884335789009, 15623633842188443713, 6824600547587227473, + 2021901489047050575, 4365578779519871721, 8603845379677060773, 8929020214929242215, 13010370953605249273, 8071113541575997356, 10681914788594101574, 13369522272664029829, + 4241327321249984516, 12810477007632864750, 16880581634097860698, 3630333431846970294, 3439918565119627078, 6613942714960548236, 8647431674406387604, 13352599355545805355, + 12320933424684960076, 8906954611339502145, 16931708732547376267, 6446897311644431583, 13749563256713530203, 11522599770043814384, 14455556215959342791, 9022955938835504923, + }, + { + 14734199666349771791, 4159865781008764547, 4413963548436270853, 4489431649937855880, 2824856157476907519, 5111945927271461159, 9825486821405155220, 7657332565522279292, + 8798422177670065832, 1250388939918922012, 5145324733132743026, 3716299737175574168, 14387202892738514375, 10255681359895407489, 8563822744122397080, 18430417713920116013, + 7857399215288147901, 2593284350589299949, 10086687882410147351, 14407837835956327624, 4469878646287387405, 6845712208092605310, 3019047164798827279, 2760591274816994522, + 10930605176025081830, 5850835312463207607, 9603163404664606781, 15623771803942139684, 4096748595672894276, 13060617166821837740, 11222939712741281346, 18445178894593253710, + 7976335583419810431, 16568954524837045521, 5581116346602990350, 18369129318179031997, 1930478946186473631, 14255806344323769850, 5302512325798050097, 10008570767203424472, + 929118050943456183, 445163760697913504, 7208246135487165437, 13565354217377841149, 3835063466770003968, 8214155842021576753, 2078717035803259674, 12502657820500987763, + 17741004372977419914, 16320851348541958575, 11853008901690968430, 9954777341381304447, 14274892887030297417, 10721339848307167540, 8731332510371325649, 13676383433520290197, + 6251805914672095769, 1063060143452094078, 16647397170554915088, 15454955736319919321, 11509235104607349098, 8441647766544589978, 956121464218389431, 10752793912033728149, + 14879603883666664508, 11158422912437249402, 8667988090392051507, 12702057193823135575, 13234479167199208223, 13752955092077643608, 1117550756888145931, 1184967682872965330, + 7004802666964716824, 395433501626287206, 16674151908279399004, 2279551165611331131, 18032555740803644790, 3573953642384243989, 9892480504006025100, 2397759375236735831, + 15032723931727013905, 4176673226777913017, 7270920367565154969, 8867675920919464620, 8174119413251699506, 15030539680120439131, 12325981936900309518, 2932283844656871802, + 3227890727136855788, 10772277744280338071, 10065219931092397730, 17233706446636791341, 1372997838851103811, 14430098741859995846, 3290287064497023581, 13054133915521430696, + 3404625123065318735, 11884832674715736469, 16513753127836546671, 2355202492840722899, 2182039060332057784, 17942518439766613327, 16731345075928062112, 5501869092504492134, + 15704211025163561149, 14596679078416770266, 10262177146233936833, 12443005669252298316, 1541294166390346144, 2952804996209068760, 15358481016273272880, 1321729586231860999, + 6862865331233391228, 15106279167311639122, 3283008666555627541, 13405610967904725343, 9845239597399801983, 10764942720093673542, 6506992344666983370, 2483131164245259278, + 9508384588323002196, 4786531415304518048, 2690306308552249535, 12208815515689370850, 5145548243685871201, 1536915039528215765, 709289946127949333, 7152734178531880864, + 1555309551759198315, 13156609441579560529, 3264147002083956017, 13629162110016732793, 10295903344220946201, 13835148808782379287, 15646840234722388037, 5850505683234532491, + 13348528763144385986, 8174978337870000956, 261860806345683810, 4561309263594732686, 973813267913726588, 5812176512716041479, 14361848397565071235, 9009461390894364897, + 4179825720483458009, 3146850391122074949, 16612045757277946305, 5253713786583408340, 5426943015912173764, 17275032103121420023, 8467860410750483621, 18150546576917688519, + 12762630566060098764, 9762645120146998728, 3635705124656800121, 3295681695622364165, 1764654124266477879, 16610065552429288987, 12182970194869512424, 1144643512390613083, + 10286763540130161498, 3400614528650953395, 16801086752981339314, 5746334081386875705, 4489937467701177898, 9249554406360134002, 8258038249111147700, 1922443676251769427, + 5981893063992017591, 11780456280008557800, 6865399120670384939, 6192294124015342683, 12388377965276278882, 754882660516751393, 13476094069874573591, 1000347292494612314, + 12753947115797128456, 1864834899756446490, 12873792436322150068, 15420857687427067065, 6214698061215951246, 11174167031334850459, 16378107708672199269, 13073812080094584194, + 3531275996684891316, 11764951512033875164, 12239314899306115751, 12701224435273087197, 152051042384837402, 47099568832954431, 766394328496626388, 666084094458729910, + 145223485883456390, 2445564470200462527, 6482144922631612251, 2316071592874761856, 16548688346252111305, 342430477320199814, 17819268336019413198, 9112976726726029531, + 8272548370610798443, 13910434751574602849, 4252192981011400377, 9590519351704710983, 4172183866405471444, 5567646223402949380, 6059725713353637560, 7169982539312028092, + 8422701298619809502, 5749403137850675219, 16696307763582818287, 7683947097864552851, 8685161999810728314, 15931494353777534155, 21629333141204286, 14374464903502564030, + 2354260355218724624, 2822702976206523812, 15817484232297668127, 8389777239766813455, 7962301702220496539, 8489354871723216960, 6637394823950686331, 5853452396705295777, + 5949957329602421631, 18059540496274524007, 301304345046470155, 15775279105880040289, 15979581939040500554, 7142046514802237176, 9965619645220287167, 14369754349691428035, + 8777708254369249245, 17882304730732025188, 3127583543205765491, 17274466425160820249, 12870181470212293464, 5881181695848312122, 15338161852429951632, 7245877162563127935, + 2634466441339054819, 13057158409330115380, 7410310056430957860, 16362149248469660371, 1435827663585128427, 8005375008235415486, 10865470829924734253, 3177600727860863968, + 9627905753364576437, 12776456422166281355, 2851899644189834214, 16998174196920379994, 9001602524188761177, 8960294999163663712, 6580949973363012183, 7568306074815116797, + }, + { + 13345381996769253014, 6278048669175242345, 15990349685835870151, 16358261962292186792, 8505261070637963021, 9396725065098564446, 15331953351049782155, 16194796339208321205, + 7076257175157660597, 13221730879315432464, 4499548735964119085, 7559253754663705944, 10196808438730350168, 2391718471837563931, 914409564589897387, 6649744842278078706, + 5502356275170104522, 12750322860129485966, 9581277511584938955, 3872282844937972630, 13668854758739140874, 15020438121087800965, 4581631138384430657, 13296613956093963588, + 14380343880230315251, 6834909096379668102, 15975839936772635678, 7738230945027714388, 7971648870467745046, 8320128435052726628, 10820026056424631461, 7058118462476293754, + 7264036154081699231, 1304060579978677195, 18166928140052040308, 12593450560996175288, 16027666629366472835, 3677101504316839585, 1477956412235870359, 11566116495906585190, + 5990066097615398579, 2092999434824845091, 774626818169813708, 12815491981614523669, 6993666706115136402, 17707947501239782961, 4907798919001453899, 8644498548438963907, + 10893744790193607860, 5425031394266373727, 7829758685118664025, 11448863214220512751, 12538690882574195219, 5021176303769859871, 5820297470465439264, 5119144562842414323, + 11165438517321289842, 8661037418831619311, 4401480924846865713, 9355650368902126256, 14352422544185763829, 12268031594529744663, 17607497268577344783, 12698954426769962214, + 5383780168621648368, 3613813760948797485, 5757643543708300439, 11434310585555684688, 2345981080034027974, 17817292757133028083, 18327982597027759229, 3812144950722314021, + 8792386854586041066, 18039588893139878329, 5501406599000558473, 17117814302557684702, 7341401626090786647, 13233670006052290433, 2192568639640447778, 8946253989965365943, + 16557600339807647183, 3024845786116641335, 415867109882960964, 5024769423994345740, 6707880221250244075, 1889203596997453159, 2063004021323938442, 15149146696320496954, + 5091534986326281709, 8005812102849517353, 17366496686564185717, 3658878980438307796, 8455810244524637026, 7649393961871195909, 8629386422022530934, 507133797409068858, + 8757568869745664865, 18237318628906764926, 3340648953867756474, 561136277959994100, 7271097445297070958, 2011054670254573698, 5452809131990689118, 13469888024447314143, + 609640447772904263, 18299734743899574512, 9082773418653747144, 17312950936823165983, 8565594704932296493, 366163155689429929, 12596423288683198842, 6787120926470876217, + 18404298100226778075, 8425329164435445437, 7431919288426595458, 10680902470574406179, 7944931189168061880, 9822761000408762356, 794258562727155839, 14627123549563592012, + 10098955144981013765, 4095621933458866623, 313357785962326778, 14880249056209398407, 10605627454111004262, 7841561005378789988, 5015399193497762029, 5818383539901203899, + 486788682374900660, 17535646661984222270, 10786441143551223569, 9744277172896797168, 3600223448942144865, 8847805713765285452, 15255336563751266306, 14792236445425212896, + 1569380763811592357, 16850350231819425451, 14974316825768144065, 2974893813338477794, 16726324408135381195, 15000665607479486464, 886632233152598796, 3783793991897659851, + 17267037371866619240, 16498595171305365582, 6453906744948258368, 17267405673163971478, 1676214913570622682, 5051819157163581842, 4315279707026326708, 15525840981760267164, + 829322392873083372, 3459913121476606612, 4547714508342797986, 5801286056105229312, 11591136160675314269, 17965845487226778560, 7022309609363885527, 5990586084398664157, + 17415594387520595717, 1453256163777911406, 8242859112004661961, 17561205531134219898, 4881570838375824387, 8392772608437744175, 8797957464506597080, 3142154188155936579, + 6248409301753564092, 912147011448492506, 10413108318416902948, 3349445377400918439, 15887909530347096066, 5138859116738406557, 10990929831134698266, 5911273060564729281, + 5197581879950405305, 3001975514736631403, 1919678782193534342, 16811100109848336275, 13086769249238533805, 4351245282425706500, 18281601243717625371, 16799537683941038764, + 6288277991521986035, 12231480617236286340, 8633990423689180869, 13535190275290488690, 13419223868164389899, 17622732596713666209, 10071779220640400024, 9451660464882303005, + 14029940454474238624, 4957972603307996503, 6311878080193524221, 7485203809274562760, 14450074580092843779, 16845073066403496624, 15870387052570816676, 15700883809468229388, + 6623653867803819118, 1037643671564549885, 1236506073247459642, 2997396263933967582, 6182371056378366305, 9773995232878719062, 3356495419149379609, 10710833980055523526, + 9965766500412107699, 16895652751136174589, 7310937934566947318, 17649841370865305295, 5052693369483848536, 8093060855248113928, 15777336335257114359, 4642979535698186085, + 11761084765182152401, 9226969173242076459, 5445719591917489914, 9371133174853825566, 18381867235964583187, 17262888224001222346, 1934253649802391267, 3574888052111089863, + 7862401495691359736, 674970329153306547, 14447324101549742216, 15408908436196428001, 5339840774639473099, 15542473999923245378, 17353380999887736706, 17787369898686553525, + 2301964326759931592, 17775852221342579469, 345763078096690431, 5079606662887837532, 14863722102334084051, 8905941255852264870, 3806200576416892831, 6668327519801544161, + 3672519063263749482, 11898207454593385701, 2364050348970898536, 3761861796987022528, 8159458273489411574, 3231262679605967088, 17179214124164291006, 8770284079122589350, + 15270079546607712667, 8471883658965202468, 10333206241569264742, 7414741159725097798, 7709713706668701850, 4461310100162632915, 10099194304024685274, 4760203118683828795, + }, + { + 7237168574323016859, 17908810107504968137, 16225708127838178121, 11127095986977103648, 9346291308317410251, 7381283458277047010, 15168602815371846943, 9193719743691734294, + 9607254959726458411, 16837156817186931862, 16178143308706847393, 10970721795101931539, 10840376278605273092, 8298765077094727469, 15520543037910966843, 11484620381947762562, + 17395070963094117711, 7608132718922490881, 6531178971071758016, 1302814667007331361, 4002629363862769670, 14649492495298089937, 6868174210142585697, 15456170652716048683, + 3688308493559609536, 5123828159813683938, 6245436817220183971, 13062234263274605325, 1313395302496832996, 4961840797708687855, 4990192311341859196, 5283131006216350959, + 16952735659396950704, 1249482796824289329, 17859504101404809981, 13260403650830168625, 1583343855370236084, 11389120333403185826, 13676599385745755008, 5066667130137739346, + 4799264864906918301, 18347145714884655971, 17034125441088681346, 11828378178487279470, 1759038296522081901, 9582407833575609381, 4195817009050503354, 12618256395967210055, + 6486755100213398364, 5270923715575268322, 9268255346880651103, 3147724493117082267, 16710318779269985633, 1240462642985509644, 2400811796254219369, 13670743068809614358, + 11437779990773246594, 13609113239383378152, 31268927496646090, 11618772705165035208, 3078024460365036167, 12138983306908962712, 4779157919015181380, 11901698850260672002, + 14642787507945832432, 16258081639927585935, 2560321462542164242, 16286882196975458174, 4328159372518768969, 14313553865397841147, 13769372656320240568, 8950675126435331579, + 1309709344837422378, 18403288028608342631, 17102350491414677993, 1133523394849437315, 12526496222782003755, 5447309058259018864, 17311120581405946631, 3760058661125144368, + 17891474172606596407, 320094264574086461, 93927075222964142, 14355817838636249140, 1777882427773785068, 8237021421587560510, 11520559469641852991, 10567910693910023157, + 9164300764173563614, 12322279811717869943, 2768902015676238485, 7761799112151133995, 5740162933856370894, 8043946608495226840, 8923704438836381869, 15189322655373943849, + 12046275966378825708, 5909386969214935556, 1201990704907881028, 12128549009950617306, 10773478878317625320, 4554854625564648958, 14029736945871573056, 1353247251064221090, + 11909123603272307101, 5517949035179831783, 3295386484950432959, 5605137795524476293, 10718654547602473221, 4984952409581272095, 883267724006996723, 17328431094026829348, + 13671210467347025770, 9980617617519781731, 8339868735163579231, 2150724314757661915, 8571153021065445899, 9146903697769397191, 9122246102814640373, 7744779513085605450, + 5733697799130763963, 962857090393715865, 10538932452511329419, 3413281437172590207, 6635428480179771287, 4404718679994229138, 329074170931436370, 17746708206910902139, + 465623292609655544, 12740391094094156750, 15600221933467635716, 10712500242632508096, 4760789577759901270, 408323003115848014, 464706673403009712, 11801112998381624421, + 5545681129476190458, 8725689673989867696, 2143812364199804063, 12133065347666807143, 14072918710585286097, 9027361966934517640, 12608281386782336413, 2053972384317260152, + 11862793362110657447, 2615766363986534825, 5065170482898519596, 2878903691735623036, 5204682974850023849, 3382494556217615091, 15421437169807703253, 14936317525837021790, + 12363228588136583724, 15035239525394825264, 16312817933687103988, 5294641777817034798, 1197907056067071148, 13084871057903003628, 5240492218216802394, 13798739245031559476, + 16167519903361647970, 13106036157861122437, 2112866661358918425, 941609107359251234, 7240286582201258992, 17783018133711208779, 10433192833270769862, 4303514224506582113, + 1549168540255262874, 7958641388293572779, 17867057222043259063, 1752211085999288773, 11828273580240483781, 11712913016012991853, 2373360154803731890, 6779610934665321824, + 6633997178003959408, 145323909693817907, 2198452664824754574, 2920221908918689777, 11047294234593305837, 16840329825128198329, 5479733463453508239, 14054694221361300990, + 7908792292118068471, 2703859967221946473, 6268006869726038724, 9406994672213611166, 12424098071734234719, 11900750387777467409, 10724482893673149484, 3191937460539390989, + 7635348735946750413, 15019610708452651937, 107671898858364780, 16808612119435524257, 6120109999123152232, 2251481693156523545, 10792076820325151003, 12924551065805570215, + 11225852067204798645, 7665324911993544151, 10402215331683389905, 2856131542917118235, 14152013119539822346, 12731746180172488022, 16402457225039188820, 11129045951416314666, + 13009066459618795885, 9636541685854928780, 4783713672495759319, 13224924194053497016, 8044695420996588716, 4054951559323242873, 8718047667110955699, 13479622910466265834, + 4333570709684826136, 3188483692974505359, 17976757824852085147, 12585927226835238762, 16665346689006249776, 14033419552450595393, 12216049696243171604, 447536030797427204, + 14100792221456700414, 11757964686555543320, 3971513799111193530, 224065891283865758, 1282520657830780512, 9120233956471705454, 13711191981326748211, 3725543277190035504, + 12094220323799678341, 16876894723130932646, 14806566188177921577, 2283878370700345373, 12254260799985999571, 12764268124075139223, 2425316412794046071, 2088679341162220742, + 172600248248909168, 4338060849169481673, 12046085844143054569, 16220699277291497444, 10358794035289405365, 6739081270984238196, 16079387007561085178, 13471688054064884318, + 179238781994802586, 9017860544212172426, 16179187983079961859, 1380105261896053495, 6187003332254463653, 9896593306091975718, 17181949235068293496, 6820143159816507763, + }, + { + 11156900805411912784, 1737275748212365168, 16993486692205800040, 12538938066475152524, 13167476167764338790, 17373319366642541595, 6054831428930905282, 7118452124011953000, + 743288751022377974, 9226164090128370715, 17669221687181604232, 1117842260351396541, 17697784451833850722, 14382209306891991103, 16951372733687268016, 188390802712613066, + 13268209255833199938, 16633146482265079265, 6044233937395578074, 1733872861371247406, 14239198956565602758, 4036306057099495106, 1778912298053970082, 6174082695927204648, + 6390361653475754940, 1093882175267357438, 3784079074949811167, 17343872969822169769, 3359715652121662979, 16808529787251986583, 5169730697018536870, 52766949850965558, + 1403521870654810721, 11668153767441183906, 12890534559297323775, 13255750643204945388, 8838011266491753399, 8488733891344199335, 7468199299966346977, 10645543722243906719, + 17019046551094178629, 3055711824867115856, 10841870683594132444, 382072777978364200, 725450988418371589, 12501774248954630111, 10130164067258601487, 7823254091037946722, + 1866199290196467180, 554488389245696218, 15654194385091454776, 6131785419830586829, 17856850737824439617, 17602611196814472081, 18244498160243832872, 5819736122106091373, + 13147212495082910072, 5628914243538540648, 439989514095330941, 7620472329041817834, 4272251824771633288, 16564977601507013119, 5441117136715882317, 5622571730905974389, + 8706277077547369587, 4848592786700984858, 11350018329458740004, 143688324288835298, 16536982722430328270, 6522962371020150117, 9511378862712142567, 18221873727623821967, + 3985496688414455558, 11118602672015528143, 5383196298446424580, 6116994806363767576, 4051800833472889243, 12236802048340231756, 2619572929682123355, 6008593592518397571, + 18159379849046931840, 11283007389905902377, 7465803490016928282, 7730454089227756796, 3569936088077890000, 2804205711716327520, 14107136042131448408, 12050401455009236937, + 4207421531032215145, 7924589143024019458, 5496622772676523695, 7061817399179278712, 7371689806847905735, 8282663810521090335, 11422487361383982601, 6397294642072086677, + 15924576759579234055, 9923512276109397968, 6142441356128988365, 11189756906743802872, 17790684027455851021, 14143953977451478354, 4002828116425422775, 7521923183240026608, + 8349314793194266723, 8850246337652136896, 16635419760398709839, 12466509089914036008, 2686508850003621675, 15822032015692543407, 2211302095278204426, 12550915236378020964, + 3844358527917324404, 15045102416646491178, 558560187822263769, 119801275176287297, 15593003307431898979, 12817356186703401527, 531419248069516921, 7957931139148417935, + 960381383357168877, 16774709963697143275, 17896181076874992240, 1150415168189819777, 5498481130530161530, 6895900060791069290, 2079514944189747822, 3428689507902936354, + 8525743843643076774, 6014251588524085384, 16423924202355808346, 2605496565158241796, 16676209604549923678, 4870342652354143869, 8337072895802905281, 2478197702680510974, + 17076629945963768258, 7647523525256473652, 3999707005939039144, 9578933944626939348, 10469359763046002307, 17515033080264359276, 14272203721184987484, 9979666177276073145, + 3896303254672567310, 17167309259158926479, 13231865857998180315, 2260217573257466310, 15465674863730076330, 4460025914284098438, 718707264700157542, 7215235109325196153, + 12056920178557394302, 5131253251712581899, 16251877995882633963, 10883064494054484051, 9473274815269607947, 8874308005470196739, 4412709653190846630, 16469366225886156991, + 13309932065155297376, 11504232979548718747, 14366229841750030472, 12797870458094162590, 12894694064456675527, 16977997035748966590, 15080606548583635864, 9292471759789693596, + 1226107676031264801, 2683510924831720521, 4344623677417870872, 6581244316153602113, 12516966236091151915, 6758725397469564403, 3190083750600883646, 13018714111448482165, + 877612159223846777, 1550479938877814973, 11850498481046386377, 18144061425551419288, 6811504328538189985, 16351446194603329949, 14495016749549056145, 15394017693349630053, + 3547198835521135552, 9940690805769919242, 15170549880149605690, 10701681269101530298, 5914809102954412895, 15790219349994701461, 13664049285300230863, 5735249081148743651, + 12395092509541605535, 2320592554345067500, 14213227630916688382, 12847883717490005600, 10192971941898238362, 18392117086623245662, 14369915478097042954, 9636190568462370402, + 13308009076055104849, 9679299541833059768, 1512845037409468381, 18211703514703978816, 8337549295001075127, 15882458005321062689, 1212364284844818969, 13856780176955551126, + 7602636613718406141, 2678224565451628558, 6976331354727208830, 7414684104929828967, 13536889690553324858, 13086024708711030952, 3748455179863046304, 1853373561223967487, + 14753645881295168970, 9410794713982813264, 13485977523338061503, 14267793994973767571, 15813187206361118954, 16449603969001878152, 17705410933007144731, 10960511750491378082, + 42380674434071200, 10068881872338405083, 14327149378182272744, 18264050153181932597, 8600790433943161385, 2952021171158508784, 13461070885471956301, 9633226987835715039, + 10360728550079953555, 8471248683929284652, 6673003382103023756, 8972723925695500528, 5941683993959582947, 13500073059240140624, 17856131583218769324, 16799784995825923732, + 7292102941225494742, 4807816512907139693, 11271815982004383122, 1065441269547643077, 5279915279332961503, 12100441009657772185, 10448964764090871372, 2944169874578456259, + 4669722990542640772, 10017368027073540045, 13392819319228640800, 1070815632453829996, 7274130058405640448, 4460835865761538026, 15283885107956150892, 5815884817303156137, + }, + { + 987127652983079722, 6872208513691823695, 10110578719299008714, 16457381243184327116, 10779072738879347999, 5965282398006505989, 18251577267494363268, 9321323379108563422, + 6119049170407437179, 12938407708038792521, 10105420405825346383, 3416306788177016826, 330648587825212481, 125568991290568141, 13857127346570918649, 4376080986981308805, + 5930434126597036589, 11240994623469578584, 839346533006694594, 10015087329724081709, 6285097815938761787, 17244606485954860090, 11523249727882012981, 7178451948850151803, + 16733615335426327004, 14833564736134873977, 15736159435317472941, 17560193966711884894, 4670172300306758374, 9477713670983496835, 14243955354032737980, 6459864769649694970, + 16053517698989562256, 2069464650596796670, 18305730134729303993, 1090863611964008044, 10595928817546105363, 11743183506441120484, 8584767248013363683, 9187452830514379903, + 6938579711827578880, 16268282168771714072, 4567473305742437221, 754548046014059669, 7773636946295303572, 8132380485366854656, 5054542590195827722, 6074347500754248128, + 3955857908381474930, 11852678900999621852, 13783017203403417097, 13616745777931267488, 13189924588414196918, 14519927295809952876, 9334100965942760501, 15172548734242885286, + 4869852132020793901, 14921212823114724347, 14342559113353033349, 13336827701782657309, 11168346789878684452, 9790480725902426052, 9171116079841529936, 9268902731763220711, + 4417372365016540659, 17280138652899703220, 6650972319709075180, 11252221422799491438, 6825784270383052414, 18308562203755695191, 4237445205082731447, 8485872406483449772, + 11718726714045105993, 2624508790343569765, 5794111482300008213, 12819216567095091385, 285329842793152354, 17257707863943097374, 16912487373614458842, 9501952033607497057, + 13074643344738491288, 8505193022029238118, 9932652098552590323, 267408114280737600, 14802861246220332716, 17506028737327736269, 15929083591675282371, 10937680132245212187, + 16688733766460131050, 12777635326299184071, 7769405825716531061, 12435979606514844725, 2767987141420672349, 6723940414217341603, 16114182069580717377, 4541113259200434040, + 8506371389960537559, 18031198550689409305, 11510251852995503442, 1923472385053944821, 6189904387119195613, 345664413118550356, 6439003191582953600, 18344385760549575361, + 5320548958745018096, 8341561189829530680, 3327828110998403156, 17981346269177513223, 14254804273025688074, 11111147827704077234, 9093333670517437191, 7272552282364063165, + 7587268797387514088, 4402481408866840263, 11260815389095334885, 14391444701764948710, 4224880318604689771, 2472148458660279030, 961218007313548745, 13826356220879831691, + 4477325369345373420, 299141931133875776, 6787652129916359301, 2498016036564260464, 13342319943892943548, 11124287719737492205, 2456333599771808442, 5084719026388755304, + 11208298545009109072, 9361672558653085726, 11597473974277116433, 9123409098329262113, 2315750487079392743, 14788891796020803799, 16477373474431052867, 6394267218565388055, + 15099803091969065643, 7815556206466442264, 10304177785190927723, 6493522556494567776, 14386173575053274652, 4399768637448990408, 2996083572119906777, 9762267038999270167, + 204205141115459509, 15145142808374731170, 1678672167166598389, 12032976094920574395, 7472791648310888425, 3687068899306742850, 2661021646792225638, 13705804163602287184, + 9320611303424530507, 7295004960403065170, 15749376924513664173, 11543669941217924178, 3710258597217352196, 11371260298937340150, 3902566672028686120, 17983841473999282668, + 11814188798674654097, 13017429106091059634, 11079902218909650978, 9064898043690914606, 12023954026861289442, 3592879256613061321, 9997625973577562433, 16073808232708438680, + 15365030910001289574, 8789654976305207659, 3460532362538090307, 12992239135084766157, 11449242863169591803, 5418502644558230048, 7981056000757137731, 13021725301900406539, + 7782773734814780443, 9390379677704432300, 7130223982341575476, 6496906770393189740, 5095392170846288421, 11058865093605824851, 11021902438459302094, 6088242418938968652, + 5966014717657529537, 5136588784988696205, 6360472729290909457, 4447169061266082500, 12506934948427595318, 16433395045878253504, 5116318868244944213, 6640832565780621049, + 4132920736998081103, 11827975663170283136, 379030309731223785, 3149051804072184938, 12035317616979166548, 9642536337674500310, 15531497803043399747, 14399049832547796531, + 5493097462539196248, 1629980144571354138, 13641173578972112594, 11096917099468924214, 10934169048860132281, 11269991112518485434, 11206050457519414207, 7939067479736521906, + 3806755078949823895, 15988975666336042818, 8246267395645554105, 9703363749164024623, 7795606269507901456, 451102251659512683, 5865831186929065346, 81064915983990550, + 5810137226573964106, 8424253088401255706, 4543883508937971494, 7386148745257823512, 1348150366217548727, 2103092839740150931, 8353010273955146981, 16831854467863536537, + 5095778754322493440, 16577416628492146129, 12288076721910512064, 10563487622054214075, 14620553808255752273, 8159958565993701874, 6712100880095874267, 13203797993773281759, + 9640010815039549413, 17460773950401928956, 18229977265837445092, 9252634318278265623, 16798979334068219458, 11731856767337519419, 1551975378671663669, 14320102155496153821, + 10528988339881191052, 3808685834232068820, 1064376960475785008, 6466670187430364599, 16860839938395945578, 8876458447995837267, 7897541112736965607, 10283235741250763328, + 12246932755942887931, 10842213418139067847, 1827392608284960926, 6961756951208563550, 14626966634382828071, 12653808277900379805, 3359368990539585596, 1190255644328095727, + }, + { + 10836955050615624183, 6228108794259714192, 3769609579318360527, 1481673673659326756, 301329772189110029, 3574710782304999603, 3037244905687722509, 14442941761397229485, + 7739110373965523311, 11019153332996240080, 7346010155518751797, 14853737521111213548, 2542883237207113209, 11015136828834421504, 1998601666328316353, 1540417751495800469, + 599034134015360884, 11263317530774995092, 6667501365379084069, 9844276013103312691, 7566306886180438990, 13702768345615509363, 12525031963170717968, 8463786212988210464, + 16430555432520738762, 11225321696360643709, 11777973654353154098, 13133115227828884393, 17678738157022850345, 1586660156856830642, 840647763693221003, 8645742354464326466, + 8067725618510810414, 6229041032854057836, 8025364837156161898, 1290443646630661359, 10991323655842003728, 9346782840170054789, 8939059450584871902, 4772347025802041241, + 8187379062828795656, 14744366698824202673, 17014663804743197103, 7704635785366672415, 17162645929628468457, 12942442527592695216, 13291710923558633295, 5800071247768119960, + 3552067877931717181, 590108493628737243, 497390871097272557, 12785795808441396281, 5561451613221796230, 14263823913216836845, 10481729699001112354, 7818094022046194698, + 3598736754916501501, 4827022662021197854, 8203180748141933720, 10283251600632599041, 16186680658340956808, 15183393398087319142, 8714402370450665212, 13217902016653171217, + 13766344083396522552, 3843491774288631026, 1174164282483810421, 6398024882371290634, 5459580537585379131, 5871096222069618377, 9611875286525007701, 1940221124144536452, + 11598781954647517543, 15695098933552070478, 14976427118847622464, 7147358677856872160, 17135524627354123951, 8338407198954935663, 18099467407613897492, 9025371359994313919, + 11451018285787631498, 11710749546511767589, 12969419493657624536, 2338911230088800542, 17492614411321910750, 15274296072504110502, 1258801445108685875, 15178229640559232741, + 11743016372336018414, 6125536020759642301, 16563280170798047733, 14529052521060111789, 15253502329174644388, 1556763737841306861, 3041940064290508531, 18020881359462953336, + 3035547704692483898, 462291414338050452, 4209186193642705826, 929930084709639338, 5246348806539841852, 16407585218848843770, 6076996892610687611, 16547395936238439809, + 7781914900266335582, 17967080065814694611, 14677062197599091614, 6667353926398988267, 5658765839564756939, 15807817354312473921, 7476371642206655552, 1194561222317134506, + 10645607622061780068, 11583886495890779582, 6879987349345299140, 5119348262235800038, 6546217876212430196, 5534845503308945424, 2003680599532879233, 2824480091030475274, + 2297585344913389180, 13965959247788377604, 11341378863608387800, 9890953382689640379, 10443523266835246852, 9631856696811288063, 6944503116553031744, 10030824122780966395, + 2699238910420203023, 13176131497943324759, 2961070701879546988, 1511208990650977146, 13153451530298458944, 254163587562261430, 6668642257003911712, 9040693998431169077, + 5596789796292416136, 10505785820531892681, 3803057245175025164, 13601446877323203658, 7939638174866660891, 14141848241206986005, 13782885912650943563, 6287188921995938212, + 12999700407775352603, 2664314004284450753, 7085329252118415126, 17472483363754271126, 15550759073378819069, 7572177503239633527, 15705617684783232308, 8586972133880335199, + 6636244234467769744, 18284390012667441869, 722929822219329257, 3274166388651987221, 13177791837760949480, 2568122970128111127, 3477167651270374316, 11371867850093288299, + 4065583279544717744, 15575999213135020408, 1319417044854511604, 14713082471342741360, 9399053450949627084, 8900637792268080986, 13511414538589410393, 601556817111475086, + 9243142430859561073, 14638657859832212223, 15324499241122936340, 14477761753578941934, 14699832840191454785, 5114873517253108658, 7427057898915535537, 12536800031182849473, + 11775949216482616439, 9736654184925478086, 2941126957776086651, 11331303510371804917, 3535865528819665754, 2488682969529955908, 922100556888217739, 17904958255578472855, + 12876460969864886954, 4692925517843901006, 17390372170975588763, 14692578774429957295, 1759864963511373267, 12016553435172103063, 13015881292003590351, 16187569590073958370, + 15154092100094191059, 8103665941237580228, 3265383210728928809, 8341136973870067643, 17281635732314163959, 16433737305161442677, 9130496888060417111, 7070932927684908859, + 2129695226164849659, 15163429615374108928, 2225893409909150746, 7870945454511315288, 11525457118388411301, 10519485231888976311, 17601979933176863246, 5257431126828903605, + 8898009465920717074, 9418463821940622934, 3166596280515383312, 14529914354665032916, 17743784337460844568, 15964972678726654742, 17116339264937332927, 17098426671353029801, + 5868176722765418764, 8429364877332796959, 7785756492654137899, 14975349488807886278, 3272010964035218775, 8946396491617515063, 3774503492701710541, 2113917870331282160, + 8185651664029868180, 7343199417670698200, 16586532029502206227, 1531724981369431299, 5596507566645246686, 1033673521199123313, 6075312368002387319, 6988496924054716781, + 4604669688530289732, 1867259042549585024, 16180237500272383882, 3296394050993129267, 4326520583229998770, 6557569103657179065, 4126356691591689646, 8674328613473447843, + 18057076842177382833, 13020419432300857029, 13646402493577420410, 16962268104464192344, 5358581430340252761, 16443104373870300795, 10099216829282619724, 14890543634568831960, + 13276631120161396750, 16968574704982977753, 2376454925072971568, 9477134218094425043, 13784622991820528988, 2074505569780291922, 14294803056569335889, 8759684036994631222, + }, + { + 16549155412115374305, 75977962737081600, 8870825488941031605, 7403557657089993256, 5825394234709429518, 11978746413791918409, 1876293130790608838, 9416631623519692610, + 12346821858361333862, 6352722970730511156, 9920942104315084988, 10492373916708072181, 11163323626947992791, 17035282992056675772, 835685192553490421, 12792459312570503983, + 10447922451077327640, 1515090998837129975, 13498807582085718212, 1212623301098019779, 11573518650162536086, 4823461685803772444, 7289704908733783271, 13167881392748327839, + 16959954449957105311, 11051594981465026138, 16062472412226411858, 1976004115345602080, 3630495248560019222, 12698191555196404138, 10107923614229901155, 1597725222698864118, + 3416019089997140849, 3011417601086561495, 6275242351124667286, 14197557183081060761, 5633711018871057458, 9723496411345030748, 13910057756335530832, 7285051335492395815, + 16774605620279328247, 10223834398868957807, 15781733027568799739, 2607095622729296315, 16499590296954181006, 7576646116121533069, 16638313875339315529, 14296194207652471696, + 12874411821611501068, 1998417862388106640, 13882642435422318946, 5791129521978463837, 7512934139125398199, 6966283273373526248, 5475728301311080129, 13542211842758381589, + 7798993150530415596, 3288770257171840452, 13373286069558329631, 9784279046496345014, 15725032660238528813, 16038931923358478835, 6921788930154218900, 6093507035435214187, + 12194839527972802101, 8376199250979271449, 9055995864116790217, 8125030124567662718, 1070812422878398727, 3950906148308170707, 10121516919638719072, 813471384549762068, + 7610605259962047827, 14617655900985224117, 16089669078196617440, 10594009761533811214, 3425971360520359121, 1245021768718221594, 16690372892586114152, 9933179766195044175, + 15142732720676276463, 17435583143726694663, 6030257647680338377, 243683009289621849, 3064238900429394982, 7655019927497549159, 4484400084400458429, 11650860345018651068, + 16203650576183697977, 17614744533772112337, 1119292890050870301, 4574885263986729787, 8108207498737924504, 9708434439316056191, 10676823369888368482, 1078311554675991883, + 6876202174636220158, 18057502510666060150, 7821730895845187783, 8882321799602877790, 16220624224336230256, 15562800387021280583, 3043825293109091520, 697130162105054983, + 5394549491776704087, 14795348078061113966, 14206080764200897220, 4623073019242398493, 5850602047109930316, 13783531993760979209, 14764346975698048641, 8951680172224205890, + 7302725075484262769, 12572584665857403749, 6499691362933905099, 14476353114356185371, 7381716148644532401, 1573798302334388880, 8128390307766282880, 14547454176248589604, + 12109367644744157376, 18037756988516427980, 814572839386760074, 913616818805172815, 12974490864633240943, 5228203788953366069, 11458421156905635350, 14534725545904442520, + 7717994192616957513, 12204841747568567286, 3911093871228418198, 11236736110419819217, 3283907124458179474, 11742518530965881218, 15033771683298452989, 5201173013910789542, + 13950307561490203211, 2109530041534182225, 16966608361725322273, 17050013487213560977, 6582524050666171954, 8348662591649610062, 18257068567217206885, 12084557407109908164, + 18134052713971906645, 13522834551049471624, 11796035797088472898, 4408384297249203940, 13184035659663315829, 10318594606381671372, 43784547091343099, 5864759723778872652, + 273751009786971880, 17433500308695913052, 14095169299451888252, 9384697581863180516, 3670310635026975567, 7512955195757960238, 1061441390592395566, 8687976820410979119, + 5847434943730572861, 6139588162124989313, 16640425042872222986, 2868461478718351559, 4582082172412426839, 15999623457772192080, 3911979819012561187, 2157661418664981538, + 1679437503202036379, 3357369754420384657, 14227038694942012893, 10849243658636382261, 14698996273665673323, 15473144682787301630, 12886363027445513063, 4218522843946321369, + 4305571301049736163, 7569433164869749160, 9286896219015408706, 3061813620853218759, 13836461093317197944, 12893856993043908140, 17368798334120212403, 3507360881370918016, + 17887381848254057882, 5080557806347717197, 10977337278063191668, 13725230338861893934, 12618602415751234016, 7913767807930932665, 7119689779124353718, 14253582440914432409, + 16180079775228731323, 7952167928403988663, 17529082815577219791, 14568667835887326401, 80331144542697157, 16190739578357987369, 8688615216852085496, 16668129019590270615, + 7430615013050690552, 16490141830322172211, 9793941851666815428, 12678643730925993658, 248986629347794843, 14250412400606998841, 7671639316274744213, 198890803142183275, + 14099347485199617889, 10490379547566026579, 2979540189595374423, 1947868558918413664, 14807112327541983728, 2275659819146309375, 14464189275380358394, 11448352451702208893, + 6508543885912264241, 11768683250765503158, 10952591955476802447, 13816638967602836229, 15422608700909569544, 2270147616630940233, 11179267547770249728, 10448962907285827304, + 16734703714440778657, 5221611440144584595, 14517195248502569256, 17256223341936104100, 13260411672715241049, 15406026631864099983, 13737383554048659263, 15455560302156741675, + 1751310744149771156, 13066379976997527874, 3353407191478085130, 1488844206746128853, 5630044290904406795, 14205180196532348798, 10021323252656730909, 4177092513333669169, + 16120743770319899284, 12307096057555293266, 15459231763664176764, 889710173957193674, 4419259815443372538, 17256024223837463177, 6105822529166486288, 14459703836399905847, + 13918155529832261002, 12953608416498491778, 4285102780598248527, 15853935066983430239, 2610351403194505517, 14905969693015035277, 12043976761492019990, 3883507340167876094, + }, + { + 10994470105201067642, 16033352651464686189, 15774427704233149170, 4596106024708523955, 8854068685612048031, 13529126595187822090, 11509928293490788309, 11620850709434334690, + 2068337952936280662, 6856444059219131741, 17213926210819196459, 513413453513211652, 5520465604253824995, 12970886588014924605, 12296275668437451137, 16808287251356954962, + 3413637388791433603, 16937929882486384885, 11215805046681294327, 11559398419268924015, 11697258172757450737, 6137795229387768085, 3190984692021653574, 15347236891154706739, + 17955247196574331290, 285928599571507431, 11902987704297070981, 2841167725097799100, 9281167410796848363, 16444340476026142274, 3423202423051178709, 9101021305726472246, + 7129792976024548817, 11116052691850916690, 13765136177550464799, 15815419857669199536, 3490265904262366633, 3075105190264147815, 13090337405102529529, 3772589223509323910, + 1865569627339845981, 14405510401062639678, 16548248891153314376, 18134729633170718083, 7342289204282809135, 13678804825671892797, 11309594694670555920, 6482941613725556691, + 15861081392207208208, 8373665453512079014, 1489270058265748447, 3594001972161344915, 1115942979837741628, 11080512787291988167, 8129036993725083225, 3358115086982889477, + 8869273893561412239, 10349812081309865197, 8913824872487209090, 3148808685596180274, 530908416884501807, 10284103744075805979, 13606889938905754722, 2418062398586758364, + 12413212040240282991, 7478703750955930147, 1623142090654002287, 3800351594986125089, 483782039790230646, 7823394007049468275, 7643246613581822171, 15001087565682987468, + 4027740097060141621, 11299467443664320327, 11151439614540606708, 866223440636939151, 3440384894029167420, 3873155505711856537, 5954029035616033594, 15381861683078360474, + 536906646315848696, 4762701494256722953, 3244970900590282998, 13809219136128336072, 6259009038800226590, 15448401588227595780, 5228638796256712654, 52817644221507266, + 2298603653884322501, 601221199968240092, 1285112712738261659, 13882709760355308533, 13563254433762438170, 13447718334658435053, 13493687958886238103, 11032778298151679283, + 17130548285854289846, 13389656739579236890, 11825243553071488462, 3295650559395515332, 5681593832990094932, 1986112566735995333, 6900923912677810734, 12885634718618182763, + 5991348986251844787, 10652959958005297329, 4727138097613835700, 11665606181484659859, 17818486388489237050, 17381457056943650121, 11483713831771420728, 14346336385265122549, + 6806904066777263417, 13464056321884738018, 6982639430167685380, 7669355554060154277, 5859444273628884163, 13073052226541367148, 9082949662405155834, 12390918296713433257, + 8626760324117733843, 16403951745962159364, 5837394731212322914, 6847167719031297053, 2888142337636247281, 3409818743436360750, 1677113113408664401, 1222157560539217476, + 2099068961815044779, 12562181036171628906, 16101890713848274666, 6017764396716978009, 13064414845499489050, 8419967306079989759, 18212218073753486044, 1008986649080828268, + 14883940931343463293, 12319376897638542821, 12367185247284838739, 5861252096374912155, 10018798299303262817, 6422772579819453330, 2070123940915480886, 12141302052520620302, + 7347211048650261526, 4324486111460763452, 1238262417801832007, 7620393213816116173, 13629115925647807355, 17975185632155863015, 6751253893269214002, 3530207280414557051, + 2294668553308381923, 17165287249067859297, 9897724011823072967, 17409851874231284314, 13888575318617508639, 12092766090379658603, 8412268183255186961, 8046572777745318483, + 4493498909328627892, 14652253245762410953, 5345262207345951514, 3970988687199074488, 2208768899312608620, 1510839623981662369, 642580537527653605, 14940785106713466054, + 12480576687390709541, 11249304546887636239, 6075047530003410694, 6121669232248820705, 10309256543424460776, 101796817907499847, 2110451750984093184, 10778335615512351243, + 2885395260584376454, 612553588199732392, 17978615338185945515, 10414937982597482385, 6078565000469652304, 11381998957121196965, 1536418226678270090, 15114446343888525079, + 13178439025428734594, 16388038968589028071, 16165750006742628176, 17903639397633243161, 9560319088110737392, 7888974492854359310, 16116323630343158496, 10177829649622711030, + 12866004572891901059, 10190228418101288877, 4717913135317392922, 13460871068228811018, 4756401774767581561, 7711771325649205249, 13695132008344052711, 13361851345229950778, + 938191787235564236, 14328703178349042628, 10217257468952800498, 10415924210674864240, 7171739159943819214, 6502538915112206091, 3300616451202954689, 9576887970605726107, + 17259872866446195810, 3694244894150718459, 3214322135693408389, 15571503654130046642, 2332159362399220208, 14087696453363417437, 14318693886833071484, 6131092646618026480, + 2378363646789710539, 494954399989334038, 6915496914984330558, 757197055392642378, 9097171378505823650, 12104288338329241488, 1905673690814481722, 13011078677095066032, + 8123217305309052526, 2527408826476194625, 10072980201182378784, 15489255866670747761, 12978131276673936002, 8312787524887306087, 13561211528259491689, 6311660759462081662, + 16247831307849916845, 8654394682758927217, 1180998443932568506, 3847327552938166714, 17658215545137701161, 11975413369861829206, 10679985684889599173, 14928266362876119222, + 7605162785889302718, 5718899840342120733, 6057249397801266401, 9486440247354542717, 10618370743126190918, 16153494357430969850, 16785070420314699787, 11277000340020413902, + 12712274459771886808, 15748114764509451764, 1994630377849062253, 564390855806287868, 135493276425567450, 4305883520257541088, 4842008929542073044, 7035106970720249617, + }, + { + 5757832545805155636, 17609080533053427627, 13855662030602089892, 9622498683452213297, 12281586442064581946, 610529441714948581, 14740357477019436626, 16983190111725558863, + 8470173855499872166, 15915210069852758660, 12368087246231545517, 3867174158581293223, 9082358201271846042, 16663463402757176964, 13740908859955866819, 7748969412274962298, + 13375424826931892789, 8508645806391261552, 18359272328758476903, 16433208398005282080, 13071423254987294580, 7999383023651164492, 14514701556577587750, 4848295889377673720, + 1020519568493754982, 10467781020270669620, 17607771250627508499, 6620300538436640218, 3858527175523168333, 13798129597334232823, 12495322651937391008, 3926713306087152023, + 5116038770086590715, 9943789276328255827, 13715439407309914230, 4887040860130243503, 18288477612078228131, 7640050573721350378, 14537362862524718973, 1658575506924200900, + 18400486168653423966, 18026904540891591040, 2037921821724043966, 18010956304511658647, 3751340308708032971, 16275459031633957091, 17177866831427164840, 1635431811339422016, + 3897330996014812355, 14347736534650088376, 16862404119462964612, 12216005411754788835, 11561909516001453735, 6261639657930189859, 9101978764314050064, 12280333151331067982, + 910276569977357717, 2872810615152156462, 1942106118995935911, 8734004121981822024, 13862118436432879698, 1912639548162720505, 13060137938875924732, 10642216911059510120, + 5132232031613377471, 3053931945527415993, 2695169719602504430, 11855046366025891084, 16462491222215366712, 16664416596202206835, 3077124688577987843, 7109495326459366311, + 3858762384693274897, 7520194928987242278, 4496744297661574884, 16159343214247089321, 6441167128044578145, 3604951495694157848, 3072075325567814428, 1964798869371174506, + 13689578474000942304, 8998724267038772595, 3350174507425430811, 15015413657749135511, 4848907595494161497, 7935644131326131098, 385849363817229511, 10087353746314516691, + 6067717601858189468, 10438404859927821308, 9991403870358837471, 17728358586134467771, 13476949968946830198, 7311235997042381570, 4429835371823736972, 17463216455983091974, + 3446799583712077997, 6763499387263152328, 9231082163128267527, 7733182914440272884, 17225092062157610409, 8163842076250628108, 14230171836718858439, 8195740716303697476, + 57759226194841354, 14962480748390190557, 2047254208877381876, 2830450574091058648, 14905486147404440754, 15792397930637469157, 16383409053580142957, 8686506861607761226, + 11120178572139812895, 11534611317577597022, 10437856602660752754, 13925966850017814278, 16776862023867799947, 17362554889424346260, 3408825299914455645, 2606150045870067742, + 1119490156854665483, 16772836556735128526, 8607715508817325509, 9931017263385440911, 8799481055779587022, 11740562709785257943, 1956849872282864970, 595288629437737005, + 2281206023740505682, 9926719762453358166, 4061056930877891291, 15576657447058286418, 2614863633636977404, 2206334532799385250, 3474210873232711865, 8477734925846980028, + 3340305349427486363, 5188523629566097463, 6160790887356413747, 9863988255192204305, 14066826366866912046, 11932779567129411871, 12863116515340321220, 15222969978942513045, + 12758642615291604404, 3209096496454847183, 11182564090025111575, 1931382908005123647, 15151007707308352245, 5954868822601995640, 15742433078711209582, 3347972315985628205, + 16490337555999239547, 8990885971520264447, 10360258506616249745, 6630482203535351500, 16999397485942558889, 6788531264412870316, 7229988042452941955, 9856917486277085502, + 10734804718395003162, 14385908274213668346, 6022876926209575383, 371182857232839872, 7412466195447558077, 7961511694540082231, 8030758761844457080, 8046696009323575767, + 6399666206029163609, 17663811097734928631, 11470778017516778751, 17702646621253134508, 14876192013866833082, 12935280981291312476, 1467399042175927183, 8848908916563618579, + 14359906986235888744, 4587839368531771442, 10497105860390751463, 14244616431904836676, 14906421794748569105, 7955760173743833219, 17152550002849021490, 138081467951201370, + 2349932829525749283, 14749355610935637731, 14751838563606808702, 18058355029195133983, 8461199524627795693, 4219910612621001507, 12776964042319070094, 15905069451612157329, + 1355498131832429384, 15975585519420207244, 15757549940022310945, 11357394570768845273, 8197697208897202922, 1901621009956576265, 4290101364365692172, 11947148614241269523, + 5840762872471677309, 7514977007446174348, 1527060545570586419, 16609099121469101859, 9320342325870097548, 13965331598495025936, 9683433385601741715, 8845290689278472178, + 14119395813630029665, 8634138112335652518, 4658233985292819043, 9622846971166516113, 996353694479629398, 7404001202059682426, 18145845493517798921, 6449664057746980028, + 16521775738375440178, 14142357232938693849, 10990147202113110230, 9959066954415780230, 13034621383041722634, 5025797339352139456, 10322111864643366459, 2614961497496501105, + 12557910439954509999, 12230084394391362713, 8427020226737465235, 12518021396913122088, 7134271418642259845, 3905501389650646356, 15707807001177039585, 2909191175572717037, + 769225015796913609, 8753059944613840368, 1611869096690868003, 11449828005116201197, 11517191665323762104, 2697717476879599241, 11188965056705006770, 6249359019890699508, + 15594613737645580981, 3037156291483237329, 5604071908229825169, 1604475019228431078, 7766918222574600204, 9339455406279970318, 16266564745812517195, 4799424886612563329, + 11601570184515356415, 11247678357869162156, 8415095559389686315, 9625816053586466047, 11807876143163335372, 15690209134241397733, 8684637224468015605, 15974968144193406020, + }, + { + 11275984449259340674, 14922638629881602958, 17186426309348732477, 2094500549052461127, 1245672419927595636, 7529921141299797945, 3181089597449057707, 7749529014968942143, + 12842581296013847382, 7845624487558348664, 8747439372999364825, 849305823897878133, 2217292264321907974, 8173534404179332054, 3588643190593022889, 12047811416592660080, + 6959527445237219799, 17170778245769606301, 807762173031474321, 9151445233487017286, 15103411489939446333, 7976669669591851659, 14824323986579638641, 1535817133122800221, + 2365455792963674834, 2249234228455309648, 5237136544555745182, 8814246419234451463, 13017813702607013347, 2241193992640629733, 17982226236989862510, 9800528153239873634, + 14106286229675768856, 13210380707737857536, 8049798856134302097, 9734560649539497957, 1623205786926824588, 14287958304696241706, 1800854015662052826, 1212925137572581937, + 3026947385483276729, 13169758424754896088, 6834174387382969128, 7206544572805916922, 6934064062491731757, 10620406409375153308, 5194048681860016706, 1469688754160716493, + 12993558037376222988, 4669474265806332952, 4832324179281001408, 4413085485243863075, 5405837906169635761, 10091776239975644239, 5290719555562061144, 4213669820858776415, + 17271890190678399664, 284117971978206043, 14999643475704678808, 6264699496666119083, 8859934981351804429, 8300880062060532031, 3667839168820458685, 17133003164910522197, + 2997449698566957466, 7884833253198684528, 12498695470674374226, 3115564300965505022, 13376506794876055601, 17895331353362057251, 16990581103090492269, 4674862843410794096, + 5006430925084708381, 16053115139414023422, 7049813739763202517, 6676629839394309180, 2410666182773729148, 8945309185393628712, 17674821778351929167, 2683163960153153804, + 14505459891908974624, 1849503921080779239, 13990490450160111440, 5181610498614712941, 8653117877167770474, 8814499350344722617, 5343755460620184433, 243401361005198353, + 17791633957402883820, 5938849497629314459, 8356801465373786047, 3163889445599730677, 9723709910898285412, 2841967670768995554, 10896757438566636700, 7434996874241771203, + 7372440365321345784, 2862880403174214663, 8420116322934238724, 2818552325655576635, 111342786192022359, 74705672003306783, 6591511741514218962, 13124391319573921957, + 9931074113040228488, 15362193958291437280, 6613002366394890474, 5258448208225327677, 5126245224005997831, 8755434042915330962, 4405830479557957055, 13343591337768921462, + 9242810916685205316, 2838356272544913798, 10099476622515682546, 15551100165305852662, 737838832976059770, 11188026425234857295, 10166119013055947704, 8628233674684143419, + 14086272070125896854, 12009239107306499026, 6592816798466831203, 11371087141421707743, 16277593907837607793, 16449564916599232060, 17471042072158213549, 11953382410698485455, + 823047236880870350, 7830286704456953572, 6464058012664717582, 15521380082330968365, 10182202815901986450, 18143527955004661429, 3207725716803325445, 3193887560605396493, + 15569872930692175382, 5346306686087630019, 6632365629111827330, 15865883909120927399, 7200040798106687333, 5723169387900917854, 15027071603483375844, 11018037359110138454, + 741009114382240162, 13879961246490269823, 4790141947869766711, 4479737908710468090, 4979904969974036721, 634413885118778288, 101727670723732801, 3160663692354720401, + 14613304240735929191, 7658770973133328064, 3028731168325633068, 1486737152001467114, 16715835021406106944, 15214200823642287507, 18441898204110447697, 5669963154424393607, + 4313074171267006471, 16217697149498819074, 2027993870023154350, 18018610229517524263, 117205923522590215, 1784380715025232199, 307956084870608716, 4140912860495129528, + 3938313265660748839, 13018885466029447230, 12461354215832959401, 14495299413345669177, 16234805331742830462, 1578184208291171741, 17168468892394781152, 12575003872292801090, + 10424668870965277693, 8952352824982702795, 18017430252932614600, 14101949308713937413, 16342352613992153502, 6862842966642668923, 7238655696295295374, 11216037980129705353, + 8381651800836461905, 275661552864412688, 5548629401959193001, 7174991431295718071, 7441251258233899268, 14052832476485122412, 301250917807487282, 17797202428295526090, + 5173263645506172639, 7621852612520351243, 4670271322283433581, 8663786927624336026, 14433502423455668413, 6926265712671351075, 9776903322968672826, 542207238644018218, + 9588056942118198784, 14024040464098859998, 17712446698584877455, 17945455593410056285, 18104078100936060946, 9565731894604896153, 13213845893767817606, 16043575675737450101, + 3436179376183792489, 8124476612946977721, 2169106928994773725, 8589833084708985039, 15137137534993478994, 12789920909097626916, 6767184505234754407, 8369097443031908183, + 16448089571044203401, 1396460397790470601, 9796571033221459545, 8162511506983092708, 13893406727199803081, 16188892369453405032, 16023105002157964343, 7061289566170423383, + 10448117565485426152, 8145533940921335838, 1579413779055052019, 14761309857236492983, 9815666466018583851, 15131106965446764630, 16986514923298225288, 3751380281663155649, + 2224143503200162255, 9410177163066991107, 7157489123570995878, 13420515130397093319, 2675717750075873421, 6702938999067232257, 394050047437199204, 11922372120263518221, + 9930837889018815244, 11397955329795684905, 6931891256005909739, 516538601500853702, 923444815286512108, 16665363217941733257, 1377231042965759902, 8439923996651310778, + 8743817608749172620, 7804846024944300667, 507107926259802795, 5258061329638283805, 15071070675288472598, 1848743106176521296, 16295570828486620412, 7409462376590083349, + }, + { + 10223936123327786454, 2321887803300626936, 13607747528117965743, 10456421630388400123, 3266519591187534953, 1053691207967834494, 7151443045753788549, 18124688636664828399, + 6946488664257064464, 9775328903143754150, 58700362449753229, 7901817271072273406, 5074738678159003249, 18397239731906005785, 15911102965988125888, 9277654580204145348, + 1874700306906525099, 7337351570334852732, 7954195906114335343, 17230226427261850070, 5181329498860909261, 107167242193237170, 4364856176182710332, 14611171841443342914, + 10990327814895720476, 16588412771027628390, 2850177278998422188, 11569813740328484871, 3922148781422941186, 3581659216490446242, 3646872304052351376, 10887870617200051198, + 8175121141368087217, 15079393830876652065, 16766987879267543828, 10582796406546815217, 16832256009371376870, 10042650470778064892, 17867163453935683780, 7123418345572641116, + 1782651072016504775, 11033882508408412333, 11105300284232190625, 3276372405089292929, 6399472468041061588, 15319690780388315076, 4928530863063375966, 11596222198340277418, + 5452424185619067445, 12050433097586021281, 15018766900844679217, 273234254926088220, 15426383635547379531, 5213762666043737626, 2829185842069118470, 3230799283770935127, + 280030342772428106, 17354882606189694746, 17455445295460080284, 18076709382926018941, 9895982528034548913, 13681172423928175330, 1656433131309057012, 11935757350437721114, + 14437472586336929440, 17387041914242263390, 14861577074379746687, 5146556788377906297, 13036814755814090195, 15108836859245573802, 1667563994642521213, 14722845469958372258, + 7781061995240980266, 2066078418484154391, 4089376589847114892, 2434843609506038666, 18376509832460312721, 7187709121160627756, 2455436656049651823, 14419116837518875372, + 1270185079976403265, 9214436628411717184, 15450450827683432913, 2880014804806096082, 15448799931519826757, 10080240282127928735, 10673974088219619287, 12998944469117087518, + 18028691745165748687, 8931519595829275195, 18379893031942930865, 12120980098075498008, 9596371470855895261, 4133427878071274570, 13159312399982591699, 1639865074052258019, + 1661696187746428348, 2656198437014978712, 13769477291975851694, 12512848714492869444, 5980926616932344693, 15821983893144953005, 5816015630031200526, 15887565799959254415, + 16463929919291729278, 14920112063826664828, 10056471508435050704, 10696267144984026763, 12049530292963090991, 11926086636123189182, 9464890378472966538, 9719194510330239601, + 18110249193861022779, 5612784685592189906, 11360454032996393150, 6795401682138043705, 1961398978248902107, 8999847457729547905, 8941741619641401180, 6086938661427918241, + 10153847769616409178, 16591913677701651064, 11369151065084080884, 9493662847124813178, 14299126668179023903, 2451525064998641745, 16055696673728703463, 7600736037142850105, + 3331094657189179723, 8579232659787725167, 11007434086335446933, 7488529659250917009, 17217317841029490639, 7202184784979805220, 10955913831459448069, 9000508720500651545, + 12966352809478736412, 9643078159289928329, 1577471789984999485, 3065864601417367655, 13683627106599504109, 12782093364215596525, 12883456287502344466, 384144415368165580, + 2010495664660209855, 15520744321638216967, 5385447087428594566, 12897496516555487055, 7343279016502952747, 11323820480524965410, 3414484402519450292, 2497721392671865580, + 16065061175628739123, 13981641072236579791, 6942197664821630380, 1667302033233547783, 11254994815452647404, 5265777633437443003, 1242523090981771320, 11333562630675377559, + 13967113064180443992, 10679621597408504672, 16223869620849920309, 8813286726524283182, 2000697484240530777, 5417120625530117302, 12109041278382745291, 13257693171778388584, + 11737174095432286870, 13459661128718998448, 4655386724404378836, 3119616752887818891, 13324285182058113375, 1078137740985006431, 1610820303976884986, 10433300853468291587, + 13552049979590080822, 4379357932546858489, 7594644709024650108, 7268811721525297059, 6728671136192921869, 8183021491392757516, 6569378188404015319, 6481294988812800823, + 6071501855022377789, 3805172128285889052, 6802689634429908933, 11713310861536728752, 18291420299754708339, 7582277238191809023, 6183429490206797252, 10452668103640934631, + 16083971675285095468, 2965035896159234764, 9379493912871845724, 13109394112987303523, 231416620515766991, 630445729708227114, 10015412267172773636, 18104521419340860951, + 13169770634232644390, 12462964563528956096, 2689434882719314440, 5936289129999384022, 17773436345927091892, 2631892834965929605, 8167719122803587721, 2458223071457452931, + 10744932251818515465, 13367480890778627284, 15909410072235544918, 9252559547875570290, 16355895225285344183, 5115003811789024567, 7369371640924666845, 4586532908333332092, + 15999516281080615520, 10972478396135936147, 9002100275598932738, 9043399249857967875, 4975188939801806064, 14240375126510021504, 4497218436093285729, 899858725260616391, + 4788026362409273347, 8940774760122210910, 66616358927988247, 13054723206705508289, 6256911511259454580, 589353691478007084, 17831732841616807303, 10847300659030294141, + 3016120727980669767, 340560476433708073, 10516500208515875331, 14933811541787518485, 9941997565508314935, 9752941942651093949, 1928238223054143159, 16147782066840428568, + 8953947187833770878, 6627103431982157573, 1279114735725952920, 5378128106441279737, 15447712974588571571, 10552843735673449739, 14956126644414569830, 1994513099978508850, + 3339871688614476269, 2937156932445032796, 2071344655111136329, 509603454456841298, 10929109734329673813, 4531629375826317764, 1802431569220697099, 7971315018970882891, + }, + { + 4307546666223047416, 13914498777427309061, 6812156124789414732, 1680174913762530491, 6756325174405757110, 2535144759174364990, 11263393915003278429, 14767595249676286403, + 6981289090623415465, 8930069945493884157, 8223216504978132408, 796477795300774057, 3120155459209743472, 14825603392456550470, 3711879425019804830, 11041054473536223927, + 14135544350792474553, 6171419888118563156, 12495035851387242130, 2843310747449412555, 5668766087526340552, 10117858698838396246, 5181027414039980750, 13247490124270426, + 10847249691051757241, 13332329214721571156, 12457628412284474075, 17044657510482764070, 5218221397478351943, 6922630493693541156, 6207927102810766143, 5644846665334649748, + 3169200533952511826, 13123046990387029463, 10303462306703730135, 3315008874027436174, 12427067008037332041, 13190195608614554888, 11930408619792340463, 12881268563913740396, + 6700154625941523096, 796110091507466427, 5388163944686450548, 3113104467109529928, 12930558649635882086, 7536049463980825715, 11851825635055645132, 2654888532632283214, + 17530163750522835336, 9722612350417778827, 13244522480616169491, 13740964907388846002, 793235549667461791, 1998252773625399730, 9697911491069876538, 3365596945186794622, + 6506371904134856034, 9541373683805144018, 3149037850125132851, 7111810601994573897, 16732445693192247884, 4994069681898274622, 7223719243730104348, 5649174084086150712, + 4640203560336739679, 9900565149463137003, 1093715346388527458, 17169722430093991108, 14911871124664548213, 8091096017016862097, 2911569128494944136, 10192934055727569503, + 13251895658744991883, 18083873142202267221, 8241707078409323349, 5240943892375002838, 7329419846032881810, 17784005510850539551, 14034072243705748841, 10459194866755836024, + 16396304393792022741, 510358226971662693, 2002127643935690257, 14681425695950046262, 481902649451190280, 1717807040845239061, 3358903321435525144, 12361252150277163759, + 16562962997045986629, 1977585352264948582, 15588655103333244279, 253344970359156714, 12939363813291366775, 12548224526376199974, 14450582788883748364, 5869541533939881460, + 15346378007864735078, 9904337403373774864, 17933855382993937953, 9391248304932723816, 2250334275383247202, 5978804056136420670, 1528238852592364099, 6491566547950141733, + 8439860020828526478, 8674532462170555517, 13889716766953024111, 1166696901054897271, 3599113965286446905, 3276801695553601157, 6631284861504552600, 757022358659145685, + 1117291222568805017, 7148845117809308441, 4044258438241699262, 16593433123496068718, 13992202310165134218, 2998204381658279116, 11137382784909083266, 11972581616612094469, + 13970586842647892965, 3680317572634289692, 3416494470839852757, 8341497967750607131, 5522860734580749818, 6847292760525096386, 9317394708872196559, 8522252330730808934, + 7951036477225560362, 3718789111033607646, 11667291286820266731, 6927579620295878614, 6796746225018555970, 2070061401518338268, 2452626412419851405, 6130352643925327785, + 128317967134069640, 18241952544586374251, 10689311587315723515, 16537482248073764435, 15914436258832414511, 15518344078557945167, 15883120902045970449, 16251109112716994957, + 11957637677355945602, 8965882791601648704, 9390723380463051523, 15025038333096274711, 14983844148312808973, 14488574793452236487, 15795526425513195718, 11651978879566144360, + 6838341670149165895, 12326189699069923388, 9258908232339348843, 17297038041466207989, 5242182985453021593, 9648294165629935142, 9715128265517308395, 2662662536493743894, + 11289660454922592938, 470300170724369955, 4571871646010841181, 8724229358245178735, 10173959190953118810, 3611286966550223594, 10810806064515103403, 13571898391208639897, + 6782514579206442308, 893150666351200058, 2099535232722538223, 300696716419619813, 4778737709473194628, 16786610674618434170, 1751624209281870237, 1677055321500839173, + 8950755642341912951, 1974922263791844623, 5333377426024439800, 13909803085973342843, 7639604505882407937, 16195074617613125552, 8954461446680048430, 6823359994465290754, + 6790869670806738567, 4499989093713728199, 17496748050437205626, 7194829324969293638, 18432532588985091708, 12041300075343806904, 7788543067151959941, 13703334460383638224, + 15088053026772564466, 18172984562199594382, 3119317310593433742, 13835498717175822656, 10015747176747842576, 11008571041665092033, 9377877781626879674, 10465443099481589185, + 8933481722931715231, 10811948483433850165, 16314247813274714890, 503147444786527544, 2533443680646505069, 7617720894972668760, 12477410920355308950, 422423516920457876, + 1501843276089632258, 18180777013307233967, 2535426997299341256, 5318958493284100856, 16758265544916918856, 8118598628492446123, 2511013049335776232, 11181447982017705826, + 5996381448168274884, 8108757135223610234, 8067369725584584297, 7184925262427750366, 8199341143188441284, 13031376443169557016, 16036606349050125404, 11958720833222295864, + 15336831835795822898, 6806921200736832713, 5959923980569572002, 16835674225684329982, 9985579070716031367, 6517360043741716503, 16609518082836868591, 15878184747124924573, + 4786224758360153905, 10283234398763800487, 13695837902615591472, 7260571034197533442, 9225062953237806971, 5969706575516748564, 6537573986766117532, 15754484267739133126, + 5004791824134844360, 4971286291135250424, 1811371968353107289, 15772458387007519716, 17381493577383518445, 14326903440006940586, 14237922492359728234, 6234116464684325431, + 9293668142017263584, 17134656749295164870, 5879480323258506465, 4788695233340705697, 17809277710857366810, 15157733645094688870, 14143869691462709080, 3162286338158090229, + }, + { + 4471613312912718060, 13601785177495103328, 8083849806888905297, 9053545412554844461, 1876605639999952155, 6124537911475118673, 16877891074567085688, 16541720065079132344, + 18138305348849897995, 11498878557125400853, 14125346336359365201, 11991638807788804369, 13935355764550912568, 16209147067155729660, 11086190285777019292, 16727288863943596410, + 8873152377611229668, 6260821826956744083, 11489383533808918262, 6585112195310961861, 7365821041919648819, 1936165695028211587, 16164747634991415711, 17957320776652490392, + 13065030057172289846, 7278765836407486850, 383827150208081081, 7832055893191054601, 5249323286889945617, 1631661912887095912, 4431088469837785451, 2866263172993336891, + 6140213328062333551, 16704025329777067037, 5839832043209342792, 9196354634862869885, 2735762290983671190, 5323501742548818139, 3199127466610501482, 6719610946615763192, + 4032564124345275293, 240208626666701335, 10264845378648897042, 15608289126091352415, 17866864241918154829, 6207477318890336372, 491708931325462670, 7002400618323099260, + 11507747783030541234, 13683043390668171754, 9924966309328785763, 5549516466627385909, 3724001483041449858, 2408778220985427590, 10510707601023430105, 12862520912519186496, + 5012372868660694231, 12033652755433827667, 5960732801180177919, 6089487525155575261, 18400192045204062277, 12754175136438960528, 6170965736016609163, 10141374482047067946, + 11781193922347227447, 16937876011211881637, 2982972111293427964, 1728244726890919596, 3327997232304084768, 4754833121145407334, 5871027825507908591, 15772493911709624273, + 5862944033912881884, 11132730978666088350, 12167180155548291888, 16449838286695349787, 14399676610730016345, 17071083123672650337, 13727728787831175379, 7507765123422748291, + 4883057314704644529, 6606742732149784343, 16070555141087563722, 8408294891669305261, 4298316003054547788, 1573237855902568238, 5357176885445018289, 5855732582172939202, + 11721126538980357625, 7216004194033429913, 12765889884935948315, 6003399149613653734, 4571046290325663920, 15023470461830485075, 11463513436288928362, 16135217522406374542, + 14300855104129153291, 11491527690078786162, 5674183453812271017, 5044682546652747711, 9153978680942338831, 400454762074409409, 12351470849648616331, 10559229320903248832, + 11839166519633311143, 16489041153719856002, 4820713204385149226, 13768001962143023630, 13107713406478328396, 15471245828032402460, 6733652296473831544, 7665657127090105428, + 6114372925011600760, 13061069979571997963, 12156991017966837826, 12403009591138383583, 5761958709898299258, 1907771304051897263, 3007439379929412593, 18410854883912163962, + 14408045416865494124, 2442199381796557002, 10475326303775862953, 13637445125954159957, 3778313603630758744, 9247460369874272772, 17572237597883145087, 11338350096360776602, + 16486104352829691584, 11856560607954368882, 7574789609701925021, 12240083183242127118, 9872957337376856275, 11508929285199799229, 12084460294902152451, 5537978390111837838, + 17987471707896681874, 13038244305326284081, 6130570860032709721, 13697749322780057427, 15250226813583295859, 11214437899623632196, 11227424166765094355, 16073863048822462466, + 12399110995748365941, 8259021120978314016, 13249466772279340115, 11201772702582865107, 6680428295197296613, 14825065014187212998, 11638499193934854444, 13944483483619391536, + 12792956896156056673, 17521293339475028823, 5738513258925006248, 7070317930523546296, 11175557000762820214, 15356683721815441509, 3038847573301969409, 3712495274059632476, + 9703339902904468931, 6903532388259237787, 6598718994389707957, 3524767540333384210, 17230409287786477986, 398508689507908728, 12956323124513250470, 3050411724956364887, + 9936181795041159997, 16502578966693139346, 12331385558134766329, 16368402633574222539, 17549179704234998175, 16732353056786161257, 7018642409136068141, 13907639522324102590, + 3202436334590923936, 10521323531132496064, 17087322194145514933, 2557934785422493682, 1054672647095378624, 13197226692868722325, 1089249862965154915, 6479925079582112772, + 3880905381896902231, 16702686708922296685, 9815623322445801304, 3275100382267252573, 6882533095211022198, 11000715667767797665, 17615808190378608947, 7418883369171949489, + 15109802007060416273, 6172716471862133951, 10675428021956965140, 5719399515177959211, 13414568332401128475, 15437234656470291881, 15174397947571718085, 15093134682110367825, + 12767354263003560893, 9423701226679222965, 15980976879563466685, 16370172902785037004, 8843543841582241065, 16817827501960591898, 1524531339915785994, 11624580276885598157, + 17147068462804570503, 413038056092618151, 5178874804057490245, 6267545065511948977, 17573054445714024932, 7120476833739422223, 5707115123830922617, 10971108972695387841, + 12546478152711337402, 3213958910433616233, 1066524299473173792, 6276669942370566610, 14816818963020520773, 7372806616674468946, 12220278123497641030, 4953625488417522128, + 9443607421568920607, 2583030461169574647, 9900905053299561307, 15892418329702922907, 1184158607153919924, 17842123060489837980, 2681791639846598155, 14394984710894548047, + 17363678829105605076, 3907856148144141388, 7535915338133892356, 4354053943074081189, 9644954727645503986, 6816908226661921388, 15867806610099035654, 13874346146744695721, + 2654416582410062512, 9232324386948815478, 13864516581612906345, 17132638924861000235, 16875312639179645137, 11536057026563830119, 1090137303802556590, 15667188983774010898, + 4336697789732072031, 16054848662858822197, 17934449813415035557, 14518380622432694666, 14100582434152190048, 6262866632984449325, 18069160441815400774, 9798342103009260692, + }, + { + 687104890149081183, 1938816743979859301, 11342071981175913904, 4270322127259097184, 4964776270155503160, 17849333582861534767, 15814479527332819692, 8083943565773123408, + 3986890035154628803, 15316660330065019741, 11560940574529263780, 12576851520091035878, 10883944418224886092, 9908329441525486205, 11238684990962507809, 12900626758810151662, + 4077800412543228484, 7173833470188867493, 4604439809988262876, 8090706047559948234, 6117166546062912279, 15748537214854486946, 16719594636199001564, 563850925691991999, + 7109145612337154341, 10502845970727626855, 6527587047878193251, 7049690136888681718, 16750703006415156202, 2559311261501721445, 1222802194566919230, 18019462938525560766, + 322982839910423931, 2590270731903609206, 14012397059804449007, 4254569016772908483, 8508420263334539908, 9175848619353159735, 9777676085150569067, 51565612432741329, + 10233009747926770582, 4858065366607044068, 17837971428933010861, 10016544790726166742, 15260359301638740999, 5101999662344938158, 8809855615759524306, 15369468763003019252, + 4549989329927399868, 4866130696079291940, 2784467031729859937, 12617898982021887868, 2721301974598150044, 1014232218549205942, 5446756626431953863, 7315134179045561903, + 2592184418896595488, 1723751460114310463, 11198512454461769811, 6203707848404803677, 2601731234271436477, 18186700975610533226, 4757569424831899615, 13359873383138312209, + 7339520839328992093, 3349084091338116256, 16532710170417471614, 4425958052714174548, 16833748535746803845, 12665172019028452441, 11274971974705134171, 6504206221872290467, + 17921256304121537300, 16573867650874642543, 18439217559900658467, 2917797231317530012, 2415475159165597656, 2027542924210178369, 13542535558170959524, 9428931592662127870, + 17821420552135083528, 332847653840014838, 11832506312426064436, 8314663852639826188, 5744960486691350299, 8069724388577179929, 7949740770152910898, 7431049632429535950, + 13904640252203718708, 10747313134117021792, 14667070697514384141, 2600977347774743546, 15498986940124021079, 14740645313274577011, 17403811028932224808, 8060102076475954685, + 3517334016249952489, 10826317205305313746, 9664766330892822745, 4145357045596983962, 123566849370572650, 6452080295201963409, 15558941970553263818, 1191650177320171029, + 13303794914626036653, 12135860919529132656, 3620217653879289461, 5443663950311568399, 9020744500355505374, 11953955628401776228, 2101814526784564259, 18218598416910848525, + 306945432061418036, 3150552227215797902, 12928374347433953916, 13299101042862573370, 13702883318367407795, 4879985767563062391, 17591048490944890593, 5088664549487213106, + 12990649636107730851, 13002224935892458030, 4380799638612122791, 7935443787987894924, 10438069978940053126, 302419699431148366, 551508078777729872, 366155636877501719, + 1657628676376133571, 11398060401420699070, 15376723048512638551, 9875623386041366116, 9978954704196888739, 13768323245406799962, 3568095027938899541, 11120740210192996096, + 15561317952331530901, 10479759083279078649, 16536004935523752634, 2928518661531554130, 736056663469575928, 6131440330717529370, 11275149343392851570, 9601030074480228347, + 2397968519718544666, 7793046606567840651, 8971792946996316712, 16581860514259510747, 9067804351225143661, 12322425857061932421, 10680196669251962457, 7226521395530060461, + 14734735764746786753, 1378251478826083755, 13509550209468602124, 14458158239096821346, 5681313138148713219, 9054039627425609375, 4235536482830620712, 1502753755874778370, + 10053374816829223121, 5956370573575798562, 6842782625116195714, 6646143553705917790, 10487617951080656190, 3930886288654953669, 13141858447548326172, 6966039733711006320, + 3565931266726110263, 10142260659118036168, 18315406451571961983, 16857333990691080595, 6889519058362658033, 11812045051361479600, 12503117299646153207, 7223531498406875484, + 15243128568488310595, 13798032762330794140, 4456440673105474920, 17896260893739107970, 5519537045179649847, 11908888770632658821, 5444504459295913107, 5588728973482930155, + 8914021876030553161, 9071747156187788379, 14974736924255494885, 17070198435868341999, 3643991391224035758, 260637732328275649, 17609644917816078225, 2302996326941944665, + 1293835161492069734, 5795029109259991590, 6978986253519330084, 10360755458408067486, 9467742127861782141, 9197981630801498774, 6297600026339958279, 1375635441408481019, + 16875086338187280639, 18188722331528885957, 2047282716697647836, 17808297209051779725, 4756860180475663362, 16103863616340614471, 4416311024167936572, 6340715229216403079, + 14261984008515151673, 8778722476581699445, 1624620854396291363, 8490568310899028272, 6147263592092458803, 3717414274397202281, 3449154725620836174, 13526832985450537957, + 5888744936142942064, 17277140802638358763, 10290028650289820825, 1940657436863195045, 10796401239257150661, 7788083050548605852, 13474393003015663489, 8961936186303685357, + 81364364291228199, 11636155122637141142, 927710893705810893, 1403208391464987879, 4332234662194589819, 7913319387647351094, 2589089673930123808, 5387282875974124558, + 10611669207938469024, 4491327257859270050, 14129391883837344111, 13924810848449766461, 8792187899775727862, 15048142623570073200, 3933861866144269656, 1162055633060412126, + 6883724061714130987, 10416279294253729103, 12247033224345499651, 10291073256885105234, 4480789666471481188, 6022169887991948086, 3778647272347441051, 9527380466585130391, + 16407225233842256953, 13423683834633513951, 17454464203550700633, 6033758555290610449, 5063673994339442907, 12250392784929419939, 5954485814444410119, 10099095370725442279, + }, + { + 7669790431656261769, 2776517632462593286, 11694025667665633506, 9533794089008895277, 2895611631120558023, 11551410655448788956, 10026541900772270925, 6243136875017000843, + 9434484992529186384, 16435748047660609525, 16190660694150989986, 7083965852434071085, 4216306437741910756, 2698742177539497614, 5199793642504009017, 17298424769819019160, + 4041169394784902550, 5699517344009382913, 5306272267462320559, 15846674482556330025, 2606351264228884283, 4162585980422107281, 3715151019132039005, 6607223447043258693, + 8168579295855409503, 16727569921530031841, 6182114460261837773, 8940603165471931374, 6081572078077526926, 5890840443923124563, 11215305828294759727, 2875117534361804712, + 9045974983664041829, 905036705033699463, 6962033946652121779, 3027264198782618401, 13786415307358010100, 3643342525745067473, 13641958783381681886, 15675065537779359584, + 7507377696839752642, 1061259379811757443, 10276590160392917813, 6889137095037822679, 16373913505782725550, 12287733095803524526, 5695917172259210496, 6360958736925918808, + 7459854586357006968, 702429387211615855, 8231296036461604410, 628323703857882004, 1059802628602873385, 12517208225099517507, 2368172548334856593, 4792344750094708709, + 18352334786472886037, 7096021570596476676, 17045407505128867173, 2467670847537319400, 2225663888244226889, 1876713214939672742, 5329943993157142620, 12168650975912182188, + 6639850268372381409, 2284514769224558945, 15390110317831975716, 13933785559694229008, 12787641603764341997, 793886210532292741, 3222136169196008839, 11104892506868626444, + 12660547967989039787, 2109392063597767300, 9889743271997754037, 11803327596624324036, 6940409327849314286, 1466399127363116843, 2572333022681966275, 4216097356402537802, + 16858757460581672909, 5838407119753001493, 4453405435911954669, 2828665451507432751, 13657966632733241760, 6875986784381874300, 2390233934255482553, 17386653779125555159, + 2976756344404126744, 17032556609402836559, 16348907464011574182, 2196781021202618892, 8270822867494340805, 4738372358350764889, 6088256422707334932, 17121369334507507505, + 2081729301541563384, 5577186154173396778, 1865152567701500436, 1422284589642218069, 2489023909725140903, 276494395149046869, 17420927169263487236, 8292343688801608074, + 7819174654675104600, 12778055482430336726, 14615848543490171611, 17498415175263742825, 4785899184222234034, 5227136636239697699, 1570704808469050246, 2858953380860123257, + 4323577007857413306, 10524228743339400397, 5418808897687027557, 5939367271366264075, 3569359126353670574, 12961495213964770607, 8906990808988099905, 261084295374207271, + 13937553904380032591, 9165760362893872288, 14159606902675650669, 7794101517494576908, 11994596892880464164, 16211278212275417034, 1568324133241165712, 6579463633356173526, + 3784436423124943604, 14504130598322564217, 13343318845201913758, 2617267381187565844, 10063945174501910869, 8584806992850354016, 17494557973113338328, 16987450461117846543, + 3191556545349441897, 4780077336463115599, 102121356289129288, 18353108394754479957, 10099934050411322327, 10323433488737585897, 1480832277301232858, 8304664509429164326, + 16494820005379860880, 1719281141269927630, 2983529566257876158, 15928721259545722550, 3714203000118655889, 15922105900709017310, 17662263539397115758, 1577461381995722723, + 3426958921435818822, 395886748655323570, 280632942609884625, 4361473570148050146, 6600885194277340683, 16668323952296831943, 10057749804882861513, 5701592287343868424, + 6951585494335471689, 15680855848517030170, 9776921814538436791, 17640230976984644473, 5777838393869379647, 2176689364164694052, 10884627395386092313, 17656310368333301757, + 215205608065278414, 15361057989137229644, 6756215047166430628, 12215084297877052825, 7185815532109454500, 3394755837885079236, 1778980761557523977, 1861969072415300655, + 1535235777601157298, 8875662482214652105, 15404846185148709157, 6538418983879634448, 8341745508628160868, 7582603669233291629, 13003716464169800826, 13309194956036155778, + 4791947847339351337, 15204996809755844932, 11672964318683763318, 13067259871945801803, 2139419248113049376, 18195694858089271989, 7208559250123396675, 4671149208567820584, + 7276586425415793787, 14549539461274532692, 7381249250982432115, 3215735370879771405, 9197833835152018601, 17236122023193655371, 5029275100388702506, 10829687080866991082, + 15533362570521057331, 6942829144635570952, 9572990445468151820, 6294042535380467396, 9052929167426522790, 18293430694900525497, 9076155092724374912, 6410242671697688127, + 16930844972313552415, 9009509357611549679, 10722442939705348974, 4686888495512108749, 6326319203058274828, 2590592779822711917, 12876493112068819735, 12372064322375408146, + 11938822647496400962, 15333527667419199710, 2385985954862343680, 14776415112244140169, 12817753724474105460, 5129368568396757194, 607898027127847045, 3917240453172744497, + 47211231428738650, 9333275559955619158, 7534434473427098199, 8557232404317963052, 11597011861056629671, 15926835677212702441, 17943276685960195536, 2513989430679125485, + 2903417389682946131, 15905578940823611094, 4910425097773753648, 10817415382143821768, 16356392378927186116, 1174713154793914656, 9176413825070952738, 17779105524945038711, + 18302140065591767046, 6334261306636966429, 4203057092695998640, 2740754645452686340, 10020911575632378009, 5906713384548665207, 13902791548718280892, 2505591245136304093, + 369660229032730089, 11851915833529805698, 13485269529443969890, 1270120511649175022, 9128783725626301238, 6173048611666804214, 14151404531169195672, 18241008099921643437, + }, + { + 16226789363155700046, 13734705589524056856, 9629995513492843231, 2636144377026081617, 1233405076007260313, 2119672919365858964, 4932766292290315579, 4185117177904214366, + 16996170392430604056, 2476294162951949891, 6677439492116109133, 1649550562276235959, 7307278264575968221, 17128676264244010843, 9768062299436691373, 3825325732056225433, + 5861627228365499618, 5011684805137210454, 1579621475012951778, 11535371650014354624, 12847902922833318241, 4997014846790385136, 9875988184618650628, 13094082946822736823, + 17224650011872599657, 11430188086182976152, 12892063618843720029, 5912993124689510433, 7069617917494051681, 1298012103142698333, 7943359430865057535, 9329776843721719539, + 9091412025380293893, 8622355764644651307, 8718353412518918238, 12338548766610785709, 5241721208413391572, 6315749100915178625, 17919741546105181107, 13677847475856526539, + 17388254638056933809, 5422523708003180288, 16851423629233109043, 5504298365310420423, 15145549220326540137, 5184279399795985462, 2201159255780420815, 12506121988996036804, + 4460258780042860751, 1816027820333214126, 7778555198679811350, 8155451943744426012, 18392325810806023014, 3955295139981696248, 17141295608995204052, 14206938573418983337, + 9464085603669036454, 7848014338005901108, 2368914226155404065, 11578416111848517850, 6323647866244499941, 14399589996703947996, 2824522520842155768, 391310842153371124, + 17489550078249576530, 13418700716683405568, 8265132837103985034, 8386855948619700037, 4642739669076635265, 1941217860224507563, 7402267177254210690, 9863923110146355168, + 6176679707488254741, 4307819088229734202, 7082712522654871200, 14603822721598353371, 4115674343542702663, 4132348830250348245, 5992042367418590636, 15937819366199058986, + 11008669567537833607, 14841150752801539889, 5253470025479549456, 5665837575328478629, 4601810548582757669, 6173003636576141170, 6899193615399367692, 968231402917393055, + 9948650801233133770, 5836115460383443197, 11404170696974449207, 4791020823967734476, 423024247809958592, 11251668387880260801, 5562546584223492723, 578572156618131794, + 13456676488167240303, 17121749146829956377, 17370555398500311209, 5041791432274350681, 4009412063225388868, 2407629981571159579, 13052555280728877038, 11483117063845312302, + 13421089948800544371, 5001604265596626568, 15312985179876036482, 16032805766802337150, 12508684642650694186, 10422897636519090240, 3970071218688010847, 17419166239676042564, + 400453640980208069, 15220637399286523210, 17321057148174216619, 14860227640908306879, 8047994708253288483, 10174040071518845057, 3280567772472213687, 5651529697807929627, + 13988854693674964723, 10817154032069031941, 3171651998645961547, 5518712225783687705, 14706905091132091970, 13960758460934966876, 12141035142079693145, 3149939820435101269, + 17047631647660916802, 11327141967045825261, 6773994317182166280, 5064726840218426586, 11921340002806613391, 9067034595359929709, 16499225867422892436, 9333282251383805811, + 16851791401266483516, 3629537449474963381, 9616439067014312078, 17691280261538133313, 13915291913519800282, 4229243823856128517, 5998682815524220834, 18293451622818510596, + 13740317056016164038, 15410121987858832401, 12080671504470100936, 11635316325777860129, 2584464163154495637, 9849046399519426256, 7991103215010751716, 2109987257001640948, + 2505301881938098126, 9962323478923652140, 9312199523950444342, 12957822502673689705, 8060810607849163143, 737408553843844193, 8143568733429881786, 18003168270780769476, + 18361695650419575705, 15964191128341330249, 14834821085064012697, 12512940671502931238, 11592194993753547188, 14358438239394059598, 2876909840522954216, 13154864917311376528, + 1919485747906370662, 5250174270363591951, 6270889482486194520, 726576673961648732, 5067680336545953003, 3152041783219682891, 15674051618916413369, 12438966377512424334, + 7755405178378312585, 2001220826274540277, 13276328134279740680, 5464764824709243977, 5374818714652367118, 7988129049563289676, 13996688354532099099, 14091540329663446637, + 4460927228453411137, 16058422703785787527, 8016770980039986075, 11450941100964860520, 2742597710491507274, 16940752978671801273, 5897441526901650840, 5926672707007682446, + 11513132526446047688, 2371334524276946445, 2461552185903242316, 2195417192784351355, 8821293936856062699, 17392142102281287994, 4066797133115509650, 10704373174879556404, + 15778536817306082589, 5755579035239870289, 2105368957367433938, 16043807797277162048, 14990960263520315004, 15618512283659189876, 1020703273079622517, 9735594589053383212, + 11529096471626785524, 13217557548494128192, 6973227637542883184, 17901194406865837244, 2414269359614891938, 9165268075797348071, 1013041849562373861, 17525420894350269712, + 5772778048593061508, 15424756817708323928, 11842041259277183807, 17669952685051160523, 5405007811515938363, 4415223809135251038, 2223192286456742546, 15742105795795272774, + 2844957165101842208, 5188198001787563362, 17071130520794983821, 6691797472842348899, 4294126956896299534, 5788827614077663481, 16267876196920424765, 10795461545625002360, + 4405006158507706423, 8343317561528812223, 7851955346465616798, 14568481075913348758, 4391527082463073614, 13997921112025001396, 15259963045788570485, 6787776208020191083, + 4002627348223514978, 16231148554725644698, 9578235186329928114, 14741034641204805809, 14651073646349829984, 1797163177099497214, 982043220105120238, 9027434646723092211, + 11809342655313500273, 16822704101813284237, 13097372075606737923, 5463281428453250615, 11967866991673525562, 6828790575157689223, 12568831962097805646, 8385980093725157349, + }, + { + 9246769514475702626, 5788284997389507365, 15857897362850327403, 13864676801095094218, 12881526722328523267, 2620841421323221200, 12599981775316397124, 12820989846844560040, + 13066648645100247757, 16727492115601921665, 15749471761440498106, 937594351871983779, 8381111811209734349, 10884357918725639691, 781591570669492079, 12778743765298428266, + 1797915488494232282, 17274756142259274463, 14207994319179101777, 9405903752538915237, 9001743317941152797, 15048752608061590843, 4925745663463425863, 17143694017177138485, + 11613437975225156304, 8619196433402562266, 11907033287998837424, 5056904365610561965, 7637552956459333558, 2827449950719061527, 9998507085256853501, 9238562885525900325, + 13246436769495027717, 5445015753017977819, 16266739009580878036, 15566606095998238977, 12196078605380432771, 4854324313875295137, 10974170498288217052, 6425550765546527417, + 14574824159194628650, 16992289415800830701, 3499062703303067574, 11220872042664151226, 18135860701984580530, 13630928571014923388, 2913126838766947469, 15869180955632349704, + 10621010745328025245, 14637275395423748577, 12928463178963493980, 3270672471462681194, 11765440832075775157, 8138439106739837848, 10004644076263261924, 12582897670868871780, + 9605391611675301152, 14014632424042523224, 4638465078692570733, 3013469722048440965, 10972600060210514686, 4497572559400894366, 2652629676209366276, 15827846806499715082, + 216774559418075105, 7056732230404238374, 4342481467357759950, 16143194646968227790, 12304857685680249595, 5021239809847286634, 745754913624924064, 1603290801266214539, + 2784418922013631805, 3554650219010546629, 8896448905401216908, 13445015286698819482, 6508982623352460996, 3322529327934132311, 18417788670080975365, 17554108945303789353, + 17620122226715347133, 9738429733440215024, 15577771434516492888, 12422193389576942718, 18383756008252443605, 12736926759685644351, 16981429110294392086, 11999528928951986433, + 14489177617081844909, 15995916840320959685, 6361381313838994395, 11146176143708648759, 17190512001934479207, 8406914945663974955, 18073221191428103088, 14075266763636071788, + 11493260321718935244, 7134447477334726345, 10489281872281557152, 6145540581503915475, 4002857892747271740, 5167943945955725680, 2892864850826359384, 16887114279977647596, + 16493058314197913501, 14642843949567816202, 8421201635021034279, 3645913604138483317, 6127272877310948153, 3660286308390076870, 14343325047340052700, 5355450922054278073, + 8426383571761073988, 17627146217941553397, 7396223609641674418, 2402241526082789613, 4067059813672963823, 10306840429023537942, 18342506396531908477, 7249869764848707701, + 17055119974261943875, 6266720893662254540, 13102139953381148874, 5249314164321788611, 7157525732140787612, 15381277459195914226, 17265025258266710353, 17573500432599548120, + 11703325491493519624, 6756922948215553472, 10849241105791329449, 4442124988071574400, 13193138084274485850, 11408181130174894537, 16602978650637017500, 4282478513876287613, + 15849352004835856215, 7022332658299393045, 2753527422472509447, 2508636546397160815, 18156018186852519542, 13445036260276495365, 6279372959444826452, 4487789154840943220, + 6912674041294744283, 18177825269025993506, 2129493844567951509, 6833453501368177831, 1878514166200306112, 12873954541639117785, 7984716702294892278, 1590286870058148425, + 13310953099825480439, 942949035351116482, 7073926138905956784, 7700580694019269362, 3135382872993338421, 14860196563555722941, 10662854588851556233, 5736680857908686906, + 3285730769783013255, 5648284582450022694, 16712547815126338303, 16508335566005209527, 12272617736405465053, 6690178916455692547, 3481505327914964440, 9148348679521508925, + 5445751308732564169, 15671537507319596886, 17721221564106627243, 379590599139121791, 11923849517192812255, 11794879354344258105, 4802202540817083733, 9502952511912834734, + 10387170863530151921, 5325952605083195756, 9255511257002170523, 413412942592261158, 2693041233704004652, 14135620291598275670, 11595337134421837671, 1965085859594934829, + 10567751951694288416, 10466024270444644770, 17138727632883910786, 12652567654757797118, 13864111438265559205, 2552332838857882141, 3739570066357761968, 1439381288436968019, + 14926469836092482784, 1974273958550553776, 10770915481273902280, 4033929759715842024, 15461883773070197251, 5631323369668498153, 13319268013678629907, 16483196656661792008, + 5825598074470439590, 15514383036808278765, 836600073173284669, 17006029483641514808, 2253041172677940136, 10109282146738215012, 2050330973627959134, 13783991480192211446, + 7361422649689322835, 16012782899552425604, 10324124765039044621, 12319908523309013390, 17065808121376215422, 14844145564675640794, 2754581529304803758, 1149534055082731343, + 5371192675994106473, 15892724499541682278, 3862864290653799361, 15759456296270265029, 12018492462347307225, 11503979417352537931, 1894695486127847219, 15184833349947874986, + 14711143160318482298, 15515618319260581947, 10015362293284733112, 1169298538214120845, 14481541537099551545, 10304547737174682912, 9351439436434829996, 15807708290724844107, + 16946643895530263911, 6110142153011856496, 6857516759047242219, 2026267735072107620, 10338469980804248611, 8224240981205716101, 12739986350836865860, 6828408496869642385, + 18199296108774357398, 10154672256889745380, 1190269705881674139, 2317654455984851716, 6948704988682993246, 7864433505284017226, 12371653445644439372, 16432795124971446509, + 940679263188661008, 14653357165718140350, 15439920797347843673, 9415587255510557494, 18115037054677722678, 8165849408798239027, 17369387099667572407, 17619066457126678815, + }, + { + 11047138821675365077, 8918121441180891972, 26692501222219426, 15248413052700323978, 7305399157133896461, 7065644238942265367, 3949166834387057064, 14178248206708015607, + 69860179712677448, 15464996655458230480, 15095351218136595554, 3792171085643513009, 3187131848664503636, 12713332789590787064, 6278731049699775392, 1597379696359884724, + 3952205381894147962, 11371446109407143937, 7231851263356642380, 11612512368538303138, 5227214975837085115, 10737565048439707166, 6333629542995218640, 12386725921557807638, + 3631886121917081375, 14286411187847401959, 1546298555066841633, 4790996540037176549, 9541492152219486842, 12304868498960755609, 11426775785781090329, 10793898029125361155, + 16970032026045908366, 4739834790205788500, 11604777595337607879, 7222013010088766249, 5472916641327283117, 4172262758310864960, 5724466588558015820, 1116300519629843659, + 15795014995613238211, 14290583239597353888, 2490895887909073495, 11130992987297321872, 17554999787604367088, 3420129662095546057, 11472786435923282727, 16869713476687852227, + 15035061229930585935, 453017670704522761, 13269498238801130975, 9381945743239523058, 2560842189862328725, 6461599464735341542, 12067824007347016987, 254830492191366350, + 10932352970046201499, 5875690226460998551, 3699681560657289718, 8665743594825198441, 10232089728487302472, 15664572298733091969, 7267222702489958195, 10552093353886118696, + 15492342672675114391, 415437554733257587, 12890781990283079094, 13204935186176658494, 16394061615709463286, 5115734945762570776, 10421418042968619739, 5523853293632283162, + 11813086056645720947, 8830149648607040713, 12579056778157665738, 8379445845509747387, 11968812374998926933, 7579079245215672446, 18392522237039663466, 14209436550919930092, + 10567400467353853420, 12039111362360014956, 9752698954159464580, 974455144078370581, 16749750902684349652, 7431880327875076520, 16572811640260132367, 197121614392940749, + 4008236965647744142, 8547116629188387980, 13874587567755370880, 12696453513706541959, 10752386832421030566, 365775083075323708, 14295272684753058141, 11024897725724291722, + 9389823976782902582, 12987864137510618346, 14351996708263571593, 9397276818616280108, 8943695845699072910, 16761881794918812428, 14395546039428720745, 9608138420173912436, + 1876375509629583057, 3290820252719995988, 488255726920613097, 12524692306565281959, 1588481057230489293, 2737168321842385117, 3787136521155591587, 2297952115270241626, + 14002797112021507101, 15941913959379016886, 13373428680779564378, 905956693320605119, 14911807685762669378, 13046351903511292109, 5010776430112230246, 13569296078340893444, + 2399232949134869057, 16408469118941953277, 17229778225562184411, 15990256768949242309, 10694906341686584836, 2816242351659628719, 10794163145486835660, 15601904974333104002, + 11025280186920123511, 2638544612389113264, 14138782464677956897, 8182901095979048555, 11304014986311338849, 5418181013691186545, 4702887342992202530, 10901371551431733592, + 7470028071605003558, 15552389775050890646, 8285175978626575302, 11755261053723122966, 10951094485216236078, 5507384267235142469, 13239757172982680465, 16153920989213321821, + 18058993449364903238, 13898361855323379776, 4830208064835445821, 2108287244297002782, 403367259355957679, 5208097191763320114, 5105753392567780820, 6874403921011777407, + 1158521281204350113, 6232098057848694897, 12517518152351190341, 17852831094628421272, 11653774695260269881, 4420518556213490728, 13835371272800239099, 12119809446886544909, + 13202632776652821767, 12444449950521044637, 11128589812117041029, 13640351754206364654, 12210309609173025143, 2998033188714623978, 2790558847086549679, 1412882974937756344, + 8872627396983903111, 13010748252899511409, 9503554222158928531, 17266043773425260257, 14299273669130129568, 8206060595465248374, 2391269797717981426, 11490348680137544215, + 5625434013536513462, 11006046905736534335, 12974260386406529669, 9092861657954444193, 9064784322822166467, 388172762639483312, 7975459140038679785, 16519143691087724914, + 5143546864201185527, 6400878864024957342, 15513803558106064076, 6712861984922181478, 15623960351786428150, 3094277713920675576, 6184647748693682810, 7857605928718754519, + 17306606255667524897, 5112741148301782470, 12807238528895999857, 15137864759480529337, 14604908573844046691, 17945466774813936069, 8840495514948749443, 11197321446299638637, + 17220811598578674405, 4438480777123093236, 3563461541688367236, 3111403934731489046, 16319666149508644048, 11775149819643332554, 6467838040542164752, 1567553021477840718, + 5384669083154912160, 17619132083131362037, 8938889855576647487, 9107335704092137106, 10077424566800668390, 15550874273916351174, 15171215175982142697, 15277375103802490853, + 2219152199405421709, 5367881810186787185, 9606063680551349934, 18233690822105957780, 9941441403610160291, 14996958158691873775, 4936282940888570414, 16608409936766385729, + 3344228690528365773, 14741103791830625697, 12105806333035047780, 4814603778329751458, 15692542571489717296, 4074467482166609769, 11583878004635312071, 15057041878895842153, + 12046059187840675577, 1106267261822046486, 4800396230944794513, 6983782857932290361, 12630891077786998995, 18331269049237387531, 8843236455524152484, 8233277245902259517, + 4072376012468301087, 15956856707868364136, 6651399164294705117, 9169559249495366525, 8774931051408681195, 7328702764302633441, 3821510551720221090, 13559321821383641691, + 13270099407878039596, 11569032926557804346, 7759575992876193458, 8239088820951102451, 9633607727685959064, 9775909232917811341, 12297921731498614906, 14756100221590046549, + }, + { + 5228416958727036186, 12534300056259911378, 6859045937063682340, 16561718753727911412, 9427589074776024847, 4167904055656501509, 10156691045253563236, 17557096561606923049, + 12847261474293104380, 15935635664155479706, 11956861064550631146, 11743590506948225647, 15973092866215748716, 8269726904881958353, 15639962392523441528, 15171417818360069012, + 2605212343977737441, 18393471024186189813, 2302707671753008158, 8606549841034095192, 3842822953634987820, 3094721493917442423, 6408313759447502937, 13486364200254727287, + 2191808101092881088, 128992526124656216, 738676021426139131, 10157323147642681558, 11221959943853120586, 18255489816550713347, 10885231659068427649, 12104397395119665023, + 7707807226411417919, 16609863548699265350, 17639371636697428128, 8755472387723764172, 164779854477783265, 9714199241756765614, 3491355372893948450, 17683742455036967163, + 13595758632462338296, 14515163666150177917, 6720823780841221770, 15071435664343609336, 9016075014492765983, 16881277609836316886, 6969993988115170067, 15419704786434737070, + 14933348768149415725, 8499210770493553168, 6778840616148281272, 13282837866452012488, 12007326272174861053, 11172739862019218274, 15202495365302649711, 8797477675597086120, + 17862558746132168231, 4941846130090682869, 17131557654945008226, 5312800819142473968, 5818269467205924209, 13458582047859447022, 2683428091382447153, 12956887954464730664, + 11820752998821943867, 5623379642132478491, 11666807493120740820, 7241997274572162616, 17165010508995490690, 1769225877906480182, 3814296306467163522, 1913823003062817434, + 11936813336110488997, 3878433838529606580, 6540053284493149566, 10610279324743393563, 14079852809920102066, 9176732841330794388, 14287311909822853963, 7146204303626670196, + 1222343790928490335, 2199405396044383670, 14080919887213592148, 7341303626667347624, 11784881532696657518, 17307742911086150556, 6036132721599132043, 12167106497065306941, + 13817073203406999359, 7220729284169210756, 14908407498603601482, 13536224989620701632, 1615171540711527931, 2063856048260214664, 10622581435417474559, 16378505765730768032, + 6855676470217754770, 9517149712286624325, 11080380031068971680, 667425509348698033, 6136243307299269825, 5326577850303193160, 16120190278345757447, 1982981965726975383, + 15399454106099176868, 3988744407816581672, 3596277710300384050, 15129113714633923498, 1554582462382333698, 3164553872715651710, 12729140363982748426, 16366728035709811784, + 11647936211409428873, 14704462653811533865, 10005129575282387925, 1526194796943187368, 10906326807488904308, 18256878690674435807, 13093574545395154003, 12352810226542367406, + 15290817014272444879, 8012864091692295774, 7591940515496590516, 9299619125326026848, 7297256232167521068, 17861204372399797627, 3100022958796565106, 15313770879200204613, + 10190931844127971311, 4449277895844944513, 18060903840299422620, 5786504870079014862, 13877878835839147302, 17536010013969819055, 12266576574650233794, 3344384108564085445, + 10406231172694378191, 12785551100938284935, 12210880209800471137, 16615829503150405150, 11718424737227538631, 2245674303359461903, 9283401225508960391, 7699506693893764319, + 2278745583218618906, 10638058115573097116, 5685140621618740987, 7758115175926730915, 9043786871908073988, 11442680114439652875, 15805407285446479015, 17615595308217319903, + 17127838629068819444, 14692086977823146619, 2491742894643214379, 3238012827598604955, 4066194902402629617, 3613111175737094005, 3296022969960608152, 4860674720288543209, + 3753308721541654311, 13160824061458819791, 11298768140351857216, 14042645015841972579, 10518697187157549363, 9641851844202952198, 9254393371864075771, 11429987449073904258, + 2566815986599406395, 7824829323439900657, 14465315986665451243, 2957504741164666523, 2280672126115151165, 14999624386740200468, 6090063205272921919, 3478652904863279960, + 18131467168461028363, 16471039226156513736, 14133684792391617163, 7137732710914086892, 17299189471163637576, 17270681183882183871, 9046519354210103502, 16091721986926209161, + 6572875037492082093, 18318192911446580882, 18107990043035854610, 13573958681720538720, 2821064027974708708, 13000417359062035623, 18286430707379660800, 13070676610179458046, + 8521670721910745896, 7440450230382417926, 13794050882540606099, 3657777690979179248, 15466400576048586018, 7802926536345945651, 11932698778931413693, 4291944108972289013, + 16729016738422946915, 4082443597391481873, 2238453914102498318, 1806710580451019071, 18305396377745213463, 2262209146853870557, 17240603302299977012, 1540474707615687112, + 16013022710115947086, 8748688582020024424, 10921431579160628425, 2228053349060750948, 7024441100951863599, 15816203310189470753, 9078141084316010447, 9841344410499582933, + 13377311905703150010, 6352753896568161822, 11086576920416079356, 7629848369702522073, 5472036379742160857, 6535781794177956975, 7671272114437903042, 5056286897725243410, + 14579350500102276494, 5882738271691348624, 7188200631561491668, 3605286077046581049, 8957701412704914688, 10347270272487533128, 5995825272041784807, 18321189862831103872, + 2376178340116743056, 2263593380938083722, 1990228394694589285, 13477447633165073333, 9023956677421314778, 7820910817739139038, 5288765314200405868, 13070699273589738968, + 6765952613794178887, 347733275422001747, 7640026333627494061, 12925707101320796635, 5670472934143686904, 15041817307271371058, 18176041895529109656, 6838891343875575060, + 9902363248877274636, 15776195244792726061, 1935342462634574355, 16572507349421626602, 7503944294807242065, 7454248748618678609, 2837392985175901475, 16494567631149775512, + }, + { + 12299242278498309265, 117303082144788574, 18382936515565701803, 7941141688858846015, 14923823358299367958, 15681089039073030752, 16878721085421212785, 3933697483394042011, + 2878536922988449879, 8819365314325664735, 2696760638950720974, 11357179421800829605, 14730809809256560583, 16277258726425730599, 13189689275745970592, 13769784951430309444, + 10292669945637125918, 13595563061674492799, 18301330068714272653, 16981073598172019270, 4022713583718555211, 847970754090163894, 12867993617169467387, 10021141621864633865, + 15254713655124736464, 6283719686609422435, 5417864180357938798, 14010638809605797246, 10873690600705871523, 15014239467130590294, 15765460219545280655, 6048061577845125750, + 13465802402879986040, 12777495579796377113, 11251142258511736751, 8312965145580158560, 223338793373356823, 813747086573652373, 534790745256052224, 3658554932030960904, + 223170064814687698, 12938716002978051117, 3848816111228150666, 17463649319617382466, 12886402189488467296, 9605942357133455486, 12272689261734151433, 9916808458308982839, + 2879267873712146919, 1770061136847768757, 3497337219301780129, 17065005588401306579, 7675751531915094916, 4337540836976665798, 12505868586426061855, 16929961945120117126, + 12012729503226546642, 4811951115754240681, 2652621264600062520, 1108958391967017223, 2508980100759035797, 9023233544151778447, 11183557587641545146, 10726727280840759519, + 7189877122763725514, 14656082750550993666, 13508306914749420120, 4564443784575285318, 15450566991657647015, 14019683331552104148, 7751003035652012225, 960642801291306739, + 4621870789074053275, 2699296606556701282, 17510056683561737074, 14554735156054608866, 9556940770704438829, 10801168482998872433, 2280142648958056556, 4982572404720159279, + 14621729774738200671, 16850208728798178903, 2650832773786164628, 8325244333065591089, 17692120343646193143, 2242845836784335580, 9386711085286369304, 517979059098779699, + 396296174223428093, 16763788748988621834, 10606803701784640909, 2399987183687936561, 3641562114746393614, 8536687357022564742, 14532545286961725758, 14658331097531946901, + 13237433907493396869, 4990918623816750268, 17673032790739307275, 15907935024302716062, 8613470231259845273, 9346326052910147032, 17747177708802561938, 4490130127498754807, + 13867405496809412712, 7002613759387491643, 13033935552806967378, 67437088702121131, 1528641041740753990, 7001198041125832592, 6102137699349551632, 11527483925092226802, + 5535652777947676324, 1216916140919282417, 343617165825197489, 10806931059627863289, 3212291315152120591, 3351900116486367965, 8595828725191802110, 15621077401736893558, + 2386026063595321071, 9144724461940169335, 13121187420621518554, 17351696676224980491, 143495028693925520, 4590642171152119027, 8655406817079059692, 10005507077955932112, + 6982427644490593208, 5211219672384700779, 14348546115682624722, 5876199850746516331, 18396840586595761484, 1465795856184232635, 9374516147960853174, 11803025633575966436, + 16374709377982692229, 2545295070586009694, 17366393861359867323, 18067336234259281315, 9181658491221440805, 16188192004088261717, 13669686513572099673, 2044092029751770484, + 7308293641399229883, 9813539775451467496, 4837144575100457648, 14073521030008335794, 12371724228966966456, 9185074727660313894, 7387427551441329556, 1866580457929037678, + 2047964090310465728, 14133690277069748515, 8287749071957055844, 16821270580974431218, 10324684883587585565, 9652498187344172497, 2461802306013063247, 15048850312040680643, + 9659170831875656345, 4251587871822599596, 12391279033133774782, 7346556824831912968, 2133114137534468833, 8694105814921565301, 1000087733076615381, 3478818915231998954, + 15832320183998834886, 898251865651859288, 16874504161383651033, 18425166917936355349, 13387271833996394553, 808054572728974308, 2281828182196149100, 14620221246129351423, + 2755242181069093228, 16155597320469161129, 17375128328249992589, 13376690667170223367, 12825454052191060221, 13625129606409283552, 5627148592782580041, 3092789333954796605, + 105705035058376953, 7541598459498134635, 6627685529861836787, 5724525771392188178, 13865897118300842617, 627652561624109853, 167084371112939132, 14192831153344015422, + 15618434900514973465, 14267977447705810206, 8823759714826430108, 7918740515193061263, 1664424041430494212, 9076733744947861202, 12010737794701759719, 13920688631446302868, + 14823160144680924028, 15690572793903331709, 1101762466949115887, 19462496059060771, 6099422315003219574, 4300077459158116870, 1282001407902323746, 5792772971400374848, + 11488321122368726901, 14632273370344181937, 12984850848826263218, 1857014998357880446, 13897135914532207935, 15925630222053677390, 11992050787349110965, 10072349923584966035, + 12837696619944805389, 13470316523008526719, 3074966674353053986, 2630814767089382743, 13353767941686870870, 11405513520090968568, 3071969598389884693, 8212421686942839224, + 7866179127856055582, 11874624506147226333, 5793384221044709375, 4411967221310302622, 3004913667339358722, 12230878672003882045, 7395980261120250376, 5188846147165419817, + 16160221386626609076, 2876994290210606255, 6065036384672004173, 14696333348949573335, 2667009584785381424, 12100192661844794919, 12401945669155059163, 12847772853650923808, + 2286437117905535902, 3130250324287833414, 3662253455746154641, 13229041365675585947, 3663813013822489770, 3093300797589723687, 11507645566517710232, 3715061633580408995, + 3464453985305889088, 7486900500389940957, 8944303108517222303, 18319933556403836918, 6782476025105836770, 12822840946923377510, 2799592944286097404, 8737598030273298051, + }, + { + 2754563581284692865, 11257360722362829387, 8254261291934606879, 10023569920325096229, 7980898855627864603, 17162063449612285703, 9275759455936104363, 15936804738099638644, + 303387668469489035, 7978391052264888020, 15413722057350324494, 508608987485166058, 5585753287556892522, 1344134516582665443, 15144279405777509214, 17014353449841567149, + 12518577022215294774, 11347384788122950558, 17122718824958020364, 5431993665805279177, 1445831254446028321, 9845654210244803480, 18315778529692612284, 930563922336183289, + 4894710987714215219, 15704993603878996107, 1606204036324478223, 14308635149530198932, 5026736228773269251, 2911689442372084137, 9004077539360196849, 7770049130277452000, + 2745120518194234905, 17255944807561408883, 7371907591838942770, 11781525288383871227, 7814952754785494862, 15022715213812536212, 4112388658963418656, 13703771908397991335, + 17455440140812341569, 16738670164475421762, 13259904130186215994, 2168106064304872507, 1969289843772256992, 9025317999701057831, 5835661391798891929, 3826587698101558069, + 9149648374608464235, 7248346993716635643, 17283919168525322365, 10107681064815728795, 12176813938029507719, 15110337574289033068, 2436453685316043712, 5876967059744625564, + 6197919059881189313, 3300993078638782623, 9357667752372201437, 17688129755135009447, 5477090746558113696, 12602024521188880300, 15889961935507293355, 7135039746373941272, + 12561063426893244305, 3079971284510744316, 15695857190375815873, 16730673956207425338, 12065477821845465116, 13846158368121919228, 2126156187526559500, 3005167441915916768, + 3858987859895327040, 17043903959888117395, 5861237635520080595, 3292646198413575902, 14286644557048422360, 14346409530388980974, 12583555046601155161, 4665981927428063991, + 7657686120974201842, 2388299767028319466, 17934808017791217639, 16708640707026372313, 14122341266134976486, 9547124998857374491, 11194069918436025923, 14657538980930727877, + 18115789712912297420, 6543740714573413570, 1955886376702349613, 14736853369719086334, 3914330395250768396, 7607848156418885173, 18220633528980056514, 11930952850158858930, + 17010681753474701341, 6961088634819162945, 18395881317121515295, 11611411242007267179, 7996717433149311639, 2687736005475404867, 15340214362731923149, 7116441414289074727, + 14302198697220662403, 11915041511943922518, 12363437164440173650, 4047410026894059083, 11439743031696133447, 8776097395071907030, 6851061783357383945, 16484733576581941012, + 2861660066704264713, 16453202148771053225, 10039035813504063627, 6275446616881871868, 7150609984920482782, 11100941755544354363, 6219760433202580646, 2875377482232912143, + 16477698187919702699, 2757160833848759781, 5976267476122243443, 7375951277779829055, 6525111513935493528, 9969722922172879681, 12582700753056539952, 3861958159147836631, + 4207324524057007052, 13478421263392765917, 13318275642734768006, 8504333045658265535, 3960720256367501697, 1601661680373853468, 7704205542213854353, 1552780884856863914, + 11788483659211580527, 8739274750400900816, 17562450707643849072, 2113004873311762141, 8542099133996522813, 5273956141012313478, 15661081976505113338, 2290009483289921559, + 7730337285529332076, 910421998453976119, 1242962796308775978, 15931313183507922513, 296811920445331257, 5821874870466240960, 3843405649348506130, 9007082448812181532, + 7200777231000974420, 15816416455380831370, 12932038056359512011, 16422801186386616731, 489979647738684819, 8269568123908387407, 11617965045895762979, 10069799398667455631, + 8245819835927284113, 5237630684564909794, 16572245571184457585, 13396211619915630497, 9177789877937729597, 7598297211302983411, 3133511073496333246, 16553020037702090576, + 13441991323065749602, 15441570013111011986, 291172418860690999, 765089869758770890, 15943751395897588683, 1887511823199710563, 15452188663399676440, 15028027691170546376, + 11009923302693800320, 5465227554242327455, 6511134004216481185, 2187999546010819375, 8387421071023131689, 174295266381833460, 6010935117399493668, 3340537243300033200, + 270646782953286316, 6635640130829923909, 12717598479778596267, 3198506535831522029, 2605714695474737827, 14548588937126505914, 10448279824323025574, 16008560590494788290, + 11460241048816176102, 6557818897531989327, 6948658872009630691, 12506263743501199667, 1813031030961999507, 18320107449706503757, 16783076453582799039, 10113937529079967843, + 3096618551325580575, 16427990761832670936, 11792441105446619028, 682188988823121368, 15606025696371121549, 15395362449248397023, 10158149307437310549, 4442375503593769727, + 5486499432056098253, 3811951715772590831, 3856863699598204250, 559724736042732093, 13251752681229681959, 3173330346541355882, 3597530263899943500, 11567248716558420280, + 12822024566847931229, 14487146762942136806, 1361688279016339671, 2776885758026394514, 13616222293131162973, 4629388276054227930, 8304132204812769256, 15188275451670510949, + 3676452641243730693, 10839008248003066479, 7676518604603740317, 18384275325050378660, 7827069556104560937, 14335741841451672783, 1145975213382205474, 13691620069378415495, + 2513451633312793643, 17331457978938147928, 11588204658848186728, 8824497684232939293, 5956154520546906549, 14434958079241686709, 11798870987920360447, 5875117843056165993, + 11313908033360767461, 14678575871510948417, 8479250768537158849, 13854183783158898926, 9413525381201650053, 10077438283140087199, 11413783222317729281, 4418417663988371635, + 6001837395299394988, 7545846298204709215, 6662102781970969966, 12696054550672669874, 7436995610194789060, 9042605089262227561, 11621508125069628605, 7289479057360149973, + }, + { + 15555649500689005370, 7857014165815014600, 4664069234996337119, 8058230521837307501, 18090421626009183450, 9278179697462444898, 17138879353097908770, 18285223269807746171, + 12741225973381753432, 5461579923265799106, 4348015554411389960, 14970536173310927872, 11568371199139699619, 11748662751675667230, 15495664823061891700, 670495577617908409, + 9865772269601493999, 7325690007062740244, 18437964667185475354, 9546360111100619473, 6266116854923135821, 295905071295541234, 16037468634731642095, 16602609756229843150, + 13266892287530449189, 2232961926857263527, 5934654989155432442, 5415975493246016158, 17117784386882463158, 3875398271691758986, 15755743581177209843, 11380746535012415659, + 2047267291755864302, 5637045342080349843, 15779859211873634959, 2415175423642772217, 8664113127182901077, 3032959071705433205, 4250761979529656864, 8729259992491925346, + 16559648310139278090, 4796731467175572814, 9267555165184254828, 1400425639375650333, 5199035239208036114, 7994153995816640929, 15677746297407711038, 16338895562326287438, + 1740700879164022194, 8442598604870387302, 14969965175291161450, 3086786406508018865, 12097766404705693713, 3199218739761578935, 1744290654371906763, 3672816528005113908, + 10162459499000477259, 7534168794800651527, 6320985751131807192, 9263840233901040421, 12178625283764017250, 13143797113231152006, 13089053551699608219, 13480464010111258056, + 6572275813002629593, 15328678880838895837, 4281489161088939969, 17996597009627631572, 10405019085582914256, 4773602940383417103, 1455403777441353089, 14766164051850429812, + 3886207141264898075, 4215113531127552656, 14283840761174092490, 11161507022796293343, 11235510295695006534, 2260082644337974884, 3781681413610409735, 5577127823856153222, + 15609337727165147542, 5725010295752332028, 10091444858515990726, 16162855978360002823, 2478705273652882251, 11394669968372227148, 512641267828429208, 581410050368207889, + 11426681347395118622, 17135867298562042628, 10870734720453621112, 13803364322975322217, 9740527283062276630, 13765238480893165263, 4792349632705804116, 18359029140065847048, + 15215482935039834459, 3553551735757600990, 2471323651039083728, 11355669733892999598, 15830640053604899661, 1625382463038435804, 11969247519895019474, 14372668755409420943, + 11113773299077372683, 16618589175113863833, 12580653821785550146, 8264195478884998326, 13569468260761711850, 16539804400793525042, 14464540796871846551, 7710175783216723569, + 17565888320840294282, 12018783137972195797, 12159604578082686310, 3210316551614996638, 7700243787046971168, 8307491254792612954, 18022719331150542511, 15890872243361196004, + 1420251412241033168, 6449255753238336368, 7468624722117319576, 5716072259994197219, 8165015263320602150, 4757529151848960259, 2040364517493350115, 11069642333484542749, + 6861520668253090597, 14998859189233167760, 4851545815068976365, 594940747993801912, 8473464707442482665, 2316305389548210767, 3995416902578245152, 15770229481630179441, + 7316766449463697051, 1410976281384361962, 2878678490331118371, 17638505404208445694, 1130528985192221086, 7544558337870473835, 12071909234624857373, 436426448500115731, + 17436263074416557505, 4339627812592344328, 1076653379358004720, 4996950998108333398, 8528954985898069924, 8259369611572362613, 7869239207308554342, 3592003501289265534, + 9434687670958330812, 4808705744496679962, 13910334628239183613, 2628594509406050235, 10055911465166862298, 8069256921143792190, 11886968715847498191, 6351594160398684406, + 9865961381837093259, 9590584894285774485, 10674927452501376239, 13861388033657872936, 808108663597281834, 16581187679405825998, 490312642991375167, 11352102610468872412, + 12888468148530619065, 542846015600866106, 2086677842713697080, 2758534484095407635, 15767123419143795297, 7077908316515201321, 10297306681753168866, 4641656730233236179, + 104445550853777927, 3829969899624802332, 4713997522991885393, 677661257782477991, 12383280183673921457, 4373178143111769979, 6375181113677777933, 18335077047046332699, + 5472832021993175857, 6120966193470238758, 13156950075464061259, 14833372202848630444, 5760885723102571027, 18022572190906170343, 9870918995920897221, 9546055667466056471, + 9516299398994303577, 11139435630263588724, 7364473923787049608, 15378275072059759415, 7378721410182635028, 2461490895435109307, 2579531356642691202, 11061693189962996686, + 17380308085259417126, 12584808641030185141, 15485460243194852460, 16172215807782473427, 690761937231979275, 17570908286361380250, 3347059970688467328, 10404397759606846636, + 1302709369035452282, 7149637102192386111, 11659118804203241786, 13909758997057192683, 4361167997834561257, 2300569874961985919, 1609719848856066643, 9866395233709297503, + 17124591880405051324, 3379747334483646957, 2084957978131092947, 2413497199486596627, 2867893699156486423, 1357836849049583382, 13607307985344221035, 18039194570940763745, + 831259058391834318, 12782538711785697535, 12640197944141330321, 11004378265133697847, 13622812183191119169, 5878590957416415562, 4810392157669422629, 11128662835376944411, + 2248775486830407244, 1550340755758127506, 8399436463450785695, 11063895076440303538, 2168511054719571959, 12238498894267955838, 10824266376477587807, 1073243669335517712, + 13568560319245254019, 17615770913976878569, 13214159705850440913, 7542574047209989510, 7392331519546111377, 2685908847786414772, 14835825677444128105, 11347648758435372538, + 13230815692170010235, 10662664037814374845, 10120384214921531334, 8892863196093018212, 4443096401408136417, 1896901619181196514, 739574867683694091, 11233952771600927581, + }, + { + 10708602749843635238, 8046488591192123621, 11391158257016334553, 2512908912806916201, 14760224624269838038, 2631910198880826809, 2387247811669872033, 2048193899742148806, + 17392983665205025720, 12603331233691760840, 3752711326309791976, 8281544857330094164, 17004651346561270106, 10399322540256775923, 9721299450284863176, 3725182263994944884, + 13643198694788276032, 8127425924819487897, 14165945065582850538, 16520977003164405917, 1941948502806705647, 12390949208067741787, 17112117376594486408, 10232906951259490042, + 74190110112564319, 11227229657066745164, 10151080599595091944, 16591051376074591375, 8256332495682655293, 7127985739945537626, 5708687346183367016, 18310921046455023281, + 10040565835468209502, 3986586816478505371, 7957179672896506016, 13347112914739722590, 14099734293320932556, 13921950344400895005, 17478414833301838037, 961445463330402797, + 10553899634420153414, 4215979268474142438, 3643081140297634139, 11620906704125380460, 17733628678553828301, 137473878710304143, 6211837577294956591, 13926110616399428016, + 15203257341351417031, 11597829209697937782, 17317571979811895272, 15929491640570317745, 5731227916276913121, 17093153916149088230, 3908757069480556294, 18211818355208735794, + 6887629766078662088, 15330755394432598442, 14144608177273202986, 13258654996547018856, 8975611270007504091, 16869834856279102361, 12707970875852047823, 8520055680055874514, + 1643407997152867899, 5400650551136488814, 14663391437159245064, 4743209938058826866, 1728998201888511561, 11238331920657195520, 11617984741897247805, 13858029141809651878, + 14003691155137683825, 14641672309228817657, 4031783205073356111, 7414359968614421153, 8166814052414482491, 5209260157266028169, 17555696996149558694, 9780166780476833956, + 16029688335186691281, 4244772808651443261, 6450459413414527821, 695875191412722741, 11104054226249884864, 146461617619843327, 17480214580411209436, 11285418463967817315, + 4922928863857449168, 7226901725606965824, 7568276305032368752, 6253457805758596797, 14345040877576203078, 1718759302498746946, 3544444746772280646, 17880302727399449566, + 10693864548794383214, 736148233485985101, 5241869061152863453, 7166954323782542739, 16861482585847848251, 11483636939866570861, 5950529069765100144, 700887039653157350, + 3906969739692830848, 5546431121705298614, 10491799900914853406, 5871175286230239238, 2851574662174517162, 5511397532379144624, 17409985391224790540, 319012646822749823, + 15269778527761027251, 16471737837294177924, 8830398856035267114, 8206807757132984992, 1421214226977370752, 14233751335943482739, 9668238787333455452, 8700964319357541954, + 17561670470565298976, 9524122142014309347, 12619871254022879995, 4098610542031538614, 8338691110372464881, 4378598564044777665, 16435680634249352743, 12067828180476756532, + 17102475410743425567, 16244272017711449252, 17598420420289880464, 17858587557620312875, 7275673248698358659, 13952444068457447282, 14004356855576602555, 15776401868125266614, + 9848210305813205280, 13777844361911806961, 874096498600831552, 6286216140019341540, 1229781121487503670, 1221366673976886811, 4534525466071627057, 12966848050780717673, + 14890015174575641658, 7082696407720868596, 14546372439063914434, 18306301153433516531, 14360952638570231274, 5387283652784509262, 7246876761413034655, 4917620254108676720, + 16572428185201836512, 15535161956998457706, 3118508753345802652, 1036719175817464914, 9929541174185476740, 17658764424885337186, 3038747611544544975, 9299826573437432057, + 4068144934809412921, 10422353259330469599, 10820596685747357573, 18174251490011716747, 14838047097904304200, 6494279871501263445, 10908514990777375627, 2395220126873880030, + 8963350163641627638, 7331858013030906816, 15047142507680204097, 6113080618647404016, 7520779807516814683, 10255234845683676901, 12010823325949652354, 4005195924244758895, + 14455661618497189876, 980858168361168621, 5057114595071838088, 9447128440518577562, 17115933446433165191, 2063106073840638313, 5944852338857186064, 14601740796836275216, + 12750698494453238590, 822220438401342753, 12735466470750534773, 15168985403101776640, 17837672477921333079, 5413174218886573483, 8340507718774735027, 3796235681866322764, + 3366536195244539993, 13901131415620358879, 9146606194544531067, 7568835872371146677, 7101767951356426039, 1958938171972212156, 3871412404872561111, 6437054924013233533, + 12849284342518738377, 6061751293342567452, 15168528907847693260, 2071752334274992986, 338461453751559028, 17270401493472349305, 14152214298057413781, 14074152302209367061, + 3814267663972523036, 16047424572869508620, 328411057776532579, 2321721854548154476, 14950895796162035951, 2381867458906113095, 3121132178592915484, 11019696935452271225, + 17929201703034060485, 15478512769360046037, 11652099424221007544, 8436785170867890275, 16340404489137765730, 13705192502407972039, 9464924753010269658, 17372624212933856135, + 11237648987477338397, 850109691127167972, 6566350160877169296, 8028073340740368222, 4445151747345891893, 7753144690511205475, 7551118086710031441, 16785474804106433438, + 3919851124685189663, 16028223238454025712, 9008783533564802058, 15647028843636411071, 2029953422298604024, 361326896057539567, 7653333452646403127, 9211397293968688243, + 1817762546790624760, 15025271080704836746, 5761603182050280401, 10355268146728370750, 12900583599325924868, 1488036927944469046, 3405173852255315327, 5600132277974719103, + 12574920924242276218, 18146097977073890534, 18281543194137545725, 4432773597060264867, 797962164984225968, 13432184750641235201, 12394976215803938078, 3054137600383896411, + }, + { + 18270562314314829224, 17574550722906577917, 15522987576586641271, 4200899284963891561, 15719788437321868967, 18390235318178988753, 17371818886753265385, 4583380351670429228, + 10102001197913431240, 6211688443233768420, 1986665099237206297, 9248907879269017005, 3081890249882755206, 7828527646248386813, 12676034092230326487, 14655968027461080131, + 11464797300361906821, 1444449385286554508, 12433005157477847389, 964895564896662942, 8457290958728547067, 7044433897330872257, 12292673928275053405, 13679355097865192207, + 5841260956815214452, 9461234662498869948, 4761949640006529525, 18229823346336752572, 16055957459773168413, 4272222475092786627, 18249888117586812770, 4531982814021269346, + 6528332072572425694, 15678657170585498065, 14462305190193144746, 10159286562616704237, 8919172138248087686, 13247073064443362933, 3056992278576665488, 8216417459286092432, + 15095644792163717355, 9931529712396800935, 8608932369590222253, 17019087735655287783, 17120516184457264144, 16412592485673181957, 11221555966650765276, 6815162077372858604, + 2239262553722461896, 16518231021003046485, 12875479040073845800, 5243141733462514317, 12216971859495819530, 8917663622006068692, 2606734295032613674, 11237688605489138628, + 7270238601276019194, 7318279377964882079, 5226788506940171017, 16838449902078450273, 3375184937745100689, 16320655850583231520, 2173229791272659780, 9816381971766204380, + 11061652864259684005, 3625039563689831268, 4435107405432574197, 17570841169180914890, 13981724030179216670, 4992487505540472327, 12893808204753555435, 12899171569920731024, + 12002788024483301555, 629439982201171214, 13749214763651595011, 7164215996082938159, 63895081825959000, 7006379700570403491, 11929054672290963822, 14290981753366049931, + 13554823979828564217, 15627748053816803157, 12924111102804720301, 3165135223777020148, 16345805559363705538, 17656153856991335888, 5211305829410935273, 17846236012427517420, + 17113448198982970082, 8879656883528014268, 15215762680430094206, 15232212751792759071, 13699137317730183483, 11815206710130297569, 12298409691055054593, 2192455093917691024, + 6973243752017383665, 15334919534387330350, 12203459507012560294, 1880762698376948842, 13008085926447766632, 11695961991328184563, 1311309249708428072, 11981691262998313807, + 9209757463013140118, 6782250604095077195, 7896297860376631775, 11500117438976638113, 9099678680126826621, 11908588510950302487, 16863479726536938776, 6368086884375751995, + 6220904427249143299, 4501361418756965744, 13907877383736656540, 4499586229279778873, 7233050316216252450, 2775027076708404645, 5669561106187108564, 13951461080947060780, + 9742819322069712752, 8779224519997661749, 2591205872359340025, 9144644967919881415, 10495832103710459914, 4557264342687723360, 935865282128150338, 7120324256648044882, + 10780007491891304021, 14382731247791652187, 3296810302419712004, 12567831906993651128, 13933396378746555124, 9754754796778422405, 2905077741505886450, 1522919492210831578, + 7282056954663740402, 4915867300034445627, 234865561845141598, 8874426225092097899, 196269165389812019, 8951904020835594063, 6908903359164319029, 14062039535595890356, + 17922518107542393156, 17388335427727081004, 9077910363057082196, 15157438705071651749, 5615196861086779767, 17404239261681991687, 9772161835068093639, 8380484042808490346, + 4683169301229531061, 14940390399739549639, 13128742567354398401, 15222231511210090535, 7352694898928671332, 1221555526903686446, 8697118143614336514, 14515400523185205597, + 1119145169329993209, 3453487026721044641, 13723232966162256192, 15282444658208236515, 17806849942057939434, 158876409507122147, 11954364486113732203, 17566268785944006600, + 1443798830844295330, 5744920153630226551, 15739845606128975055, 10458513157123726949, 14711103578835123416, 13886988522344422595, 4454455988362290346, 3507977514728491242, + 15328194945269186082, 16398971046683442289, 16293780909421539295, 14488637138080135219, 1108438550767062231, 16291467684905586857, 11891067917592351245, 10459212739896579624, + 10239864229747754958, 12477284975467600510, 14054830413782031834, 17752968481181892755, 16082713391521411762, 17191329716867535304, 11245514405315337909, 15159733745232554536, + 4102022129275956496, 9361580273813141387, 13824110615577377618, 17115977884930866990, 15058705994968389537, 8039281695081359531, 7179945727911625126, 17628825480910826356, + 2477336341361304271, 18030759410783456479, 1164757311756325043, 14195508307731677040, 9794350654156795137, 1135232100348022155, 16693007636113380448, 17770308235375345497, + 7064407865619227176, 13284912648471743270, 9099541805618279168, 7383718461788433067, 3311987567268438113, 10943963018398042859, 8038765968977300792, 11039205462473399718, + 15444441705629456398, 12030687282161152512, 16500102437404896225, 11467410246278043769, 14031300093087427598, 14458597378298191386, 9434445586505481169, 18221119130471329822, + 2269899977501467759, 9674818903084386596, 11781166634280447465, 11752600152694102168, 14011747753002945247, 1084585411243442928, 15614695372212325691, 17196679090968022414, + 5062812993852092949, 13193601915773858975, 12910718608861476282, 447390754333146274, 18393090736120037653, 13231946117349235609, 13365030203093499221, 9724404925244523782, + 18064151108813428175, 3928735035534454387, 186686833744587171, 18406101378699384605, 16477405641027895528, 697258480673566897, 8958117283476321666, 6970167016227017231, + 12694666558858009121, 10992313774737315187, 6046508760845509477, 16818819584649823054, 10464090160691032219, 1221502600750419478, 8658907655588546849, 16110628890984509877, + }, + { + 6266830413981331818, 10890859480409122410, 738708598357264321, 9241173142142102299, 9152169484987657321, 12122430118903337055, 14270062735458703743, 13877635605034675649, + 4572419849863661964, 16330741525817240677, 5456771007607263420, 8798510164381810403, 16547183593064549865, 11355135595370359779, 17581910791451710925, 11684604415749005488, + 2766130068233451740, 15909689412657372214, 3272828023364980896, 12907534005596438003, 7065629942275705493, 10609982651084321649, 17330343421617897811, 1746246157222856918, + 13168953859078137190, 9072500190247791267, 11090199374278127099, 3503847851309060744, 14395788825673846582, 15073993563711333791, 12879874391706003251, 15417028700542288171, + 2686132933630431595, 334448852204016400, 5636563341880055087, 3897420469445246068, 10569177829735084588, 8244586887721364305, 17973493209847502315, 18083956436923498083, + 15514509130323617046, 9833490355170899102, 12028021566105512521, 6349458533674010018, 13470024118569549588, 7867188201997717819, 6110792121234766402, 9917858515766574695, + 8273062939911446888, 1936638151089648185, 44602505720759914, 7839132833110815763, 13045756168264521992, 5375972549477973022, 16964151563535745964, 2871770426902555432, + 14164643693563803212, 6386904235704679249, 931435629437735510, 4496152555914735639, 820099532545743339, 13419988718062417698, 8948965620484833159, 7238410427920554862, + 4387941928212462673, 8023190031094282215, 12322462122895470016, 17311901330042910195, 16445894183838823370, 690424095855074674, 12820742226551488738, 227935274674223825, + 17437729542205914525, 8439533239207225162, 2862942422640746698, 7340866642549630610, 6770451500989398335, 17262212260581413381, 2156505157624055602, 12195119268846383898, + 13685370053507732180, 8304030489296890211, 7042719123662732989, 12115154796394040286, 10496877583013829980, 8909625177349683785, 5268451666475070682, 8910488385056259560, + 8874918984373509240, 6652428549855789281, 14866714872885601270, 12093494664302554643, 10149999253136441007, 1611873554177023520, 10271627298853910516, 15171882630355491729, + 17085353800722834618, 3271271583189511718, 9338466116389304614, 4685545953421459012, 5059702655119414343, 201476627889215522, 6388612869673101372, 8357217932305360896, + 1836621694151238424, 11438765101326043062, 5537182303007083642, 2657011195641217678, 15370176245454161308, 1478361893117141052, 8729368992402422055, 4807782542738120272, + 5084555197036037383, 500829604818691390, 8128181767026348964, 17895639808733462562, 4138748420878083709, 267892430674902987, 8954358455330696651, 1946224964877667668, + 9440495138081785154, 17328162383695942099, 1870751290505230781, 17987066187778049275, 9952223038481207705, 10126409487298126876, 1181369784146909444, 8158422380540037915, + 13005094963168549814, 7254364757817833162, 3708127625424981603, 13214729399033107803, 14306349749849146051, 6983270854272831328, 14340019638575491596, 704963281855585401, + 14561080905697443279, 7640393429462623622, 15685656608213407004, 6317405842945074950, 4586197269496115618, 4352991463721324184, 18421865732314202847, 13679296167045126011, + 5024751682397066348, 16464187416447039066, 17561916915150311869, 1553872229528720506, 3971081625989469183, 16363381852589102144, 270446294712200212, 4668332646727035585, + 4623922898967168175, 14927682190770218553, 18082407698612059245, 8156136453512346914, 17256761355350416250, 16185397881787027880, 15277309552707416345, 5991448876439153037, + 1334994484802978518, 6876293985013000163, 4439518198423905531, 18287119869714109760, 10942138263490204506, 13981840082343935999, 186527448590996253, 11759869552919496944, + 12652601656062843187, 3947181446273381189, 6235196886760030384, 10531746495897900463, 13249616872107073217, 7937663398525899211, 8701780810784627798, 5901496026068341468, + 1059285256835989485, 18284969105568513488, 8222225088165109003, 1178299600673068913, 16068875143685216551, 17849596766342630525, 1648308197824141718, 6344465813776674352, + 13850247359965870279, 17391106451201459411, 9882112511974412840, 9027485913083134379, 2654737707750698953, 2829872619210764000, 10765067664204983675, 6008367494734883394, + 3596754664049660092, 4049565277418079196, 14294713401434678004, 682517403208603499, 13234861305501540306, 2878761138137340816, 7563571058666806838, 10752984583868770507, + 16399575624290910947, 4698193514990054887, 16222170007882213172, 6300376482027477465, 11587700570984519002, 4408509191665524069, 8909599698892352923, 15110169407475815879, + 13814725956725513903, 11675366986709837701, 8008867936756292473, 9627238521208818281, 500674066787171963, 7307892989752288627, 16701476208533897826, 13745712031279374379, + 14988991093658383268, 6586109441484875615, 1456715231981080716, 10587865765103526958, 7439504550868615754, 430398141621608230, 17710495017725824018, 1267747579289674850, + 7939033276966801601, 2952159422351169438, 10439776924885737877, 13630489377585028212, 1779349587540144503, 17209739673050012100, 4174716919431642272, 4986592555205663040, + 7110204405213656469, 5225260473696365908, 13544564684169074596, 10807318789744351361, 16301252682064971136, 5046748520130689465, 7049174921499294642, 433785658536711265, + 14622798677938552282, 10254953149954221954, 2609148400899865119, 8197897294400569748, 11784700166516996756, 18107483989994737564, 5332550435198791193, 13389061302706010047, + 10892870009313407934, 5709686117526627488, 16343828906887149047, 16063790174205876315, 9076070192754075075, 4639156357669822592, 4904064385440242264, 5942264027390072320, + }, + { + 17271984572727555145, 2097715321690717305, 5328035183708920909, 13206781766168781008, 5915693606819932446, 3870849056297265622, 18070792155150730518, 7786839743829304936, + 7939886940063870939, 3597929036186322512, 12732772396498766041, 13300425267730247515, 2345112231224079293, 12327614629296314761, 17106102937162407683, 1702694317559034053, + 8821632341545057929, 16708916351043551467, 9171560460858577216, 5711973041149612575, 15808302665077457285, 8412173943543358851, 12602476370021215716, 4778674887660148537, + 2707032230021934480, 14943945588296447659, 13333962501998857701, 10603744732015256400, 3386903941135274495, 8507451908469049716, 9231176235773710950, 995464562185179329, + 1251633503842168929, 8646405730060859011, 7525758827556699239, 12781503299321323139, 3762059070509735358, 12904588764985510882, 518456268741331092, 2752400776174997580, + 2965193395248314453, 17005383961540689211, 2510904953800463288, 14582093408745785683, 14930278391247460535, 15319775861672772086, 7915822626743762260, 10481055458185028303, + 750419144665626202, 4009275022270511551, 4810013488865977907, 13090697535304919393, 17408171061427940566, 13469810562978287715, 3952372983235223818, 12596642250700034956, + 1497127512412966214, 11292269970549257426, 13920714718237672232, 1363993524078475237, 3687544717271789391, 14097532154250414203, 8367320351336033564, 11089672460564007700, + 3927746316404254574, 9973240942982210953, 14122301504960485144, 13249357693517290445, 4259646940372475995, 13156585130751616020, 424701295260668232, 13006066175253059189, + 10308406270137410351, 18444509448482991837, 12509509048086126555, 8343613342319774737, 7902938427788958534, 9924925722974851885, 3762983399809982874, 3397026083592781307, + 2116063253535766674, 12792173758770945620, 11357854367169339661, 4502375174987631263, 11374536277081101446, 718846097450449491, 6555869089578359747, 8571702699293548804, + 16993953276024768829, 12025590727803527383, 2672535181364381995, 2183111709986676538, 14822521615475063339, 10618745739454592027, 9957515072444869664, 4797521918890127374, + 4582720743315312164, 18280419827142252016, 13380629421749133990, 16786942173448683921, 5929601960379909803, 6715621236552343797, 11192523517069751956, 228125323996215945, + 11861176681909235996, 830114321507110720, 13132205092670772295, 15318287322892364044, 9027539547229346224, 17629383881225964174, 17167841317952050345, 2169650970520181403, + 11366374573920831312, 81590945660392134, 3339836125719372590, 6176339191258224503, 13498384602634172923, 17380518646558253069, 6505222573402454298, 3404196177841425416, + 18097777493026370528, 398882723487597643, 10462164796529820357, 7704526739683689547, 12438059168025171393, 8163001730991802064, 3319372757738711275, 11518015261707325261, + 11235814273447729629, 17827792960634604769, 10951458440780869321, 13957342734404907932, 7344502969085321880, 15052563297577817908, 9276707332138855229, 16077312904856138176, + 10945312442787809330, 8353703025227023259, 2523779015225839136, 5997809042199398051, 5595238423081509301, 1901941038793828090, 983256298544496123, 929860142706986099, + 1859865867925261002, 7171610585558961405, 1708769301690637951, 1669524125723022855, 1126574755172968332, 2284167219201068318, 14925928881489847288, 4349400324295146347, + 7809927122217534998, 6003738786706135597, 16541272992243715653, 11352602417510156921, 6070703545889058045, 3742828100079104877, 9416263310824977145, 1406892456840131842, + 15907861713154660116, 1789008941779432664, 1086909323248385272, 5337878342158043460, 16405416607888113885, 8262005873603178297, 12775015438669465500, 18394022127507526579, + 9832954710628449545, 14612341389215489179, 8860090710725054989, 501948203387806263, 9969784653624607958, 8524029089859194791, 18028069685553315869, 7863046312587606086, + 940114737935470576, 15661699274328042324, 2496303902262203344, 17900779334646110602, 10928989106385540880, 11160326060698033088, 6591628982832414200, 12860480197090465356, + 4005088727670892586, 10215872293486093012, 7634075494800881158, 7707144163086802537, 7085702388211248163, 12168208652772836726, 16347414703478252736, 9386623607672009940, + 10720163442173876242, 185901255673735804, 189524655270248212, 17937598483325200462, 9672767027914841820, 10911520298114954717, 17238190140596785520, 12302850034563751398, + 1819872496749425461, 5856173707582412777, 8519855762092470428, 16315722646681962500, 11554620938156340375, 11822224388003836169, 8221825485736638318, 8413608007099273503, + 16265754830488408124, 13716061973016396515, 6050948709307262401, 7654402024031530787, 149712175032162058, 8319305312043679388, 6115793437696694706, 9669523344165801010, + 16530167766100505402, 14672843572926881623, 10752276706209340624, 6888427606311779724, 9060900993058482991, 10439396339124215720, 17640982730425880863, 12916411341252677116, + 13182242421100666863, 5267519287044706312, 1780278156705475292, 12839545421803851420, 9724873776343820617, 11742602593528879231, 7598735248774560112, 4512374343657017031, + 15080601286098047668, 6570749562833033851, 17494147260115946401, 4878559727303047850, 2259644365338981526, 4232993783633913284, 3035566741498855042, 4306414296080197928, + 3878740698199360708, 1631626027682263021, 10479957065093850273, 1678395573475248815, 9964111965340163993, 8992208313996775841, 8147773940555378986, 6205594793300838098, + 11776245660806714247, 18129100488268011560, 15389929343761297429, 14544292652959164154, 18063319363114104433, 13433144948667100183, 462314076175867487, 1862686058702915115, + }, + { + 17596113920958619489, 11920824479658301378, 3786905428099710821, 11112764289322906111, 8817260450068629272, 6518847791514184438, 14593687325057339783, 18018581321332807475, + 2056254186648603335, 12143016575009965669, 3290271983033386616, 4338404082685850854, 12660444972475266123, 6345409453044955404, 13278869140778657354, 10063005167581762786, + 5303950317664091150, 17409642188139937758, 12135894348302287058, 16444741243419918751, 13935885817311324323, 8496908197501732703, 12163317723288843510, 16891353436915314685, + 4168420842704770545, 338440394515115377, 2102734009665481092, 9772725684473416465, 1247616337419130000, 4222072446972129729, 4836898447189504007, 9154346000276664999, + 14649401370738987318, 10044081678892811795, 1306517740393103656, 16586067283450489622, 8211832777731897494, 12849393859171286949, 17327526552807783018, 14171704619529498238, + 12849053390245557890, 12693731077061619694, 5891211584023409476, 1008960642219209210, 15593393872187214387, 7381248773473326001, 16559656086795428559, 2106025108820858934, + 4347667984894353308, 2129883711661222690, 1076488804494872856, 15059968377755299195, 12380937310378496242, 16425480883270944108, 4769192332535557551, 10388305045430499744, + 8553482596123775980, 12627020622203381515, 10774788686070344669, 6271789114731522097, 6270586844349642345, 5584690155569770537, 9478526526319537174, 3405597782572827563, + 10255408065071335198, 2384996481315588922, 10679447117297819075, 3734157461055440416, 1635042286365643780, 11321809677772938996, 2373960964139986040, 4869129159740273704, + 16681657814739017038, 2045405432090866328, 9473434018957974472, 5549808619751464269, 3650191693776946828, 15805634549736708741, 16764187297356548667, 6849534466635874363, + 15295973549776084894, 8150873746601283651, 8017271980921908229, 4899654767305073834, 15257501950484641691, 17365331272978440066, 14582514748324426172, 14727798284858071666, + 3180828552573045472, 17698570278585440462, 8986722854533057232, 4518665117523514004, 1718699978312382231, 16973074530347269964, 5684175730451960274, 17228649521631238473, + 2897180423141326703, 13691273076814343144, 2967066815351306232, 1032540647518716635, 16365716239989474776, 9503666134295232767, 4599017511227366879, 14475021161770955579, + 4512626226592149712, 13575110216387069176, 13488662643497927183, 9673147324484250604, 143936220816719815, 2307325398241477161, 468315224038772027, 17743582630124600591, + 1476494936296433813, 3408612621327299295, 11632756669813257564, 14296719897220703576, 2241234207239321749, 12237974399755684219, 16253635131573079566, 11892871179872526079, + 8492025373682814159, 9341562763134230960, 10104358220305514878, 17545404790574939459, 15737064134098795026, 3880597485411313924, 388663918823974597, 15032915458906101183, + 965170717686924337, 17890763357610871180, 17588999527212997589, 13975763809893342952, 5109669798963414493, 7891916061794044612, 4789971769354067287, 13049661741280223048, + 13047125712251887103, 6463859133854403980, 12163714608926575519, 1018022589444930561, 1736262463418943433, 15665908585957261569, 8814123147308650486, 4798595000205497714, + 7506024921976923590, 5624217973379693717, 1727881279508834884, 6843292805721229768, 14721392495779901462, 15739256886195814859, 4955780690636679890, 7941019297099640360, + 1679075860853192334, 11512607379850785303, 8606478479964129549, 11355506676698290554, 8638706560448426692, 11314948425220125007, 192055228531777598, 1407009579456262795, + 13286048494307428802, 9003344338654830342, 14153483128501604993, 18443414499226619410, 1307798988667786279, 4422280407538765508, 1295189585361403916, 13026087054487841568, + 2784400402546744378, 15890015749743051151, 2721137876244951114, 5096506081391833837, 7797931926335359630, 11689452583898581132, 14104784079353186010, 3188967683935096888, + 11330231400343362000, 12540174755074499088, 10360616188128925049, 3458938550662172118, 9983547038346979353, 14986318734459319551, 6345635939240236679, 6320686366534231970, + 14746231842102190633, 3622710091833101572, 12626649920759262265, 7005023704205322451, 8825692535315999015, 4515463777365688013, 9604411949030284505, 440641657645532650, + 15551627195111259367, 17849357920181562395, 12870839822102241674, 13056924324197787476, 15109676201265834798, 12974964680406025093, 12841612415554457877, 2986633967268013902, + 16172780290314506283, 6024364507532794579, 11981052290745212925, 14235100278052078327, 12912505872017936022, 4228821405313065812, 2097279962856941481, 5542690235350104224, + 6815123236701278368, 6160442936745831892, 8260371745376005388, 715614132817606668, 7741929556783214361, 6266561134278568728, 13996537792746468394, 13116379830815883328, + 2212637711439965528, 14217650183061928416, 540869510933311267, 6195450915592159668, 14904187357299823445, 5215400274033728644, 6907285779833076435, 15804222685871187391, + 11124404141552478131, 15318338152189072517, 4067893425297764249, 1697503410612817642, 219476584157335935, 13106826093753036522, 13049745208482785760, 11832231199490391669, + 10493328169052594634, 10627081887813813659, 11388663495964140754, 1823051796271862577, 12476400584366298768, 13709148086980681224, 4023418280354022922, 7805776130903851852, + 11989040498741265687, 10985482710426001838, 598034896692782488, 12230542700168560619, 1496676485546951232, 6833695919100964828, 527254545105454187, 6276811762216173921, + 9452334891928331309, 15207477416486363718, 8081596400773698461, 7528187414539292243, 4773782664413845594, 9435822414884353799, 2722451711480791323, 13642248100035410269, + }, + { + 16570512961992288509, 15784840936011612960, 17797638261217375128, 10589124475671334830, 16848440972121709899, 697679021479737566, 1456738239410270672, 7214966028857473701, + 11434157439953722433, 9090860151661100670, 8407869262026581159, 17882547383849407986, 5277257988019769029, 11463853580825342183, 13607489835500075467, 16878800997498950751, + 16059297404709367406, 8426877510924536482, 15269952608796785555, 6256768900860981984, 13142655793714702073, 14566340391957173918, 17147065283704191109, 879106195264399, + 1169887957113173798, 4936710663791789838, 8029349457377894665, 3317691225106441967, 3622769674498678594, 1954144360900150482, 1432045977341997899, 934737636789214593, + 3519438486034584001, 16935008858078961013, 11197550931955434437, 14100007087636813411, 11096181469792598416, 1355325916242161182, 3003143504356161310, 5855778258533244174, + 3469569649879539425, 2734409579230557820, 11243101718460963222, 14449395552625174865, 2832531264658408884, 12736053595733579053, 8514265014627300817, 4646556172848350801, + 1035318057221356651, 16671259128991069733, 933190532640544326, 10073090919720429370, 16298191828003546946, 89465711115503243, 1726024123088964037, 6293688775358665990, + 9646181593082190523, 16068017411165381498, 13319927590045936565, 6113055520205251774, 5576373032696512038, 13880344501714804196, 12022206929479211261, 7387979357081975315, + 14353322331225872624, 14137906560696321406, 9597841809127146330, 14479164716385450316, 18390885446667453601, 13789601367024187697, 10599569472420906075, 2183893278236533867, + 17984045804937170919, 3008713646989855019, 11485907798025297413, 14380740919052976941, 3447658178143557676, 9095585806057103587, 698302327819557146, 7922185712178340950, + 12592522203651044465, 1491459000495558958, 16988036704095888812, 4620491143241098446, 3378886739158384486, 4601357523445825298, 7994566595487207192, 4507705687201434902, + 13553890993911526641, 1237937561304731611, 11053651340854998814, 8737874920379363783, 15414508226001886918, 9311523317130687807, 12080742822716708234, 4715194040021784977, + 11413808093931937231, 8917864246709968658, 13554307490245863526, 2827250138133094370, 7637229863299622584, 6947884496333162143, 4815638417787805223, 10291803792760846879, + 12634048591085853546, 14321728045924893987, 12328747445286182403, 436433860116637979, 5910966881246245784, 11478152401127002379, 15956760875311888867, 9580341487614973173, + 5405380974793160831, 17240616607222875763, 18407061562244315632, 1777973921117407170, 6264824459040652928, 18267228171835175724, 1278124974392461846, 16099414790431742315, + 17182132836049601530, 1486820395924546410, 14089047182364760490, 5375431386740917568, 2352352269256972682, 6220398075470064021, 11444733678216077372, 8173002234516562536, + 2862732059898640615, 16485439906762588046, 8431038226899400253, 8931540904642954995, 792388953489761723, 9296018986570869365, 406447163304028453, 13270392174836298982, + 7772216409762031099, 334915652033124323, 9705063888720244418, 9829080877134570370, 13800256788887079067, 6121678265481939341, 11747955988727867922, 15561904257519022219, + 11919247213871390231, 15058232650937578711, 16509435050946309716, 4585646383560577121, 7109079914182671169, 11070600764446172401, 13212464399509938674, 18057628120939602025, + 7896339057146013415, 17613745684396593735, 4238258305077796259, 15438639564771394288, 1482402451684594916, 3104492350488980260, 5195487229736262641, 1361150797580203482, + 16473144777827648385, 1226441665397311636, 11587851779002173636, 5723433682141598491, 3818559725806959172, 9177247615754603193, 9531025756553277601, 1716800965379459594, + 10276432666086031385, 14627452075834666747, 18000674623336885291, 15328386422432799237, 18139556051577750412, 2213209867616444127, 9597214736611625280, 2458176131336755868, + 17117806110792423322, 15887422186644000204, 18208397717053970445, 7361598408210891485, 13818554723430207838, 12816010025806087198, 17883064936092688211, 1716893191215908273, + 15949855327936545010, 15892613064802301412, 7776232754624386979, 2193423430757249310, 10751855334513902678, 13772090376350944100, 5644047706587449960, 370992037484721524, + 10797607675141630710, 16625594903133516890, 4419277245854893028, 2258389314023537326, 16758443963306613299, 15289730903996513589, 17124263661185245991, 15862017582286958991, + 12272869926038995590, 4569089761627301260, 6495211060884501138, 11124576322292919277, 18314798224925096163, 13232331582712038213, 4402693571738173484, 9384301049965683996, + 9611026793910805538, 11152002731283498747, 12528844591327054806, 1485961507414531975, 12820893906402770618, 16248407567081091370, 15794078730898691457, 18231302945982292403, + 16329953561108738383, 14553061366216182492, 5112198484879130320, 1893048255589705004, 11469678860036344015, 8992526488267920066, 18208245559304821518, 8980963391041920447, + 17694621707784163170, 16317870945177246022, 9173265111786762052, 761657177655159269, 4876597178189658673, 8185879737091523833, 7520213092991049248, 6827963030155074898, + 4104149897263226969, 1008830393844825523, 1692548451180187046, 10984924503299463085, 3526468436911084942, 18012043951628458646, 15181223987778535698, 18016160796635284569, + 1524865955503524054, 7479709667052337482, 1064851658558537015, 14404181438374010311, 192685278892740648, 38546070651855709, 17006752915941498851, 7120034574635007914, + 5783711159088718530, 3503029393593618754, 10736192136662142256, 15443859608128978668, 271805305647187927, 8244170003029416464, 17017071503038388446, 2536084345379259313, + }, + { + 11535283160083833975, 15102917952496589829, 12909095696891909745, 4611119212602854744, 4195756524232932570, 3754603965149529000, 11539276962358607816, 4401424391553230481, + 6842993463058627120, 2708745114617181146, 3832382691810282630, 10333834458057320812, 15028197197004077388, 7681293343255119354, 16227537691626587485, 6703363869832495607, + 17567108285065737529, 8693245930330183070, 4025672038042952121, 3491067119247522844, 2100799837993193702, 9579542950705774054, 5091525285265581997, 1806757015709820088, + 7757354840118131559, 16227348978414023262, 5800861674805516343, 16486235803248700961, 15646700163023677309, 504258765210816549, 13724147899924699697, 13588899538317277096, + 15895596198730294923, 12513494396374459899, 13552575153453342654, 11312160635199213507, 12446618129881485796, 11286186116805515445, 1584191291800429567, 10188741008566636830, + 10157626021970320469, 6580688164585715207, 17158416694471177787, 2873484006255074946, 14852216457444863041, 7463041347566136496, 2190030775722958628, 1116223035803602728, + 8598875756176012541, 2146658618778432566, 17575736667490551813, 3602654209723135985, 7223347129734689099, 7256921024940026570, 12249118963297692963, 18168532365040819099, + 7585174044375463988, 2491923406404093282, 9355733262044036112, 11484079611349577325, 13145030009254943291, 1234932884591406048, 479845670619308618, 87735010308690865, + 17307641427543377277, 5862898330718319545, 485176909118483750, 9845223919324484076, 4293549583740204024, 12765124043303931186, 17870894781132657382, 6508172378961793849, + 14234156373312856400, 16210939788814051708, 6478880037923038392, 10435512103812909561, 18142844321802359753, 3243996803126554757, 395603619159523019, 10213589535656704607, + 13256075137060741204, 991285459654295161, 7857174835799942102, 16201105040554625727, 18398612085349994216, 4512860776717176840, 3521452275695011919, 10622137733045360311, + 14993897771652736376, 3320388672992783302, 18421556740970114213, 7438967127794745135, 15048397725789819354, 12721899159423777001, 1135125102123667189, 6637485660805148630, + 205606553319362431, 15484041062449893984, 12896720585684759529, 16675624014916513672, 7579112430564108390, 4602460356900251082, 2677787525524280838, 16723483619407414479, + 17981939084373319995, 3989926231875116241, 15342509934701840045, 12999252495311233519, 15389919024942778009, 6136335679373143315, 4117381415379544275, 13342256463740646775, + 7622769999043907199, 15227992053145045634, 2719412067263856950, 17811161335980566241, 15036003689029442301, 4919601685231030712, 6756893600905532338, 18029567067112075531, + 15804957839814547154, 7924193945337173678, 1080401129055171214, 10816444561878528576, 13171610473229361771, 8099842733764406841, 9284741070210451667, 9245195215372025289, + 7352706423771400643, 11698139248518535945, 12836997796963939813, 5747451345763534394, 7572033904536738295, 17752330422795919668, 16404760947165813434, 13477251724246705199, + 13700661979310397040, 11172816633936827025, 7087232997755437759, 601317374152115944, 16028751590837491967, 2749566595186093629, 4445248645671399873, 14714182958477634997, + 9630153311383545953, 6436885924177487891, 2150319761793765423, 3799049242167098053, 13902640603080497957, 13121685068446422901, 10099985117244683905, 10692630753099650856, + 15680946025356791729, 10935239212262708062, 15255596602338430716, 15502989798538458360, 906862139952472300, 16128362872146694687, 9038121313842477036, 18304772087255114926, + 16961332967315316395, 2872796507989583018, 18145413156795763341, 6377517922859730669, 3543621377523624979, 10701607653242580646, 17747200374013876449, 15987594633077120628, + 3487728992359467218, 11271201736509774853, 5968332690756574092, 3519182420015237550, 17144214368301394415, 1805926423542721773, 13696269518209159134, 942126126861202483, + 16211326119697754410, 9043490763746629445, 13119640043033532883, 9720862297941999125, 12662669665140843415, 2188499520755532048, 2671631563874698813, 621164898587943254, + 4804461736506692241, 10698651491947798952, 3524135800884470917, 11451653751986650957, 10424912129082359813, 6912080226482239953, 10357808719970986526, 11757540241185558439, + 1841157911016607198, 17522398974331009019, 17392843956059453556, 10510758656389762948, 7666344868273530034, 10739409534839380580, 18010518965344703872, 14478457810678500914, + 1617286442811984718, 4008909641875477702, 9328654790542709745, 12015676532404599710, 1763241944305573436, 9348325841675336084, 4072126498552363080, 17245688939115723821, + 17277555838655940279, 16604852268187595492, 11674181070138727191, 9847423407697994070, 13562477602392929772, 8181792789545038731, 6999326623175714057, 1227784795779857129, + 14923095901189736698, 90760593075377861, 11732533481504731510, 6000651333412910877, 7847865090059184838, 16859539404919901595, 2709105957383645337, 10263820965340474390, + 3161036642361826348, 5744405044938832483, 8939741099437920817, 9766097724147073614, 12117283480449144394, 14632688836103128438, 9491651214417380913, 13676064073123352224, + 4673738700738845848, 2758750364605631228, 16596306154038523740, 4124917668203310922, 2170509910405778957, 4178077301639210933, 5109055439238993109, 9627760120514410208, + 7259236533647228872, 9608619771663043870, 4148523959415435806, 8831501534166306825, 7919036824652396994, 13625969543100731176, 2546426055029800579, 14852443398434563210, + 7185947197544988797, 13306809398790059447, 11440959253278285287, 15084384140482244570, 5059824327869316332, 7585354587200615362, 6801884469610571467, 6427162399794357267, + }, + { + 1453348772281274490, 13028088570940491471, 16100623112750251840, 5210244400259879874, 9443916155735416174, 16869520919318450506, 7479400208826706574, 14374970913352602458, + 10149597276931117465, 5778043705364022356, 13888443605241209218, 6889225091714429810, 16209363638493043975, 8516350006446457646, 9214830076046105138, 8894115046323798175, + 13026208197163596446, 1935567165776196957, 17296090833026336866, 8461284827078097590, 14545382879887213077, 3302103559307506085, 16026840048628844803, 4599071908983273604, + 16375505785562641226, 4903074653735993511, 10355787845763031761, 3188849033041803587, 3145147452182864049, 13878747873108378466, 2798650520315922121, 2325962694441538199, + 3587824003578414733, 14942115314012282784, 5312737955169054323, 12478174640832111702, 7866537714619856949, 1076672758151595875, 12163513815501875048, 8343003998956997868, + 1392855790923554429, 928317148609633070, 18064469091536763974, 13446092366984636072, 1975222540660444795, 15443703204026028192, 4016450768356916736, 16656367700821047808, + 11505659892372808762, 10322665279031813234, 16948868760451861492, 13706671779256832515, 1968671033447691229, 2446599110590827903, 13721045634744350023, 11053432744256776013, + 11530583608023224967, 15979155166527001287, 592931868138800645, 5361781029549479856, 11246418752830406096, 11882251984248471436, 12026389461944629423, 12289077922798223820, + 7685868633977346753, 12528120620943035447, 18394735788313062922, 8523101916703102094, 14261596878655792929, 6364753027040158382, 6433482955677830548, 11538026548937002170, + 16221339092185577802, 2285289075757313223, 1151823739515800834, 1010537560193450047, 10651328720714490261, 426599792903611671, 7181136719074122463, 15350479907287263858, + 7559201271967630136, 3561344874280275357, 16379496429080959327, 9662226613694502966, 10995850494788192620, 14875349277152338391, 730989400822814422, 14352408679782573731, + 1328339467113622233, 13374824154992864223, 12057132015324007824, 8507647484414695512, 9286016370798146986, 9083798709648036612, 4312959727577073260, 3732271624824420608, + 13007262390446754395, 5631627784562092966, 2644488352265945851, 15340614364643014664, 6692152810984634488, 3903120388658969320, 2513009101887784188, 11999681442010831364, + 8301343575202991577, 7108394080005777057, 3477086484634015274, 8772644795325835021, 14747670046873500269, 11068536909552811695, 12349879357573378133, 8898863927320049679, + 5231654281125841638, 5181061730027800171, 10657176101406080919, 2548241936122768256, 17140253194139059526, 17961047717804458883, 12407260567549658707, 1464243167234637194, + 14129872794455315922, 16354810591314135702, 3177417010197743958, 16494351130927698692, 16557765056328447818, 3139394196704738651, 16891813998271303650, 7374470997276173905, + 18244279644122204437, 10097660526910786400, 9270181538098732492, 362243376823712218, 12369863141977485111, 2550957813014523686, 7768963959853738595, 470330801228165919, + 14632133560934390222, 13844843545192224059, 12232948338847787315, 18223343336729121487, 5219716370019749202, 2729037467284418866, 4546118556621109595, 12191675540034649671, + 15868991745937371466, 12686820201922367283, 14799186526268126508, 1241626948050017911, 7080072468753824165, 10793537086099229143, 7855065677501047369, 9747396305344084667, + 1547814412223404746, 14194709348405912167, 15771028929623393801, 10945048172868684389, 11234483878611528405, 9542605377580701681, 4555391547268120796, 14916872217451960022, + 398873176015355134, 17609838001051405497, 3752252377312468061, 10441993652683449973, 5218696730429184844, 6514361533063264268, 16024112124035725701, 13453525984055386124, + 3452753233449684767, 8937677805622461008, 15346651759545553711, 17055791435062851045, 7025089132368408239, 7345356702153522862, 17760529714295220737, 10772604293313163303, + 9940223451501501487, 13671810088603813912, 1432075826781639138, 8145669182511487125, 852664959675237493, 11640273224943853609, 16220508798123490173, 12787926729706993802, + 15490131882364039637, 5140304620645573547, 9403655757498826823, 9824850954989176893, 6366139388903157264, 1981838834216345294, 14659870286877979556, 12091578124492612524, + 9672198938588933438, 1848525214318852627, 6500329435429402369, 8547555101599185897, 4282139019453982058, 13391782461875853462, 956960429753408730, 1865393350316759140, + 9155847368519787501, 17397005067762861574, 9943535970799289809, 16365250691105460494, 13595148040475375224, 8300668237654861875, 7149654787413440141, 9226755501324961219, + 9359301870380673800, 9344500253098672841, 282669288080771734, 1996492472861301505, 4777949892950526502, 1374151996531969836, 1529729047243522545, 3371309923165192567, + 6832171775190531128, 7903951882621813138, 13717573220828409267, 15400575122727850054, 6101307565293576434, 17226772767105983314, 3485283905347036760, 18439321272895583847, + 5095761409418784463, 15484673138064200869, 7743962634971666154, 9119597914470879266, 17978461537972396088, 11490423388734028778, 6537551181013090408, 11706370534703673937, + 6635008658379780815, 11428304571924124070, 392068559161657302, 16314310970878215293, 13769492864572584674, 16880907685897496637, 2877983831559338706, 10391660973242662710, + 16861266984653213659, 1968749328591656132, 13301499995824894911, 14523175695360471336, 15130688872832023531, 1794208537894703529, 10525607764904270704, 14288604199111912452, + 6289130721434350290, 16129337673129720846, 6648351866142453445, 7705983837166554199, 8120136556582544814, 7924417299222764443, 7821995671300500892, 17697662726179084739, + }, + { + 14086745261650819748, 17953107869776717888, 1777725846494438428, 2765314406831814212, 2603997194371852991, 2223517025105045134, 15579901335531749634, 6571733615332171148, + 5478465698516556930, 287443454357750768, 12201204663663129285, 782849770204658255, 11270045035009204251, 18152683972363144337, 10463300037926642739, 14363641589598863851, + 3790592075869665284, 5120320091554329756, 15361581282394028709, 14360464660911164915, 15249656958991931940, 2563751872827049233, 13745792006936646733, 14572943674933168173, + 3167679014977537749, 3417445004776702290, 1709246158507866266, 5861367897285174360, 8437429877561342262, 17231607446243478748, 16047596399306766948, 17656438585829685897, + 12977336347094692192, 13723871955002954132, 17399919575273038847, 16163973860244779675, 14997921584659740746, 6543247361992425067, 4675926398794487109, 17580512382853194054, + 10778828023580771765, 9812611440536367405, 14168149065325522570, 6692229110872358750, 11010962724400581057, 13055203605065073902, 11396463140731730116, 16969789651736772232, + 3285258013801327520, 14114268241282882888, 5367399533198476395, 17277410244918727028, 2751895479568588421, 751309667192066167, 13038472116718581917, 16013586354334596135, + 12364168614988940500, 13194367252463423118, 12223406606527741280, 5014139294736037086, 11244699482831816093, 5336498572491451990, 14268404582568856142, 14502318820969599316, + 13295673091232554814, 313981772853781347, 11829785736449464030, 13341198421333360997, 16787450350663111566, 6525407229623764159, 2012189087039636323, 2110887590404698758, + 16984895766311549717, 1057465499230843916, 13565444352859773343, 1019268943988786852, 2190009140530404855, 10045598732295352970, 4789803496780829836, 10700158388954091321, + 14840255112988420500, 10066628133368048408, 6527354835873855384, 16106607076894419125, 3989024604987083046, 10713316697020150694, 16811389107269552264, 11294596059797019619, + 622403023838932676, 11423409007234191765, 17086830255702454786, 1213655124481948199, 3976593910085142945, 6315875935390432543, 17947502153968859628, 18031540937011531543, + 342675887578148289, 18366454547938469828, 3853530442390999217, 7499455395579634652, 768597405171336502, 1636271570683567151, 16786205337983572309, 13056241332863199715, + 15991153618414855143, 803325462494472241, 5709347424437559249, 10138502962472290727, 4675124205115228106, 9599016746550345197, 2769443894695609085, 12311020551315051087, + 11470964304318784059, 17313372553735202469, 10686997186364536871, 9228453594798060121, 57933324335302024, 8931389936300346353, 17823596478938529898, 1126996688113949835, + 10457697409306668434, 7830477435299250296, 7772379952622394912, 6109610138112185532, 4382803614711332911, 286231563340803504, 14408860285425706036, 9445290615796219321, + 17536503470234411585, 10755841521760916925, 15905055604395954066, 10842889504280336728, 10780689817551616331, 12490302644719656914, 14399349211478895553, 6458867531104408547, + 18263696007238300109, 14344529985676987842, 12351733186419515951, 14648750128188787544, 3817061805140294292, 13460439592176294955, 2651602395003150964, 17613970291099197676, + 1107162420032075035, 14270134754120385897, 4090584027509443635, 16066949131129832709, 10961641187652236678, 5480069976143996625, 8549185711954755645, 3125346629140152790, + 18337941236561689695, 2899707239319273137, 10519944305927777790, 6913088845356962577, 13796383125935684169, 2756403850320695444, 10018647980137529204, 15185795467001072323, + 678732289042224133, 4682655677430115166, 4806112367076139288, 7101473987708012826, 9907594813802528406, 5572757906434657546, 17819688309930892783, 8734990255058630746, + 4831187103439262344, 8323117365987697459, 10690753367486223100, 8744079000061869982, 12737295198832254982, 8667406752954112119, 7942221205780634447, 12612313636795726189, + 16544756691464009983, 3413196415113664237, 18442955254149959801, 10193439941306925215, 15178607176290875326, 10962146035584651400, 11757614026658646304, 16241528460556234026, + 9593194029396792148, 6376573619820601945, 14694667798662125868, 473733746941891591, 662607401339043361, 12105576520816361855, 16629740468317868384, 12416206001206121129, + 6655696780366945398, 12313066500177107654, 15308290012322636383, 2159880960357061128, 12374858353753840133, 11331956329091160239, 11794840094276473821, 14170553661155653327, + 3336445037682266403, 3746885541049520962, 12688476435907418892, 7399089272376853858, 4625608774712059624, 1535040330210000507, 6878676093546636363, 14533846142831376645, + 6250279356724130049, 3901672557903829499, 4884397030724371499, 17385695093365573379, 1240675257703364805, 9703149291204612490, 10295741452056015824, 7249917314998921510, + 3336800790505577062, 319907795530586958, 15852045830964927276, 5538471989976998401, 16558766045222360422, 3135574045680866961, 7590873965560972285, 17191236425969884848, + 542161999237385999, 11197034618748267807, 9595673564043235773, 16351524685431643264, 17127456926935032059, 15448416181819692483, 2035065584045934249, 10976830957530152324, + 3230012351101746458, 1662948417350421518, 16062645290454019168, 6021730506322388306, 9903225699788133023, 8549278993611431648, 16267339446315716903, 9759051179942016620, + 14281696762614162345, 14970002486407845832, 14940104843292093805, 7021313216295048550, 18131586729996841512, 13957742754718859034, 14322894803959804982, 12861731196696133597, + 8838210194055479741, 16190279688839291456, 16520121358582757556, 16768959728701970634, 17279896910204983538, 3582816187015369256, 372137484481226748, 12972230207718993241, + }, + { + 11480031272463364121, 2525856087688686375, 5875255653057452580, 5606696614467440464, 12094715534218447962, 6173479337351577758, 18244015404482482803, 13199349349511499695, + 6097225965006328457, 2630336000831717722, 7049638698786526134, 4203465038263485233, 4677974565964393904, 16044036678123636261, 7266049959100300634, 11339674070148504929, + 1979992348876614621, 7494083465935166973, 16035723662722831744, 6820849908885211777, 16740735239901023053, 2017530162004533409, 3087262288875984227, 9072904925376793991, + 1830819081327272165, 11951292080860646547, 18002500803418629938, 13317986695217403840, 5005574200470766330, 10138179785738652504, 1610518863105278224, 13309443265578956954, + 8383241685301818332, 6629521234420338478, 8997316634951392497, 9976633096395028594, 5171570735720385772, 8129153749198218776, 10596302915145665067, 3726383961188926740, + 14494271933981109919, 9056473564311980628, 10557807473422306929, 1549747977888697589, 4534184223615087698, 12977118933167042558, 11930457930026683843, 8346041661496792064, + 7756665146227128459, 15140824521273386708, 693427108108819556, 14819189462601222526, 4507393341357475468, 8710020646873218244, 4535796033535410044, 11884504632761707946, + 14192194428653714479, 11578618567931657764, 6032668189933768881, 15981535562386193924, 8925792218631314923, 9408714146004183681, 8859017894477140398, 1675606340537058506, + 13447287279335601133, 7792625320709870803, 5347956615552953057, 335662974452886013, 892825083599137885, 1017154417323307505, 11923254597349925339, 13818364932306372202, + 15945578153358602744, 6304522416189552358, 2545503746231186920, 14563857419242252397, 9711931929315762229, 7949505077082864287, 8780199792032437102, 10109661610848827442, + 1628958128919487289, 9818701341087940523, 16480240737841185501, 12633108361335093100, 7982147509828265471, 17959437456356197928, 14370416793872407673, 3577532534573396628, + 3265119697568866078, 13753322853248061974, 15435163376825672274, 13646180516491714938, 15620545622955090250, 18241486585498394565, 459566729126467878, 9728217493753855944, + 3187267754295885454, 7928049460349917700, 737484733736740002, 16459695106656291329, 13000947522838718027, 11933371771659873912, 4720706464557996274, 7997020276574823784, + 12655797542121172150, 16574741514838026101, 13532097913031715162, 13578832971779600350, 18171552525916232035, 1516234640889581592, 13625693062693291889, 3869929504544890755, + 5134589228918179333, 5186584327613952853, 4114726195152833642, 578924708883784650, 9128473117392713984, 17390547319520768186, 15077700332150016764, 8707446714183162237, + 9883406233903270985, 17184231210468300584, 5498903678702785900, 6903833261382863389, 11489138095102981636, 9021901931786177471, 3227433089132150317, 16639455550797627042, + 2873811051246262545, 12759149028054319339, 17920917684786397179, 2576455203420292139, 3181713670456310447, 1109589489775958300, 8920119044838278090, 16056330702164227666, + 14277712977137303378, 3896132440952049994, 13252040000848248790, 7844779060499908154, 8651912177362107995, 5527615699397299219, 17688189207278018869, 14560189235973121390, + 10861951673021959044, 16820848382034483641, 6761027571816469843, 3555385088072119757, 9363319138244920099, 14172742116400795253, 9535965929584057049, 15628517769546199992, + 839045372102318861, 2032615141683118500, 1921816835840256924, 504869150737570005, 15932184001422473505, 7849475855313305697, 1189994394722618598, 9874798350548229684, + 11973862230325381876, 7670835358390713178, 2113918814334481709, 2289514490388170400, 4570586695077036349, 9449360682811675838, 16506880875578470116, 13723444740920888736, + 14210428465641691143, 15330786835024186142, 14173742373807858721, 15041216914246450688, 6027140311333696225, 17929849776014561356, 3955935517651718925, 12080548003455651805, + 1974142217308765297, 2347591555456293560, 9029256828390326937, 15561672734848339972, 4719329765359230610, 1924866103348851009, 4950258359754995181, 3245952633715281425, + 4636201671610663345, 16662311204732135584, 12398897461787695675, 1355753199357412657, 14695655628527768564, 1692838776852698754, 7346906782826540912, 14869028424564984700, + 7242751356102425321, 7770223857340282376, 14976092066160539655, 389604327668352376, 3472336298605227213, 11456496863560248128, 14597769228654187922, 17838672631901123288, + 10997764239928568735, 16648935573721932916, 13735964592293716412, 1364388817619955667, 2511134998006929958, 811167458534120344, 9371523327214092569, 9359553283016173792, + 15311627918891973770, 17170652974122329165, 12620036374776436604, 17439165699070911984, 18124482726761033947, 18390083820175794165, 5492125340791896125, 16506113603670143446, + 14240080119980315297, 4751983902385717684, 7941412103662252178, 4796803816695225123, 9327986654018000413, 15111345676387005726, 3582558555283734415, 11913156735178831102, + 2000999613038609412, 1070613390669459171, 595743672188217500, 9169968884291187666, 17960105939867090032, 13927107353795575459, 17872593051171658925, 11087013461789649235, + 2280207936513065312, 11704149285124951522, 2138093032627822381, 6816805886026674440, 12141709925796155041, 12164596600353477920, 5711177504477877148, 17211856235678509153, + 6061025098309340877, 4060747063380827182, 15186975689243681841, 18380882388949786706, 927262408801716090, 7447789770541113239, 14074172896745727729, 15372475239996145400, + 12915868875134296756, 3071143582305760275, 10848717856747030010, 10973183887638314967, 5983269487036236823, 9885184706933333413, 14509493669845681951, 9669608065801379977, + }, + { + 1190044221726873109, 732025666411072563, 7781096808432277694, 13079879637655900365, 9048680577986693793, 12528346194104542124, 10575369576322629835, 14068913038053465964, + 3297775431071502652, 7947266993949467566, 6161673381820779563, 10604626734847205996, 17669018066064502925, 2386071939136502354, 1076023928806956187, 16075459682742916440, + 12387028018538599950, 5791681577936683396, 7536922194386052138, 15377770247350254336, 6564316170002440575, 10629371267149545956, 16125760269901494382, 16051398044897962024, + 6237354803374899565, 1782400080026955610, 16115027077529639342, 17907259814836219524, 13179234383592426160, 7302978467158243198, 13607720993463749225, 6693531817169120736, + 9659467198848065929, 13765104107473534739, 820057324758632016, 16604163594878267405, 15321524555426147294, 14524167419333362957, 157291234996957911, 3689488130722730671, + 13896531645245568605, 13598412639648980920, 3875086557856143664, 9242105512956135595, 17410265178284161091, 14904754836127450463, 2354327747158525902, 3858493849578874654, + 17288438225198818690, 15275190140700748774, 14532856272428522409, 2192294551011758591, 4275966798014984695, 13252812474545742114, 8604705074440591847, 504054699541924162, + 3623027910997185067, 15957466839356832933, 3504133700071704955, 4135728439139397021, 1160600455691338871, 4431928072859292239, 1542956305318065546, 14109181184369694855, + 9135041378474229537, 15626265487594878380, 2645257479784082878, 8105679948445763432, 8579715781576202730, 13437359533149316300, 4344754720520585005, 13326372852934103673, + 11341622086474305983, 17104958945811183278, 8154944722722765678, 6696975609844549775, 10895966360082878017, 2277686037575837054, 12236842881540344526, 2086470279064067791, + 2942668817124399651, 11439025398519255779, 17477264434671471242, 9606507094132585461, 1771475584689465869, 10791261379080046759, 12901998265727968789, 16423426725595035468, + 555419805011031621, 13715683285513290290, 15526654591995532751, 7023761423093873354, 15093366433178982304, 8625749648874372840, 6412297471587129206, 12161114204724955762, + 9362623422820895267, 6491356159483508885, 8543224082493101588, 17290929683278258587, 17348088672546490894, 3323350644823701753, 398525379151271104, 17794417664139368677, + 5880914635028486784, 2898124605786107494, 7953506026198179840, 2189477357380899574, 11207122442953694293, 9288539232660208671, 13527697625010979453, 448462453367651162, + 11262205719433170585, 7539606872920196479, 492385789142086954, 16036594120332851267, 8084547797138041354, 10063336189771527714, 2484395715469739911, 4879298610504146231, + 194633990068625151, 1028128699653523895, 7436102537486268753, 1855520454959348468, 5098690958966904466, 3342676718901330604, 6942212660412942406, 16510188838209674083, + 8847926418142642416, 13342814936333556870, 7082971165114489981, 9307541560233171550, 3013250253632606412, 289291445038948658, 2146586262066552853, 17331519863134074479, + 13747350589043267381, 6342477793304440894, 10087817302474912962, 18076409030930122816, 240893790090129194, 16364025156922886538, 13518167316464590136, 8034023137744309866, + 14013962864860194917, 5900268096329749317, 578665371390927342, 1407477069341362927, 15142181569643383319, 5217173950329651840, 7999234477517614232, 14722759409199120103, + 857809728583972424, 12785911478120621636, 11351522624307635957, 6288237507210579634, 15030282678317951238, 5521516375915920991, 1243458350248005764, 3567973108073272461, + 2834660673354035229, 10728451420162645120, 13396052377701115268, 5730788601228223475, 1616452754395208882, 18236419625072278846, 1179103743668306981, 9179882267624091153, + 14087439523312437665, 7814060325531094731, 16777382811045908592, 16916500419494378884, 16312259370378381991, 15848836870251345077, 16352246673954213424, 5228629139249919486, + 16889782864065172884, 4486233301301317202, 12424854171669138635, 12490430652425390015, 17335301435940798099, 9685877269515541824, 8728054940414220990, 12781228698496150916, + 15949574575791239459, 10394047428873141717, 15915386168603191086, 11400139144838475744, 17198578050070482368, 10505386377385262682, 11269920026804647377, 6796664536515856824, + 3285292208372623455, 3132524240856145976, 9712372580979649457, 10523781046601368064, 4797168047433466290, 11748687702182632788, 4177442610271848350, 8549047039673741703, + 6897143822689085584, 12727238507037725105, 12692443686524496083, 1187606781546544862, 11016254758711515442, 15622643445771261724, 16669571628491989170, 7099030384210775042, + 3541342827520921014, 12596176563952906070, 5209816272233406534, 17158219989390045881, 9096207539532607151, 12291463442559161708, 5188945441827912424, 12721126884401346746, + 8137171114112655022, 6980210601071561830, 10156197453074241467, 4481350962546197264, 1902625885079226055, 18310954340184922140, 10624840172920793227, 121304340390125189, + 5261036238619130144, 1418912875050883205, 4741122621962383610, 17995804738511375239, 15493968366750472633, 8417505178158780382, 14685111207483504618, 9729483826733360559, + 13434828863770210135, 2940287993177646896, 18241509923298296610, 7613631401252601295, 16815829390381900683, 8566309323681786413, 11094402941539790077, 8442864581771174862, + 17650059733379916650, 4260524072883785227, 8351659371377110706, 710338300032970720, 5512358883882849029, 15533759462039713914, 14352344066268876816, 16708588250954382269, + 7963995066277321795, 14986230345162551366, 10616366460602472778, 16186943329215681149, 9522855644164647723, 9858336437877021178, 16344942402271072884, 18027353494090590022, + }, + { + 13175608910844763921, 11168387468381338510, 5555873864224342205, 17780559602571640345, 1582669533588472814, 8145133762789622706, 1625372887980692455, 4576809104068554285, + 2900978533555060931, 1749946807863203576, 11705734511931072459, 4446980691693419424, 14437524160594907241, 12795912566640000773, 10400036254697939154, 8356113088668925361, + 8900726473285671992, 890209742292473781, 16059468388719373167, 16577940938542233221, 17381766864962227733, 231915274132518769, 16477817024012878700, 15646021286310268545, + 17511864284462479051, 4391315454785363822, 16633389246499252566, 4290325975494843430, 15912670649662225191, 14172733821603328367, 12709802627494541709, 9648238681838878635, + 4196883387800999444, 11261069840079702254, 7135968036494594517, 14537849011631244070, 1930221875891546479, 3029378442249928630, 6219774718328347890, 5922417028077454069, + 12470404961188516783, 10420863874252801995, 1369334550763670666, 4781925915058005676, 13086538845357988031, 11031029468188842766, 15252022305121561855, 10497770692868889659, + 8808726441417268278, 8201526072266158452, 1136378216494101481, 15014143365043073153, 9752793532008788265, 18423606089906807282, 12708377698545978413, 13697405528655748691, + 12840624483048662115, 13614226123606379619, 15087571995173849344, 1395488309690794621, 7173958427217959878, 1198561064176820332, 16821886260165681626, 10019479169661217935, + 8089335244564816024, 4652396396372834438, 8473523796721514770, 4493469844513366216, 5448577418901909194, 13487502520341193870, 17978888920023381084, 13125222810420128122, + 11369140159803973135, 8391820975236191093, 4865145203995472561, 14899728882581231165, 11169032718524058736, 14532440521453562763, 7464998467530340358, 14981246381757614771, + 1242992129726403300, 2014928170416925311, 6341058783517690890, 11690291498253019630, 15951586247597589423, 14033993236702971559, 16963725291567059464, 15075124616754319226, + 9901328250515532535, 12796305315632203942, 10819069447660403847, 12542238242508661240, 17077777737462854238, 4364294351375777648, 8335244153165894685, 8863541604979427311, + 12648774127830238318, 5576880401343711606, 11786867566889606858, 7421038047404952574, 1037620897405863409, 16680872357246325645, 10443290127564886223, 9718182896013830581, + 15199007294286606824, 10225517367964809030, 12827778649823141649, 16046039393887815140, 284741135347255597, 4015036609703147909, 3902771329647468314, 5128707935422024078, + 16953148044626345363, 5101833106541998042, 7655361325113312133, 15948906743291877728, 11619404274756349415, 7521254459857915559, 14978040810149334918, 18392994517091959591, + 11029520456752683158, 2297291237551781454, 7934683676555855479, 1563954733146546128, 2252805128528536255, 10523375695822939112, 4598652721256818062, 17493379105069761435, + 1717382065475436420, 12444276194160330384, 3605273785657402256, 17503422605669121015, 12066457002913396306, 18109827202111882684, 11555238022219952400, 1993655497207863497, + 14852223665677610379, 4743851404709775958, 8525514757892437524, 10402777181498591725, 17096034999928466604, 4289543339828736175, 4306145672261807290, 15366124999227707930, + 9159996652502914864, 10301349423202296392, 13530090903323000632, 4082840779249202050, 8824726775176314063, 16033767772359951785, 9181204573557660412, 7582747879080399183, + 2747551692916263132, 3151123035150683281, 13938314333309683791, 11970864255483579560, 7894236725861110998, 10515613623232567716, 10445557830757066839, 11155097727742050955, + 511773909731247800, 2463012923954369656, 15859141716591041471, 4940781664379556792, 12435624343534748284, 10278084527750129328, 13178135781125832492, 5930328346313533490, + 9122100956633732655, 4007876149211873275, 8339438668849688737, 7791075492023424820, 1771434463088769532, 17441254147665904787, 9470298561063283873, 16552350513326886351, + 2842235074681099572, 6045955093828210410, 12534027406207583719, 9023829328537353058, 1661393615224515538, 3265308219651631507, 14825441620546235307, 6190486519065164472, + 9569762631377921701, 10341967309273270300, 4697570564829842013, 16684762125903667303, 5281929071085172097, 13027961546412671127, 18398764602992176314, 3872796039504642974, + 3625046733403850478, 18072436321985678348, 12926724831909767455, 17260889391923475120, 2205546675974800411, 4823605748727161314, 11756378320036258430, 12725625915588142865, + 3620769638269480452, 531680461898170, 10213227252513668871, 15446816035938502695, 15189868868814686765, 8689079054027125562, 10767300823767661010, 12185259819280811201, + 2954211398425891878, 887045055614004603, 17024961552904496644, 11183183814128062689, 2275500228939836642, 8454621353771976628, 13706050871317440521, 13152782683288296426, + 5035609627965208105, 6338722048255995169, 11594969687968475335, 5639340633771742838, 6928934109570365590, 11589609913085167707, 18353534295019770247, 6879412911232288750, + 13223374340976842384, 5803903047180226505, 10382392856292138950, 6807712794865416975, 7273350714698980002, 16071615116869102753, 11130104866208195846, 17099222713467268806, + 4288395624114587260, 10186593737807119648, 5970871638851383523, 17274628153010076591, 11083355019391773968, 15571651381840416650, 7269503864920933911, 34314525388851272, + 13518907013113787435, 13799188114963266222, 10949327997049617616, 12851874981220611149, 14245323937486085594, 18324803746062213911, 13224741762315864830, 15910543010675195930, + 10042008593756684008, 6713471518385846030, 10961845838926538977, 855334359173719954, 7598642325962297361, 18006511265377032901, 6844179774752820363, 248341098560552116, + }, + { + 9259451175701532574, 15597985540941612547, 4541553034987877619, 8927802479718285609, 14672230714928802576, 14284617950605306700, 13339970812390782046, 787078889494395850, + 18165439414433063919, 14693282888419586094, 14992616289789993230, 13232208150801553912, 16850347485914337693, 223364064845089224, 7302899015436300563, 4391216311540396088, + 12558388403188680537, 10687978569513354810, 9373570856564649772, 4079327722512691465, 5372437374337150740, 3297421622932790851, 9326853593823854687, 8305371884938201251, + 13121472331750731303, 323745485980480220, 15110308702528119977, 4696302365805118372, 9859094500834571602, 5315656270700787939, 4187063204400651801, 1695695129564818327, + 12817586844019849804, 15425488216915741929, 15293562913126043054, 11770898970765978555, 6806103820345670787, 17789094149138630459, 3807781571713999192, 5823254011289457198, + 10112524230916015604, 13571143908656270599, 6031089010487808103, 5774558171390644848, 1808959243244841573, 14071312530426692155, 9241399242704553402, 9836150050655369231, + 14083007972403535491, 16009534514129505275, 12481249112231139039, 10309390659262581143, 5782517315325366794, 16717384553294759530, 4578392002424884134, 239005024949492645, + 1836857790164349346, 11263074778654917115, 4786985732596838094, 10525445084936097674, 1484357665966453236, 3110871407036679558, 1668700683093601768, 2736269131339849752, + 9249362594190920246, 8088172462140746331, 4080107866285682736, 16205599538438551353, 18312943145606312277, 9100632964181080418, 3969991115337154345, 5340098959750368315, + 12122448407048035645, 2891250339380982359, 12794212052932523400, 7166035112141500392, 15777441312724841162, 4552528682229856831, 2210391301762911476, 16410254680957321386, + 6910044784455706429, 11951245774670922855, 207795885948948810, 3884359690300962405, 4578772752480772344, 2948797025787714138, 16774444535658202808, 1388183483009000463, + 16561979129019905420, 1881862411760441386, 15441950514711322010, 12933020223458438856, 5570341451312029630, 18172638959728259059, 9074261777549091198, 6171200053222754816, + 10126435102176037237, 1545112930055760251, 11563139991870561593, 10766754172364938834, 5116781688621052838, 14471715119500781375, 4724882615229687192, 10031687030722046325, + 15016829467434780812, 6777508689306592732, 3811682306063049290, 11475361222871876387, 6080948654298953274, 11612742274762615033, 5268311600240325047, 9147351973710519861, + 12130203770930472759, 3880998309361490881, 8807462332494735511, 9291388074337055133, 18165312304248982432, 12733144434841535979, 18090195697688236271, 5825342884125782172, + 13034385644450400616, 16498936039876211302, 13019014556329727588, 17911110193068084415, 2034190024798401134, 1526884555531436223, 8108814142164064795, 4075908755148182733, + 6404489603451030675, 16601749455344583377, 12907740667428956072, 8446383351289364752, 698035388979141580, 3593914367189873571, 6744392605914028800, 18168078953147292456, + 14484703498492722699, 6030798911934842161, 15931002029256202288, 13491601992324491240, 8585568434702372483, 9113555433818303018, 6375455514164501736, 1476639189214882947, + 5209683814158663472, 16895526973128357178, 10457488910391635027, 17530003042232440701, 4030542539553991812, 9445656287872984963, 4212489101708176016, 837886727556721620, + 2983790309644332360, 11276772732994425347, 1310621139116895737, 3131558545024164284, 3529698629777893989, 13271773334458143370, 8171825923423406479, 305718858162497191, + 12347543988562572522, 4315642616923374215, 17586477762860427847, 1037334149869395772, 18311071546005132724, 1489505092175764848, 8926409949339059078, 10260307609945070716, + 1253128470325829265, 917180526219408600, 10471425074479693158, 12658678264459355332, 8502334428811982364, 3789675096428271759, 4531393894096863308, 12176495757901795513, + 15081766794595471636, 3944913291385975654, 14507832069904091484, 3624606168811123851, 11476968507677098110, 1056328247986567681, 15246608002200052960, 6566558379177875405, + 652315099041528631, 4287214618428297946, 11730560225433863449, 7935915607867430686, 15979159466809939008, 9209839549459543141, 9501045239358734673, 15490773728258884277, + 14281001444503113096, 9253803478538227873, 3842669056581745692, 18399990656305827164, 11841387504710944221, 16052361836589007383, 831115622826254867, 8219898649601530197, + 2339128600785774471, 1426373723429876004, 3299581455481810683, 16966544600853047157, 8419320331267538738, 8376963013864643226, 1996128805959565963, 2376766970819731241, + 10963611841695254911, 2914831079289063424, 15884256460390936367, 8724520571578171796, 329402220949313078, 8905943304651682982, 1280899166177109409, 6980213851983339340, + 5524321102331478758, 2149164875175284006, 10897639484514200325, 14605181792035620644, 316829907946955098, 13776780818927038502, 18167912441745301047, 16845612593885106452, + 15817940025107036888, 14405005201957074368, 11647077937228007305, 13795438792777593442, 9813339145840642083, 13196196205982926907, 5981408724994946624, 12303812818084690125, + 11821409230485627325, 1934768678832528678, 15798397330147151183, 2089418311766868031, 16206123311846842647, 10276976147555604745, 14858750786699873336, 2500482310681473276, + 9502861285700748804, 12163827978530279544, 9383068907817479934, 5717811606328725851, 12416198015020836733, 15689586164720677894, 14337921697724349521, 9392403465642971279, + 6696074876763293236, 10302972695440286968, 16647054763285662245, 7841147641486000427, 10612636854719975349, 3592516733641215266, 14172459823963607855, 1290213042793222575, + }, +} diff --git a/src/crypto/elliptic/p256_asm_table_test.go b/src/crypto/elliptic/p256_asm_table_test.go new file mode 100644 index 0000000..6b64f26 --- /dev/null +++ b/src/crypto/elliptic/p256_asm_table_test.go @@ -0,0 +1,59 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 || arm64 +// +build amd64 arm64 + +package elliptic + +import ( + "reflect" + "testing" +) + +func TestP256PrecomputedTable(t *testing.T) { + + basePoint := []uint64{ + 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6, + 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85, + 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe, + } + t1 := make([]uint64, 12) + t2 := make([]uint64, 12) + copy(t2, basePoint) + + zInv := make([]uint64, 4) + zInvSq := make([]uint64, 4) + for j := 0; j < 32; j++ { + copy(t1, t2) + for i := 0; i < 43; i++ { + // The window size is 6 so we need to double 6 times. + if i != 0 { + for k := 0; k < 6; k++ { + p256PointDoubleAsm(t1, t1) + } + } + // Convert the point to affine form. (Its values are + // still in Montgomery form however.) + p256Inverse(zInv, t1[8:12]) + p256Sqr(zInvSq, zInv, 1) + p256Mul(zInv, zInv, zInvSq) + + p256Mul(t1[:4], t1[:4], zInvSq) + p256Mul(t1[4:8], t1[4:8], zInv) + + copy(t1[8:12], basePoint[8:12]) + + if got, want := p256Precomputed[i][j*8:(j*8)+8], t1[:8]; !reflect.DeepEqual(got, want) { + t.Fatalf("Unexpected table entry at [%d][%d:%d]: got %v, want %v", i, j*8, (j*8)+8, got, want) + } + } + if j == 0 { + p256PointDoubleAsm(t2, basePoint) + } else { + p256PointAddAsm(t2, t2, basePoint) + } + } + +} diff --git a/src/crypto/elliptic/p256_generic.go b/src/crypto/elliptic/p256_generic.go new file mode 100644 index 0000000..25762a8 --- /dev/null +++ b/src/crypto/elliptic/p256_generic.go @@ -0,0 +1,15 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !amd64 && !s390x && !arm64 && !ppc64le +// +build !amd64,!s390x,!arm64,!ppc64le + +package elliptic + +var p256 p256Curve + +func initP256Arch() { + // Use pure Go implementation. + p256 = p256Curve{p256Params} +} diff --git a/src/crypto/elliptic/p256_ppc64le.go b/src/crypto/elliptic/p256_ppc64le.go new file mode 100644 index 0000000..40d9ed9 --- /dev/null +++ b/src/crypto/elliptic/p256_ppc64le.go @@ -0,0 +1,522 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ppc64le +// +build ppc64le + +package elliptic + +import ( + "crypto/subtle" + "encoding/binary" + "math/big" +) + +// This was ported from the s390x implementation for ppc64le. +// Some hints are included here for changes that should be +// in the big endian ppc64 implementation, however more +// investigation and testing is needed for the ppc64 big +// endian version to work. +type p256CurveFast struct { + *CurveParams +} + +type p256Point struct { + x [32]byte + y [32]byte + z [32]byte +} + +var ( + p256 Curve + p256PreFast *[37][64]p256Point +) + +func initP256Arch() { + p256 = p256CurveFast{p256Params} + initTable() + return +} + +func (curve p256CurveFast) Params() *CurveParams { + return curve.CurveParams +} + +// Functions implemented in p256_asm_ppc64le.s +// Montgomery multiplication modulo P256 +// +//go:noescape +func p256MulAsm(res, in1, in2 []byte) + +// Montgomery square modulo P256 +// +func p256Sqr(res, in []byte) { + p256MulAsm(res, in, in) +} + +// Montgomery multiplication by 1 +// +//go:noescape +func p256FromMont(res, in []byte) + +// iff cond == 1 val <- -val +// +//go:noescape +func p256NegCond(val *p256Point, cond int) + +// if cond == 0 res <- b; else res <- a +// +//go:noescape +func p256MovCond(res, a, b *p256Point, cond int) + +// Constant time table access +// +//go:noescape +func p256Select(point *p256Point, table []p256Point, idx int) + +// +//go:noescape +func p256SelectBase(point *p256Point, table []p256Point, idx int) + +// Point add with P2 being affine point +// If sign == 1 -> P2 = -P2 +// If sel == 0 -> P3 = P1 +// if zero == 0 -> P3 = P2 +// +//go:noescape +func p256PointAddAffineAsm(res, in1, in2 *p256Point, sign, sel, zero int) + +// Point add +// +//go:noescape +func p256PointAddAsm(res, in1, in2 *p256Point) int + +// +//go:noescape +func p256PointDoubleAsm(res, in *p256Point) + +// The result should be a slice in LE order, but the slice +// from big.Bytes is in BE order. +// TODO: For big endian implementation, do not reverse bytes. +// +func fromBig(big *big.Int) []byte { + // This could be done a lot more efficiently... + res := big.Bytes() + t := make([]byte, 32) + if len(res) < 32 { + copy(t[32-len(res):], res) + } else if len(res) == 32 { + copy(t, res) + } else { + copy(t, res[len(res)-32:]) + } + p256ReverseBytes(t, t) + return t +} + +// p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar +// is equal or greater than the order of the group, it's reduced modulo that order. +func p256GetMultiplier(in []byte) []byte { + n := new(big.Int).SetBytes(in) + + if n.Cmp(p256Params.N) >= 0 { + n.Mod(n, p256Params.N) + } + return fromBig(n) +} + +// p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the +// underlying field of the curve. (See initP256 for the value.) Thus rr here is +// R×R mod p. See comment in Inverse about how this is used. +// TODO: For big endian implementation, the bytes in these slices should be in reverse order, +// as found in the s390x implementation. +var rr = []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00} + +// (This is one, in the Montgomery domain.) +var one = []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00} + +func maybeReduceModP(in *big.Int) *big.Int { + if in.Cmp(p256Params.P) < 0 { + return in + } + return new(big.Int).Mod(in, p256Params.P) +} + +// p256ReverseBytes copies the first 32 bytes from in to res in reverse order. +func p256ReverseBytes(res, in []byte) { + // remove bounds check + in = in[:32] + res = res[:32] + + // Load in reverse order + a := binary.BigEndian.Uint64(in[0:]) + b := binary.BigEndian.Uint64(in[8:]) + c := binary.BigEndian.Uint64(in[16:]) + d := binary.BigEndian.Uint64(in[24:]) + + // Store in normal order + binary.LittleEndian.PutUint64(res[0:], d) + binary.LittleEndian.PutUint64(res[8:], c) + binary.LittleEndian.PutUint64(res[16:], b) + binary.LittleEndian.PutUint64(res[24:], a) +} + +func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { + var r1, r2 p256Point + + scalarReduced := p256GetMultiplier(baseScalar) + r1IsInfinity := scalarIsZero(scalarReduced) + r1.p256BaseMult(scalarReduced) + + copy(r2.x[:], fromBig(maybeReduceModP(bigX))) + copy(r2.y[:], fromBig(maybeReduceModP(bigY))) + copy(r2.z[:], one) + p256MulAsm(r2.x[:], r2.x[:], rr[:]) + p256MulAsm(r2.y[:], r2.y[:], rr[:]) + + scalarReduced = p256GetMultiplier(scalar) + r2IsInfinity := scalarIsZero(scalarReduced) + r2.p256ScalarMult(scalarReduced) + + var sum, double p256Point + pointsEqual := p256PointAddAsm(&sum, &r1, &r2) + p256PointDoubleAsm(&double, &r1) + p256MovCond(&sum, &double, &sum, pointsEqual) + p256MovCond(&sum, &r1, &sum, r2IsInfinity) + p256MovCond(&sum, &r2, &sum, r1IsInfinity) + return sum.p256PointToAffine() +} + +func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { + var r p256Point + reducedScalar := p256GetMultiplier(scalar) + r.p256BaseMult(reducedScalar) + return r.p256PointToAffine() +} + +func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { + scalarReduced := p256GetMultiplier(scalar) + var r p256Point + copy(r.x[:], fromBig(maybeReduceModP(bigX))) + copy(r.y[:], fromBig(maybeReduceModP(bigY))) + copy(r.z[:], one) + p256MulAsm(r.x[:], r.x[:], rr[:]) + p256MulAsm(r.y[:], r.y[:], rr[:]) + r.p256ScalarMult(scalarReduced) + return r.p256PointToAffine() +} + +func scalarIsZero(scalar []byte) int { + // If any byte is not zero, return 0. + // Check for -0.... since that appears to compare to 0. + b := byte(0) + for _, s := range scalar { + b |= s + } + return subtle.ConstantTimeByteEq(b, 0) +} + +func (p *p256Point) p256PointToAffine() (x, y *big.Int) { + zInv := make([]byte, 32) + zInvSq := make([]byte, 32) + + p256Inverse(zInv, p.z[:]) + p256Sqr(zInvSq, zInv) + p256MulAsm(zInv, zInv, zInvSq) + + p256MulAsm(zInvSq, p.x[:], zInvSq) + p256MulAsm(zInv, p.y[:], zInv) + + p256FromMont(zInvSq, zInvSq) + p256FromMont(zInv, zInv) + + // SetBytes expects a slice in big endian order, + // since ppc64le is little endian, reverse the bytes. + // TODO: For big endian, bytes don't need to be reversed. + p256ReverseBytes(zInvSq, zInvSq) + p256ReverseBytes(zInv, zInv) + rx := new(big.Int).SetBytes(zInvSq) + ry := new(big.Int).SetBytes(zInv) + return rx, ry +} + +// p256Inverse sets out to in^-1 mod p. +func p256Inverse(out, in []byte) { + var stack [6 * 32]byte + p2 := stack[32*0 : 32*0+32] + p4 := stack[32*1 : 32*1+32] + p8 := stack[32*2 : 32*2+32] + p16 := stack[32*3 : 32*3+32] + p32 := stack[32*4 : 32*4+32] + + p256Sqr(out, in) + p256MulAsm(p2, out, in) // 3*p + + p256Sqr(out, p2) + p256Sqr(out, out) + p256MulAsm(p4, out, p2) // f*p + + p256Sqr(out, p4) + p256Sqr(out, out) + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(p8, out, p4) // ff*p + + p256Sqr(out, p8) + + for i := 0; i < 7; i++ { + p256Sqr(out, out) + } + p256MulAsm(p16, out, p8) // ffff*p + + p256Sqr(out, p16) + for i := 0; i < 15; i++ { + p256Sqr(out, out) + } + p256MulAsm(p32, out, p16) // ffffffff*p + + p256Sqr(out, p32) + + for i := 0; i < 31; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, in) + + for i := 0; i < 32*4; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p32) + + for i := 0; i < 32; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p32) + + for i := 0; i < 16; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p16) + + for i := 0; i < 8; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p8) + + p256Sqr(out, out) + p256Sqr(out, out) + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(out, out, p4) + + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(out, out, p2) + + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(out, out, in) +} + +func boothW5(in uint) (int, int) { + var s uint = ^((in >> 5) - 1) + var d uint = (1 << 6) - in - 1 + d = (d & s) | (in & (^s)) + d = (d >> 1) + (d & 1) + return int(d), int(s & 1) +} + +func boothW6(in uint) (int, int) { + var s uint = ^((in >> 6) - 1) + var d uint = (1 << 7) - in - 1 + d = (d & s) | (in & (^s)) + d = (d >> 1) + (d & 1) + return int(d), int(s & 1) +} + +func boothW7(in uint) (int, int) { + var s uint = ^((in >> 7) - 1) + var d uint = (1 << 8) - in - 1 + d = (d & s) | (in & (^s)) + d = (d >> 1) + (d & 1) + return int(d), int(s & 1) +} + +func initTable() { + + p256PreFast = new([37][64]p256Point) + + // TODO: For big endian, these slices should be in reverse byte order, + // as found in the s390x implementation. + basePoint := p256Point{ + x: [32]byte{0x3c, 0x14, 0xa9, 0x18, 0xd4, 0x30, 0xe7, 0x79, 0x01, 0xb6, 0xed, 0x5f, 0xfc, 0x95, 0xba, 0x75, + 0x10, 0x25, 0x62, 0x77, 0x2b, 0x73, 0xfb, 0x79, 0xc6, 0x55, 0x37, 0xa5, 0x76, 0x5f, 0x90, 0x18}, //(p256.x*2^256)%p + y: [32]byte{0x0a, 0x56, 0x95, 0xce, 0x57, 0x53, 0xf2, 0xdd, 0x5c, 0xe4, 0x19, 0xba, 0xe4, 0xb8, 0x4a, 0x8b, + 0x25, 0xf3, 0x21, 0xdd, 0x88, 0x86, 0xe8, 0xd2, 0x85, 0x5d, 0x88, 0x25, 0x18, 0xff, 0x71, 0x85}, //(p256.y*2^256)%p + z: [32]byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, //(p256.z*2^256)%p + + } + + t1 := new(p256Point) + t2 := new(p256Point) + *t2 = basePoint + + zInv := make([]byte, 32) + zInvSq := make([]byte, 32) + for j := 0; j < 64; j++ { + *t1 = *t2 + for i := 0; i < 37; i++ { + // The window size is 7 so we need to double 7 times. + if i != 0 { + for k := 0; k < 7; k++ { + p256PointDoubleAsm(t1, t1) + } + } + // Convert the point to affine form. (Its values are + // still in Montgomery form however.) + p256Inverse(zInv, t1.z[:]) + p256Sqr(zInvSq, zInv) + p256MulAsm(zInv, zInv, zInvSq) + + p256MulAsm(t1.x[:], t1.x[:], zInvSq) + p256MulAsm(t1.y[:], t1.y[:], zInv) + + copy(t1.z[:], basePoint.z[:]) + // Update the table entry + copy(p256PreFast[i][j].x[:], t1.x[:]) + copy(p256PreFast[i][j].y[:], t1.y[:]) + } + if j == 0 { + p256PointDoubleAsm(t2, &basePoint) + } else { + p256PointAddAsm(t2, t2, &basePoint) + } + } +} + +func (p *p256Point) p256BaseMult(scalar []byte) { + // TODO: For big endian, the index should be 31 not 0. + wvalue := (uint(scalar[0]) << 1) & 0xff + sel, sign := boothW7(uint(wvalue)) + p256SelectBase(p, p256PreFast[0][:], sel) + p256NegCond(p, sign) + + copy(p.z[:], one[:]) + var t0 p256Point + + copy(t0.z[:], one[:]) + + index := uint(6) + zero := sel + for i := 1; i < 37; i++ { + // TODO: For big endian, use the same index values as found + // in the s390x implementation. + if index < 247 { + wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0xff + } else { + wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0xff + } + index += 7 + sel, sign = boothW7(uint(wvalue)) + p256SelectBase(&t0, p256PreFast[i][:], sel) + p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) + zero |= sel + } +} + +func (p *p256Point) p256ScalarMult(scalar []byte) { + // precomp is a table of precomputed points that stores powers of p + // from p^1 to p^16. + var precomp [16]p256Point + var t0, t1, t2, t3 p256Point + + *&precomp[0] = *p + p256PointDoubleAsm(&t0, p) + p256PointDoubleAsm(&t1, &t0) + p256PointDoubleAsm(&t2, &t1) + p256PointDoubleAsm(&t3, &t2) + *&precomp[1] = t0 + *&precomp[3] = t1 + *&precomp[7] = t2 + *&precomp[15] = t3 + + p256PointAddAsm(&t0, &t0, p) + p256PointAddAsm(&t1, &t1, p) + p256PointAddAsm(&t2, &t2, p) + + *&precomp[2] = t0 + *&precomp[4] = t1 + *&precomp[8] = t2 + + p256PointDoubleAsm(&t0, &t0) + p256PointDoubleAsm(&t1, &t1) + *&precomp[5] = t0 + *&precomp[9] = t1 + + p256PointAddAsm(&t2, &t0, p) + p256PointAddAsm(&t1, &t1, p) + *&precomp[6] = t2 + *&precomp[10] = t1 + + p256PointDoubleAsm(&t0, &t0) + p256PointDoubleAsm(&t2, &t2) + *&precomp[11] = t0 + *&precomp[13] = t2 + + p256PointAddAsm(&t0, &t0, p) + p256PointAddAsm(&t2, &t2, p) + *&precomp[12] = t0 + *&precomp[14] = t2 + + // Start scanning the window from top bit + index := uint(254) + var sel, sign int + + // TODO: For big endian, use index found in s390x implementation. + wvalue := (uint(scalar[index/8]) >> (index % 8)) & 0x3f + sel, _ = boothW5(uint(wvalue)) + p256Select(p, precomp[:], sel) + zero := sel + + for index > 4 { + index -= 5 + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + + // TODO: For big endian, use index values as found in s390x implementation. + if index < 247 { + wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0x3f + } else { + wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0x3f + } + + sel, sign = boothW5(uint(wvalue)) + + p256Select(&t0, precomp[:], sel) + p256NegCond(&t0, sign) + p256PointAddAsm(&t1, p, &t0) + p256MovCond(&t1, &t1, p, sel) + p256MovCond(p, &t1, &t0, zero) + zero |= sel + } + + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + + // TODO: Use index for big endian as found in s390x implementation. + wvalue = (uint(scalar[0]) << 1) & 0x3f + sel, sign = boothW5(uint(wvalue)) + + p256Select(&t0, precomp[:], sel) + p256NegCond(&t0, sign) + p256PointAddAsm(&t1, p, &t0) + p256MovCond(&t1, &t1, p, sel) + p256MovCond(p, &t1, &t0, zero) +} diff --git a/src/crypto/elliptic/p256_s390x.go b/src/crypto/elliptic/p256_s390x.go new file mode 100644 index 0000000..91e613b --- /dev/null +++ b/src/crypto/elliptic/p256_s390x.go @@ -0,0 +1,577 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build s390x +// +build s390x + +package elliptic + +import ( + "crypto/subtle" + "internal/cpu" + "math/big" + "unsafe" +) + +const ( + offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) + offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE) +) + +type p256CurveFast struct { + *CurveParams +} + +type p256Point struct { + x [32]byte + y [32]byte + z [32]byte +} + +var ( + p256 Curve + p256PreFast *[37][64]p256Point +) + +//go:noescape +func p256MulInternalTrampolineSetup() + +//go:noescape +func p256SqrInternalTrampolineSetup() + +//go:noescape +func p256MulInternalVX() + +//go:noescape +func p256MulInternalVMSL() + +//go:noescape +func p256SqrInternalVX() + +//go:noescape +func p256SqrInternalVMSL() + +func initP256Arch() { + if cpu.S390X.HasVX { + p256 = p256CurveFast{p256Params} + initTable() + return + } + + // No vector support, use pure Go implementation. + p256 = p256Curve{p256Params} + return +} + +func (curve p256CurveFast) Params() *CurveParams { + return curve.CurveParams +} + +// Functions implemented in p256_asm_s390x.s +// Montgomery multiplication modulo P256 +// +//go:noescape +func p256SqrAsm(res, in1 []byte) + +//go:noescape +func p256MulAsm(res, in1, in2 []byte) + +// Montgomery square modulo P256 +func p256Sqr(res, in []byte) { + p256SqrAsm(res, in) +} + +// Montgomery multiplication by 1 +// +//go:noescape +func p256FromMont(res, in []byte) + +// iff cond == 1 val <- -val +// +//go:noescape +func p256NegCond(val *p256Point, cond int) + +// if cond == 0 res <- b; else res <- a +// +//go:noescape +func p256MovCond(res, a, b *p256Point, cond int) + +// Constant time table access +// +//go:noescape +func p256Select(point *p256Point, table []p256Point, idx int) + +//go:noescape +func p256SelectBase(point *p256Point, table []p256Point, idx int) + +// Montgomery multiplication modulo Ord(G) +// +//go:noescape +func p256OrdMul(res, in1, in2 []byte) + +// Montgomery square modulo Ord(G), repeated n times +func p256OrdSqr(res, in []byte, n int) { + copy(res, in) + for i := 0; i < n; i += 1 { + p256OrdMul(res, res, res) + } +} + +// Point add with P2 being affine point +// If sign == 1 -> P2 = -P2 +// If sel == 0 -> P3 = P1 +// if zero == 0 -> P3 = P2 +// +//go:noescape +func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) + +// Point add +// +//go:noescape +func p256PointAddAsm(P3, P1, P2 *p256Point) int + +//go:noescape +func p256PointDoubleAsm(P3, P1 *p256Point) + +func (curve p256CurveFast) Inverse(k *big.Int) *big.Int { + if k.Cmp(p256Params.N) >= 0 { + // This should never happen. + reducedK := new(big.Int).Mod(k, p256Params.N) + k = reducedK + } + + // table will store precomputed powers of x. The 32 bytes at index + // i store x^(i+1). + var table [15][32]byte + + x := fromBig(k) + // This code operates in the Montgomery domain where R = 2^256 mod n + // and n is the order of the scalar field. (See initP256 for the + // value.) Elements in the Montgomery domain take the form a×R and + // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR + // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, + // i.e. converts x into the Montgomery domain. Stored in BigEndian form + RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59, + 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2} + + p256OrdMul(table[0][:], x, RR) + + // Prepare the table, no need in constant time access, because the + // power is not a secret. (Entry 0 is never used.) + for i := 2; i < 16; i += 2 { + p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1) + p256OrdMul(table[i][:], table[i-1][:], table[0][:]) + } + + copy(x, table[14][:]) // f + + p256OrdSqr(x[0:32], x[0:32], 4) + p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff + t := make([]byte, 32) + copy(t, x) + + p256OrdSqr(x, x, 8) + p256OrdMul(x, x, t) // ffff + copy(t, x) + + p256OrdSqr(x, x, 16) + p256OrdMul(x, x, t) // ffffffff + copy(t, x) + + p256OrdSqr(x, x, 64) // ffffffff0000000000000000 + p256OrdMul(x, x, t) // ffffffff00000000ffffffff + p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 + p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff + + // Remaining 32 windows + expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, + 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} + for i := 0; i < 32; i++ { + p256OrdSqr(x, x, 4) + p256OrdMul(x, x, table[expLo[i]-1][:]) + } + + // Multiplying by one in the Montgomery domain converts a Montgomery + // value out of the domain. + one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} + p256OrdMul(x, x, one) + + return new(big.Int).SetBytes(x) +} + +// fromBig converts a *big.Int into a format used by this code. +func fromBig(big *big.Int) []byte { + // This could be done a lot more efficiently... + res := big.Bytes() + if 32 == len(res) { + return res + } + t := make([]byte, 32) + offset := 32 - len(res) + for i := len(res) - 1; i >= 0; i-- { + t[i+offset] = res[i] + } + return t +} + +// p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar +// is equal or greater than the order of the group, it's reduced modulo that order. +func p256GetMultiplier(in []byte) []byte { + n := new(big.Int).SetBytes(in) + + if n.Cmp(p256Params.N) >= 0 { + n.Mod(n, p256Params.N) + } + return fromBig(n) +} + +// p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the +// underlying field of the curve. (See initP256 for the value.) Thus rr here is +// R×R mod p. See comment in Inverse about how this is used. +var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, + 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} + +// (This is one, in the Montgomery domain.) +var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} + +func maybeReduceModP(in *big.Int) *big.Int { + if in.Cmp(p256Params.P) < 0 { + return in + } + return new(big.Int).Mod(in, p256Params.P) +} + +func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { + var r1, r2 p256Point + scalarReduced := p256GetMultiplier(baseScalar) + r1IsInfinity := scalarIsZero(scalarReduced) + r1.p256BaseMult(scalarReduced) + + copy(r2.x[:], fromBig(maybeReduceModP(bigX))) + copy(r2.y[:], fromBig(maybeReduceModP(bigY))) + copy(r2.z[:], one) + p256MulAsm(r2.x[:], r2.x[:], rr[:]) + p256MulAsm(r2.y[:], r2.y[:], rr[:]) + + scalarReduced = p256GetMultiplier(scalar) + r2IsInfinity := scalarIsZero(scalarReduced) + r2.p256ScalarMult(p256GetMultiplier(scalar)) + + var sum, double p256Point + pointsEqual := p256PointAddAsm(&sum, &r1, &r2) + p256PointDoubleAsm(&double, &r1) + p256MovCond(&sum, &double, &sum, pointsEqual) + p256MovCond(&sum, &r1, &sum, r2IsInfinity) + p256MovCond(&sum, &r2, &sum, r1IsInfinity) + return sum.p256PointToAffine() +} + +func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { + var r p256Point + r.p256BaseMult(p256GetMultiplier(scalar)) + return r.p256PointToAffine() +} + +func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { + var r p256Point + copy(r.x[:], fromBig(maybeReduceModP(bigX))) + copy(r.y[:], fromBig(maybeReduceModP(bigY))) + copy(r.z[:], one) + p256MulAsm(r.x[:], r.x[:], rr[:]) + p256MulAsm(r.y[:], r.y[:], rr[:]) + r.p256ScalarMult(p256GetMultiplier(scalar)) + return r.p256PointToAffine() +} + +// scalarIsZero returns 1 if scalar represents the zero value, and zero +// otherwise. +func scalarIsZero(scalar []byte) int { + b := byte(0) + for _, s := range scalar { + b |= s + } + return subtle.ConstantTimeByteEq(b, 0) +} + +func (p *p256Point) p256PointToAffine() (x, y *big.Int) { + zInv := make([]byte, 32) + zInvSq := make([]byte, 32) + + p256Inverse(zInv, p.z[:]) + p256Sqr(zInvSq, zInv) + p256MulAsm(zInv, zInv, zInvSq) + + p256MulAsm(zInvSq, p.x[:], zInvSq) + p256MulAsm(zInv, p.y[:], zInv) + + p256FromMont(zInvSq, zInvSq) + p256FromMont(zInv, zInv) + + return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv) +} + +// p256Inverse sets out to in^-1 mod p. +func p256Inverse(out, in []byte) { + var stack [6 * 32]byte + p2 := stack[32*0 : 32*0+32] + p4 := stack[32*1 : 32*1+32] + p8 := stack[32*2 : 32*2+32] + p16 := stack[32*3 : 32*3+32] + p32 := stack[32*4 : 32*4+32] + + p256Sqr(out, in) + p256MulAsm(p2, out, in) // 3*p + + p256Sqr(out, p2) + p256Sqr(out, out) + p256MulAsm(p4, out, p2) // f*p + + p256Sqr(out, p4) + p256Sqr(out, out) + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(p8, out, p4) // ff*p + + p256Sqr(out, p8) + + for i := 0; i < 7; i++ { + p256Sqr(out, out) + } + p256MulAsm(p16, out, p8) // ffff*p + + p256Sqr(out, p16) + for i := 0; i < 15; i++ { + p256Sqr(out, out) + } + p256MulAsm(p32, out, p16) // ffffffff*p + + p256Sqr(out, p32) + + for i := 0; i < 31; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, in) + + for i := 0; i < 32*4; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p32) + + for i := 0; i < 32; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p32) + + for i := 0; i < 16; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p16) + + for i := 0; i < 8; i++ { + p256Sqr(out, out) + } + p256MulAsm(out, out, p8) + + p256Sqr(out, out) + p256Sqr(out, out) + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(out, out, p4) + + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(out, out, p2) + + p256Sqr(out, out) + p256Sqr(out, out) + p256MulAsm(out, out, in) +} + +func boothW5(in uint) (int, int) { + var s uint = ^((in >> 5) - 1) + var d uint = (1 << 6) - in - 1 + d = (d & s) | (in & (^s)) + d = (d >> 1) + (d & 1) + return int(d), int(s & 1) +} + +func boothW7(in uint) (int, int) { + var s uint = ^((in >> 7) - 1) + var d uint = (1 << 8) - in - 1 + d = (d & s) | (in & (^s)) + d = (d >> 1) + (d & 1) + return int(d), int(s & 1) +} + +func initTable() { + p256PreFast = new([37][64]p256Point) //z coordinate not used + basePoint := p256Point{ + x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10, + 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p + y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25, + 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p + z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p + } + + t1 := new(p256Point) + t2 := new(p256Point) + *t2 = basePoint + + zInv := make([]byte, 32) + zInvSq := make([]byte, 32) + for j := 0; j < 64; j++ { + *t1 = *t2 + for i := 0; i < 37; i++ { + // The window size is 7 so we need to double 7 times. + if i != 0 { + for k := 0; k < 7; k++ { + p256PointDoubleAsm(t1, t1) + } + } + // Convert the point to affine form. (Its values are + // still in Montgomery form however.) + p256Inverse(zInv, t1.z[:]) + p256Sqr(zInvSq, zInv) + p256MulAsm(zInv, zInv, zInvSq) + + p256MulAsm(t1.x[:], t1.x[:], zInvSq) + p256MulAsm(t1.y[:], t1.y[:], zInv) + + copy(t1.z[:], basePoint.z[:]) + // Update the table entry + copy(p256PreFast[i][j].x[:], t1.x[:]) + copy(p256PreFast[i][j].y[:], t1.y[:]) + } + if j == 0 { + p256PointDoubleAsm(t2, &basePoint) + } else { + p256PointAddAsm(t2, t2, &basePoint) + } + } +} + +func (p *p256Point) p256BaseMult(scalar []byte) { + wvalue := (uint(scalar[31]) << 1) & 0xff + sel, sign := boothW7(uint(wvalue)) + p256SelectBase(p, p256PreFast[0][:], sel) + p256NegCond(p, sign) + + copy(p.z[:], one[:]) + var t0 p256Point + + copy(t0.z[:], one[:]) + + index := uint(6) + zero := sel + + for i := 1; i < 37; i++ { + if index < 247 { + wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff + } else { + wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff + } + index += 7 + sel, sign = boothW7(uint(wvalue)) + p256SelectBase(&t0, p256PreFast[i][:], sel) + p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) + zero |= sel + } +} + +func (p *p256Point) p256ScalarMult(scalar []byte) { + // precomp is a table of precomputed points that stores powers of p + // from p^1 to p^16. + var precomp [16]p256Point + var t0, t1, t2, t3 p256Point + + // Prepare the table + *&precomp[0] = *p + + p256PointDoubleAsm(&t0, p) + p256PointDoubleAsm(&t1, &t0) + p256PointDoubleAsm(&t2, &t1) + p256PointDoubleAsm(&t3, &t2) + *&precomp[1] = t0 // 2 + *&precomp[3] = t1 // 4 + *&precomp[7] = t2 // 8 + *&precomp[15] = t3 // 16 + + p256PointAddAsm(&t0, &t0, p) + p256PointAddAsm(&t1, &t1, p) + p256PointAddAsm(&t2, &t2, p) + *&precomp[2] = t0 // 3 + *&precomp[4] = t1 // 5 + *&precomp[8] = t2 // 9 + + p256PointDoubleAsm(&t0, &t0) + p256PointDoubleAsm(&t1, &t1) + *&precomp[5] = t0 // 6 + *&precomp[9] = t1 // 10 + + p256PointAddAsm(&t2, &t0, p) + p256PointAddAsm(&t1, &t1, p) + *&precomp[6] = t2 // 7 + *&precomp[10] = t1 // 11 + + p256PointDoubleAsm(&t0, &t0) + p256PointDoubleAsm(&t2, &t2) + *&precomp[11] = t0 // 12 + *&precomp[13] = t2 // 14 + + p256PointAddAsm(&t0, &t0, p) + p256PointAddAsm(&t2, &t2, p) + *&precomp[12] = t0 // 13 + *&precomp[14] = t2 // 15 + + // Start scanning the window from top bit + index := uint(254) + var sel, sign int + + wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f + sel, _ = boothW5(uint(wvalue)) + p256Select(p, precomp[:], sel) + zero := sel + + for index > 4 { + index -= 5 + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + + if index < 247 { + wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f + } else { + wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f + } + + sel, sign = boothW5(uint(wvalue)) + + p256Select(&t0, precomp[:], sel) + p256NegCond(&t0, sign) + p256PointAddAsm(&t1, p, &t0) + p256MovCond(&t1, &t1, p, sel) + p256MovCond(p, &t1, &t0, zero) + zero |= sel + } + + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + p256PointDoubleAsm(p, p) + + wvalue = (uint(scalar[31]) << 1) & 0x3f + sel, sign = boothW5(uint(wvalue)) + + p256Select(&t0, precomp[:], sel) + p256NegCond(&t0, sign) + p256PointAddAsm(&t1, p, &t0) + p256MovCond(&t1, &t1, p, sel) + p256MovCond(p, &t1, &t0, zero) +} diff --git a/src/crypto/elliptic/p256_test.go b/src/crypto/elliptic/p256_test.go new file mode 100644 index 0000000..694186d --- /dev/null +++ b/src/crypto/elliptic/p256_test.go @@ -0,0 +1,169 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package elliptic + +import ( + "math/big" + "testing" +) + +type scalarMultTest struct { + k string + xIn, yIn string + xOut, yOut string +} + +var p256MultTests = []scalarMultTest{ + { + "2a265f8bcbdcaf94d58519141e578124cb40d64a501fba9c11847b28965bc737", + "023819813ac969847059028ea88a1f30dfbcde03fc791d3a252c6b41211882ea", + "f93e4ae433cc12cf2a43fc0ef26400c0e125508224cdb649380f25479148a4ad", + "4d4de80f1534850d261075997e3049321a0864082d24a917863366c0724f5ae3", + "a22d2b7f7818a3563e0f7a76c9bf0921ac55e06e2e4d11795b233824b1db8cc0", + }, + { + "313f72ff9fe811bf573176231b286a3bdb6f1b14e05c40146590727a71c3bccd", + "cc11887b2d66cbae8f4d306627192522932146b42f01d3c6f92bd5c8ba739b06", + "a2f08a029cd06b46183085bae9248b0ed15b70280c7ef13a457f5af382426031", + "831c3f6b5f762d2f461901577af41354ac5f228c2591f84f8a6e51e2e3f17991", + "93f90934cd0ef2c698cc471c60a93524e87ab31ca2412252337f364513e43684", + }, +} + +func TestP256BaseMult(t *testing.T) { + p256 := P256() + p256Generic := p256.Params() + + scalars := make([]*big.Int, 0, len(p224BaseMultTests)+1) + for _, e := range p224BaseMultTests { + k, _ := new(big.Int).SetString(e.k, 10) + scalars = append(scalars, k) + } + k := new(big.Int).SetInt64(1) + k.Lsh(k, 500) + scalars = append(scalars, k) + + for i, k := range scalars { + x, y := p256.ScalarBaseMult(k.Bytes()) + x2, y2 := p256Generic.ScalarBaseMult(k.Bytes()) + if x.Cmp(x2) != 0 || y.Cmp(y2) != 0 { + t.Errorf("#%d: got (%x, %x), want (%x, %x)", i, x, y, x2, y2) + } + + if testing.Short() && i > 5 { + break + } + } +} + +func TestP256Mult(t *testing.T) { + p256 := P256() + p256Generic := p256.Params() + + for i, e := range p224BaseMultTests { + x, _ := new(big.Int).SetString(e.x, 16) + y, _ := new(big.Int).SetString(e.y, 16) + k, _ := new(big.Int).SetString(e.k, 10) + + xx, yy := p256.ScalarMult(x, y, k.Bytes()) + xx2, yy2 := p256Generic.ScalarMult(x, y, k.Bytes()) + if xx.Cmp(xx2) != 0 || yy.Cmp(yy2) != 0 { + t.Errorf("#%d: got (%x, %x), want (%x, %x)", i, xx, yy, xx2, yy2) + } + if testing.Short() && i > 5 { + break + } + } + + for i, e := range p256MultTests { + x, _ := new(big.Int).SetString(e.xIn, 16) + y, _ := new(big.Int).SetString(e.yIn, 16) + k, _ := new(big.Int).SetString(e.k, 16) + expectedX, _ := new(big.Int).SetString(e.xOut, 16) + expectedY, _ := new(big.Int).SetString(e.yOut, 16) + + xx, yy := p256.ScalarMult(x, y, k.Bytes()) + if xx.Cmp(expectedX) != 0 || yy.Cmp(expectedY) != 0 { + t.Errorf("#%d: got (%x, %x), want (%x, %x)", i, xx, yy, expectedX, expectedY) + } + } +} + +type synthCombinedMult struct { + Curve +} + +func (s synthCombinedMult) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { + x1, y1 := s.ScalarBaseMult(baseScalar) + x2, y2 := s.ScalarMult(bigX, bigY, scalar) + return s.Add(x1, y1, x2, y2) +} + +func TestP256CombinedMult(t *testing.T) { + type combinedMult interface { + Curve + CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) + } + + p256, ok := P256().(combinedMult) + if !ok { + p256 = &synthCombinedMult{P256()} + } + + gx := p256.Params().Gx + gy := p256.Params().Gy + + zero := make([]byte, 32) + one := make([]byte, 32) + one[31] = 1 + two := make([]byte, 32) + two[31] = 2 + + // 0×G + 0×G = ∞ + x, y := p256.CombinedMult(gx, gy, zero, zero) + if x.Sign() != 0 || y.Sign() != 0 { + t.Errorf("0×G + 0×G = (%d, %d), should be ∞", x, y) + } + + // 1×G + 0×G = G + x, y = p256.CombinedMult(gx, gy, one, zero) + if x.Cmp(gx) != 0 || y.Cmp(gy) != 0 { + t.Errorf("1×G + 0×G = (%d, %d), should be (%d, %d)", x, y, gx, gy) + } + + // 0×G + 1×G = G + x, y = p256.CombinedMult(gx, gy, zero, one) + if x.Cmp(gx) != 0 || y.Cmp(gy) != 0 { + t.Errorf("0×G + 1×G = (%d, %d), should be (%d, %d)", x, y, gx, gy) + } + + // 1×G + 1×G = 2×G + x, y = p256.CombinedMult(gx, gy, one, one) + ggx, ggy := p256.ScalarBaseMult(two) + if x.Cmp(ggx) != 0 || y.Cmp(ggy) != 0 { + t.Errorf("1×G + 1×G = (%d, %d), should be (%d, %d)", x, y, ggx, ggy) + } + + minusOne := new(big.Int).Sub(p256.Params().N, big.NewInt(1)) + // 1×G + (-1)×G = ∞ + x, y = p256.CombinedMult(gx, gy, one, minusOne.Bytes()) + if x.Sign() != 0 || y.Sign() != 0 { + t.Errorf("1×G + (-1)×G = (%d, %d), should be ∞", x, y) + } +} + +func TestIssue52075(t *testing.T) { + Gx, Gy := P256().Params().Gx, P256().Params().Gy + scalar := make([]byte, 33) + scalar[32] = 1 + x, y := P256().ScalarBaseMult(scalar) + if x.Cmp(Gx) != 0 || y.Cmp(Gy) != 0 { + t.Errorf("unexpected output (%v,%v)", x, y) + } + x, y = P256().ScalarMult(Gx, Gy, scalar) + if x.Cmp(Gx) != 0 || y.Cmp(Gy) != 0 { + t.Errorf("unexpected output (%v,%v)", x, y) + } +} diff --git a/src/crypto/elliptic/p521.go b/src/crypto/elliptic/p521.go new file mode 100644 index 0000000..587991e --- /dev/null +++ b/src/crypto/elliptic/p521.go @@ -0,0 +1,264 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package elliptic + +import ( + "crypto/elliptic/internal/fiat" + "math/big" +) + +type p521Curve struct { + *CurveParams +} + +var p521 p521Curve +var p521Params *CurveParams + +func initP521() { + // See FIPS 186-3, section D.2.5 + p521.CurveParams = &CurveParams{Name: "P-521"} + p521.P, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", 10) + p521.N, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397655394245057746333217197532963996371363321113864768612440380340372808892707005449", 10) + p521.B, _ = new(big.Int).SetString("051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", 16) + p521.Gx, _ = new(big.Int).SetString("c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", 16) + p521.Gy, _ = new(big.Int).SetString("11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", 16) + p521.BitSize = 521 +} + +func (curve p521Curve) Params() *CurveParams { + return curve.CurveParams +} + +func (curve p521Curve) IsOnCurve(x, y *big.Int) bool { + if x.Sign() < 0 || x.Cmp(curve.P) >= 0 || + y.Sign() < 0 || y.Cmp(curve.P) >= 0 { + return false + } + + x1 := bigIntToFiatP521(x) + y1 := bigIntToFiatP521(y) + b := bigIntToFiatP521(curve.B) // TODO: precompute this value. + + // x³ - 3x + b. + x3 := new(fiat.P521Element).Square(x1) + x3.Mul(x3, x1) + + threeX := new(fiat.P521Element).Add(x1, x1) + threeX.Add(threeX, x1) + + x3.Sub(x3, threeX) + x3.Add(x3, b) + + // y² = x³ - 3x + b + y2 := new(fiat.P521Element).Square(y1) + + return x3.Equal(y2) == 1 +} + +type p521Point struct { + x, y, z *fiat.P521Element +} + +func fiatP521ToBigInt(x *fiat.P521Element) *big.Int { + xBytes := x.Bytes() + for i := range xBytes[:len(xBytes)/2] { + xBytes[i], xBytes[len(xBytes)-i-1] = xBytes[len(xBytes)-i-1], xBytes[i] + } + return new(big.Int).SetBytes(xBytes) +} + +// affineFromJacobian brings a point in Jacobian coordinates back to affine +// coordinates, with (0, 0) representing infinity by convention. It also goes +// back to big.Int values to match the exposed API. +func (curve p521Curve) affineFromJacobian(p *p521Point) (x, y *big.Int) { + if p.z.IsZero() == 1 { + return new(big.Int), new(big.Int) + } + + zinv := new(fiat.P521Element).Invert(p.z) + zinvsq := new(fiat.P521Element).Mul(zinv, zinv) + + xx := new(fiat.P521Element).Mul(p.x, zinvsq) + zinvsq.Mul(zinvsq, zinv) + yy := new(fiat.P521Element).Mul(p.y, zinvsq) + + return fiatP521ToBigInt(xx), fiatP521ToBigInt(yy) +} + +func bigIntToFiatP521(x *big.Int) *fiat.P521Element { + xBytes := new(big.Int).Mod(x, p521.P).FillBytes(make([]byte, 66)) + for i := range xBytes[:len(xBytes)/2] { + xBytes[i], xBytes[len(xBytes)-i-1] = xBytes[len(xBytes)-i-1], xBytes[i] + } + x1, err := new(fiat.P521Element).SetBytes(xBytes) + if err != nil { + // The input is reduced modulo P and encoded in a fixed size bytes + // slice, this should be impossible. + panic("internal error: bigIntToFiatP521") + } + return x1 +} + +// jacobianFromAffine converts (x, y) affine coordinates into (x, y, z) Jacobian +// coordinates. It also converts from big.Int to fiat, which is necessarily a +// messy and variable-time operation, which we can't avoid due to the exposed API. +func (curve p521Curve) jacobianFromAffine(x, y *big.Int) *p521Point { + // (0, 0) is by convention the point at infinity, which can't be represented + // in affine coordinates, but is (0, 0, 0) in Jacobian. + if x.Sign() == 0 && y.Sign() == 0 { + return &p521Point{ + x: new(fiat.P521Element), + y: new(fiat.P521Element), + z: new(fiat.P521Element), + } + } + return &p521Point{ + x: bigIntToFiatP521(x), + y: bigIntToFiatP521(y), + z: new(fiat.P521Element).One(), + } +} + +func (curve p521Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) { + p1 := curve.jacobianFromAffine(x1, y1) + p2 := curve.jacobianFromAffine(x2, y2) + return curve.affineFromJacobian(p1.addJacobian(p1, p2)) +} + +// addJacobian sets q = p1 + p2, and returns q. The points may overlap. +func (q *p521Point) addJacobian(p1, p2 *p521Point) *p521Point { + // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl + z1IsZero := p1.z.IsZero() + z2IsZero := p2.z.IsZero() + + z1z1 := new(fiat.P521Element).Square(p1.z) + z2z2 := new(fiat.P521Element).Square(p2.z) + + u1 := new(fiat.P521Element).Mul(p1.x, z2z2) + u2 := new(fiat.P521Element).Mul(p2.x, z1z1) + h := new(fiat.P521Element).Sub(u2, u1) + xEqual := h.IsZero() == 1 + i := new(fiat.P521Element).Add(h, h) + i.Square(i) + j := new(fiat.P521Element).Mul(h, i) + + s1 := new(fiat.P521Element).Mul(p1.y, p2.z) + s1.Mul(s1, z2z2) + s2 := new(fiat.P521Element).Mul(p2.y, p1.z) + s2.Mul(s2, z1z1) + r := new(fiat.P521Element).Sub(s2, s1) + yEqual := r.IsZero() == 1 + if xEqual && yEqual && z1IsZero == 0 && z2IsZero == 0 { + return q.doubleJacobian(p1) + } + r.Add(r, r) + v := new(fiat.P521Element).Mul(u1, i) + + x := new(fiat.P521Element).Set(r) + x.Square(x) + x.Sub(x, j) + x.Sub(x, v) + x.Sub(x, v) + + y := new(fiat.P521Element).Set(r) + v.Sub(v, x) + y.Mul(y, v) + s1.Mul(s1, j) + s1.Add(s1, s1) + y.Sub(y, s1) + + z := new(fiat.P521Element).Add(p1.z, p2.z) + z.Square(z) + z.Sub(z, z1z1) + z.Sub(z, z2z2) + z.Mul(z, h) + + x.Select(p2.x, x, z1IsZero) + x.Select(p1.x, x, z2IsZero) + y.Select(p2.y, y, z1IsZero) + y.Select(p1.y, y, z2IsZero) + z.Select(p2.z, z, z1IsZero) + z.Select(p1.z, z, z2IsZero) + + q.x.Set(x) + q.y.Set(y) + q.z.Set(z) + return q +} + +func (curve p521Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) { + p := curve.jacobianFromAffine(x1, y1) + return curve.affineFromJacobian(p.doubleJacobian(p)) +} + +// doubleJacobian sets q = p + p, and returns q. The points may overlap. +func (q *p521Point) doubleJacobian(p *p521Point) *p521Point { + // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b + delta := new(fiat.P521Element).Square(p.z) + gamma := new(fiat.P521Element).Square(p.y) + alpha := new(fiat.P521Element).Sub(p.x, delta) + alpha2 := new(fiat.P521Element).Add(p.x, delta) + alpha.Mul(alpha, alpha2) + alpha2.Set(alpha) + alpha.Add(alpha, alpha) + alpha.Add(alpha, alpha2) + + beta := alpha2.Mul(p.x, gamma) + + q.x.Square(alpha) + beta8 := new(fiat.P521Element).Add(beta, beta) + beta8.Add(beta8, beta8) + beta8.Add(beta8, beta8) + q.x.Sub(q.x, beta8) + + q.z.Add(p.y, p.z) + q.z.Square(q.z) + q.z.Sub(q.z, gamma) + q.z.Sub(q.z, delta) + + beta.Add(beta, beta) + beta.Add(beta, beta) + beta.Sub(beta, q.x) + q.y.Mul(alpha, beta) + + gamma.Square(gamma) + gamma.Add(gamma, gamma) + gamma.Add(gamma, gamma) + gamma.Add(gamma, gamma) + + q.y.Sub(q.y, gamma) + + return q +} + +func (curve p521Curve) ScalarMult(Bx, By *big.Int, scalar []byte) (*big.Int, *big.Int) { + B := curve.jacobianFromAffine(Bx, By) + p, t := &p521Point{ + x: new(fiat.P521Element), + y: new(fiat.P521Element), + z: new(fiat.P521Element), + }, &p521Point{ + x: new(fiat.P521Element), + y: new(fiat.P521Element), + z: new(fiat.P521Element), + } + + for _, byte := range scalar { + for bitNum := 0; bitNum < 8; bitNum++ { + p.doubleJacobian(p) + bit := (byte >> (7 - bitNum)) & 1 + t.addJacobian(p, B) + p.x.Select(t.x, p.x, int(bit)) + p.y.Select(t.y, p.y, int(bit)) + p.z.Select(t.z, p.z, int(bit)) + } + } + + return curve.affineFromJacobian(p) +} + +func (curve p521Curve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) { + return curve.ScalarMult(curve.Gx, curve.Gy, k) +} |