diff options
Diffstat (limited to 'src/crypto/internal/bigmod/nat_asm.go')
-rw-r--r-- | src/crypto/internal/bigmod/nat_asm.go | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/src/crypto/internal/bigmod/nat_asm.go b/src/crypto/internal/bigmod/nat_asm.go new file mode 100644 index 0000000..5eb91e1 --- /dev/null +++ b/src/crypto/internal/bigmod/nat_asm.go @@ -0,0 +1,28 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !purego && (386 || amd64 || arm || arm64 || ppc64 || ppc64le || s390x) + +package bigmod + +import "internal/cpu" + +// amd64 assembly uses ADCX/ADOX/MULX if ADX is available to run two carry +// chains in the flags in parallel across the whole operation, and aggressively +// unrolls loops. arm64 processes four words at a time. +// +// It's unclear why the assembly for all other architectures, as well as for +// amd64 without ADX, perform better than the compiler output. +// TODO(filippo): file cmd/compile performance issue. + +var supportADX = cpu.X86.HasADX && cpu.X86.HasBMI2 + +//go:noescape +func addMulVVW1024(z, x *uint, y uint) (c uint) + +//go:noescape +func addMulVVW1536(z, x *uint, y uint) (c uint) + +//go:noescape +func addMulVVW2048(z, x *uint, y uint) (c uint) |