summaryrefslogtreecommitdiffstats
path: root/src/crypto/internal/bigmod/nat_asm.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/internal/bigmod/nat_asm.go')
-rw-r--r--src/crypto/internal/bigmod/nat_asm.go28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/crypto/internal/bigmod/nat_asm.go b/src/crypto/internal/bigmod/nat_asm.go
new file mode 100644
index 0000000..5eb91e1
--- /dev/null
+++ b/src/crypto/internal/bigmod/nat_asm.go
@@ -0,0 +1,28 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !purego && (386 || amd64 || arm || arm64 || ppc64 || ppc64le || s390x)
+
+package bigmod
+
+import "internal/cpu"
+
+// amd64 assembly uses ADCX/ADOX/MULX if ADX is available to run two carry
+// chains in the flags in parallel across the whole operation, and aggressively
+// unrolls loops. arm64 processes four words at a time.
+//
+// It's unclear why the assembly for all other architectures, as well as for
+// amd64 without ADX, perform better than the compiler output.
+// TODO(filippo): file cmd/compile performance issue.
+
+var supportADX = cpu.X86.HasADX && cpu.X86.HasBMI2
+
+//go:noescape
+func addMulVVW1024(z, x *uint, y uint) (c uint)
+
+//go:noescape
+func addMulVVW1536(z, x *uint, y uint) (c uint)
+
+//go:noescape
+func addMulVVW2048(z, x *uint, y uint) (c uint)