summaryrefslogtreecommitdiffstats
path: root/src/crypto/internal/bigmod/nat_riscv64.s
blob: c1d9cc0dd488ac394d1669c42792cf17f187757f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build !purego

#include "textflag.h"

// func addMulVVW1024(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW1024(SB),$0-32
	MOV	$16, X30
	JMP	addMulVVWx(SB)

// func addMulVVW1536(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW1536(SB),$0-32
	MOV	$24, X30
	JMP	addMulVVWx(SB)

// func addMulVVW2048(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW2048(SB),$0-32
	MOV	$32, X30
	JMP	addMulVVWx(SB)

TEXT addMulVVWx(SB),NOFRAME|NOSPLIT,$0
	MOV	z+0(FP), X5
	MOV	x+8(FP), X7
	MOV	y+16(FP), X6
	MOV	$0, X29

	BEQZ	X30, done
loop:
	MOV	0*8(X5), X10	// z[0]
	MOV	1*8(X5), X13	// z[1]
	MOV	2*8(X5), X16	// z[2]
	MOV	3*8(X5), X19	// z[3]

	MOV	0*8(X7), X8	// x[0]
	MOV	1*8(X7), X11	// x[1]
	MOV	2*8(X7), X14	// x[2]
	MOV	3*8(X7), X17	// x[3]

	MULHU	X8, X6, X9	// z_hi[0] = x[0] * y
	MUL	X8, X6, X8	// z_lo[0] = x[0] * y
	ADD	X8, X10, X21	// z_lo[0] = x[0] * y + z[0]
	SLTU	X8, X21, X22
	ADD	X9, X22, X9	// z_hi[0] = x[0] * y + z[0]
	ADD	X21, X29, X10	// z_lo[0] = x[0] * y + z[0] + c
	SLTU	X21, X10, X22
	ADD	X9, X22, X29	// next c

	MULHU	X11, X6, X12	// z_hi[1] = x[1] * y
	MUL	X11, X6, X11	// z_lo[1] = x[1] * y
	ADD	X11, X13, X21	// z_lo[1] = x[1] * y + z[1]
	SLTU	X11, X21, X22
	ADD	X12, X22, X12	// z_hi[1] = x[1] * y + z[1]
	ADD	X21, X29, X13	// z_lo[1] = x[1] * y + z[1] + c
	SLTU	X21, X13, X22
	ADD	X12, X22, X29	// next c

	MULHU	X14, X6, X15	// z_hi[2] = x[2] * y
	MUL	X14, X6, X14	// z_lo[2] = x[2] * y
	ADD	X14, X16, X21	// z_lo[2] = x[2] * y + z[2]
	SLTU	X14, X21, X22
	ADD	X15, X22, X15	// z_hi[2] = x[2] * y + z[2]
	ADD	X21, X29, X16	// z_lo[2] = x[2] * y + z[2] + c
	SLTU	X21, X16, X22
	ADD	X15, X22, X29	// next c

	MULHU	X17, X6, X18	// z_hi[3] = x[3] * y
	MUL	X17, X6, X17	// z_lo[3] = x[3] * y
	ADD	X17, X19, X21	// z_lo[3] = x[3] * y + z[3]
	SLTU	X17, X21, X22
	ADD	X18, X22, X18	// z_hi[3] = x[3] * y + z[3]
	ADD	X21, X29, X19	// z_lo[3] = x[3] * y + z[3] + c
	SLTU	X21, X19, X22
	ADD	X18, X22, X29	// next c

	MOV	X10, 0*8(X5)	// z[0]
	MOV	X13, 1*8(X5)	// z[1]
	MOV	X16, 2*8(X5)	// z[2]
	MOV	X19, 3*8(X5)	// z[3]

	ADD	$32, X5
	ADD	$32, X7

	SUB	$4, X30
	BNEZ	X30, loop

done:
	MOV	X29, c+24(FP)
	RET