Adding upstream version 18.2.2.upstream/18.2.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
commit: e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree: 64f88b554b444a49f656b6c656111a145cbbaa28 /src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm
parent: Initial commit. (diff)
download: ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
1 files changed, 318 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm b/src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm
new file mode 100644
index 000000000..86d09e303
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm
@@ -0,0 +1,318 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;    * Redistributions of source code must retain the above copyright
+;      notice, this list of conditions and the following disclaimer.
+;    * Redistributions in binary form must reproduce the above copyright
+;      notice, this list of conditions and the following disclaimer in
+;      the documentation and/or other materials provided with the
+;      distribution.
+;    * Neither the name of Intel Corporation nor the names of its
+;      contributors may be used to endorse or promote products derived
+;      from this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "sha1_mb_mgr_datastruct.asm"
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_SHANI
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0  rdi
+ %define arg1  rsi
+%else
+ ; Windows
+ %define arg0   rcx
+ %define arg1   rdx
+%endif
+
+;; FRAMESZ plus pushes must be an odd multiple of 8
+%define FRAMESZ	32	; space for ABCDE
+%define RSPSAVE	rax
+
+%define ABCD		xmm0
+; two E's b/c for ping-pong
+%define E0		xmm1
+%define E1		xmm2
+%define MSG0		xmm3
+%define MSG1		xmm4
+%define MSG2		xmm5
+%define MSG3		xmm6
+%define SHUF_MASK	xmm7
+
+; arg index is start from 0 while mgr_flush/submit is from 1
+%define MGR	arg0
+%define NBLK	arg1
+%define NLANX4	r10	; consistent with caller
+%define IDX	r8	; local variable -- consistent with caller
+%define DPTR	r11	; local variable -- input buffer pointer
+%define TMP	r9	; local variable -- assistant to address digest
+;%define TMP2	r8	; local variable -- assistant to address digest
+align 32
+
+; void sha1_ni_x1(SHA1_MB_ARGS_Xn *args, uint32_t size_in_blocks);
+; arg 0 : MGR : pointer to args (only 4 of the 16 lanes used)
+; arg 1 : NBLK : size (in blocks) ;; assumed to be >= 1
+; invisibile arg 2 : IDX : hash on which lane
+; invisibile arg 3 : NLANX4 : max lanes*4 for this arch (digest is placed by it)
+; 		 (sse/avx is 4, avx2 is 8, avx512 is 16)
+;
+; Clobbers registers: rax, r9~r11, xmm0-xmm7
+;
+mk_global sha1_ni_x1, function, internal
+sha1_ni_x1:
+	endbranch
+	mov	RSPSAVE, rsp
+	sub     rsp, FRAMESZ
+	and	rsp, ~0xF	; Align 16Bytes downward
+
+	shl	NBLK, 6		; transform blk amount into bytes
+	jz	backto_mgr
+
+	; detach idx from nlanx4
+	mov	IDX, NLANX4
+	shr	NLANX4, 8
+	and	IDX, 0xff
+
+	lea	TMP, [MGR + 4*IDX]
+	;; Initialize digest
+	pinsrd	ABCD, [TMP + 0*NLANX4], 3
+	pinsrd	ABCD, [TMP + 1*NLANX4], 2
+	pinsrd	ABCD, [TMP + 2*NLANX4], 1
+	lea	TMP, [TMP + 2*NLANX4]	; MGR + 4*IDX + 2*NLANX4
+	pinsrd	ABCD, [TMP + 1*NLANX4], 0
+	pinsrd	E0, [TMP + 2*NLANX4], 3
+	pand	E0, [IDX3_WORD_MASK]
+
+	movdqa	SHUF_MASK, [PSHUFFLE_SHANI_MASK]
+
+	;; Load input pointers
+	mov     DPTR, [MGR + _data_ptr + IDX*8]
+	;; nblk is used to indicate data end
+	add	NBLK, DPTR
+
+lloop:
+	; Save hash values for addition after rounds
+	movdqa		[rsp + 0*16], E0
+	movdqa		[rsp + 1*16], ABCD
+
+	; do rounds 0-3
+	movdqu		MSG0, [DPTR + 0*16]
+	pshufb		MSG0, SHUF_MASK
+		paddd		E0, MSG0
+		movdqa		E1, ABCD
+		sha1rnds4	ABCD, E0, 0
+
+	; do rounds 4-7
+	movdqu		MSG1, [DPTR + 1*16]
+	pshufb		MSG1, SHUF_MASK
+		sha1nexte	E1, MSG1
+		movdqa		E0, ABCD
+		sha1rnds4	ABCD, E1, 0
+	sha1msg1	MSG0, MSG1
+
+	; do rounds 8-11
+	movdqu		MSG2, [DPTR + 2*16]
+	pshufb		MSG2, SHUF_MASK
+		sha1nexte	E0, MSG2
+		movdqa		E1, ABCD
+		sha1rnds4	ABCD, E0, 0
+	sha1msg1	MSG1, MSG2
+	pxor		MSG0, MSG2
+
+	; do rounds 12-15
+	movdqu		MSG3, [DPTR + 3*16]
+	pshufb		MSG3, SHUF_MASK
+		sha1nexte	E1, MSG3
+		movdqa		E0, ABCD
+	sha1msg2	MSG0, MSG3
+		sha1rnds4	ABCD, E1, 0
+	sha1msg1	MSG2, MSG3
+	pxor		MSG1, MSG3
+
+	; do rounds 16-19
+		sha1nexte	E0, MSG0
+		movdqa		E1, ABCD
+	sha1msg2	MSG1, MSG0
+		sha1rnds4	ABCD, E0, 0
+	sha1msg1	MSG3, MSG0
+	pxor		MSG2, MSG0
+
+	; do rounds 20-23
+		sha1nexte	E1, MSG1
+		movdqa		E0, ABCD
+	sha1msg2	MSG2, MSG1
+		sha1rnds4	ABCD, E1, 1
+	sha1msg1	MSG0, MSG1
+	pxor		MSG3, MSG1
+
+	; do rounds 24-27
+		sha1nexte	E0, MSG2
+		movdqa		E1, ABCD
+	sha1msg2	MSG3, MSG2
+		sha1rnds4	ABCD, E0, 1
+	sha1msg1	MSG1, MSG2
+	pxor		MSG0, MSG2
+
+	; do rounds 28-31
+		sha1nexte	E1, MSG3
+		movdqa		E0, ABCD
+	sha1msg2	MSG0, MSG3
+		sha1rnds4	ABCD, E1, 1
+	sha1msg1	MSG2, MSG3
+	pxor		MSG1, MSG3
+
+	; do rounds 32-35
+		sha1nexte	E0, MSG0
+		movdqa		E1, ABCD
+	sha1msg2	MSG1, MSG0
+		sha1rnds4	ABCD, E0, 1
+	sha1msg1	MSG3, MSG0
+	pxor		MSG2, MSG0
+
+	; do rounds 36-39
+		sha1nexte	E1, MSG1
+		movdqa		E0, ABCD
+	sha1msg2	MSG2, MSG1
+		sha1rnds4	ABCD, E1, 1
+	sha1msg1	MSG0, MSG1
+	pxor		MSG3, MSG1
+
+	; do rounds 40-43
+		sha1nexte	E0, MSG2
+		movdqa		E1, ABCD
+	sha1msg2	MSG3, MSG2
+		sha1rnds4	ABCD, E0, 2
+	sha1msg1	MSG1, MSG2
+	pxor		MSG0, MSG2
+
+	; do rounds 44-47
+		sha1nexte	E1, MSG3
+		movdqa		E0, ABCD
+	sha1msg2	MSG0, MSG3
+		sha1rnds4	ABCD, E1, 2
+	sha1msg1	MSG2, MSG3
+	pxor		MSG1, MSG3
+
+	; do rounds 48-51
+		sha1nexte	E0, MSG0
+		movdqa		E1, ABCD
+	sha1msg2	MSG1, MSG0
+		sha1rnds4	ABCD, E0, 2
+	sha1msg1	MSG3, MSG0
+	pxor		MSG2, MSG0
+
+	; do rounds 52-55
+		sha1nexte	E1, MSG1
+		movdqa		E0, ABCD
+	sha1msg2	MSG2, MSG1
+		sha1rnds4	ABCD, E1, 2
+	sha1msg1	MSG0, MSG1
+	pxor		MSG3, MSG1
+
+	; do rounds 56-59
+		sha1nexte	E0, MSG2
+		movdqa		E1, ABCD
+	sha1msg2	MSG3, MSG2
+		sha1rnds4	ABCD, E0, 2
+	sha1msg1	MSG1, MSG2
+	pxor		MSG0, MSG2
+
+	; do rounds 60-63
+		sha1nexte	E1, MSG3
+		movdqa		E0, ABCD
+	sha1msg2	MSG0, MSG3
+		sha1rnds4	ABCD, E1, 3
+	sha1msg1	MSG2, MSG3
+	pxor		MSG1, MSG3
+
+	; do rounds 64-67
+		sha1nexte	E0, MSG0
+		movdqa		E1, ABCD
+	sha1msg2	MSG1, MSG0
+		sha1rnds4	ABCD, E0, 3
+	sha1msg1	MSG3, MSG0
+	pxor		MSG2, MSG0
+
+	; do rounds 68-71
+		sha1nexte	E1, MSG1
+		movdqa		E0, ABCD
+	sha1msg2	MSG2, MSG1
+		sha1rnds4	ABCD, E1, 3
+	pxor		MSG3, MSG1
+
+	; do rounds 72-75
+		sha1nexte	E0, MSG2
+		movdqa		E1, ABCD
+	sha1msg2	MSG3, MSG2
+		sha1rnds4	ABCD, E0, 3
+
+	; do rounds 76-79
+		sha1nexte	E1, MSG3
+		movdqa		E0, ABCD
+		sha1rnds4	ABCD, E1, 3
+
+	; Add current hash values with previously saved
+	sha1nexte	E0, [rsp + 0*16]
+	paddd		ABCD, [rsp + 1*16]
+
+	; Increment data pointer and loop if more to process
+	add		DPTR, 64
+	cmp		DPTR, NBLK
+	jne		lloop
+
+	; write out digests
+	lea	TMP, [MGR + 4*IDX]
+	pextrd	[TMP + 0*NLANX4], ABCD, 3
+	pextrd	[TMP + 1*NLANX4], ABCD, 2
+	pextrd	[TMP + 2*NLANX4], ABCD, 1
+	lea	TMP, [TMP + 2*NLANX4]	; MGR + 4*IDX + 2*NLANX4
+	pextrd	[TMP + 1*NLANX4], ABCD, 0
+	pextrd	[TMP + 2*NLANX4], E0, 3
+
+	; update input pointers
+	mov     [MGR + _data_ptr + IDX*8], DPTR
+
+backto_mgr:
+	;;;;;;;;;;;;;;;;
+	;; Postamble
+
+	mov     rsp, RSPSAVE
+
+	ret
+
+
+section .data align=16
+PSHUFFLE_SHANI_MASK:	dq 0x08090a0b0c0d0e0f, 0x0001020304050607
+IDX3_WORD_MASK:		dq 0x0000000000000000, 0xFFFFFFFF00000000
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_sha1_ni_x1
+no_sha1_ni_x1:
+%endif
+%endif ; HAVE_AS_KNOWS_SHANI
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
commit	e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree	64f88b554b444a49f656b6c656111a145cbbaa28 /src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm
parent	Initial commit. (diff)
download	ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip