summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm318
1 files changed, 318 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm b/src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm
new file mode 100644
index 000000000..86d09e303
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/sha1_ni_x1.asm
@@ -0,0 +1,318 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "sha1_mb_mgr_datastruct.asm"
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_SHANI
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+%endif
+
+;; FRAMESZ plus pushes must be an odd multiple of 8
+%define FRAMESZ 32 ; space for ABCDE
+%define RSPSAVE rax
+
+%define ABCD xmm0
+; two E's b/c for ping-pong
+%define E0 xmm1
+%define E1 xmm2
+%define MSG0 xmm3
+%define MSG1 xmm4
+%define MSG2 xmm5
+%define MSG3 xmm6
+%define SHUF_MASK xmm7
+
+; arg index is start from 0 while mgr_flush/submit is from 1
+%define MGR arg0
+%define NBLK arg1
+%define NLANX4 r10 ; consistent with caller
+%define IDX r8 ; local variable -- consistent with caller
+%define DPTR r11 ; local variable -- input buffer pointer
+%define TMP r9 ; local variable -- assistant to address digest
+;%define TMP2 r8 ; local variable -- assistant to address digest
+align 32
+
+; void sha1_ni_x1(SHA1_MB_ARGS_Xn *args, uint32_t size_in_blocks);
+; arg 0 : MGR : pointer to args (only 4 of the 16 lanes used)
+; arg 1 : NBLK : size (in blocks) ;; assumed to be >= 1
+; invisibile arg 2 : IDX : hash on which lane
+; invisibile arg 3 : NLANX4 : max lanes*4 for this arch (digest is placed by it)
+; (sse/avx is 4, avx2 is 8, avx512 is 16)
+;
+; Clobbers registers: rax, r9~r11, xmm0-xmm7
+;
+mk_global sha1_ni_x1, function, internal
+sha1_ni_x1:
+ endbranch
+ mov RSPSAVE, rsp
+ sub rsp, FRAMESZ
+ and rsp, ~0xF ; Align 16Bytes downward
+
+ shl NBLK, 6 ; transform blk amount into bytes
+ jz backto_mgr
+
+ ; detach idx from nlanx4
+ mov IDX, NLANX4
+ shr NLANX4, 8
+ and IDX, 0xff
+
+ lea TMP, [MGR + 4*IDX]
+ ;; Initialize digest
+ pinsrd ABCD, [TMP + 0*NLANX4], 3
+ pinsrd ABCD, [TMP + 1*NLANX4], 2
+ pinsrd ABCD, [TMP + 2*NLANX4], 1
+ lea TMP, [TMP + 2*NLANX4] ; MGR + 4*IDX + 2*NLANX4
+ pinsrd ABCD, [TMP + 1*NLANX4], 0
+ pinsrd E0, [TMP + 2*NLANX4], 3
+ pand E0, [IDX3_WORD_MASK]
+
+ movdqa SHUF_MASK, [PSHUFFLE_SHANI_MASK]
+
+ ;; Load input pointers
+ mov DPTR, [MGR + _data_ptr + IDX*8]
+ ;; nblk is used to indicate data end
+ add NBLK, DPTR
+
+lloop:
+ ; Save hash values for addition after rounds
+ movdqa [rsp + 0*16], E0
+ movdqa [rsp + 1*16], ABCD
+
+ ; do rounds 0-3
+ movdqu MSG0, [DPTR + 0*16]
+ pshufb MSG0, SHUF_MASK
+ paddd E0, MSG0
+ movdqa E1, ABCD
+ sha1rnds4 ABCD, E0, 0
+
+ ; do rounds 4-7
+ movdqu MSG1, [DPTR + 1*16]
+ pshufb MSG1, SHUF_MASK
+ sha1nexte E1, MSG1
+ movdqa E0, ABCD
+ sha1rnds4 ABCD, E1, 0
+ sha1msg1 MSG0, MSG1
+
+ ; do rounds 8-11
+ movdqu MSG2, [DPTR + 2*16]
+ pshufb MSG2, SHUF_MASK
+ sha1nexte E0, MSG2
+ movdqa E1, ABCD
+ sha1rnds4 ABCD, E0, 0
+ sha1msg1 MSG1, MSG2
+ pxor MSG0, MSG2
+
+ ; do rounds 12-15
+ movdqu MSG3, [DPTR + 3*16]
+ pshufb MSG3, SHUF_MASK
+ sha1nexte E1, MSG3
+ movdqa E0, ABCD
+ sha1msg2 MSG0, MSG3
+ sha1rnds4 ABCD, E1, 0
+ sha1msg1 MSG2, MSG3
+ pxor MSG1, MSG3
+
+ ; do rounds 16-19
+ sha1nexte E0, MSG0
+ movdqa E1, ABCD
+ sha1msg2 MSG1, MSG0
+ sha1rnds4 ABCD, E0, 0
+ sha1msg1 MSG3, MSG0
+ pxor MSG2, MSG0
+
+ ; do rounds 20-23
+ sha1nexte E1, MSG1
+ movdqa E0, ABCD
+ sha1msg2 MSG2, MSG1
+ sha1rnds4 ABCD, E1, 1
+ sha1msg1 MSG0, MSG1
+ pxor MSG3, MSG1
+
+ ; do rounds 24-27
+ sha1nexte E0, MSG2
+ movdqa E1, ABCD
+ sha1msg2 MSG3, MSG2
+ sha1rnds4 ABCD, E0, 1
+ sha1msg1 MSG1, MSG2
+ pxor MSG0, MSG2
+
+ ; do rounds 28-31
+ sha1nexte E1, MSG3
+ movdqa E0, ABCD
+ sha1msg2 MSG0, MSG3
+ sha1rnds4 ABCD, E1, 1
+ sha1msg1 MSG2, MSG3
+ pxor MSG1, MSG3
+
+ ; do rounds 32-35
+ sha1nexte E0, MSG0
+ movdqa E1, ABCD
+ sha1msg2 MSG1, MSG0
+ sha1rnds4 ABCD, E0, 1
+ sha1msg1 MSG3, MSG0
+ pxor MSG2, MSG0
+
+ ; do rounds 36-39
+ sha1nexte E1, MSG1
+ movdqa E0, ABCD
+ sha1msg2 MSG2, MSG1
+ sha1rnds4 ABCD, E1, 1
+ sha1msg1 MSG0, MSG1
+ pxor MSG3, MSG1
+
+ ; do rounds 40-43
+ sha1nexte E0, MSG2
+ movdqa E1, ABCD
+ sha1msg2 MSG3, MSG2
+ sha1rnds4 ABCD, E0, 2
+ sha1msg1 MSG1, MSG2
+ pxor MSG0, MSG2
+
+ ; do rounds 44-47
+ sha1nexte E1, MSG3
+ movdqa E0, ABCD
+ sha1msg2 MSG0, MSG3
+ sha1rnds4 ABCD, E1, 2
+ sha1msg1 MSG2, MSG3
+ pxor MSG1, MSG3
+
+ ; do rounds 48-51
+ sha1nexte E0, MSG0
+ movdqa E1, ABCD
+ sha1msg2 MSG1, MSG0
+ sha1rnds4 ABCD, E0, 2
+ sha1msg1 MSG3, MSG0
+ pxor MSG2, MSG0
+
+ ; do rounds 52-55
+ sha1nexte E1, MSG1
+ movdqa E0, ABCD
+ sha1msg2 MSG2, MSG1
+ sha1rnds4 ABCD, E1, 2
+ sha1msg1 MSG0, MSG1
+ pxor MSG3, MSG1
+
+ ; do rounds 56-59
+ sha1nexte E0, MSG2
+ movdqa E1, ABCD
+ sha1msg2 MSG3, MSG2
+ sha1rnds4 ABCD, E0, 2
+ sha1msg1 MSG1, MSG2
+ pxor MSG0, MSG2
+
+ ; do rounds 60-63
+ sha1nexte E1, MSG3
+ movdqa E0, ABCD
+ sha1msg2 MSG0, MSG3
+ sha1rnds4 ABCD, E1, 3
+ sha1msg1 MSG2, MSG3
+ pxor MSG1, MSG3
+
+ ; do rounds 64-67
+ sha1nexte E0, MSG0
+ movdqa E1, ABCD
+ sha1msg2 MSG1, MSG0
+ sha1rnds4 ABCD, E0, 3
+ sha1msg1 MSG3, MSG0
+ pxor MSG2, MSG0
+
+ ; do rounds 68-71
+ sha1nexte E1, MSG1
+ movdqa E0, ABCD
+ sha1msg2 MSG2, MSG1
+ sha1rnds4 ABCD, E1, 3
+ pxor MSG3, MSG1
+
+ ; do rounds 72-75
+ sha1nexte E0, MSG2
+ movdqa E1, ABCD
+ sha1msg2 MSG3, MSG2
+ sha1rnds4 ABCD, E0, 3
+
+ ; do rounds 76-79
+ sha1nexte E1, MSG3
+ movdqa E0, ABCD
+ sha1rnds4 ABCD, E1, 3
+
+ ; Add current hash values with previously saved
+ sha1nexte E0, [rsp + 0*16]
+ paddd ABCD, [rsp + 1*16]
+
+ ; Increment data pointer and loop if more to process
+ add DPTR, 64
+ cmp DPTR, NBLK
+ jne lloop
+
+ ; write out digests
+ lea TMP, [MGR + 4*IDX]
+ pextrd [TMP + 0*NLANX4], ABCD, 3
+ pextrd [TMP + 1*NLANX4], ABCD, 2
+ pextrd [TMP + 2*NLANX4], ABCD, 1
+ lea TMP, [TMP + 2*NLANX4] ; MGR + 4*IDX + 2*NLANX4
+ pextrd [TMP + 1*NLANX4], ABCD, 0
+ pextrd [TMP + 2*NLANX4], E0, 3
+
+ ; update input pointers
+ mov [MGR + _data_ptr + IDX*8], DPTR
+
+backto_mgr:
+ ;;;;;;;;;;;;;;;;
+ ;; Postamble
+
+ mov rsp, RSPSAVE
+
+ ret
+
+
+section .data align=16
+PSHUFFLE_SHANI_MASK: dq 0x08090a0b0c0d0e0f, 0x0001020304050607
+IDX3_WORD_MASK: dq 0x0000000000000000, 0xFFFFFFFF00000000
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_sha1_ni_x1
+no_sha1_ni_x1:
+%endif
+%endif ; HAVE_AS_KNOWS_SHANI