summaryrefslogtreecommitdiffstats
path: root/src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm')
-rw-r--r--src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm346
1 files changed, 346 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm b/src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm
new file mode 100644
index 000000000..e13b8467d
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm
@@ -0,0 +1,346 @@
+;;
+;; Copyright (c) 2012-2018, Intel Corporation
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions are met:
+;;
+;; * Redistributions of source code must retain the above copyright notice,
+;; this list of conditions and the following disclaimer.
+;; * Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;; * Neither the name of Intel Corporation nor the names of its contributors
+;; may be used to endorse or promote products derived from this software
+;; without specific prior written permission.
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;
+
+; Routines to do simple AES ECB Enc on one stream with 3 blocks
+
+;void
+; aes128_ecbenc_x3_sse(void *in, void *keys, void *out1, void *out2, void *out3);
+;void
+; aes128_ecbenc_x3_avx(void *in, void *keys, void *out1, void *out2, void *out3);
+
+%include "include/os.asm"
+%define NO_AESNI_RENAME
+%include "include/aesni_emu.inc"
+%include "include/clear_regs.asm"
+
+%ifdef LINUX
+%define IN rdi ; arg 1
+%define KEYS rsi ; arg 2
+%define OUT0 rdx ; arg 3
+%define OUT1 rcx ; arg 4
+%define OUT2 r8 ; arg 5
+%else
+%define IN rcx ; arg 1
+%define KEYS rdx ; arg 2
+%define OUT0 r8 ; arg 3
+%define OUT1 r9 ; arg 4
+%define OUT2 rax ;
+%endif
+
+
+%define XDATA0 xmm0
+%define XDATA1 xmm1
+%define XDATA2 xmm2
+
+%define XKEYA xmm3
+%define XKEYB xmm4
+
+section .text
+
+MKGLOBAL(aes128_ecbenc_x3_sse,function,internal)
+aes128_ecbenc_x3_sse:
+
+%ifndef LINUX
+ mov OUT2, [rsp + 5*8]
+%endif
+
+%ifdef SAFE_PARAM
+ cmp IN, 0
+ jz aes128_ecbenc_x3_sse_return
+ cmp KEYS, 0
+ jz aes128_ecbenc_x3_sse_return
+ cmp OUT0, 0
+ jz aes128_ecbenc_x3_sse_return
+ cmp OUT1, 0
+ jz aes128_ecbenc_x3_sse_return
+ cmp OUT2, 0
+ jz aes128_ecbenc_x3_sse_return
+%endif
+
+ movdqu XDATA0, [IN + 0*16] ; load first block of plain text
+ movdqu XDATA1, [IN + 1*16] ; load second block of plain text
+ movdqu XDATA2, [IN + 2*16] ; load third block of plain text
+
+ movdqa XKEYA, [KEYS + 16*0]
+
+ movdqa XKEYB, [KEYS + 16*1]
+ pxor XDATA0, XKEYA ; 0. ARK
+ pxor XDATA1, XKEYA ; 0. ARK
+ pxor XDATA2, XKEYA ; 0. ARK
+
+ movdqa XKEYA, [KEYS + 16*2]
+ aesenc XDATA0, XKEYB ; 1. ENC
+ aesenc XDATA1, XKEYB ; 1. ENC
+ aesenc XDATA2, XKEYB ; 1. ENC
+
+ movdqa XKEYB, [KEYS + 16*3]
+ aesenc XDATA0, XKEYA ; 2. ENC
+ aesenc XDATA1, XKEYA ; 2. ENC
+ aesenc XDATA2, XKEYA ; 2. ENC
+
+ movdqa XKEYA, [KEYS + 16*4]
+ aesenc XDATA0, XKEYB ; 3. ENC
+ aesenc XDATA1, XKEYB ; 3. ENC
+ aesenc XDATA2, XKEYB ; 3. ENC
+
+ movdqa XKEYB, [KEYS + 16*5]
+ aesenc XDATA0, XKEYA ; 4. ENC
+ aesenc XDATA1, XKEYA ; 4. ENC
+ aesenc XDATA2, XKEYA ; 4. ENC
+
+ movdqa XKEYA, [KEYS + 16*6]
+ aesenc XDATA0, XKEYB ; 5. ENC
+ aesenc XDATA1, XKEYB ; 5. ENC
+ aesenc XDATA2, XKEYB ; 5. ENC
+
+ movdqa XKEYB, [KEYS + 16*7]
+ aesenc XDATA0, XKEYA ; 6. ENC
+ aesenc XDATA1, XKEYA ; 6. ENC
+ aesenc XDATA2, XKEYA ; 6. ENC
+
+ movdqa XKEYA, [KEYS + 16*8]
+ aesenc XDATA0, XKEYB ; 7. ENC
+ aesenc XDATA1, XKEYB ; 7. ENC
+ aesenc XDATA2, XKEYB ; 7. ENC
+
+ movdqa XKEYB, [KEYS + 16*9]
+ aesenc XDATA0, XKEYA ; 8. ENC
+ aesenc XDATA1, XKEYA ; 8. ENC
+ aesenc XDATA2, XKEYA ; 8. ENC
+
+ movdqa XKEYA, [KEYS + 16*10]
+ aesenc XDATA0, XKEYB ; 9. ENC
+ aesenc XDATA1, XKEYB ; 9. ENC
+ aesenc XDATA2, XKEYB ; 9. ENC
+
+ aesenclast XDATA0, XKEYA ; 10. ENC
+ aesenclast XDATA1, XKEYA ; 10. ENC
+ aesenclast XDATA2, XKEYA ; 10. ENC
+
+ movdqu [OUT0], XDATA0 ; write back ciphertext
+ movdqu [OUT1], XDATA1 ; write back ciphertext
+ movdqu [OUT2], XDATA2 ; write back ciphertext
+
+aes128_ecbenc_x3_sse_return:
+
+%ifdef SAFE_DATA
+ clear_scratch_gps_asm
+ clear_scratch_xmms_sse_asm
+%endif
+ ret
+
+MKGLOBAL(aes128_ecbenc_x3_sse_no_aesni,function,internal)
+aes128_ecbenc_x3_sse_no_aesni:
+
+%ifndef LINUX
+ mov OUT2, [rsp + 5*8]
+%endif
+
+%ifdef SAFE_PARAM
+ cmp IN, 0
+ jz aes128_ecbenc_x3_sse_no_aesni_return
+ cmp KEYS, 0
+ jz aes128_ecbenc_x3_sse_no_aesni_return
+ cmp OUT0, 0
+ jz aes128_ecbenc_x3_sse_no_aesni_return
+ cmp OUT1, 0
+ jz aes128_ecbenc_x3_sse_no_aesni_return
+ cmp OUT2, 0
+ jz aes128_ecbenc_x3_sse_no_aesni_return
+%endif
+
+ movdqu XDATA0, [IN + 0*16] ; load first block of plain text
+ movdqu XDATA1, [IN + 1*16] ; load second block of plain text
+ movdqu XDATA2, [IN + 2*16] ; load third block of plain text
+
+ movdqa XKEYA, [KEYS + 16*0]
+
+ movdqa XKEYB, [KEYS + 16*1]
+ pxor XDATA0, XKEYA ; 0. ARK
+ pxor XDATA1, XKEYA ; 0. ARK
+ pxor XDATA2, XKEYA ; 0. ARK
+
+ movdqa XKEYA, [KEYS + 16*2]
+ EMULATE_AESENC XDATA0, XKEYB ; 1. ENC
+ EMULATE_AESENC XDATA1, XKEYB ; 1. ENC
+ EMULATE_AESENC XDATA2, XKEYB ; 1. ENC
+
+ movdqa XKEYB, [KEYS + 16*3]
+ EMULATE_AESENC XDATA0, XKEYA ; 2. ENC
+ EMULATE_AESENC XDATA1, XKEYA ; 2. ENC
+ EMULATE_AESENC XDATA2, XKEYA ; 2. ENC
+
+ movdqa XKEYA, [KEYS + 16*4]
+ EMULATE_AESENC XDATA0, XKEYB ; 3. ENC
+ EMULATE_AESENC XDATA1, XKEYB ; 3. ENC
+ EMULATE_AESENC XDATA2, XKEYB ; 3. ENC
+
+ movdqa XKEYB, [KEYS + 16*5]
+ EMULATE_AESENC XDATA0, XKEYA ; 4. ENC
+ EMULATE_AESENC XDATA1, XKEYA ; 4. ENC
+ EMULATE_AESENC XDATA2, XKEYA ; 4. ENC
+
+ movdqa XKEYA, [KEYS + 16*6]
+ EMULATE_AESENC XDATA0, XKEYB ; 5. ENC
+ EMULATE_AESENC XDATA1, XKEYB ; 5. ENC
+ EMULATE_AESENC XDATA2, XKEYB ; 5. ENC
+
+ movdqa XKEYB, [KEYS + 16*7]
+ EMULATE_AESENC XDATA0, XKEYA ; 6. ENC
+ EMULATE_AESENC XDATA1, XKEYA ; 6. ENC
+ EMULATE_AESENC XDATA2, XKEYA ; 6. ENC
+
+ movdqa XKEYA, [KEYS + 16*8]
+ EMULATE_AESENC XDATA0, XKEYB ; 7. ENC
+ EMULATE_AESENC XDATA1, XKEYB ; 7. ENC
+ EMULATE_AESENC XDATA2, XKEYB ; 7. ENC
+
+ movdqa XKEYB, [KEYS + 16*9]
+ EMULATE_AESENC XDATA0, XKEYA ; 8. ENC
+ EMULATE_AESENC XDATA1, XKEYA ; 8. ENC
+ EMULATE_AESENC XDATA2, XKEYA ; 8. ENC
+
+ movdqa XKEYA, [KEYS + 16*10]
+ EMULATE_AESENC XDATA0, XKEYB ; 9. ENC
+ EMULATE_AESENC XDATA1, XKEYB ; 9. ENC
+ EMULATE_AESENC XDATA2, XKEYB ; 9. ENC
+
+ EMULATE_AESENCLAST XDATA0, XKEYA ; 10. ENC
+ EMULATE_AESENCLAST XDATA1, XKEYA ; 10. ENC
+ EMULATE_AESENCLAST XDATA2, XKEYA ; 10. ENC
+
+ movdqu [OUT0], XDATA0 ; write back ciphertext
+ movdqu [OUT1], XDATA1 ; write back ciphertext
+ movdqu [OUT2], XDATA2 ; write back ciphertext
+
+aes128_ecbenc_x3_sse_no_aesni_return:
+
+%ifdef SAFE_DATA
+ clear_scratch_gps_asm
+ clear_scratch_xmms_sse_asm
+%endif
+ ret
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+MKGLOBAL(aes128_ecbenc_x3_avx,function,internal)
+aes128_ecbenc_x3_avx:
+
+%ifndef LINUX
+ mov OUT2, [rsp + 5*8]
+%endif
+
+%ifdef SAFE_PARAM
+ cmp IN, 0
+ jz aes128_ecbenc_x3_avx_return
+ cmp KEYS, 0
+ jz aes128_ecbenc_x3_avx_return
+ cmp OUT0, 0
+ jz aes128_ecbenc_x3_avx_return
+ cmp OUT1, 0
+ jz aes128_ecbenc_x3_avx_return
+ cmp OUT2, 0
+ jz aes128_ecbenc_x3_avx_return
+%endif
+
+ vmovdqu XDATA0, [IN + 0*16] ; load first block of plain text
+ vmovdqu XDATA1, [IN + 1*16] ; load second block of plain text
+ vmovdqu XDATA2, [IN + 2*16] ; load third block of plain text
+
+ vmovdqa XKEYA, [KEYS + 16*0]
+
+ vmovdqa XKEYB, [KEYS + 16*1]
+ vpxor XDATA0, XDATA0, XKEYA ; 0. ARK
+ vpxor XDATA1, XDATA1, XKEYA ; 0. ARK
+ vpxor XDATA2, XDATA2, XKEYA ; 0. ARK
+
+ vmovdqa XKEYA, [KEYS + 16*2]
+ vaesenc XDATA0, XKEYB ; 1. ENC
+ vaesenc XDATA1, XKEYB ; 1. ENC
+ vaesenc XDATA2, XKEYB ; 1. ENC
+
+ vmovdqa XKEYB, [KEYS + 16*3]
+ vaesenc XDATA0, XKEYA ; 2. ENC
+ vaesenc XDATA1, XKEYA ; 2. ENC
+ vaesenc XDATA2, XKEYA ; 2. ENC
+
+ vmovdqa XKEYA, [KEYS + 16*4]
+ vaesenc XDATA0, XKEYB ; 3. ENC
+ vaesenc XDATA1, XKEYB ; 3. ENC
+ vaesenc XDATA2, XKEYB ; 3. ENC
+
+ vmovdqa XKEYB, [KEYS + 16*5]
+ vaesenc XDATA0, XKEYA ; 4. ENC
+ vaesenc XDATA1, XKEYA ; 4. ENC
+ vaesenc XDATA2, XKEYA ; 4. ENC
+
+ vmovdqa XKEYA, [KEYS + 16*6]
+ vaesenc XDATA0, XKEYB ; 5. ENC
+ vaesenc XDATA1, XKEYB ; 5. ENC
+ vaesenc XDATA2, XKEYB ; 5. ENC
+
+ vmovdqa XKEYB, [KEYS + 16*7]
+ vaesenc XDATA0, XKEYA ; 6. ENC
+ vaesenc XDATA1, XKEYA ; 6. ENC
+ vaesenc XDATA2, XKEYA ; 6. ENC
+
+ vmovdqa XKEYA, [KEYS + 16*8]
+ vaesenc XDATA0, XKEYB ; 7. ENC
+ vaesenc XDATA1, XKEYB ; 7. ENC
+ vaesenc XDATA2, XKEYB ; 7. ENC
+
+ vmovdqa XKEYB, [KEYS + 16*9]
+ vaesenc XDATA0, XKEYA ; 8. ENC
+ vaesenc XDATA1, XKEYA ; 8. ENC
+ vaesenc XDATA2, XKEYA ; 8. ENC
+
+ vmovdqa XKEYA, [KEYS + 16*10]
+ vaesenc XDATA0, XKEYB ; 9. ENC
+ vaesenc XDATA1, XKEYB ; 9. ENC
+ vaesenc XDATA2, XKEYB ; 9. ENC
+
+ vaesenclast XDATA0, XKEYA ; 10. ENC
+ vaesenclast XDATA1, XKEYA ; 10. ENC
+ vaesenclast XDATA2, XKEYA ; 10. ENC
+
+ vmovdqu [OUT0], XDATA0 ; write back ciphertext
+ vmovdqu [OUT1], XDATA1 ; write back ciphertext
+ vmovdqu [OUT2], XDATA2 ; write back ciphertext
+
+aes128_ecbenc_x3_avx_return:
+
+%ifdef SAFE_DATA
+ clear_scratch_gps_asm
+ clear_scratch_xmms_avx_asm
+%endif
+ ret
+
+%ifdef LINUX
+section .note.GNU-stack noalloc noexec nowrite progbits
+%endif
+