diff options
Diffstat (limited to 'src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm')
-rw-r--r-- | src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm b/src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm new file mode 100644 index 000000000..e13b8467d --- /dev/null +++ b/src/spdk/intel-ipsec-mb/aes128_ecbenc_x3.asm @@ -0,0 +1,346 @@ +;; +;; Copyright (c) 2012-2018, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; + +; Routines to do simple AES ECB Enc on one stream with 3 blocks + +;void +; aes128_ecbenc_x3_sse(void *in, void *keys, void *out1, void *out2, void *out3); +;void +; aes128_ecbenc_x3_avx(void *in, void *keys, void *out1, void *out2, void *out3); + +%include "include/os.asm" +%define NO_AESNI_RENAME +%include "include/aesni_emu.inc" +%include "include/clear_regs.asm" + +%ifdef LINUX +%define IN rdi ; arg 1 +%define KEYS rsi ; arg 2 +%define OUT0 rdx ; arg 3 +%define OUT1 rcx ; arg 4 +%define OUT2 r8 ; arg 5 +%else +%define IN rcx ; arg 1 +%define KEYS rdx ; arg 2 +%define OUT0 r8 ; arg 3 +%define OUT1 r9 ; arg 4 +%define OUT2 rax ; +%endif + + +%define XDATA0 xmm0 +%define XDATA1 xmm1 +%define XDATA2 xmm2 + +%define XKEYA xmm3 +%define XKEYB xmm4 + +section .text + +MKGLOBAL(aes128_ecbenc_x3_sse,function,internal) +aes128_ecbenc_x3_sse: + +%ifndef LINUX + mov OUT2, [rsp + 5*8] +%endif + +%ifdef SAFE_PARAM + cmp IN, 0 + jz aes128_ecbenc_x3_sse_return + cmp KEYS, 0 + jz aes128_ecbenc_x3_sse_return + cmp OUT0, 0 + jz aes128_ecbenc_x3_sse_return + cmp OUT1, 0 + jz aes128_ecbenc_x3_sse_return + cmp OUT2, 0 + jz aes128_ecbenc_x3_sse_return +%endif + + movdqu XDATA0, [IN + 0*16] ; load first block of plain text + movdqu XDATA1, [IN + 1*16] ; load second block of plain text + movdqu XDATA2, [IN + 2*16] ; load third block of plain text + + movdqa XKEYA, [KEYS + 16*0] + + movdqa XKEYB, [KEYS + 16*1] + pxor XDATA0, XKEYA ; 0. ARK + pxor XDATA1, XKEYA ; 0. ARK + pxor XDATA2, XKEYA ; 0. ARK + + movdqa XKEYA, [KEYS + 16*2] + aesenc XDATA0, XKEYB ; 1. ENC + aesenc XDATA1, XKEYB ; 1. ENC + aesenc XDATA2, XKEYB ; 1. ENC + + movdqa XKEYB, [KEYS + 16*3] + aesenc XDATA0, XKEYA ; 2. ENC + aesenc XDATA1, XKEYA ; 2. ENC + aesenc XDATA2, XKEYA ; 2. ENC + + movdqa XKEYA, [KEYS + 16*4] + aesenc XDATA0, XKEYB ; 3. ENC + aesenc XDATA1, XKEYB ; 3. ENC + aesenc XDATA2, XKEYB ; 3. ENC + + movdqa XKEYB, [KEYS + 16*5] + aesenc XDATA0, XKEYA ; 4. ENC + aesenc XDATA1, XKEYA ; 4. ENC + aesenc XDATA2, XKEYA ; 4. ENC + + movdqa XKEYA, [KEYS + 16*6] + aesenc XDATA0, XKEYB ; 5. ENC + aesenc XDATA1, XKEYB ; 5. ENC + aesenc XDATA2, XKEYB ; 5. ENC + + movdqa XKEYB, [KEYS + 16*7] + aesenc XDATA0, XKEYA ; 6. ENC + aesenc XDATA1, XKEYA ; 6. ENC + aesenc XDATA2, XKEYA ; 6. ENC + + movdqa XKEYA, [KEYS + 16*8] + aesenc XDATA0, XKEYB ; 7. ENC + aesenc XDATA1, XKEYB ; 7. ENC + aesenc XDATA2, XKEYB ; 7. ENC + + movdqa XKEYB, [KEYS + 16*9] + aesenc XDATA0, XKEYA ; 8. ENC + aesenc XDATA1, XKEYA ; 8. ENC + aesenc XDATA2, XKEYA ; 8. ENC + + movdqa XKEYA, [KEYS + 16*10] + aesenc XDATA0, XKEYB ; 9. ENC + aesenc XDATA1, XKEYB ; 9. ENC + aesenc XDATA2, XKEYB ; 9. ENC + + aesenclast XDATA0, XKEYA ; 10. ENC + aesenclast XDATA1, XKEYA ; 10. ENC + aesenclast XDATA2, XKEYA ; 10. ENC + + movdqu [OUT0], XDATA0 ; write back ciphertext + movdqu [OUT1], XDATA1 ; write back ciphertext + movdqu [OUT2], XDATA2 ; write back ciphertext + +aes128_ecbenc_x3_sse_return: + +%ifdef SAFE_DATA + clear_scratch_gps_asm + clear_scratch_xmms_sse_asm +%endif + ret + +MKGLOBAL(aes128_ecbenc_x3_sse_no_aesni,function,internal) +aes128_ecbenc_x3_sse_no_aesni: + +%ifndef LINUX + mov OUT2, [rsp + 5*8] +%endif + +%ifdef SAFE_PARAM + cmp IN, 0 + jz aes128_ecbenc_x3_sse_no_aesni_return + cmp KEYS, 0 + jz aes128_ecbenc_x3_sse_no_aesni_return + cmp OUT0, 0 + jz aes128_ecbenc_x3_sse_no_aesni_return + cmp OUT1, 0 + jz aes128_ecbenc_x3_sse_no_aesni_return + cmp OUT2, 0 + jz aes128_ecbenc_x3_sse_no_aesni_return +%endif + + movdqu XDATA0, [IN + 0*16] ; load first block of plain text + movdqu XDATA1, [IN + 1*16] ; load second block of plain text + movdqu XDATA2, [IN + 2*16] ; load third block of plain text + + movdqa XKEYA, [KEYS + 16*0] + + movdqa XKEYB, [KEYS + 16*1] + pxor XDATA0, XKEYA ; 0. ARK + pxor XDATA1, XKEYA ; 0. ARK + pxor XDATA2, XKEYA ; 0. ARK + + movdqa XKEYA, [KEYS + 16*2] + EMULATE_AESENC XDATA0, XKEYB ; 1. ENC + EMULATE_AESENC XDATA1, XKEYB ; 1. ENC + EMULATE_AESENC XDATA2, XKEYB ; 1. ENC + + movdqa XKEYB, [KEYS + 16*3] + EMULATE_AESENC XDATA0, XKEYA ; 2. ENC + EMULATE_AESENC XDATA1, XKEYA ; 2. ENC + EMULATE_AESENC XDATA2, XKEYA ; 2. ENC + + movdqa XKEYA, [KEYS + 16*4] + EMULATE_AESENC XDATA0, XKEYB ; 3. ENC + EMULATE_AESENC XDATA1, XKEYB ; 3. ENC + EMULATE_AESENC XDATA2, XKEYB ; 3. ENC + + movdqa XKEYB, [KEYS + 16*5] + EMULATE_AESENC XDATA0, XKEYA ; 4. ENC + EMULATE_AESENC XDATA1, XKEYA ; 4. ENC + EMULATE_AESENC XDATA2, XKEYA ; 4. ENC + + movdqa XKEYA, [KEYS + 16*6] + EMULATE_AESENC XDATA0, XKEYB ; 5. ENC + EMULATE_AESENC XDATA1, XKEYB ; 5. ENC + EMULATE_AESENC XDATA2, XKEYB ; 5. ENC + + movdqa XKEYB, [KEYS + 16*7] + EMULATE_AESENC XDATA0, XKEYA ; 6. ENC + EMULATE_AESENC XDATA1, XKEYA ; 6. ENC + EMULATE_AESENC XDATA2, XKEYA ; 6. ENC + + movdqa XKEYA, [KEYS + 16*8] + EMULATE_AESENC XDATA0, XKEYB ; 7. ENC + EMULATE_AESENC XDATA1, XKEYB ; 7. ENC + EMULATE_AESENC XDATA2, XKEYB ; 7. ENC + + movdqa XKEYB, [KEYS + 16*9] + EMULATE_AESENC XDATA0, XKEYA ; 8. ENC + EMULATE_AESENC XDATA1, XKEYA ; 8. ENC + EMULATE_AESENC XDATA2, XKEYA ; 8. ENC + + movdqa XKEYA, [KEYS + 16*10] + EMULATE_AESENC XDATA0, XKEYB ; 9. ENC + EMULATE_AESENC XDATA1, XKEYB ; 9. ENC + EMULATE_AESENC XDATA2, XKEYB ; 9. ENC + + EMULATE_AESENCLAST XDATA0, XKEYA ; 10. ENC + EMULATE_AESENCLAST XDATA1, XKEYA ; 10. ENC + EMULATE_AESENCLAST XDATA2, XKEYA ; 10. ENC + + movdqu [OUT0], XDATA0 ; write back ciphertext + movdqu [OUT1], XDATA1 ; write back ciphertext + movdqu [OUT2], XDATA2 ; write back ciphertext + +aes128_ecbenc_x3_sse_no_aesni_return: + +%ifdef SAFE_DATA + clear_scratch_gps_asm + clear_scratch_xmms_sse_asm +%endif + ret + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +MKGLOBAL(aes128_ecbenc_x3_avx,function,internal) +aes128_ecbenc_x3_avx: + +%ifndef LINUX + mov OUT2, [rsp + 5*8] +%endif + +%ifdef SAFE_PARAM + cmp IN, 0 + jz aes128_ecbenc_x3_avx_return + cmp KEYS, 0 + jz aes128_ecbenc_x3_avx_return + cmp OUT0, 0 + jz aes128_ecbenc_x3_avx_return + cmp OUT1, 0 + jz aes128_ecbenc_x3_avx_return + cmp OUT2, 0 + jz aes128_ecbenc_x3_avx_return +%endif + + vmovdqu XDATA0, [IN + 0*16] ; load first block of plain text + vmovdqu XDATA1, [IN + 1*16] ; load second block of plain text + vmovdqu XDATA2, [IN + 2*16] ; load third block of plain text + + vmovdqa XKEYA, [KEYS + 16*0] + + vmovdqa XKEYB, [KEYS + 16*1] + vpxor XDATA0, XDATA0, XKEYA ; 0. ARK + vpxor XDATA1, XDATA1, XKEYA ; 0. ARK + vpxor XDATA2, XDATA2, XKEYA ; 0. ARK + + vmovdqa XKEYA, [KEYS + 16*2] + vaesenc XDATA0, XKEYB ; 1. ENC + vaesenc XDATA1, XKEYB ; 1. ENC + vaesenc XDATA2, XKEYB ; 1. ENC + + vmovdqa XKEYB, [KEYS + 16*3] + vaesenc XDATA0, XKEYA ; 2. ENC + vaesenc XDATA1, XKEYA ; 2. ENC + vaesenc XDATA2, XKEYA ; 2. ENC + + vmovdqa XKEYA, [KEYS + 16*4] + vaesenc XDATA0, XKEYB ; 3. ENC + vaesenc XDATA1, XKEYB ; 3. ENC + vaesenc XDATA2, XKEYB ; 3. ENC + + vmovdqa XKEYB, [KEYS + 16*5] + vaesenc XDATA0, XKEYA ; 4. ENC + vaesenc XDATA1, XKEYA ; 4. ENC + vaesenc XDATA2, XKEYA ; 4. ENC + + vmovdqa XKEYA, [KEYS + 16*6] + vaesenc XDATA0, XKEYB ; 5. ENC + vaesenc XDATA1, XKEYB ; 5. ENC + vaesenc XDATA2, XKEYB ; 5. ENC + + vmovdqa XKEYB, [KEYS + 16*7] + vaesenc XDATA0, XKEYA ; 6. ENC + vaesenc XDATA1, XKEYA ; 6. ENC + vaesenc XDATA2, XKEYA ; 6. ENC + + vmovdqa XKEYA, [KEYS + 16*8] + vaesenc XDATA0, XKEYB ; 7. ENC + vaesenc XDATA1, XKEYB ; 7. ENC + vaesenc XDATA2, XKEYB ; 7. ENC + + vmovdqa XKEYB, [KEYS + 16*9] + vaesenc XDATA0, XKEYA ; 8. ENC + vaesenc XDATA1, XKEYA ; 8. ENC + vaesenc XDATA2, XKEYA ; 8. ENC + + vmovdqa XKEYA, [KEYS + 16*10] + vaesenc XDATA0, XKEYB ; 9. ENC + vaesenc XDATA1, XKEYB ; 9. ENC + vaesenc XDATA2, XKEYB ; 9. ENC + + vaesenclast XDATA0, XKEYA ; 10. ENC + vaesenclast XDATA1, XKEYA ; 10. ENC + vaesenclast XDATA2, XKEYA ; 10. ENC + + vmovdqu [OUT0], XDATA0 ; write back ciphertext + vmovdqu [OUT1], XDATA1 ; write back ciphertext + vmovdqu [OUT2], XDATA2 ; write back ciphertext + +aes128_ecbenc_x3_avx_return: + +%ifdef SAFE_DATA + clear_scratch_gps_asm + clear_scratch_xmms_avx_asm +%endif + ret + +%ifdef LINUX +section .note.GNU-stack noalloc noexec nowrite progbits +%endif + |