summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm377
1 files changed, 377 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm b/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm
new file mode 100644
index 000000000..22f00b395
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm
@@ -0,0 +1,377 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifndef _AES_COMMON_ASM_
+%define _AES_COMMON_ASM_
+
+%include "reg_sizes.asm"
+
+;; =============================================================================
+;; Generic macro to produce code that executes %%OPCODE instruction
+;; on selected number of AES blocks (16 bytes long ) between 0 and 16.
+;; All three operands of the instruction come from registers.
+;; Note: if 3 blocks are left at the end instruction is produced to operate all
+;; 4 blocks (full width of ZMM)
+
+%macro ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 14
+%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
+%define %%OPCODE %2 ; [in] instruction name
+%define %%DST0 %3 ; [out] destination ZMM register
+%define %%DST1 %4 ; [out] destination ZMM register
+%define %%DST2 %5 ; [out] destination ZMM register
+%define %%DST3 %6 ; [out] destination ZMM register
+%define %%SRC1_0 %7 ; [in] source 1 ZMM register
+%define %%SRC1_1 %8 ; [in] source 1 ZMM register
+%define %%SRC1_2 %9 ; [in] source 1 ZMM register
+%define %%SRC1_3 %10 ; [in] source 1 ZMM register
+%define %%SRC2_0 %11 ; [in] source 2 ZMM register
+%define %%SRC2_1 %12 ; [in] source 2 ZMM register
+%define %%SRC2_2 %13 ; [in] source 2 ZMM register
+%define %%SRC2_3 %14 ; [in] source 2 ZMM register
+
+%assign reg_idx 0
+%assign blocks_left %%NUM_BLOCKS
+
+%rep (%%NUM_BLOCKS / 4)
+%xdefine %%DSTREG %%DST %+ reg_idx
+%xdefine %%SRC1REG %%SRC1_ %+ reg_idx
+%xdefine %%SRC2REG %%SRC2_ %+ reg_idx
+ %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG
+%undef %%DSTREG
+%undef %%SRC1REG
+%undef %%SRC2REG
+%assign reg_idx (reg_idx + 1)
+%assign blocks_left (blocks_left - 4)
+%endrep
+
+%xdefine %%DSTREG %%DST %+ reg_idx
+%xdefine %%SRC1REG %%SRC1_ %+ reg_idx
+%xdefine %%SRC2REG %%SRC2_ %+ reg_idx
+
+%if blocks_left == 1
+ %%OPCODE XWORD(%%DSTREG), XWORD(%%SRC1REG), XWORD(%%SRC2REG)
+%elif blocks_left == 2
+ %%OPCODE YWORD(%%DSTREG), YWORD(%%SRC1REG), YWORD(%%SRC2REG)
+%elif blocks_left == 3
+ %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG
+%endif
+
+%endmacro
+
+;; =============================================================================
+;; Loads specified number of AES blocks into ZMM registers
+;; %%FLAGS are optional and only affect behavior when 3 trailing blocks are left
+;; - if %%FlAGS not provided then exactly 3 blocks are loaded (move and insert)
+;; - if "load_4_instead_of_3" option is passed then 4 blocks are loaded
+%macro ZMM_LOAD_BLOCKS_0_16 7-8
+%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
+%define %%INP %2 ; [in] input data pointer to read from
+%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
+%define %%DST0 %4 ; [out] ZMM register with loaded data
+%define %%DST1 %5 ; [out] ZMM register with loaded data
+%define %%DST2 %6 ; [out] ZMM register with loaded data
+%define %%DST3 %7 ; [out] ZMM register with loaded data
+%define %%FLAGS %8 ; [in] optional "load_4_instead_of_3"
+
+%assign src_offset 0
+%assign dst_idx 0
+
+%rep (%%NUM_BLOCKS / 4)
+%xdefine %%DSTREG %%DST %+ dst_idx
+ vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
+%undef %%DSTREG
+%assign src_offset (src_offset + 64)
+%assign dst_idx (dst_idx + 1)
+%endrep
+
+%assign blocks_left (%%NUM_BLOCKS % 4)
+%xdefine %%DSTREG %%DST %+ dst_idx
+
+%if blocks_left == 1
+ vmovdqu8 XWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
+%elif blocks_left == 2
+ vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
+%elif blocks_left == 3
+%ifidn %%FLAGS, load_4_instead_of_3
+ vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
+%else
+ vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset]
+ vinserti64x2 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset + 32], 2
+%endif
+%endif
+
+%endmacro
+
+;; =============================================================================
+;; Loads specified number of AES blocks into ZMM registers using mask register
+;; for the last loaded register (xmm, ymm or zmm).
+;; Loads take place at 1 byte granularity.
+%macro ZMM_LOAD_MASKED_BLOCKS_0_16 8
+%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
+%define %%INP %2 ; [in] input data pointer to read from
+%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
+%define %%DST0 %4 ; [out] ZMM register with loaded data
+%define %%DST1 %5 ; [out] ZMM register with loaded data
+%define %%DST2 %6 ; [out] ZMM register with loaded data
+%define %%DST3 %7 ; [out] ZMM register with loaded data
+%define %%MASK %8 ; [in] mask register
+
+%assign src_offset 0
+%assign dst_idx 0
+%assign blocks_left %%NUM_BLOCKS
+
+%if %%NUM_BLOCKS > 0
+%rep (((%%NUM_BLOCKS + 3) / 4) - 1)
+%xdefine %%DSTREG %%DST %+ dst_idx
+ vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset]
+%undef %%DSTREG
+%assign src_offset (src_offset + 64)
+%assign dst_idx (dst_idx + 1)
+%assign blocks_left (blocks_left - 4)
+%endrep
+%endif ; %if %%NUM_BLOCKS > 0
+
+%xdefine %%DSTREG %%DST %+ dst_idx
+
+%if blocks_left == 1
+ vmovdqu8 XWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
+%elif blocks_left == 2
+ vmovdqu8 YWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
+%elif (blocks_left == 3 || blocks_left == 4)
+ vmovdqu8 %%DSTREG{%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset]
+%endif
+
+%endmacro
+
+;; =============================================================================
+;; Stores specified number of AES blocks from ZMM registers
+%macro ZMM_STORE_BLOCKS_0_16 7
+%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
+%define %%OUTP %2 ; [in] output data pointer to write to
+%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
+%define %%SRC0 %4 ; [in] ZMM register with data to store
+%define %%SRC1 %5 ; [in] ZMM register with data to store
+%define %%SRC2 %6 ; [in] ZMM register with data to store
+%define %%SRC3 %7 ; [in] ZMM register with data to store
+
+%assign dst_offset 0
+%assign src_idx 0
+
+%rep (%%NUM_BLOCKS / 4)
+%xdefine %%SRCREG %%SRC %+ src_idx
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG
+%undef %%SRCREG
+%assign dst_offset (dst_offset + 64)
+%assign src_idx (src_idx + 1)
+%endrep
+
+%assign blocks_left (%%NUM_BLOCKS % 4)
+%xdefine %%SRCREG %%SRC %+ src_idx
+
+%if blocks_left == 1
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], XWORD(%%SRCREG)
+%elif blocks_left == 2
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG)
+%elif blocks_left == 3
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG)
+ vextracti32x4 [%%OUTP + %%DATA_OFFSET + dst_offset + 32], %%SRCREG, 2
+%endif
+
+%endmacro
+
+;; =============================================================================
+;; Stores specified number of AES blocks from ZMM registers with mask register
+;; for the last loaded register (xmm, ymm or zmm).
+;; Stores take place at 1 byte granularity.
+%macro ZMM_STORE_MASKED_BLOCKS_0_16 8
+%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16)
+%define %%OUTP %2 ; [in] output data pointer to write to
+%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical)
+%define %%SRC0 %4 ; [in] ZMM register with data to store
+%define %%SRC1 %5 ; [in] ZMM register with data to store
+%define %%SRC2 %6 ; [in] ZMM register with data to store
+%define %%SRC3 %7 ; [in] ZMM register with data to store
+%define %%MASK %8 ; [in] mask register
+
+%assign dst_offset 0
+%assign src_idx 0
+%assign blocks_left %%NUM_BLOCKS
+
+%if %%NUM_BLOCKS > 0
+%rep (((%%NUM_BLOCKS + 3) / 4) - 1)
+%xdefine %%SRCREG %%SRC %+ src_idx
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG
+%undef %%SRCREG
+%assign dst_offset (dst_offset + 64)
+%assign src_idx (src_idx + 1)
+%assign blocks_left (blocks_left - 4)
+%endrep
+%endif ; %if %%NUM_BLOCKS > 0
+
+%xdefine %%SRCREG %%SRC %+ src_idx
+
+%if blocks_left == 1
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, XWORD(%%SRCREG)
+%elif blocks_left == 2
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, YWORD(%%SRCREG)
+%elif (blocks_left == 3 || blocks_left == 4)
+ vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, %%SRCREG
+%endif
+
+%endmacro
+
+;;; ===========================================================================
+;;; Handles AES encryption rounds
+;;; It handles special cases: the last and first rounds
+;;; Optionally, it performs XOR with data after the last AES round.
+;;; Uses NROUNDS parameterto check what needs to be done for the current round.
+;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks).
+%macro ZMM_AESENC_ROUND_BLOCKS_0_16 12
+%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3
+%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7
+%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11
+%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15
+%define %%KEY %5 ; [in] zmm containing round key
+%define %%ROUND %6 ; [in] round number
+%define %%D0_3 %7 ; [in] zmm or no_data; plain/cipher text blocks 0-3
+%define %%D4_7 %8 ; [in] zmm or no_data; plain/cipher text blocks 4-7
+%define %%D8_11 %9 ; [in] zmm or no_data; plain/cipher text blocks 8-11
+%define %%D12_15 %10 ; [in] zmm or no_data; plain/cipher text blocks 12-15
+%define %%NUMBL %11 ; [in] number of blocks; numerical value
+%define %%NROUNDS %12 ; [in] number of rounds; numerical value
+
+;;; === first AES round
+%if (%%ROUND < 1)
+ ;; round 0
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%KEY, %%KEY, %%KEY, %%KEY
+%endif ; ROUND 0
+
+;;; === middle AES rounds
+%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS)
+ ;; rounds 1 to 9/11/13
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenc, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%KEY, %%KEY, %%KEY, %%KEY
+%endif ; rounds 1 to 9/11/13
+
+;;; === last AES round
+%if (%%ROUND > %%NROUNDS)
+ ;; the last round - mix enclast with text xor's
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenclast, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%KEY, %%KEY, %%KEY, %%KEY
+
+;;; === XOR with data
+%ifnidn %%D0_3, no_data
+%ifnidn %%D4_7, no_data
+%ifnidn %%D8_11, no_data
+%ifnidn %%D12_15, no_data
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%D0_3, %%D4_7, %%D8_11, %%D12_15
+%endif ; !no_data
+%endif ; !no_data
+%endif ; !no_data
+%endif ; !no_data
+
+%endif ; The last round
+
+%endmacro
+
+;;; ===========================================================================
+;;; Handles AES decryption rounds
+;;; It handles special cases: the last and first rounds
+;;; Optionally, it performs XOR with data after the last AES round.
+;;; Uses NROUNDS parameter to check what needs to be done for the current round.
+;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks).
+%macro ZMM_AESDEC_ROUND_BLOCKS_0_16 12
+%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3
+%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7
+%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11
+%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15
+%define %%KEY %5 ; [in] zmm containing round key
+%define %%ROUND %6 ; [in] round number
+%define %%D0_3 %7 ; [in] zmm or no_data; cipher text blocks 0-3
+%define %%D4_7 %8 ; [in] zmm or no_data; cipher text blocks 4-7
+%define %%D8_11 %9 ; [in] zmm or no_data; cipher text blocks 8-11
+%define %%D12_15 %10 ; [in] zmm or no_data; cipher text blocks 12-15
+%define %%NUMBL %11 ; [in] number of blocks; numerical value
+%define %%NROUNDS %12 ; [in] number of rounds; numerical value
+
+;;; === first AES round
+%if (%%ROUND < 1)
+ ;; round 0
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%KEY, %%KEY, %%KEY, %%KEY
+%endif ; ROUND 0
+
+;;; === middle AES rounds
+%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS)
+ ;; rounds 1 to 9/11/13
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdec, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%KEY, %%KEY, %%KEY, %%KEY
+%endif ; rounds 1 to 9/11/13
+
+;;; === last AES round
+%if (%%ROUND > %%NROUNDS)
+ ;; the last round - mix enclast with text xor's
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdeclast, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%KEY, %%KEY, %%KEY, %%KEY
+
+;;; === XOR with data
+%ifnidn %%D0_3, no_data
+%ifnidn %%D4_7, no_data
+%ifnidn %%D8_11, no_data
+%ifnidn %%D12_15, no_data
+ ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \
+ %%D0_3, %%D4_7, %%D8_11, %%D12_15
+%endif ; !no_data
+%endif ; !no_data
+%endif ; !no_data
+%endif ; !no_data
+
+%endif ; The last round
+
+%endmacro
+
+%endif ;; _AES_COMMON_ASM