From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm | 377 +++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm (limited to 'src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm') diff --git a/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm b/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm new file mode 100644 index 000000000..22f00b395 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/aes/aes_common.asm @@ -0,0 +1,377 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef _AES_COMMON_ASM_ +%define _AES_COMMON_ASM_ + +%include "reg_sizes.asm" + +;; ============================================================================= +;; Generic macro to produce code that executes %%OPCODE instruction +;; on selected number of AES blocks (16 bytes long ) between 0 and 16. +;; All three operands of the instruction come from registers. +;; Note: if 3 blocks are left at the end instruction is produced to operate all +;; 4 blocks (full width of ZMM) + +%macro ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 14 +%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16) +%define %%OPCODE %2 ; [in] instruction name +%define %%DST0 %3 ; [out] destination ZMM register +%define %%DST1 %4 ; [out] destination ZMM register +%define %%DST2 %5 ; [out] destination ZMM register +%define %%DST3 %6 ; [out] destination ZMM register +%define %%SRC1_0 %7 ; [in] source 1 ZMM register +%define %%SRC1_1 %8 ; [in] source 1 ZMM register +%define %%SRC1_2 %9 ; [in] source 1 ZMM register +%define %%SRC1_3 %10 ; [in] source 1 ZMM register +%define %%SRC2_0 %11 ; [in] source 2 ZMM register +%define %%SRC2_1 %12 ; [in] source 2 ZMM register +%define %%SRC2_2 %13 ; [in] source 2 ZMM register +%define %%SRC2_3 %14 ; [in] source 2 ZMM register + +%assign reg_idx 0 +%assign blocks_left %%NUM_BLOCKS + +%rep (%%NUM_BLOCKS / 4) +%xdefine %%DSTREG %%DST %+ reg_idx +%xdefine %%SRC1REG %%SRC1_ %+ reg_idx +%xdefine %%SRC2REG %%SRC2_ %+ reg_idx + %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG +%undef %%DSTREG +%undef %%SRC1REG +%undef %%SRC2REG +%assign reg_idx (reg_idx + 1) +%assign blocks_left (blocks_left - 4) +%endrep + +%xdefine %%DSTREG %%DST %+ reg_idx +%xdefine %%SRC1REG %%SRC1_ %+ reg_idx +%xdefine %%SRC2REG %%SRC2_ %+ reg_idx + +%if blocks_left == 1 + %%OPCODE XWORD(%%DSTREG), XWORD(%%SRC1REG), XWORD(%%SRC2REG) +%elif blocks_left == 2 + %%OPCODE YWORD(%%DSTREG), YWORD(%%SRC1REG), YWORD(%%SRC2REG) +%elif blocks_left == 3 + %%OPCODE %%DSTREG, %%SRC1REG, %%SRC2REG +%endif + +%endmacro + +;; ============================================================================= +;; Loads specified number of AES blocks into ZMM registers +;; %%FLAGS are optional and only affect behavior when 3 trailing blocks are left +;; - if %%FlAGS not provided then exactly 3 blocks are loaded (move and insert) +;; - if "load_4_instead_of_3" option is passed then 4 blocks are loaded +%macro ZMM_LOAD_BLOCKS_0_16 7-8 +%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16) +%define %%INP %2 ; [in] input data pointer to read from +%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical) +%define %%DST0 %4 ; [out] ZMM register with loaded data +%define %%DST1 %5 ; [out] ZMM register with loaded data +%define %%DST2 %6 ; [out] ZMM register with loaded data +%define %%DST3 %7 ; [out] ZMM register with loaded data +%define %%FLAGS %8 ; [in] optional "load_4_instead_of_3" + +%assign src_offset 0 +%assign dst_idx 0 + +%rep (%%NUM_BLOCKS / 4) +%xdefine %%DSTREG %%DST %+ dst_idx + vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset] +%undef %%DSTREG +%assign src_offset (src_offset + 64) +%assign dst_idx (dst_idx + 1) +%endrep + +%assign blocks_left (%%NUM_BLOCKS % 4) +%xdefine %%DSTREG %%DST %+ dst_idx + +%if blocks_left == 1 + vmovdqu8 XWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset] +%elif blocks_left == 2 + vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset] +%elif blocks_left == 3 +%ifidn %%FLAGS, load_4_instead_of_3 + vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset] +%else + vmovdqu8 YWORD(%%DSTREG), [%%INP + %%DATA_OFFSET + src_offset] + vinserti64x2 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset + 32], 2 +%endif +%endif + +%endmacro + +;; ============================================================================= +;; Loads specified number of AES blocks into ZMM registers using mask register +;; for the last loaded register (xmm, ymm or zmm). +;; Loads take place at 1 byte granularity. +%macro ZMM_LOAD_MASKED_BLOCKS_0_16 8 +%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16) +%define %%INP %2 ; [in] input data pointer to read from +%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical) +%define %%DST0 %4 ; [out] ZMM register with loaded data +%define %%DST1 %5 ; [out] ZMM register with loaded data +%define %%DST2 %6 ; [out] ZMM register with loaded data +%define %%DST3 %7 ; [out] ZMM register with loaded data +%define %%MASK %8 ; [in] mask register + +%assign src_offset 0 +%assign dst_idx 0 +%assign blocks_left %%NUM_BLOCKS + +%if %%NUM_BLOCKS > 0 +%rep (((%%NUM_BLOCKS + 3) / 4) - 1) +%xdefine %%DSTREG %%DST %+ dst_idx + vmovdqu8 %%DSTREG, [%%INP + %%DATA_OFFSET + src_offset] +%undef %%DSTREG +%assign src_offset (src_offset + 64) +%assign dst_idx (dst_idx + 1) +%assign blocks_left (blocks_left - 4) +%endrep +%endif ; %if %%NUM_BLOCKS > 0 + +%xdefine %%DSTREG %%DST %+ dst_idx + +%if blocks_left == 1 + vmovdqu8 XWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset] +%elif blocks_left == 2 + vmovdqu8 YWORD(%%DSTREG){%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset] +%elif (blocks_left == 3 || blocks_left == 4) + vmovdqu8 %%DSTREG{%%MASK}{z}, [%%INP + %%DATA_OFFSET + src_offset] +%endif + +%endmacro + +;; ============================================================================= +;; Stores specified number of AES blocks from ZMM registers +%macro ZMM_STORE_BLOCKS_0_16 7 +%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16) +%define %%OUTP %2 ; [in] output data pointer to write to +%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical) +%define %%SRC0 %4 ; [in] ZMM register with data to store +%define %%SRC1 %5 ; [in] ZMM register with data to store +%define %%SRC2 %6 ; [in] ZMM register with data to store +%define %%SRC3 %7 ; [in] ZMM register with data to store + +%assign dst_offset 0 +%assign src_idx 0 + +%rep (%%NUM_BLOCKS / 4) +%xdefine %%SRCREG %%SRC %+ src_idx + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG +%undef %%SRCREG +%assign dst_offset (dst_offset + 64) +%assign src_idx (src_idx + 1) +%endrep + +%assign blocks_left (%%NUM_BLOCKS % 4) +%xdefine %%SRCREG %%SRC %+ src_idx + +%if blocks_left == 1 + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], XWORD(%%SRCREG) +%elif blocks_left == 2 + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG) +%elif blocks_left == 3 + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], YWORD(%%SRCREG) + vextracti32x4 [%%OUTP + %%DATA_OFFSET + dst_offset + 32], %%SRCREG, 2 +%endif + +%endmacro + +;; ============================================================================= +;; Stores specified number of AES blocks from ZMM registers with mask register +;; for the last loaded register (xmm, ymm or zmm). +;; Stores take place at 1 byte granularity. +%macro ZMM_STORE_MASKED_BLOCKS_0_16 8 +%define %%NUM_BLOCKS %1 ; [in] numerical value, number of AES blocks (0 to 16) +%define %%OUTP %2 ; [in] output data pointer to write to +%define %%DATA_OFFSET %3 ; [in] offset to the output pointer (GP or numerical) +%define %%SRC0 %4 ; [in] ZMM register with data to store +%define %%SRC1 %5 ; [in] ZMM register with data to store +%define %%SRC2 %6 ; [in] ZMM register with data to store +%define %%SRC3 %7 ; [in] ZMM register with data to store +%define %%MASK %8 ; [in] mask register + +%assign dst_offset 0 +%assign src_idx 0 +%assign blocks_left %%NUM_BLOCKS + +%if %%NUM_BLOCKS > 0 +%rep (((%%NUM_BLOCKS + 3) / 4) - 1) +%xdefine %%SRCREG %%SRC %+ src_idx + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset], %%SRCREG +%undef %%SRCREG +%assign dst_offset (dst_offset + 64) +%assign src_idx (src_idx + 1) +%assign blocks_left (blocks_left - 4) +%endrep +%endif ; %if %%NUM_BLOCKS > 0 + +%xdefine %%SRCREG %%SRC %+ src_idx + +%if blocks_left == 1 + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, XWORD(%%SRCREG) +%elif blocks_left == 2 + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, YWORD(%%SRCREG) +%elif (blocks_left == 3 || blocks_left == 4) + vmovdqu8 [%%OUTP + %%DATA_OFFSET + dst_offset]{%%MASK}, %%SRCREG +%endif + +%endmacro + +;;; =========================================================================== +;;; Handles AES encryption rounds +;;; It handles special cases: the last and first rounds +;;; Optionally, it performs XOR with data after the last AES round. +;;; Uses NROUNDS parameterto check what needs to be done for the current round. +;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks). +%macro ZMM_AESENC_ROUND_BLOCKS_0_16 12 +%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3 +%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7 +%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11 +%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15 +%define %%KEY %5 ; [in] zmm containing round key +%define %%ROUND %6 ; [in] round number +%define %%D0_3 %7 ; [in] zmm or no_data; plain/cipher text blocks 0-3 +%define %%D4_7 %8 ; [in] zmm or no_data; plain/cipher text blocks 4-7 +%define %%D8_11 %9 ; [in] zmm or no_data; plain/cipher text blocks 8-11 +%define %%D12_15 %10 ; [in] zmm or no_data; plain/cipher text blocks 12-15 +%define %%NUMBL %11 ; [in] number of blocks; numerical value +%define %%NROUNDS %12 ; [in] number of rounds; numerical value + +;;; === first AES round +%if (%%ROUND < 1) + ;; round 0 + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%KEY, %%KEY, %%KEY, %%KEY +%endif ; ROUND 0 + +;;; === middle AES rounds +%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS) + ;; rounds 1 to 9/11/13 + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenc, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%KEY, %%KEY, %%KEY, %%KEY +%endif ; rounds 1 to 9/11/13 + +;;; === last AES round +%if (%%ROUND > %%NROUNDS) + ;; the last round - mix enclast with text xor's + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesenclast, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%KEY, %%KEY, %%KEY, %%KEY + +;;; === XOR with data +%ifnidn %%D0_3, no_data +%ifnidn %%D4_7, no_data +%ifnidn %%D8_11, no_data +%ifnidn %%D12_15, no_data + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%D0_3, %%D4_7, %%D8_11, %%D12_15 +%endif ; !no_data +%endif ; !no_data +%endif ; !no_data +%endif ; !no_data + +%endif ; The last round + +%endmacro + +;;; =========================================================================== +;;; Handles AES decryption rounds +;;; It handles special cases: the last and first rounds +;;; Optionally, it performs XOR with data after the last AES round. +;;; Uses NROUNDS parameter to check what needs to be done for the current round. +;;; If 3 blocks are trailing then operation on whole ZMM is performed (4 blocks). +%macro ZMM_AESDEC_ROUND_BLOCKS_0_16 12 +%define %%L0B0_3 %1 ; [in/out] zmm; blocks 0 to 3 +%define %%L0B4_7 %2 ; [in/out] zmm; blocks 4 to 7 +%define %%L0B8_11 %3 ; [in/out] zmm; blocks 8 to 11 +%define %%L0B12_15 %4 ; [in/out] zmm; blocks 12 to 15 +%define %%KEY %5 ; [in] zmm containing round key +%define %%ROUND %6 ; [in] round number +%define %%D0_3 %7 ; [in] zmm or no_data; cipher text blocks 0-3 +%define %%D4_7 %8 ; [in] zmm or no_data; cipher text blocks 4-7 +%define %%D8_11 %9 ; [in] zmm or no_data; cipher text blocks 8-11 +%define %%D12_15 %10 ; [in] zmm or no_data; cipher text blocks 12-15 +%define %%NUMBL %11 ; [in] number of blocks; numerical value +%define %%NROUNDS %12 ; [in] number of rounds; numerical value + +;;; === first AES round +%if (%%ROUND < 1) + ;; round 0 + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%KEY, %%KEY, %%KEY, %%KEY +%endif ; ROUND 0 + +;;; === middle AES rounds +%if (%%ROUND >= 1 && %%ROUND <= %%NROUNDS) + ;; rounds 1 to 9/11/13 + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdec, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%KEY, %%KEY, %%KEY, %%KEY +%endif ; rounds 1 to 9/11/13 + +;;; === last AES round +%if (%%ROUND > %%NROUNDS) + ;; the last round - mix enclast with text xor's + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vaesdeclast, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%KEY, %%KEY, %%KEY, %%KEY + +;;; === XOR with data +%ifnidn %%D0_3, no_data +%ifnidn %%D4_7, no_data +%ifnidn %%D8_11, no_data +%ifnidn %%D12_15, no_data + ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16 %%NUMBL, vpxorq, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%L0B0_3, %%L0B4_7, %%L0B8_11, %%L0B12_15, \ + %%D0_3, %%D4_7, %%D8_11, %%D12_15 +%endif ; !no_data +%endif ; !no_data +%endif ; !no_data +%endif ; !no_data + +%endif ; The last round + +%endmacro + +%endif ;; _AES_COMMON_ASM -- cgit v1.2.3