summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm427
1 files changed, 427 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm b/src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm
new file mode 100644
index 000000000..2fbc3b2af
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm
@@ -0,0 +1,427 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;
+; the following defines control the operation of the macros below and
+; need to be defines in the including file
+; KEY_ROUNDS - number of key rounds needed based on key length: 128bit - 11, 192bit - 13 or 256bit - 15
+; EARLY_BLOCKS - number of data block to load before starting computations
+; PARALLEL_BLOCKS - number of blocks of data to process in parallel also the number of xmm regs to reserve for data
+; IV_CNT - number of xmm regs to use for IV data valid values of 0 or 1
+; TMP_CNT - number of tmp xmm register to reserve
+; XMM_USAGE - number of xmm registers to use. must be at least the same as PARALLEL_BLOCKS + 2
+;
+
+%include "reg_sizes.asm"
+
+;
+; the following instructions set specific macros must be defined in the user file
+; to make use of the AES macros below
+; MOVDQ - move from memory to xmm reg
+; PXOR - XOR of two xmm registers pxor
+; AES_DEC - AES block decode for early key rounds
+; AES_DEC_LAST - AES block decode for last key round
+; or
+; AES_ENC - AES block encode for early key rounds
+; AES_ENC_LAST - AES block encode for last key round
+
+; Three usages of xmm regs: key round cache, blocks data and one temp
+; CKEY_CNT are (number of xmm regs) - PARALLEL_BLOCKS - IV holder - 2 TMP mmx reg
+%assign FIRST_XDATA (0)
+%assign IV_IDX (FIRST_XDATA + PARALLEL_BLOCKS)
+%ifndef IV_CNT
+%define IV_CNT (1)
+%endif
+%assign TMP (IV_IDX + IV_CNT)
+%assign TMP_CNT (2)
+%assign FIRST_CKEY (TMP + TMP_CNT)
+%assign CKEY_CNT (XMM_USAGE - (PARALLEL_BLOCKS + IV_CNT + TMP_CNT))
+
+; Abstract xmm register usages that identify the expected contents of the register
+%define reg(i) xmm %+ i
+%define XDATA(i) xmm %+ i
+%define KEY_REG(i) xmm %+ i
+%define IV_REG(i) xmm %+ i
+
+%define IDX rax
+
+
+
+
+;
+;
+; AES CBC ENCODE MACROS
+;
+;
+
+;
+; CBC_DECRYPT_BLOCKS
+; Decrypts a number of blocks using AES_PARALLEL_ENC_BLOCKS macro
+; Finalized the decryption and saves results in the output
+; places last last buffers crypto text in IV for next buffer
+; updates the index and number of bytes left
+;
+%macro CBC_DECRYPT_BLOCKS 17
+%define %%TOT_ROUNDS %1
+%define %%num_blocks %2 ; can be 0..13
+%define %%EARLY_LOADS %3 ; number of data blocks to laod before processing
+%define %%MOVDQ %4
+%define %%PXOR %5
+%define %%AES_DEC %6
+%define %%AES_DEC_LAST %7
+%define %%CACHED_KEYS %8 ; number of key data cached in xmm regs
+%define %%TMP %9
+%define %%TMP_CNT %10
+%define %%FIRST_CKEY %11
+%define %%KEY_DATA %12
+%define %%FIRST_XDATA %13
+%define %%IN %14 ; input data
+%define %%OUT %15 ; output data
+%define %%IDX %16 ; index into input and output data buffers
+%define %%LEN %17
+
+ AES_PARALLEL_ENC_BLOCKS %%TOT_ROUNDS, %%num_blocks, %%EARLY_LOADS, %%MOVDQ, %%PXOR, %%AES_DEC, %%AES_DEC_LAST, %%CACHED_KEYS, %%TMP, %%TMP_CNT, %%FIRST_CKEY, %%KEY_DATA, %%FIRST_XDATA, %%IN, %%OUT, %%IDX
+
+ ;
+ ; XOR the result of each block's decrypt with the previous block's cypher text (C)
+ ;
+ %assign i 0
+ %rep (%%num_blocks)
+ %%PXOR XDATA(i), XDATA(IV_IDX) ; XOR result with previous block's C
+ %%MOVDQ [%%OUT + %%IDX + i*16], XDATA(i) ; save plain text to out
+ %%MOVDQ XDATA(IV_IDX), [%%IN + IDX + i*16] ; load IV with current block C
+ %assign i (i+1)
+ %endrep
+
+ add %%IDX, %%num_blocks*16
+ sub %%LEN, %%num_blocks*16
+%endmacro
+
+
+;
+; CBC_ENC_INIT
+; XOR first data block with the IV data
+%macro CBC_ENC_INIT 7
+%define %%P_FIRST %1
+%define %%IV_IDX %2
+%define %%MOVDQ %3
+%define %%PXOR %4
+%define %%IV %5
+%define %%IN %6 ; input data
+%define %%IDX %7 ; index into input and output data buffers
+
+ %%MOVDQ XDATA(%%P_FIRST), [%%IN + %%IDX + 0*16]
+ %%MOVDQ reg(%%IV_IDX), [%%IV]
+ %%PXOR XDATA(%%P_FIRST), reg(%%IV_IDX)
+%endmacro
+
+;
+; assumptions:
+; LEN is length of data remaining
+; IDX is offset into the data buffer
+;
+; subloops
+; if data > 16 load next block into a next XDATA reg (XDATA(p_next))
+; load first uncached key into TMP0 (if any)
+; AES block encript XDATA(P_FIRST)
+; if data > 16 XOR next2 block (XDATA(p_next)) with current (XDATA(P_FIRST))
+; save current (XDATA(P_FIRST))
+; update indexes for P_FIRST
+; end if data zero
+;
+%macro CBC_ENC_SUBLOOP 17
+%define %%TOT_ROUNDS %1
+%define %%BLOCKS %2 ; can be 1...14
+%define %%START_DATA %3
+%define %%MOVDQ %4
+%define %%PXOR %5
+%define %%AES_DEC %6
+%define %%AES_DEC_LAST %7
+%define %%TMP %8
+%define %%TMP_CNT %9
+%define %%FIRST_CKEY %10
+%define %%CKEY_CNT %11
+%define %%KEYS %12
+%define %%CACHED_KEYS %13
+%define %%IN %14 ; input data
+%define %%OUT %15 ; output data
+%define %%IDX %16 ; index into input and output data buffers
+%define %%LEN %17
+
+ %assign this_blk 0
+ %assign next_blk 1
+ %assign p_first %%START_DATA
+ %assign p_next (p_first+1)
+ ; for number of blocks to be processed in a loop
+ %assign blk 1
+ %rep %%BLOCKS
+ ; if data > 16 load next block into a next XDATA reg (XDATA(p_next))
+ cmp %%LEN, 16
+ %push skip_read
+ je %$skip_read_next
+ %%MOVDQ XDATA(p_next), [%%IN + %%IDX + next_blk*16]
+ %$skip_read_next:
+ %pop
+
+ AES_ENC_BLOCKS %%TOT_ROUNDS, p_first, %%TMP, %%TMP_CNT, %%FIRST_CKEY, %%CKEY_CNT, %%KEYS, %%MOVDQ, %%PXOR, %%AES_DEC, %%AES_DEC_LAST
+
+ ; if data > 16 XOR next2 block (XDATA(p_next)) with current (XDATA(p_first))
+ cmp %%LEN, 16
+ %push skip_next
+ je %$skip_next_blk_start
+ %%PXOR XDATA(p_next), XDATA(p_first)
+ %$skip_next_blk_start:
+ %pop
+
+ ; save current (XDATA(p_first))
+ %%MOVDQ [%%OUT + %%IDX + this_blk*16], XDATA(p_first)
+ ; update indexes for p_first
+ add %%IDX, 16
+ sub %%LEN, 16
+
+ %if (blk < %%BLOCKS) ; only insert jz if NOT last block
+ ; end if data zero
+ jz %%END_CBC_ENC_SUBLOOP
+ %endif ; (p_next < %%BLOCKS)
+
+ %assign p_first (p_next)
+ %assign blk (blk+1)
+ %if (blk == %%BLOCKS) ; the last rep loop's read of the next block needs to be into START_DATA
+ %assign p_next (%%START_DATA)
+ %elif (1 == %%BLOCKS)
+ %%MOVDQ XDATA(%%START_DATA), XDATA(p_next)
+ %else
+ %assign p_next (p_next+1)
+ %endif
+ %endrep ; %%BLOCKS
+
+ %%END_CBC_ENC_SUBLOOP:
+%endm ; CBC_ENC_SUBLOOP
+
+
+;
+;
+; AES BLOCK ENCODE MACROS
+;
+;
+
+;
+; FILL_KEY_CACHE
+; Load key data into the cache key xmm regs
+%macro FILL_KEY_CACHE 4
+%define %%CACHED_KEYS %1
+%define %%CKEY_START %2
+%define %%KEY_DATA %3
+%define %%MOVDQ %4
+
+ %assign rnd 0
+ %rep KEY_ROUNDS
+ %if (rnd < %%CACHED_KEYS) ; find the round's key data
+ %assign c (rnd + %%CKEY_START)
+ %%MOVDQ KEY_REG(c), [%%KEY_DATA + rnd*16] ;load sub key into an available register
+ %endif
+ %assign rnd (rnd+1)
+ %endrep
+%endmacro
+
+;
+; SCHEDULE_DATA_LOAD
+; pre-loades message data into xmm regs
+; updates global 'blocks_loaded' that tracks which data blocks have been loaded
+; 'blocks_loaded' is an in/out global and must be declared in the using macro or function
+%macro SCHEDULE_DATA_LOAD 5
+%define %%PARALLEL_DATA %1
+%define %%EARLY_LOADS %2
+%define %%MOVDQ %3
+%define %%IN %4
+%define %%IDX %5
+
+ %if (blocks_loaded < %%PARALLEL_DATA)
+ ; load cipher text
+ %%MOVDQ XDATA(blocks_loaded), [%%IN + %%IDX + blocks_loaded*16]
+ %assign blocks_loaded (blocks_loaded+1)
+ %endif ; (blocks_loaded < %%PARALLEL_DATA)
+%endmacro ; SCHEDULED_EARLY_DATA_LOADS
+
+;
+; INIT_SELECT_KEY
+; determine which xmm reg holds the key data needed or loades it into the temp register if not cached
+; 'current_tmp' is an in/out global and must be declared in the using macro or function
+%macro INIT_SELECT_KEY 6
+%define %%TOT_ROUNDS %1
+%define %%CACHED_KEYS %2
+%define %%KEY_DATA %3
+%define %%FIRST_TMP %4
+%define %%TMP_CNT %5
+%define %%MOVDQ %6
+
+ %assign current_tmp (%%FIRST_TMP)
+ %if (%%TOT_ROUNDS > %%CACHED_KEYS) ; load the first uncached key into temp reg
+ %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + %%CACHED_KEYS*16]
+ %endif ; (KEY_ROUNDS > CKEY_CNT)
+%endmacro ; SELECT_KEY
+
+;
+; SELECT_KEY
+; determine which xmm reg holds the key data needed or loades it into the temp register if not cached
+; 'current_tmp' is an in/out global and must be declared in the using macro or function
+%macro SELECT_KEY 8
+%define %%ROUND %1
+%define %%TOT_ROUNDS %2
+%define %%CACHED_KEYS %3
+%define %%FIRST_KEY %4
+%define %%KEY_DATA %5
+%define %%FIRST_TMP %6
+%define %%TMP_CNT %7
+%define %%MOVDQ %8
+
+ ; find the key data for this round
+ %if (%%ROUND < %%CACHED_KEYS) ; is it cached
+ %assign key (%%ROUND + %%FIRST_KEY)
+ %else
+ ; Load non-cached key %%ROUND data ping-ponging between temp regs if more than one
+ %assign key (current_tmp) ; use the previous loaded key data
+ %if (1 == %%TMP_CNT)
+ %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + %%ROUND*16] ; load the next rounds key data
+ %else
+ %assign next_round (%%ROUND+1)
+ %if (next_round < %%TOT_ROUNDS) ; if more rounds to be done
+ %if (current_tmp == %%FIRST_TMP) ; calc the next temp reg to use
+ %assign current_tmp (current_tmp + 1)
+ %else
+ %assign current_tmp (%%FIRST_TMP)
+ %endif ; (current_tmp == %%FIRST_TMP)
+ %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + next_round*16] ; load the next rounds key data
+
+ %endif ; (%%ROUND < KEY_ROUNDS)
+ %endif ; (1 < %%TMP_CNT)
+ %endif ; (%%ROUND < %%CACHED_KEYS)
+%endmacro ; SELECT_KEY
+
+
+;
+; AES_PARALLEL_ENC_BLOCKS
+; preloads some data blocks to be worked on
+; starts the aes block encoding while loading the other blocks to be done in parallel
+; aes block encodes each key round on each block
+%macro AES_PARALLEL_ENC_BLOCKS 16
+%define %%KEY_ROUNDS %1
+%define %%PARALLEL_DATA %2
+%define %%EARLY_LOADS %3
+%define %%MOVDQ %4
+%define %%PXOR %5
+%define %%AES_DEC %6
+%define %%AES_DEC_LAST %7
+%define %%CACHED_KEYS %8
+%define %%TMP %9
+%define %%TMP_CNT %10
+%define %%FIRST_CKEY %11
+%define %%KEY_DATA %12
+%define %%FIRST_XDATA %13
+%define %%IN %14 ; input data
+%define %%OUT %15 ; output data
+%define %%IDX %16 ; index into input and output data buffers
+
+ %assign blocks_loaded 0
+
+ %rep %%EARLY_LOADS
+ SCHEDULE_DATA_LOAD %%PARALLEL_DATA, %%EARLY_LOADS, %%MOVDQ, %%IN, %%IDX ; updates blocks_loaded
+ %endrep ; %%EARLY_LOADS
+
+ %assign current_tmp (TMP)
+ INIT_SELECT_KEY %%KEY_ROUNDS, %%CACHED_KEYS, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
+
+ %assign round 0
+ %assign key 0
+ %rep KEY_ROUNDS ; for all key rounds
+ SELECT_KEY round, %%KEY_ROUNDS, %%CACHED_KEYS, %%FIRST_CKEY, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
+
+ %assign i %%FIRST_XDATA
+ %rep %%PARALLEL_DATA ; for each block do the EAS block encode step
+ %if (0 == round)
+ %%PXOR XDATA(i), KEY_REG(key) ; first round's step
+ SCHEDULE_DATA_LOAD %%PARALLEL_DATA, %%EARLY_LOADS, %%MOVDQ, %%IN, %%IDX
+
+ %elif ( (%%KEY_ROUNDS-1) == round )
+ %%AES_DEC_LAST XDATA(i), KEY_REG(key) ; last round's step
+
+ %else
+ %%AES_DEC XDATA(i), KEY_REG(key) ; middle round's (1..last-1) step
+
+ %endif
+ %assign i (i+1)
+ %endrep ;%%PARALLEL_DATA
+ %assign round (round+1)
+ %endrep ;KEY_ROUNDS
+%endmacro ; AES_PARALLEL_ENC_BLOCKS
+
+
+
+;
+; AES_ENC_BLOCKS
+; load first uncached key into TMP0 (if any)
+; AES block encript XDATA(p_first)
+; before using uncached key in TMP0, load next key in TMP1
+; before using uncached key in TMP1, load next key in TMP0
+%macro AES_ENC_BLOCKS 11
+%define %%TOT_ROUNDS %1
+%define %%ENC_BLOCK %2
+%define %%TMP %3
+%define %%TMP_CNT %4
+%define %%FIRST_CKEY %5
+%define %%CACHED_KEYS %6
+%define %%KEY_DATA %7
+%define %%MOVDQ %8
+%define %%PXOR %9
+%define %%AES_ENC %10
+%define %%AES_ENC_LAST %11
+
+ %assign current_tmp (%%TMP)
+ INIT_SELECT_KEY %%TOT_ROUNDS, %%CACHED_KEYS, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
+
+ %assign round 0
+ %assign key (round + %%FIRST_CKEY)
+ %rep %%TOT_ROUNDS ; for all key rounds
+ ; find the key data for this round
+ SELECT_KEY round, %%TOT_ROUNDS, %%CACHED_KEYS, %%FIRST_CKEY, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
+
+ ; encrypt block
+ %if (0 == round)
+ %%PXOR XDATA(%%ENC_BLOCK), KEY_REG(key) ; round zero step
+ %elif ( (%%TOT_ROUNDS-1) == round )
+ %%AES_ENC_LAST XDATA(%%ENC_BLOCK), KEY_REG(key) ; last round's step
+ %else
+ %%AES_ENC XDATA(%%ENC_BLOCK), KEY_REG(key) ; rounds 1..last-1 step
+ %endif ; (0 == round)
+
+ %assign round (round+1)
+ %endrep ; KEY_ROUNDS
+%endmacro ; AES_ENC
+
+