diff options
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64')
6 files changed, 1286 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_ctx_aarch64_asimd.c b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_ctx_aarch64_asimd.c new file mode 100644 index 000000000..e9a708c17 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_ctx_aarch64_asimd.c @@ -0,0 +1,230 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <stdlib.h> +#include "md5_mb.h" +#include "memcpy_inline.h" +void md5_mb_mgr_init_asimd(MD5_MB_JOB_MGR * state); +MD5_JOB *md5_mb_mgr_submit_asimd(MD5_MB_JOB_MGR * state, MD5_JOB * job); +MD5_JOB *md5_mb_mgr_flush_asimd(MD5_MB_JOB_MGR * state); + +static inline void hash_init_digest(MD5_WORD_T * digest); +static inline uint32_t hash_pad(uint8_t padblock[MD5_BLOCK_SIZE * 2], uint64_t total_len); +static MD5_HASH_CTX *md5_ctx_mgr_resubmit(MD5_HASH_CTX_MGR * mgr, MD5_HASH_CTX * ctx); + +void md5_ctx_mgr_init_asimd(MD5_HASH_CTX_MGR * mgr) +{ + md5_mb_mgr_init_asimd(&mgr->mgr); +} + +MD5_HASH_CTX *md5_ctx_mgr_submit_asimd(MD5_HASH_CTX_MGR * mgr, MD5_HASH_CTX * ctx, + const void *buffer, uint32_t len, HASH_CTX_FLAG flags) +{ + if (flags & (~HASH_ENTIRE)) { + ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; + return ctx; + } + + if (ctx->status & HASH_CTX_STS_PROCESSING) { + // Cannot submit to a currently processing job. + ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; + return ctx; + } + + if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + // Cannot update a finished job. + ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; + return ctx; + } + + if (flags & HASH_FIRST) { + // Init digest + hash_init_digest(ctx->job.result_digest); + + // Reset byte counter + ctx->total_length = 0; + + // Clear extra blocks + ctx->partial_block_buffer_length = 0; + } + // If we made it here, there were no errors during this call to submit + ctx->error = HASH_CTX_ERROR_NONE; + + // Store buffer ptr info from user + ctx->incoming_buffer = buffer; + ctx->incoming_buffer_length = len; + + // Store the user's request flags and mark this ctx as currently being processed. + ctx->status = (flags & HASH_LAST) ? + (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : + HASH_CTX_STS_PROCESSING; + + // Advance byte counter + ctx->total_length += len; + + // If there is anything currently buffered in the extra blocks, append to it until it contains a whole block. + // Or if the user's buffer contains less than a whole block, append as much as possible to the extra block. + if ((ctx->partial_block_buffer_length) | (len < MD5_BLOCK_SIZE)) { + // Compute how many bytes to copy from user buffer into extra block + uint32_t copy_len = MD5_BLOCK_SIZE - ctx->partial_block_buffer_length; + if (len < copy_len) + copy_len = len; + + if (copy_len) { + // Copy and update relevant pointers and counters + memcpy_varlen(&ctx->partial_block_buffer + [ctx->partial_block_buffer_length], buffer, copy_len); + + ctx->partial_block_buffer_length += copy_len; + ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len); + ctx->incoming_buffer_length = len - copy_len; + } + // The extra block should never contain more than 1 block here + assert(ctx->partial_block_buffer_length <= MD5_BLOCK_SIZE); + + // If the extra block buffer contains exactly 1 block, it can be hashed. + if (ctx->partial_block_buffer_length >= MD5_BLOCK_SIZE) { + ctx->partial_block_buffer_length = 0; + + ctx->job.buffer = ctx->partial_block_buffer; + ctx->job.len = 1; + ctx = (MD5_HASH_CTX *) md5_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job); + } + } + + return md5_ctx_mgr_resubmit(mgr, ctx); +} + +MD5_HASH_CTX *md5_ctx_mgr_flush_asimd(MD5_HASH_CTX_MGR * mgr) +{ + MD5_HASH_CTX *ctx; + + while (1) { + ctx = (MD5_HASH_CTX *) md5_mb_mgr_flush_asimd(&mgr->mgr); + + // If flush returned 0, there are no more jobs in flight. + if (!ctx) + return NULL; + + // If flush returned a job, verify that it is safe to return to the user. + // If it is not ready, resubmit the job to finish processing. + ctx = md5_ctx_mgr_resubmit(mgr, ctx); + + // If md5_ctx_mgr_resubmit returned a job, it is ready to be returned. + if (ctx) + return ctx; + + // Otherwise, all jobs currently being managed by the HASH_CTX_MGR still need processing. Loop. + } +} + +static MD5_HASH_CTX *md5_ctx_mgr_resubmit(MD5_HASH_CTX_MGR * mgr, MD5_HASH_CTX * ctx) +{ + while (ctx) { + + if (ctx->status & HASH_CTX_STS_COMPLETE) { + ctx->status = HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit + return ctx; + } + // If the extra blocks are empty, begin hashing what remains in the user's buffer. + if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) { + const void *buffer = ctx->incoming_buffer; + uint32_t len = ctx->incoming_buffer_length; + + // Only entire blocks can be hashed. Copy remainder to extra blocks buffer. + uint32_t copy_len = len & (MD5_BLOCK_SIZE - 1); + + if (copy_len) { + len -= copy_len; + //memcpy(ctx->partial_block_buffer, ((const char*)buffer + len), copy_len); + memcpy_varlen(ctx->partial_block_buffer, + ((const char *)buffer + len), copy_len); + ctx->partial_block_buffer_length = copy_len; + } + + ctx->incoming_buffer_length = 0; + + // len should be a multiple of the block size now + assert((len % MD5_BLOCK_SIZE) == 0); + + // Set len to the number of blocks to be hashed in the user's buffer + len >>= MD5_LOG2_BLOCK_SIZE; + + if (len) { + ctx->job.buffer = (uint8_t *) buffer; + ctx->job.len = len; + ctx = (MD5_HASH_CTX *) md5_mb_mgr_submit_asimd(&mgr->mgr, + &ctx->job); + continue; + } + } + // If the extra blocks are not empty, then we are either on the last block(s) + // or we need more user input before continuing. + if (ctx->status & HASH_CTX_STS_LAST) { + + uint8_t *buf = ctx->partial_block_buffer; + uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length); + + ctx->status = + (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE); + + ctx->job.buffer = buf; + ctx->job.len = (uint32_t) n_extra_blocks; + ctx = (MD5_HASH_CTX *) md5_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job); + continue; + } + + if (ctx) + ctx->status = HASH_CTX_STS_IDLE; + return ctx; + } + + return NULL; +} + +static inline void hash_init_digest(MD5_WORD_T * digest) +{ + static const MD5_WORD_T hash_initial_digest[MD5_DIGEST_NWORDS] = + { MD5_INITIAL_DIGEST }; + memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest)); +} + +static inline uint32_t hash_pad(uint8_t padblock[MD5_BLOCK_SIZE * 2], uint64_t total_len) +{ + uint32_t i = (uint32_t) (total_len & (MD5_BLOCK_SIZE - 1)); + + memclr_fixedlen(&padblock[i], MD5_BLOCK_SIZE); + padblock[i] = 0x80; + + i += ((MD5_BLOCK_SIZE - 1) & (0 - (total_len + MD5_PADLENGTHFIELD_SIZE + 1))) + 1 + + MD5_PADLENGTHFIELD_SIZE; + + *((uint64_t *) & padblock[i - 8]) = ((uint64_t) total_len << 3); + + return i >> MD5_LOG2_BLOCK_SIZE; // Number of extra blocks to hash +} diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_aarch64_dispatcher.c new file mode 100644 index 000000000..14ef3a6e6 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_aarch64_dispatcher.c @@ -0,0 +1,59 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <aarch64_multibinary.h> + +DEFINE_INTERFACE_DISPATCHER(md5_ctx_mgr_submit) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(md5_ctx_mgr_submit_asimd); + + return PROVIDER_BASIC(md5_ctx_mgr_submit); + +} + +DEFINE_INTERFACE_DISPATCHER(md5_ctx_mgr_init) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(md5_ctx_mgr_init_asimd); + + return PROVIDER_BASIC(md5_ctx_mgr_init); + +} + +DEFINE_INTERFACE_DISPATCHER(md5_ctx_mgr_flush) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(md5_ctx_mgr_flush_asimd); + + return PROVIDER_BASIC(md5_ctx_mgr_flush); + +} diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x1.S b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x1.S new file mode 100644 index 000000000..27d112494 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x1.S @@ -0,0 +1,248 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + q_\name .req q\reg + v_\name .req v\reg + s_\name .req s\reg +.endm + + +.macro round_0_15 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req + eor tmp0,\d_c,\d_d + mov k,\kl + and tmp0,tmp0,\d_b + movk k,\kh,lsl 16 + eor tmp0,tmp0,\d_d + add tmp1,k,\w + add tmp0,tmp1,tmp0 + add tmp0,\d_a,tmp0 + ror tmp0,tmp0,32 - \r + add \d_a,\d_b,tmp0 +.endm + +.macro round_16_31 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req + eor tmp0,\d_b,\d_c + mov k,\kl + and tmp0,tmp0,\d_d + movk k,\kh,lsl 16 + eor tmp0,tmp0,\d_c + add tmp1,k,\w + add tmp0,tmp1,tmp0 + add tmp0,\d_a,tmp0 + ror tmp0,tmp0,32 - \r + add \d_a,\d_b,tmp0 +.endm + +.macro round_32_47 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req + eor tmp0,\d_b,\d_c + mov k,\kl + eor tmp0,tmp0,\d_d + movk k,\kh,lsl 16 + add tmp1,k,\w + add tmp0,tmp1,tmp0 + add tmp0,\d_a,tmp0 + ror tmp0,tmp0,32 - \r + add \d_a,\d_b,tmp0 +.endm + +.macro round_48_63 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req + orn tmp0,\d_b,\d_d + mov k,\kl + eor tmp0,tmp0,\d_c + movk k,\kh,lsl 16 + add tmp1,k,\w + add tmp0,tmp1,tmp0 + add tmp0,\d_a,tmp0 + ror tmp0,tmp0,32 - \r + add \d_a,\d_b,tmp0 +.endm +/* + variables +*/ + job0 .req x0 + digest_addr .req x0 + len .req w1 + end .req x1 + + buf_adr .req x2 + d_a .req w3 + d_b .req w4 + d_c .req w5 + d_d .req w6 + k .req w7 + m0 .req w8 + m1 .req w9 + m2 .req w10 + m3 .req w11 + m4 .req w12 + m5 .req w13 + m6 .req w14 + m7 .req w15 + m8 .req w19 + m9 .req w20 + m10 .req w21 + m11 .req w22 + m12 .req w23 + m13 .req w24 + m14 .req w25 + m15 .req w26 + + tmp0 .req w27 + tmp1 .req w28 + + d_a1 .req w8 + d_b1 .req w9 + d_c1 .req w15 + d_d1 .req w19 + +/* + void md5_mb_asimd_x1(MD5_JOB * job0,int len) +*/ + .global md5_mb_asimd_x1 + .type md5_mb_asimd_x1, %function +md5_mb_asimd_x1: + cmp len,0 + stp x29, x30, [sp,-96]! + ldr buf_adr,[job0],64 + stp x19, x20, [sp, 16] + add end,buf_adr,end,lsl 6 + stp x21, x22, [sp, 32] + ldp d_a,d_b,[digest_addr] + stp x23, x24, [sp, 48] + ldp d_c,d_d,[digest_addr,8] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] + ble .exit + +.loop_start: + ldp m0,m1,[buf_adr],8 + ldp m2,m3,[buf_adr],8 + round_0_15 d_a,d_b,d_c,d_d,0xd76a,0xa478,m0,7 + + ldp m4,m5,[buf_adr],8 + round_0_15 d_d,d_a,d_b,d_c,0xe8c7,0xb756,m1,12 + ldp m6,m7,[buf_adr],8 + round_0_15 d_c,d_d,d_a,d_b,0x2420,0x70db,m2,17 + ldp m8,m9,[buf_adr],8 + round_0_15 d_b,d_c,d_d,d_a,0xc1bd,0xceee,m3,22 + ldp m10,m11,[buf_adr],8 + round_0_15 d_a,d_b,d_c,d_d,0xf57c,0xfaf,m4,7 + ldp m12,m13,[buf_adr],8 + round_0_15 d_d,d_a,d_b,d_c,0x4787,0xc62a,m5,12 + ldp m14,m15,[buf_adr],8 + round_0_15 d_c,d_d,d_a,d_b,0xa830,0x4613,m6,17 + round_0_15 d_b,d_c,d_d,d_a,0xfd46,0x9501,m7,22 + round_0_15 d_a,d_b,d_c,d_d,0x6980,0x98d8,m8,7 + round_0_15 d_d,d_a,d_b,d_c,0x8b44,0xf7af,m9,12 + round_0_15 d_c,d_d,d_a,d_b,0xffff,0x5bb1,m10,17 + round_0_15 d_b,d_c,d_d,d_a,0x895c,0xd7be,m11,22 + round_0_15 d_a,d_b,d_c,d_d,0x6b90,0x1122,m12,7 + round_0_15 d_d,d_a,d_b,d_c,0xfd98,0x7193,m13,12 + round_0_15 d_c,d_d,d_a,d_b,0xa679,0x438e,m14,17 + round_0_15 d_b,d_c,d_d,d_a,0x49b4,0x821,m15,22 + + round_16_31 d_a,d_b,d_c,d_d,0xf61e,0x2562,m1,5 + round_16_31 d_d,d_a,d_b,d_c,0xc040,0xb340,m6,9 + round_16_31 d_c,d_d,d_a,d_b,0x265e,0x5a51,m11,14 + round_16_31 d_b,d_c,d_d,d_a,0xe9b6,0xc7aa,m0,20 + round_16_31 d_a,d_b,d_c,d_d,0xd62f,0x105d,m5,5 + round_16_31 d_d,d_a,d_b,d_c,0x244,0x1453,m10,9 + round_16_31 d_c,d_d,d_a,d_b,0xd8a1,0xe681,m15,14 + round_16_31 d_b,d_c,d_d,d_a,0xe7d3,0xfbc8,m4,20 + round_16_31 d_a,d_b,d_c,d_d,0x21e1,0xcde6,m9,5 + round_16_31 d_d,d_a,d_b,d_c,0xc337,0x7d6,m14,9 + round_16_31 d_c,d_d,d_a,d_b,0xf4d5,0xd87,m3,14 + round_16_31 d_b,d_c,d_d,d_a,0x455a,0x14ed,m8,20 + round_16_31 d_a,d_b,d_c,d_d,0xa9e3,0xe905,m13,5 + round_16_31 d_d,d_a,d_b,d_c,0xfcef,0xa3f8,m2,9 + round_16_31 d_c,d_d,d_a,d_b,0x676f,0x2d9,m7,14 + round_16_31 d_b,d_c,d_d,d_a,0x8d2a,0x4c8a,m12,20 + + round_32_47 d_a,d_b,d_c,d_d,0xfffa,0x3942,m5,4 + round_32_47 d_d,d_a,d_b,d_c,0x8771,0xf681,m8,11 + round_32_47 d_c,d_d,d_a,d_b,0x6d9d,0x6122,m11,16 + round_32_47 d_b,d_c,d_d,d_a,0xfde5,0x380c,m14,23 + round_32_47 d_a,d_b,d_c,d_d,0xa4be,0xea44,m1,4 + round_32_47 d_d,d_a,d_b,d_c,0x4bde,0xcfa9,m4,11 + round_32_47 d_c,d_d,d_a,d_b,0xf6bb,0x4b60,m7,16 + round_32_47 d_b,d_c,d_d,d_a,0xbebf,0xbc70,m10,23 + round_32_47 d_a,d_b,d_c,d_d,0x289b,0x7ec6,m13,4 + round_32_47 d_d,d_a,d_b,d_c,0xeaa1,0x27fa,m0,11 + round_32_47 d_c,d_d,d_a,d_b,0xd4ef,0x3085,m3,16 + round_32_47 d_b,d_c,d_d,d_a,0x488,0x1d05,m6,23 + round_32_47 d_a,d_b,d_c,d_d,0xd9d4,0xd039,m9,4 + round_32_47 d_d,d_a,d_b,d_c,0xe6db,0x99e5,m12,11 + round_32_47 d_c,d_d,d_a,d_b,0x1fa2,0x7cf8,m15,16 + round_32_47 d_b,d_c,d_d,d_a,0xc4ac,0x5665,m2,23 + + round_48_63 d_a,d_b,d_c,d_d,0xf429,0x2244,m0,6 + round_48_63 d_d,d_a,d_b,d_c,0x432a,0xff97,m7,10 + round_48_63 d_c,d_d,d_a,d_b,0xab94,0x23a7,m14,15 + round_48_63 d_b,d_c,d_d,d_a,0xfc93,0xa039,m5,21 + round_48_63 d_a,d_b,d_c,d_d,0x655b,0x59c3,m12,6 + round_48_63 d_d,d_a,d_b,d_c,0x8f0c,0xcc92,m3,10 + round_48_63 d_c,d_d,d_a,d_b,0xffef,0xf47d,m10,15 + round_48_63 d_b,d_c,d_d,d_a,0x8584,0x5dd1,m1,21 + round_48_63 d_a,d_b,d_c,d_d,0x6fa8,0x7e4f,m8,6 + round_48_63 d_d,d_a,d_b,d_c,0xfe2c,0xe6e0,m15,10 + round_48_63 d_c,d_d,d_a,d_b,0xa301,0x4314,m6,15 + round_48_63 d_b,d_c,d_d,d_a,0x4e08,0x11a1,m13,21 + round_48_63 d_a,d_b,d_c,d_d,0xf753,0x7e82,m4,6 + ldp d_a1,d_b1,[digest_addr] + round_48_63 d_d,d_a,d_b,d_c,0xbd3a,0xf235,m11,10 + ldp d_c1,d_d1,[digest_addr,8] + round_48_63 d_c,d_d,d_a,d_b,0x2ad7,0xd2bb,m2,15 + round_48_63 d_b,d_c,d_d,d_a,0xeb86,0xd391,m9,21 + + cmp buf_adr,end + add d_a,d_a1 ,d_a + str d_a,[digest_addr] + add d_b,d_b1 ,d_b + str d_b,[digest_addr,4] + add d_c,d_c1 ,d_c + str d_c,[digest_addr,8] + add d_d,d_d1 ,d_d + str d_d,[digest_addr,12] + bne .loop_start + +.exit: + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + ldp x29, x30, [sp], 96 + ret + .size md5_mb_asimd_x1, .-md5_mb_asimd_x1 diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S new file mode 100644 index 000000000..53979131d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_asimd_x4.S @@ -0,0 +1,526 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + q_\name .req q\reg + v_\name .req v\reg + s_\name .req s\reg +.endm + +.macro add_key_rol a:req,b:req,k:req,w:req,r:req + add v_tmp0.4s,v_\k\().4s,v_\w\().4s + add v_tmp1.4s,v_tmp1.4s,v_\a\().4s + add v_tmp1.4s,v_tmp1.4s,v_tmp0.4s + shl v_tmp0.4s,v_tmp1.4s,\r + ushr v_tmp1.4s,v_tmp1.4s,32-\r + orr v_tmp0.16b,v_tmp1.16b,v_tmp0.16b + + add v_\a\().4s,v_\b\().4s,v_tmp0.4s +.endm +.macro round_0_15 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req + mov v_tmp1.16b, v_\b\().16b + bsl v_tmp1.16b, v_\c\().16b, v_\d\().16b + ldr q_\k1,[key_adr],16 + add_key_rol \a,\b,\k,\w,\r +.endm + +.macro round_16_31 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req + mov v_tmp1.16b, v_\d\().16b + bsl v_tmp1.16b, v_\b\().16b, v_\c\().16b + ldr q_\k1,[key_adr],16 + add_key_rol \a,\b,\k,\w,\r +.endm + +.macro round_32_47 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req + eor v_tmp1.16b,v_\b\().16b,v_\c\().16b + eor v_tmp1.16b,v_tmp1.16b,v_\d\().16b + ldr q_\k1,[key_adr],16 + add_key_rol \a,\b,\k,\w,\r +.endm + +.macro round_48_63 a:req,b:req,c:req,d:req,k:req,k1,w:req,r:req + orn v_tmp1.16b,v_\b\().16b,v_\d\().16b + eor v_tmp1.16b,v_tmp1.16b,v_\c\().16b + .ifnb \k1 + ldr q_\k1,[key_adr],16 + .endif + add_key_rol \a,\b,\k,\w,\r +.endm +/* + variables +*/ + declare_var_vector_reg tmp0, 0 + declare_var_vector_reg tmp1, 1 + declare_var_vector_reg k, 2 + declare_var_vector_reg k1, 3 + declare_var_vector_reg a, 4 + declare_var_vector_reg b, 5 + declare_var_vector_reg c, 6 + declare_var_vector_reg d, 7 + declare_var_vector_reg a1, 8 + declare_var_vector_reg b1, 9 + declare_var_vector_reg c1, 10 + declare_var_vector_reg d1, 11 + + declare_var_vector_reg w0, 16 + declare_var_vector_reg w1, 17 + declare_var_vector_reg w2, 18 + declare_var_vector_reg w3, 19 + declare_var_vector_reg w4, 20 + declare_var_vector_reg w5, 21 + declare_var_vector_reg w6, 22 + declare_var_vector_reg w7, 23 + declare_var_vector_reg w8, 24 + declare_var_vector_reg w9, 25 + declare_var_vector_reg w10, 26 + declare_var_vector_reg w11, 27 + declare_var_vector_reg w12, 28 + declare_var_vector_reg w13, 29 + declare_var_vector_reg w14, 30 + declare_var_vector_reg w15, 31 + + len .req w4 + len_x .req x4 + lane0 .req x5 + lane1 .req x6 + lane2 .req x7 + lane3 .req x9 + end .req x4 + job0 .req x0 + job1 .req x1 + job2 .req x2 + job3 .req x3 + key_adr .req x10 + +/* + void md5_mb_asimd_x4(MD5_JOB * job0, MD5_JOB * job1, + MD5_JOB * job2, MD5_JOB * job3, int len) +*/ + .global md5_mb_asimd_x4 + .type md5_mb_asimd_x4, %function +md5_mb_asimd_x4: + stp x29,x30,[sp,-48]! + ldr lane0,[job0],64 + stp d8,d9,[sp,16] + ldr lane1,[job1],64 + stp d10,d11,[sp,32] + ldr lane2,[job2],64 + cmp len,0 + ldr lane3,[job3],64 + ble .exit + + //load digests + ld4 {v_a.s-v_d.s}[0],[job0] + add end,lane0,len_x,lsl 6 + ld4 {v_a.s-v_d.s}[1],[job1] + ld4 {v_a.s-v_d.s}[2],[job2] + ld4 {v_a.s-v_d.s}[3],[job3] +.loop_start: + ld1 {v_w0.s}[0],[lane0],4 + mov v_a1.16b,v_a.16b + ld1 {v_w0.s}[1],[lane1],4 + mov v_b1.16b,v_b.16b + ld1 {v_w0.s}[2],[lane2],4 + mov v_c1.16b,v_c.16b + ld1 {v_w0.s}[3],[lane3],4 + mov v_d1.16b,v_d.16b + + ld3 {v_w1.s-v_w3.s}[0],[lane0],12 + adrp key_adr,.key_consts + ld3 {v_w1.s-v_w3.s}[1],[lane1],12 + add key_adr,key_adr,#:lo12:.key_consts + ld3 {v_w1.s-v_w3.s}[2],[lane2],12 + ldr q_k,[key_adr],16 + ld3 {v_w1.s-v_w3.s}[3],[lane3],12 + + + ld4 {v_w4.s-v_w7.s}[0], [lane0],16 + + round_0_15 a,b,c,d,k,k1,w0,7 + + ld4 {v_w4.s-v_w7.s}[1], [lane1],16 + round_0_15 d,a,b,c,k1,k,w1,12 + ld4 {v_w4.s-v_w7.s}[2], [lane2],16 + round_0_15 c,d,a,b,k,k1,w2,17 + ld4 {v_w4.s-v_w7.s}[3], [lane3],16 + round_0_15 b,c,d,a,k1,k,w3,22 + ld4 {v_w8.s-v_w11.s}[0],[lane0],16 + round_0_15 a,b,c,d,k,k1,w4,7 + ld4 {v_w8.s-v_w11.s}[1],[lane1],16 + round_0_15 d,a,b,c,k1,k,w5,12 + ld4 {v_w8.s-v_w11.s}[2],[lane2],16 + round_0_15 c,d,a,b,k,k1,w6,17 + ld4 {v_w8.s-v_w11.s}[3],[lane3],16 + round_0_15 b,c,d,a,k1,k,w7,22 + ld4 {v_w12.s-v_w15.s}[0],[lane0],16 + round_0_15 a,b,c,d,k,k1,w8,7 + ld4 {v_w12.s-v_w15.s}[1],[lane1],16 + round_0_15 d,a,b,c,k1,k,w9,12 + ld4 {v_w12.s-v_w15.s}[2],[lane2],16 + round_0_15 c,d,a,b,k,k1,w10,17 + ld4 {v_w12.s-v_w15.s}[3],[lane3],16 + round_0_15 b,c,d,a,k1,k,w11,22 + round_0_15 a,b,c,d,k,k1,w12,7 + round_0_15 d,a,b,c,k1,k,w13,12 + round_0_15 c,d,a,b,k,k1,w14,17 + round_0_15 b,c,d,a,k1,k,w15,22 + + round_16_31 a,b,c,d,k,k1,w1,5 + round_16_31 d,a,b,c,k1,k,w6,9 + round_16_31 c,d,a,b,k,k1,w11,14 + round_16_31 b,c,d,a,k1,k,w0,20 + round_16_31 a,b,c,d,k,k1,w5,5 + round_16_31 d,a,b,c,k1,k,w10,9 + round_16_31 c,d,a,b,k,k1,w15,14 + round_16_31 b,c,d,a,k1,k,w4,20 + round_16_31 a,b,c,d,k,k1,w9,5 + round_16_31 d,a,b,c,k1,k,w14,9 + round_16_31 c,d,a,b,k,k1,w3,14 + round_16_31 b,c,d,a,k1,k,w8,20 + round_16_31 a,b,c,d,k,k1,w13,5 + round_16_31 d,a,b,c,k1,k,w2,9 + round_16_31 c,d,a,b,k,k1,w7,14 + round_16_31 b,c,d,a,k1,k,w12,20 + + round_32_47 a,b,c,d,k,k1,w5,4 + round_32_47 d,a,b,c,k1,k,w8,11 + round_32_47 c,d,a,b,k,k1,w11,16 + round_32_47 b,c,d,a,k1,k,w14,23 + round_32_47 a,b,c,d,k,k1,w1,4 + round_32_47 d,a,b,c,k1,k,w4,11 + round_32_47 c,d,a,b,k,k1,w7,16 + round_32_47 b,c,d,a,k1,k,w10,23 + round_32_47 a,b,c,d,k,k1,w13,4 + round_32_47 d,a,b,c,k1,k,w0,11 + round_32_47 c,d,a,b,k,k1,w3,16 + round_32_47 b,c,d,a,k1,k,w6,23 + round_32_47 a,b,c,d,k,k1,w9,4 + round_32_47 d,a,b,c,k1,k,w12,11 + round_32_47 c,d,a,b,k,k1,w15,16 + round_32_47 b,c,d,a,k1,k,w2,23 + + round_48_63 a,b,c,d,k,k1,w0,6 + round_48_63 d,a,b,c,k1,k,w7,10 + round_48_63 c,d,a,b,k,k1,w14,15 + round_48_63 b,c,d,a,k1,k,w5,21 + round_48_63 a,b,c,d,k,k1,w12,6 + round_48_63 d,a,b,c,k1,k,w3,10 + round_48_63 c,d,a,b,k,k1,w10,15 + round_48_63 b,c,d,a,k1,k,w1,21 + round_48_63 a,b,c,d,k,k1,w8,6 + round_48_63 d,a,b,c,k1,k,w15,10 + round_48_63 c,d,a,b,k,k1,w6,15 + round_48_63 b,c,d,a,k1,k,w13,21 + round_48_63 a,b,c,d,k,k1,w4,6 + round_48_63 d,a,b,c,k1,k,w11,10 + round_48_63 c,d,a,b,k,k1,w2,15 + round_48_63 b,c,d,a,k1, ,w9,21 + + + + + cmp lane0,end + add v_a.4s,v_a1.4s,v_a.4s + add v_b.4s,v_b1.4s,v_b.4s + add v_c.4s,v_c1.4s,v_c.4s + add v_d.4s,v_d1.4s,v_d.4s + bne .loop_start + + st4 {v_a.s-v_d.s}[0],[job0] + st4 {v_a.s-v_d.s}[1],[job1] + st4 {v_a.s-v_d.s}[2],[job2] + st4 {v_a.s-v_d.s}[3],[job3] +.exit: + ldp d8,d9,[sp,16] + ldp d10,d11,[sp,32] + ldp x29,x30,[sp],48 + ret +.key_consts: + .word 0xd76aa478 + .word 0xd76aa478 + .word 0xd76aa478 + .word 0xd76aa478 + .word 0xe8c7b756 + .word 0xe8c7b756 + .word 0xe8c7b756 + .word 0xe8c7b756 + .word 0x242070db + .word 0x242070db + .word 0x242070db + .word 0x242070db + .word 0xc1bdceee + .word 0xc1bdceee + .word 0xc1bdceee + .word 0xc1bdceee + .word 0xf57c0faf + .word 0xf57c0faf + .word 0xf57c0faf + .word 0xf57c0faf + .word 0x4787c62a + .word 0x4787c62a + .word 0x4787c62a + .word 0x4787c62a + .word 0xa8304613 + .word 0xa8304613 + .word 0xa8304613 + .word 0xa8304613 + .word 0xfd469501 + .word 0xfd469501 + .word 0xfd469501 + .word 0xfd469501 + .word 0x698098d8 + .word 0x698098d8 + .word 0x698098d8 + .word 0x698098d8 + .word 0x8b44f7af + .word 0x8b44f7af + .word 0x8b44f7af + .word 0x8b44f7af + .word 0xffff5bb1 + .word 0xffff5bb1 + .word 0xffff5bb1 + .word 0xffff5bb1 + .word 0x895cd7be + .word 0x895cd7be + .word 0x895cd7be + .word 0x895cd7be + .word 0x6b901122 + .word 0x6b901122 + .word 0x6b901122 + .word 0x6b901122 + .word 0xfd987193 + .word 0xfd987193 + .word 0xfd987193 + .word 0xfd987193 + .word 0xa679438e + .word 0xa679438e + .word 0xa679438e + .word 0xa679438e + .word 0x49b40821 + .word 0x49b40821 + .word 0x49b40821 + .word 0x49b40821 + .word 0xf61e2562 + .word 0xf61e2562 + .word 0xf61e2562 + .word 0xf61e2562 + .word 0xc040b340 + .word 0xc040b340 + .word 0xc040b340 + .word 0xc040b340 + .word 0x265e5a51 + .word 0x265e5a51 + .word 0x265e5a51 + .word 0x265e5a51 + .word 0xe9b6c7aa + .word 0xe9b6c7aa + .word 0xe9b6c7aa + .word 0xe9b6c7aa + .word 0xd62f105d + .word 0xd62f105d + .word 0xd62f105d + .word 0xd62f105d + .word 0x02441453 + .word 0x02441453 + .word 0x02441453 + .word 0x02441453 + .word 0xd8a1e681 + .word 0xd8a1e681 + .word 0xd8a1e681 + .word 0xd8a1e681 + .word 0xe7d3fbc8 + .word 0xe7d3fbc8 + .word 0xe7d3fbc8 + .word 0xe7d3fbc8 + .word 0x21e1cde6 + .word 0x21e1cde6 + .word 0x21e1cde6 + .word 0x21e1cde6 + .word 0xc33707d6 + .word 0xc33707d6 + .word 0xc33707d6 + .word 0xc33707d6 + .word 0xf4d50d87 + .word 0xf4d50d87 + .word 0xf4d50d87 + .word 0xf4d50d87 + .word 0x455a14ed + .word 0x455a14ed + .word 0x455a14ed + .word 0x455a14ed + .word 0xa9e3e905 + .word 0xa9e3e905 + .word 0xa9e3e905 + .word 0xa9e3e905 + .word 0xfcefa3f8 + .word 0xfcefa3f8 + .word 0xfcefa3f8 + .word 0xfcefa3f8 + .word 0x676f02d9 + .word 0x676f02d9 + .word 0x676f02d9 + .word 0x676f02d9 + .word 0x8d2a4c8a + .word 0x8d2a4c8a + .word 0x8d2a4c8a + .word 0x8d2a4c8a + .word 0xfffa3942 + .word 0xfffa3942 + .word 0xfffa3942 + .word 0xfffa3942 + .word 0x8771f681 + .word 0x8771f681 + .word 0x8771f681 + .word 0x8771f681 + .word 0x6d9d6122 + .word 0x6d9d6122 + .word 0x6d9d6122 + .word 0x6d9d6122 + .word 0xfde5380c + .word 0xfde5380c + .word 0xfde5380c + .word 0xfde5380c + .word 0xa4beea44 + .word 0xa4beea44 + .word 0xa4beea44 + .word 0xa4beea44 + .word 0x4bdecfa9 + .word 0x4bdecfa9 + .word 0x4bdecfa9 + .word 0x4bdecfa9 + .word 0xf6bb4b60 + .word 0xf6bb4b60 + .word 0xf6bb4b60 + .word 0xf6bb4b60 + .word 0xbebfbc70 + .word 0xbebfbc70 + .word 0xbebfbc70 + .word 0xbebfbc70 + .word 0x289b7ec6 + .word 0x289b7ec6 + .word 0x289b7ec6 + .word 0x289b7ec6 + .word 0xeaa127fa + .word 0xeaa127fa + .word 0xeaa127fa + .word 0xeaa127fa + .word 0xd4ef3085 + .word 0xd4ef3085 + .word 0xd4ef3085 + .word 0xd4ef3085 + .word 0x04881d05 + .word 0x04881d05 + .word 0x04881d05 + .word 0x04881d05 + .word 0xd9d4d039 + .word 0xd9d4d039 + .word 0xd9d4d039 + .word 0xd9d4d039 + .word 0xe6db99e5 + .word 0xe6db99e5 + .word 0xe6db99e5 + .word 0xe6db99e5 + .word 0x1fa27cf8 + .word 0x1fa27cf8 + .word 0x1fa27cf8 + .word 0x1fa27cf8 + .word 0xc4ac5665 + .word 0xc4ac5665 + .word 0xc4ac5665 + .word 0xc4ac5665 + .word 0xf4292244 + .word 0xf4292244 + .word 0xf4292244 + .word 0xf4292244 + .word 0x432aff97 + .word 0x432aff97 + .word 0x432aff97 + .word 0x432aff97 + .word 0xab9423a7 + .word 0xab9423a7 + .word 0xab9423a7 + .word 0xab9423a7 + .word 0xfc93a039 + .word 0xfc93a039 + .word 0xfc93a039 + .word 0xfc93a039 + .word 0x655b59c3 + .word 0x655b59c3 + .word 0x655b59c3 + .word 0x655b59c3 + .word 0x8f0ccc92 + .word 0x8f0ccc92 + .word 0x8f0ccc92 + .word 0x8f0ccc92 + .word 0xffeff47d + .word 0xffeff47d + .word 0xffeff47d + .word 0xffeff47d + .word 0x85845dd1 + .word 0x85845dd1 + .word 0x85845dd1 + .word 0x85845dd1 + .word 0x6fa87e4f + .word 0x6fa87e4f + .word 0x6fa87e4f + .word 0x6fa87e4f + .word 0xfe2ce6e0 + .word 0xfe2ce6e0 + .word 0xfe2ce6e0 + .word 0xfe2ce6e0 + .word 0xa3014314 + .word 0xa3014314 + .word 0xa3014314 + .word 0xa3014314 + .word 0x4e0811a1 + .word 0x4e0811a1 + .word 0x4e0811a1 + .word 0x4e0811a1 + .word 0xf7537e82 + .word 0xf7537e82 + .word 0xf7537e82 + .word 0xf7537e82 + .word 0xbd3af235 + .word 0xbd3af235 + .word 0xbd3af235 + .word 0xbd3af235 + .word 0x2ad7d2bb + .word 0x2ad7d2bb + .word 0x2ad7d2bb + .word 0x2ad7d2bb + .word 0xeb86d391 + .word 0xeb86d391 + .word 0xeb86d391 + .word 0xeb86d391 + .size md5_mb_asimd_x4, .-md5_mb_asimd_x4 diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_mgr_aarch64_asimd.c b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_mgr_aarch64_asimd.c new file mode 100644 index 000000000..5289cd91f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_mgr_aarch64_asimd.c @@ -0,0 +1,187 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <stddef.h> +#include <md5_mb.h> +#include <assert.h> + +#ifndef max +#define max(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#define MD5_MB_CE_MAX_LANES 4 +void md5_mb_asimd_x4(MD5_JOB *, MD5_JOB *, MD5_JOB *, MD5_JOB *, int); +void md5_mb_asimd_x1(MD5_JOB *, int); + +#define LANE_IS_NOT_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FREE(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL) +#define LANE_IS_INVALID(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL) +void md5_mb_mgr_init_asimd(MD5_MB_JOB_MGR * state) +{ + unsigned int i; + + state->unused_lanes[0] = 0xf; + state->num_lanes_inuse = 0; + for (i = 0; i < MD5_MB_CE_MAX_LANES; i++) { + state->unused_lanes[0] <<= 4; + state->unused_lanes[0] |= MD5_MB_CE_MAX_LANES - 1 - i; + state->lens[i] = i; + state->ldata[i].job_in_lane = 0; + } + + //lanes > MD5_MB_CE_MAX_LANES is invalid lane + for (; i < MD5_MAX_LANES; i++) { + state->lens[i] = 0xf; + state->ldata[i].job_in_lane = 0; + } +} + +static int md5_mb_mgr_do_jobs(MD5_MB_JOB_MGR * state) +{ + int lane_idx, len, i; + + if (state->num_lanes_inuse == 0) { + return -1; + } + if (state->num_lanes_inuse == 4) { + len = min(min(state->lens[0], state->lens[1]), + min(state->lens[2], state->lens[3])); + lane_idx = len & 0xf; + len &= ~0xf; + md5_mb_asimd_x4(state->ldata[0].job_in_lane, + state->ldata[1].job_in_lane, + state->ldata[2].job_in_lane, + state->ldata[3].job_in_lane, len >> 4); + //only return the min length job + for (i = 0; i < MD5_MAX_LANES; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + state->lens[i] -= len; + state->ldata[i].job_in_lane->len -= len; + state->ldata[i].job_in_lane->buffer += len << 2; + } + } + + return lane_idx; + } else { + for (i = 0; i < MD5_MAX_LANES; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + len = state->lens[i] & (~0xf); + md5_mb_asimd_x1(state->ldata[i].job_in_lane, len >> 4); + state->lens[i] -= len; + state->ldata[i].job_in_lane->len -= len; + state->ldata[i].job_in_lane->buffer += len << 2; + return i; + } + } + } + return -1; + +} + +static MD5_JOB *md5_mb_mgr_free_lane(MD5_MB_JOB_MGR * state) +{ + int i; + MD5_JOB *ret = NULL; + + for (i = 0; i < MD5_MB_CE_MAX_LANES; i++) { + if (LANE_IS_FINISHED(state, i)) { + + state->unused_lanes[0] <<= 4; + state->unused_lanes[0] |= i; + state->num_lanes_inuse--; + ret = state->ldata[i].job_in_lane; + ret->status = STS_COMPLETED; + state->ldata[i].job_in_lane = NULL; + break; + } + } + return ret; +} + +static void md5_mb_mgr_insert_job(MD5_MB_JOB_MGR * state, MD5_JOB * job) +{ + int lane_idx; + //add job into lanes + lane_idx = state->unused_lanes[0] & 0xf; + //fatal error + assert(lane_idx < MD5_MB_CE_MAX_LANES); + state->lens[lane_idx] = (job->len << 4) | lane_idx; + state->ldata[lane_idx].job_in_lane = job; + state->unused_lanes[0] >>= 4; + state->num_lanes_inuse++; +} + +MD5_JOB *md5_mb_mgr_submit_asimd(MD5_MB_JOB_MGR * state, MD5_JOB * job) +{ +#ifndef NDEBUG + int lane_idx; +#endif + MD5_JOB *ret; + + //add job into lanes + md5_mb_mgr_insert_job(state, job); + + ret = md5_mb_mgr_free_lane(state); + if (ret != NULL) { + return ret; + } + //submit will wait all lane has data + if (state->num_lanes_inuse < MD5_MB_CE_MAX_LANES) + return NULL; +#ifndef NDEBUG + lane_idx = md5_mb_mgr_do_jobs(state); + assert(lane_idx != -1); +#else + md5_mb_mgr_do_jobs(state); +#endif + + ret = md5_mb_mgr_free_lane(state); + return ret; +} + +MD5_JOB *md5_mb_mgr_flush_asimd(MD5_MB_JOB_MGR * state) +{ + MD5_JOB *ret; + ret = md5_mb_mgr_free_lane(state); + if (ret) { + return ret; + } + + md5_mb_mgr_do_jobs(state); + return md5_mb_mgr_free_lane(state); + +} diff --git a/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_multibinary.S b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_multibinary.S new file mode 100644 index 000000000..b66320f5c --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/md5_mb/aarch64/md5_mb_multibinary.S @@ -0,0 +1,36 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#include "aarch64_multibinary.h" + + +mbin_interface md5_ctx_mgr_submit +mbin_interface md5_ctx_mgr_init +mbin_interface md5_ctx_mgr_flush |