diff options
Diffstat (limited to '')
-rw-r--r-- | src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c new file mode 100644 index 000000000..4b34e7b53 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c @@ -0,0 +1,217 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <stddef.h> +#include <sha1_mb.h> +#include <assert.h> +#include "endian_helper.h" + +extern void sha1_aarch64_x1(const uint8_t * data, int num_blocks, uint32_t digest[]); +static inline void sha1_job_x1(SHA1_JOB * job, int blocks) +{ + sha1_aarch64_x1(job->buffer, blocks, job->result_digest); +} + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +#define SHA1_MB_ASIMD_MAX_LANES 4 +void sha1_mb_asimd_x4(SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, int); + +#define LANE_IS_NOT_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FINISHED(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL) +#define LANE_IS_FREE(state,i) \ + (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL) +#define LANE_IS_INVALID(state,i) \ + (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL) + +void sha1_mb_mgr_init_asimd(SHA1_MB_JOB_MGR * state) +{ + unsigned int i; + + state->unused_lanes = 0xf; + state->num_lanes_inuse = 0; + for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) { + state->unused_lanes <<= 4; + state->unused_lanes |= SHA1_MB_ASIMD_MAX_LANES - 1 - i; + state->lens[i] = i; + state->ldata[i].job_in_lane = 0; + } + + // lanes > SHA1_MB_ASIMD_MAX_LANES is invalid lane + for (; i < SHA1_MAX_LANES; i++) { + state->lens[i] = 0xf; + state->ldata[i].job_in_lane = 0; + } +} + +static int sha1_mb_mgr_do_jobs(SHA1_MB_JOB_MGR * state) +{ + int lane_idx, len, i, lanes, blocks; + int lane_idx_array[SHA1_MAX_LANES]; + + if (state->num_lanes_inuse == 0) { + return -1; + } + lanes = 0, len = 0; + for (i = 0; i < SHA1_MAX_LANES && lanes < state->num_lanes_inuse; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + if (lanes) + len = min(len, state->lens[i]); + else + len = state->lens[i]; + lane_idx_array[lanes] = i; + lanes++; + } + } + + if (lanes == 0) + return -1; + lane_idx = len & 0xf; + len = len & (~0xf); + blocks = len >> 4; + + /* for less-than-3-lane job, ASIMD really does not have much advantage + * compared to scalar due to wasted >= 50% capacity + * therefore we only run ASIMD for 3/4 lanes of data + */ + if (lanes == SHA1_MB_ASIMD_MAX_LANES) { + sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane, + state->ldata[lane_idx_array[1]].job_in_lane, + state->ldata[lane_idx_array[2]].job_in_lane, + state->ldata[lane_idx_array[3]].job_in_lane, blocks); + } else if (lanes == 3) { + /* in case of 3 lanes, apparently ASIMD will still operate as if + * there were four lanes of data in processing (waste 25% capacity) + * theoretically we can let ASIMD implementation know the number of lanes + * so that it could "at least" save some memory loading time + * but in practice, we can just pass lane 0 as dummy for similar + * cache performance + */ + SHA1_JOB dummy; + dummy.buffer = state->ldata[lane_idx_array[0]].job_in_lane->buffer; + dummy.len = state->ldata[lane_idx_array[0]].job_in_lane->len; + sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane, + &dummy, + state->ldata[lane_idx_array[1]].job_in_lane, + state->ldata[lane_idx_array[2]].job_in_lane, blocks); + } else { + sha1_job_x1(state->ldata[lane_idx_array[0]].job_in_lane, blocks); + if (lanes >= 2) { + sha1_job_x1(state->ldata[lane_idx_array[1]].job_in_lane, blocks); + } + } + + // only return the min length job + for (i = 0; i < SHA1_MAX_LANES; i++) { + if (LANE_IS_NOT_FINISHED(state, i)) { + state->lens[i] -= len; + state->ldata[i].job_in_lane->len -= len; + state->ldata[i].job_in_lane->buffer += len << 2; + } + } + return lane_idx; + +} + +static SHA1_JOB *sha1_mb_mgr_free_lane(SHA1_MB_JOB_MGR * state) +{ + int i; + SHA1_JOB *ret = NULL; + + for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) { + if (LANE_IS_FINISHED(state, i)) { + state->unused_lanes <<= 4; + state->unused_lanes |= i; + state->num_lanes_inuse--; + ret = state->ldata[i].job_in_lane; + ret->status = STS_COMPLETED; + state->ldata[i].job_in_lane = NULL; + break; + } + } + return ret; +} + +static void sha1_mb_mgr_insert_job(SHA1_MB_JOB_MGR * state, SHA1_JOB * job) +{ + int lane_idx; + // add job into lanes + lane_idx = state->unused_lanes & 0xf; + // fatal error + assert(lane_idx < SHA1_MB_ASIMD_MAX_LANES); + state->lens[lane_idx] = (job->len << 4) | lane_idx; + state->ldata[lane_idx].job_in_lane = job; + state->unused_lanes >>= 4; + state->num_lanes_inuse++; +} + +SHA1_JOB *sha1_mb_mgr_submit_asimd(SHA1_MB_JOB_MGR * state, SHA1_JOB * job) +{ +#ifndef NDEBUG + int lane_idx; +#endif + SHA1_JOB *ret; + + // add job into lanes + sha1_mb_mgr_insert_job(state, job); + + ret = sha1_mb_mgr_free_lane(state); + if (ret != NULL) { + return ret; + } + // submit will wait all lane has data + if (state->num_lanes_inuse < SHA1_MB_ASIMD_MAX_LANES) + return NULL; +#ifndef NDEBUG + lane_idx = sha1_mb_mgr_do_jobs(state); + assert(lane_idx != -1); +#else + sha1_mb_mgr_do_jobs(state); +#endif + + // ~ i = lane_idx; + ret = sha1_mb_mgr_free_lane(state); + return ret; +} + +SHA1_JOB *sha1_mb_mgr_flush_asimd(SHA1_MB_JOB_MGR * state) +{ + SHA1_JOB *ret; + ret = sha1_mb_mgr_free_lane(state); + if (ret) { + return ret; + } + + sha1_mb_mgr_do_jobs(state); + return sha1_mb_mgr_free_lane(state); + +} |