diff options
Diffstat (limited to 'src/spdk/intel-ipsec-mb/LibPerfApp')
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/Makefile | 87 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/README | 82 | ||||
-rwxr-xr-x | src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py | 308 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c | 2581 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/msr.c | 304 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/msr.h | 114 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak | 81 |
7 files changed, 3557 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile new file mode 100644 index 000000000..0880d5e29 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile @@ -0,0 +1,87 @@ +# Copyright (c) 2017-2018, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +EXE=ipsec_perf +INSTPATH ?= /usr/include/intel-ipsec-mb.h + +CFLAGS = -DLINUX -D_GNU_SOURCE $(INCLUDES) \ + -W -Wall -Wextra -Wmissing-declarations -Wpointer-arith \ + -Wcast-qual -Wundef -Wwrite-strings \ + -Wformat -Wformat-security \ + -Wunreachable-code -Wmissing-noreturn -Wsign-compare -Wno-endif-labels \ + -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition \ + -pthread -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv + +LDFLAGS = -fPIE -z noexecstack -z relro -z now -pthread +LDLIBS = -lIPSec_MB + +ifeq ("$(shell test -e $(INSTPATH) && echo -n yes)","yes") +# library installed +CFLAGS += +else +# library not installed +CFLAGS += -I../include -I../ +LDFLAGS += -L../ +endif + +ifeq ($(DEBUG),y) +CFLAGS += -g -DDEBUG -O0 +LDFLAGS += -g +else +CFLAGS += -O3 -fPIE -fstack-protector -D_FORTIFY_SOURCE=2 +endif + +ifeq ($(GCM_BIG_DATA),y) +CFLAGS += -DGCM_BIG_DATA +endif + +SOURCES := ipsec_perf.c msr.c +OBJECTS := $(SOURCES:%.c=%.o) + +CHECKPATCH ?= checkpatch.pl +CPPCHECK ?= cppcheck + +.PHONY: all clean style cppcheck + +all: $(EXE) + +$(EXE): $(OBJECTS) + $(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@ + +ipsec_perf.o: $(SOURCES) + +.PHONY: clean +clean: + -rm -f $(OBJECTS) + -rm -f $(EXE) + +SOURCES_STYLE := $(foreach infile,$(SOURCES),-f $(infile)) +CHECKPATCH?=checkpatch.pl +.PHONY: style +style: + $(CHECKPATCH) --no-tree --no-signoff --emacs --no-color \ +--ignore CODE_INDENT,INITIALISED_STATIC,LEADING_SPACE,SPLIT_STRING,\ +UNSPECIFIED_INT,ARRAY_SIZE,BLOCK_COMMENT_STYLE,GLOBAL_INITIALISERS,\ +COMPLEX_MACRO,SPACING,STORAGE_CLASS $(SOURCES_STYLE) diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/README b/src/spdk/intel-ipsec-mb/LibPerfApp/README new file mode 100644 index 000000000..dad423d9a --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/README @@ -0,0 +1,82 @@ +======================================================================== +README for Intel(R) Multi-Buffer Crypto for IPsec Library API +performance measurement tool + +February 2017 +======================================================================== + + +Contents +======== + +- Overview +- Files +- Compilation +- Usage +- Legal Disclaimer + + +Overview +======== +This test tool performs multiple execution of functions included in +Intel Multi-Buffer Crypto for IPsec Library. + +Files +===== + +ipsec_perf.c - Tool which produces text formatted output representing + average times of ipsec_mb functions execution. +ipsec_diff_tool.py - Another tool which interprets text data given. + +Compilation +=========== + +Required tools: +- GNU make +- gcc (GCC) 4.8.3 (or newer) + +Simply run "make" to compile the tool. +To clean the build please run "make clean". + +You can point to another directory contaning IPSec MB library by setting +LIB_LOC. for ex: + LIB_LOC=../ipsec_mb_lib make + +In order to perform static code analysis or style check you can do: + make cppcheck +or + make style + +Be aware that you will have cppcheck tool installed and checkpatch.pl +script copied into one of the directories listed in $PATH. +You can also set CPPCHECK and/or CHECKPATCH variables if you want give paths +to this tools being placed in different directories. for ex: + CPPCHECK=~/tools/cppcheck make cppcheck + CHECKPATCH=~/scripts/checkpatch.pl make style + +Usage +===== + +You can simply check list of arguments by typing: + ./ipsec_perf -h + +Usage example: + ./ipsec_perf -c --no-avx512 --no-gcm -o 24 + +Later you can pass output to ipsec_diff_tool.py for data +analysis: + ./ipsec_diff_tool.py out1.txt out2.txt 5 + +Run ipsec_diff_tool.py -h too see help page. + +Legal Disclaimer +================ + +THIS SOFTWARE IS PROVIDED BY INTEL"AS IS". NO LICENSE, EXPRESS OR +IMPLIED, BY ESTOPPEL OR OTHERWISE, TO ANY INTELLECTUAL PROPERTY RIGHTS +ARE GRANTED THROUGH USE. EXCEPT AS PROVIDED IN INTEL'S TERMS AND +CONDITIONS OF SALE, INTEL ASSUMES NO LIABILITY WHATSOEVER AND INTEL +DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY, RELATING TO SALE AND/OR +USE OF INTEL PRODUCTS INCLUDING LIABILITY OR WARRANTIES RELATING TO +FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABILITY, OR INFRINGEMENT +OF ANY PATENT, COPYRIGHT OR OTHER INTELLECTUAL PROPERTY RIGHT. diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py new file mode 100755 index 000000000..1e8219f53 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python + +""" +********************************************************************** + Copyright(c) 2017-2018, Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +********************************************************************** +""" + +import sys + +# Number of parameters (ARCH, CIPHER_MODE, DIR, HASH_ALG, KEY_SIZE) +PAR_NUM = 5 + +class Variant(object): + """ + Class representing one test including chosen parameters and + results of average execution times + """ + def __init__(self, **args): + self.params = (args['arch'], args['cipher'], args['dir'], args['alg'], + args['keysize']) + + self.avg_times = [] + self.slope = None + self.intercept = None + + def set_times(self, avg_times): + """ + Fills test execution time list + """ + self.avg_times = avg_times + + def lin_reg(self, sizes): + """ + Computes linear regression of set of coordinates (x,y) + """ + + n = len(sizes) + + if n != len(self.avg_times): + print "Error!" + return None + + sumx = sum(sizes) + sumy = sum(self.avg_times) + sumxy = sum([x * y for x, y in zip(sizes, self.avg_times)]) + sumsqrx = sum([pow(x, 2) for x in sizes]) + self.slope = (n * sumxy - sumx * sumy) / float(n * sumsqrx - pow(sumx, 2)) + self.intercept = (sumy - self.slope * sumx) / float(n) + + def get_params_str(self): + """ + Returns all parameters concatenated into one string + """ + return "\t".join(i for i in self.params) + + def get_lin_func_str(self): + """ + Returns string having linear coefficients + """ + slope = "{:.5f}".format(self.slope) + intercept = "{:.5f}".format(self.intercept) + return "{}\t{}".format(slope, intercept) + +class VarList(list): + """ + Class used to store all test variants as a list of objects + """ + + def find_obj(self, params): + """ + Finds first occurence of object containing given parameters + """ + ret_val = None + matches = (obj for obj in self if obj.params == params) + try: + ret_val = next(matches) + except StopIteration: + pass + return ret_val + + def compare(self, list_b, tolerance): + """ + Finds variants from two data sets which are matching and compares + its linear regression coefficients. + Compares list_b against itself. + """ + + if tolerance is None: + tolerance = 5.0 + if tolerance < 0.0: + print "Bad argument: Tolerance must not be less than 0%" + exit(1) + print "TOLERANCE: {:.2f}%".format(tolerance) + + warning = False + print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE A\tINTERCEPT A\tSLOPE B\tINTERCEPT B" + for i, obj_a in enumerate(self): + obj_b = list_b.find_obj(obj_a.params) + if obj_b != None: + if obj_a.slope < 0.0: + obj_a.slope = 0 + if obj_b.slope < 0.0: + obj_b.slope = 0 + slope_bv = 0.01 * tolerance * obj_a.slope # border value + intercept_bv = 0.01 * tolerance * obj_a.intercept + diff_slope = obj_b.slope - obj_a.slope + diff_intercept = obj_b.intercept - obj_a.intercept + if (obj_a.slope > 0.001 and obj_b.slope > 0.001 and + diff_slope > slope_bv) or diff_intercept > intercept_bv: + warning = True + print "{}\t{}\t{}\t{}".format(i + 1, + obj_b.get_params_str(), + obj_a.get_lin_func_str(), + obj_b.get_lin_func_str()) + if not warning: + print "No differences found." + return warning + + def printout(self): + """ + Prints out readable representation of the list + """ + + print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE \tINTERCEPT" + for i, obj in enumerate(self): + print "{}\t{}\t{}".format(i + 1, + obj.get_params_str(), + obj.get_lin_func_str()) + + + +class Parser(object): + """ + Class used to parse a text file contaning performance data + """ + + def __init__(self, fname, verbose): + self.fname = fname + self.verbose = verbose + + @staticmethod + def convert2int(in_tuple): + """ + Converts a tuple of strings into a list of integers + """ + + result = list(in_tuple) # Converting to list + result = [int(i) for i in result] # Converting str to int + return result + + def load(self): + """ + Reads a text file by columns, stores data in objects + for further comparision of performance + """ + + v_list = VarList() + # Reading by columns, results in list of tuples + # Each tuple is representing a column from a text file + try: + f = open(self.fname, 'r') + except IOError: + print "Error reading {} file.".format(self.fname) + exit(1) + else: + with f: + cols = zip(*(line.strip().split('\t') for line in f)) + + # Reading first column with payload sizes, ommiting first 5 rows + sizes = self.convert2int(cols[0][PAR_NUM:]) + if self.verbose: + print "Available buffer sizes:\n" + print sizes + print "========================================================" + print "\n\nVariants:\n" + + # Reading remaining columns contaning performance data + for row in cols[1:]: + # First rows are run options + arch, c_mode, c_dir, h_alg, key_size = row[:PAR_NUM] + if self.verbose: + print arch, c_mode, c_dir, h_alg, key_size + + # Getting average times + avg_times = self.convert2int(row[PAR_NUM:]) + if self.verbose: + print avg_times + print "------" + + # Putting new object to the result list + v_list.append(Variant(arch=arch, cipher=c_mode, dir=c_dir, + alg=h_alg, keysize=key_size)) + v_list[-1].set_times(avg_times) + # Finding linear function representation of data set + v_list[-1].lin_reg(sizes) + if self.verbose: + print "({}, {})".format(v_list[-1].slope, v_list[-1].intercept) + print "============\n" + return v_list, sizes + +class DiffTool(object): + """ + Main class + """ + + def __init__(self): + self.fname_a = None + self.fname_b = None + self.tolerance = None + self.verbose = False + self.analyze = False + + @staticmethod + def usage(): + """ + Prints usage + """ + print "This tool compares file_b against file_a printing out differences." + print "Usage:" + print "\tipsec_diff_tool.py [-v] [-a] file_a file_b [tol]\n" + print "\t-v - verbose" + print "\t-a - takes only one argument: name of the file to analyze" + print "\tfile_a, file_b - text files containing output from ipsec_perf tool" + print "\ttol - tolerance [%], must be >= 0, default 5\n" + print "Examples:" + print "\tipsec_diff_tool.py file01.txt file02.txt 10" + print "\tipsec_diff_tool.py -a file02.txt" + print "\tipsec_diff_tool.py -v -a file01.txt" + + + def parse_args(self): + """ + Get commandline arguments + """ + if len(sys.argv) < 3 or sys.argv[1] == "-h": + self.usage() + exit(1) + if sys.argv[1] == "-a": + self.analyze = True + self.fname_a = sys.argv[2] + elif sys.argv[2] == "-a": + if sys.argv[1] == "-v": + self.verbose = True + self.analyze = True + self.fname_a = sys.argv[3] + elif sys.argv[1] == "-v": + self.verbose = True + self.fname_a = sys.argv[2] + self.fname_b = sys.argv[3] + if len(sys.argv) >= 5: + self.tolerance = float(sys.argv[4]) + + else: + self.fname_a = sys.argv[1] + self.fname_b = sys.argv[2] + if len(sys.argv) >= 4: + self.tolerance = float(sys.argv[3]) + + def run(self): + """ + Main method + """ + self.parse_args() + + parser_a = Parser(self.fname_a, self.verbose) + list_a, sizes_a = parser_a.load() + + if not self.analyze: + parser_b = Parser(self.fname_b, self.verbose) + list_b, sizes_b = parser_b.load() + if sizes_a != sizes_b: + print "Error. Buffer size lists in two compared " \ + "data sets differ! Aborting.\n" + exit(1) + warning = list_a.compare(list_b, self.tolerance) # Compares list_b against list_a + if warning: + exit(2) + else: + list_a.printout() # Takes only one file and prints it out + +if __name__ == '__main__': + DiffTool().run() diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c new file mode 100644 index 000000000..af81cdd72 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c @@ -0,0 +1,2581 @@ +/********************************************************************** + Copyright(c) 2017-2019, Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <errno.h> +#include <malloc.h> /* memalign() or _aligned_malloc()/aligned_free() */ + +#ifdef _WIN32 +#include <windows.h> +#include <process.h> +#include <intrin.h> +#define strdup _strdup +#define __forceinline static __forceinline +#else +#include <x86intrin.h> +#define __forceinline static inline __attribute__((always_inline)) +#include <unistd.h> +#include <pthread.h> +#include <sched.h> +#endif + +#include <intel-ipsec-mb.h> + +#include "msr.h" + +/* memory size for test buffers */ +#define BUFSIZE (512 * 1024 * 1024) +/* maximum size of a test buffer */ +#define JOB_SIZE_TOP (16 * 1024) +/* min size of a buffer when testing range of buffers */ +#define DEFAULT_JOB_SIZE_MIN 16 +/* max size of a buffer when testing range of buffers */ +#define DEFAULT_JOB_SIZE_MAX (2 * 1024) +/* number of bytes to increase buffer size when testing range of buffers */ +#define DEFAULT_JOB_SIZE_STEP 16 +/* max offset applied to a buffer - this is to avoid collisions in L1 */ +#define MAX_BUFFER_OFFSET 4096 +/* max value of sha_size_incr */ +#define MAX_SHA_SIZE_INCR 128 +/* region size for one buffer rounded up to 4K page size */ +#define REGION_SIZE (((JOB_SIZE_TOP + (MAX_BUFFER_OFFSET + \ + MAX_SHA_SIZE_INCR)) + 4095) & (~4095)) +/* number of test buffers */ +#define NUM_OFFSETS (BUFSIZE / REGION_SIZE) +#define NUM_RUNS 16 +/* maximum number of 128-bit expanded keys */ +#define KEYS_PER_JOB 15 + +#define AAD_SIZE_MAX JOB_SIZE_TOP +#define CCM_AAD_SIZE_MAX 46 +#define DEFAULT_GCM_AAD_SIZE 12 +#define DEFAULT_CCM_AAD_SIZE 8 + +#define ITER_SCALE_SMOKE 2048 +#define ITER_SCALE_SHORT 200000 +#define ITER_SCALE_LONG 2000000 + +#define BITS(x) (sizeof(x) * 8) +#define DIM(x) (sizeof(x)/sizeof(x[0])) + +#define MAX_NUM_THREADS 16 /* Maximum number of threads that can be created */ + +#define CIPHER_MODES_AES 7 /* CBC, CNTR, CNTR+8, CNTR_BITLEN, + CNTR_BITLEN-4, ECB, NULL_CIPHER */ +#define CIPHER_MODES_DOCSIS 4 /* AES DOCSIS, AES DOCSIS+8, DES DOCSIS, + DES DOCSIS+8 */ +#define CIPHER_MODES_DES 1 /* DES */ +#define CIPHER_MODES_GCM 1 /* GCM */ +#define CIPHER_MODES_CCM 1 /* CCM */ +#define CIPHER_MODES_3DES 1 /* 3DES */ +#define CIPHER_MODES_PON 2 /* PON, NO_CTR PON */ +#define DIRECTIONS 2 /* ENC, DEC */ +#define HASH_ALGS_AES 10 /* SHA1, SHA256, SHA224, SHA384, SHA512, XCBC, + MD5, NULL_HASH, CMAC, CMAC_BITLEN */ +#define HASH_ALGS_DOCSIS 1 /* NULL_HASH */ +#define HASH_ALGS_GCM 1 /* GCM */ +#define HASH_ALGS_CCM 1 /* CCM */ +#define HASH_ALGS_DES 1 /* NULL_HASH for DES */ +#define HASH_ALGS_3DES 1 /* NULL_HASH for 3DES */ +#define HASH_ALGS_PON 1 /* CRC32/BIP for PON */ +#define KEY_SIZES_AES 3 /* 16, 24, 32 */ +#define KEY_SIZES_DOCSIS 1 /* 16 or 8 */ +#define KEY_SIZES_GCM 3 /* 16, 24, 32 */ +#define KEY_SIZES_CCM 1 /* 16 */ +#define KEY_SIZES_DES 1 /* 8 */ +#define KEY_SIZES_3DES 1 /* 8 x 3 */ +#define KEY_SIZES_PON 1 /* 16 */ + +#define IA32_MSR_FIXED_CTR_CTRL 0x38D +#define IA32_MSR_PERF_GLOBAL_CTR 0x38F +#define IA32_MSR_CPU_UNHALTED_THREAD 0x30A + +/* Those defines tell how many different test cases are to be performed. + * Have to be multiplied by number of chosen architectures. + */ +#define VARIANTS_PER_ARCH_AES (CIPHER_MODES_AES * DIRECTIONS * \ + HASH_ALGS_AES * KEY_SIZES_AES) +#define VARIANTS_PER_ARCH_DOCSIS (CIPHER_MODES_DOCSIS * DIRECTIONS * \ + HASH_ALGS_DOCSIS * KEY_SIZES_DOCSIS) +#define VARIANTS_PER_ARCH_GCM (CIPHER_MODES_GCM * DIRECTIONS * \ + HASH_ALGS_GCM * KEY_SIZES_GCM) +#define VARIANTS_PER_ARCH_CCM (CIPHER_MODES_CCM * DIRECTIONS * \ + HASH_ALGS_CCM * KEY_SIZES_CCM) +#define VARIANTS_PER_ARCH_DES (CIPHER_MODES_DES * DIRECTIONS * \ + HASH_ALGS_DES * KEY_SIZES_DES) +#define VARIANTS_PER_ARCH_3DES (CIPHER_MODES_3DES * DIRECTIONS * \ + HASH_ALGS_3DES * KEY_SIZES_3DES) +#define VARIANTS_PER_ARCH_PON (CIPHER_MODES_PON * DIRECTIONS * \ + HASH_ALGS_PON * KEY_SIZES_PON) + +enum arch_type_e { + ARCH_SSE = 0, + ARCH_AVX, + ARCH_AVX2, + ARCH_AVX512, + NUM_ARCHS +}; + +enum test_type_e { + TTYPE_AES_HMAC, + TTYPE_AES_DOCSIS, + TTYPE_AES_GCM, + TTYPE_AES_CCM, + TTYPE_AES_DES, + TTYPE_AES_3DES, + TTYPE_PON, + TTYPE_CUSTOM, + NUM_TTYPES +}; + +/* This enum will be mostly translated to JOB_CIPHER_MODE + * (make sure to update c_mode_names list in print_times function) */ +enum test_cipher_mode_e { + TEST_CBC = 1, + TEST_CNTR, + TEST_CNTR8, /* CNTR with increased buffer by 8 */ + TEST_CNTR_BITLEN, /* CNTR-BITLEN */ + TEST_CNTR_BITLEN4, /* CNTR-BITLEN with 4 less bits in the last byte */ + TEST_ECB, + TEST_NULL_CIPHER, + TEST_AESDOCSIS, + TEST_AESDOCSIS8, /* AES DOCSIS with increased buffer size by 8 */ + TEST_DESDOCSIS, + TEST_DESDOCSIS4, /* DES DOCSIS with increased buffer size by 4 */ + TEST_GCM, /* Additional field used by GCM, not translated */ + TEST_CCM, + TEST_DES, + TEST_3DES, + TEST_PON_CNTR, + TEST_PON_NO_CNTR, + TEST_NUM_CIPHER_TESTS +}; + +/* This enum will be mostly translated to JOB_HASH_ALG + * (make sure to update h_alg_names list in print_times function) */ +enum test_hash_alg_e { + TEST_SHA1 = 1, + TEST_SHA_224, + TEST_SHA_256, + TEST_SHA_384, + TEST_SHA_512, + TEST_XCBC, + TEST_MD5, + TEST_HASH_CMAC, /* added here to be included in AES tests */ + TEST_HASH_CMAC_BITLEN, + TEST_NULL_HASH, + TEST_HASH_GCM, /* Additional field used by GCM, not translated */ + TEST_CUSTOM_HASH, /* unused */ + TEST_HASH_CCM, + TEST_PON_CRC_BIP, + TEST_NUM_HASH_TESTS +}; + +/* Struct storing cipher parameters */ +struct params_s { + JOB_CIPHER_DIRECTION cipher_dir; + enum test_type_e test_type; /* AES, DOCSIS, GCM */ + enum test_cipher_mode_e cipher_mode; + enum test_hash_alg_e hash_alg; + uint32_t aes_key_size; + uint32_t size_aes; + uint64_t aad_size; + uint32_t num_sizes; + uint32_t num_variants; + uint32_t core; +}; + +struct custom_job_params { + enum test_cipher_mode_e cipher_mode; + enum test_hash_alg_e hash_alg; + uint32_t aes_key_size; + JOB_CIPHER_DIRECTION cipher_dir; +}; + +union params { + enum arch_type_e arch_type; + struct custom_job_params job_params; +}; + +struct str_value_mapping { + const char *name; + union params values; +}; + +struct str_value_mapping arch_str_map[] = { + {.name = "SSE", .values.arch_type = ARCH_SSE }, + {.name = "AVX", .values.arch_type = ARCH_AVX }, + {.name = "AVX2", .values.arch_type = ARCH_AVX2 }, + {.name = "AVX512", .values.arch_type = ARCH_AVX512 } +}; + +struct str_value_mapping cipher_algo_str_map[] = { + { + .name = "aes-cbc-128", + .values.job_params = { + .cipher_mode = TEST_CBC, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-cbc-192", + .values.job_params = { + .cipher_mode = TEST_CBC, + .aes_key_size = AES_192_BYTES + } + }, + { + .name = "aes-cbc-256", + .values.job_params = { + .cipher_mode = TEST_CBC, + .aes_key_size = AES_256_BYTES + } + }, + { + .name = "aes-ctr-128", + .values.job_params = { + .cipher_mode = TEST_CNTR, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-ctr-192", + .values.job_params = { + .cipher_mode = TEST_CNTR, + .aes_key_size = AES_192_BYTES + } + }, + { + .name = "aes-ctr-256", + .values.job_params = { + .cipher_mode = TEST_CNTR, + .aes_key_size = AES_256_BYTES + } + }, + { + .name = "aes-ctr8-128", + .values.job_params = { + .cipher_mode = TEST_CNTR8, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-ctr8-192", + .values.job_params = { + .cipher_mode = TEST_CNTR8, + .aes_key_size = AES_192_BYTES + } + }, + { + .name = "aes-ctr8-256", + .values.job_params = { + .cipher_mode = TEST_CNTR8, + .aes_key_size = AES_256_BYTES + } + }, + { + .name = "aes-ctr-bit-128", + .values.job_params = { + .cipher_mode = TEST_CNTR_BITLEN, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-ctr-bit-192", + .values.job_params = { + .cipher_mode = TEST_CNTR_BITLEN, + .aes_key_size = AES_192_BYTES + } + }, + { + .name = "aes-ctr-bit-256", + .values.job_params = { + .cipher_mode = TEST_CNTR_BITLEN, + .aes_key_size = AES_256_BYTES + } + }, + { + .name = "aes-ctr-bit4-128", + .values.job_params = { + .cipher_mode = TEST_CNTR_BITLEN4, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-ctr-bit4-192", + .values.job_params = { + .cipher_mode = TEST_CNTR_BITLEN4, + .aes_key_size = AES_192_BYTES + } + }, + { + .name = "aes-ctr-bit4-256", + .values.job_params = { + .cipher_mode = TEST_CNTR_BITLEN4, + .aes_key_size = AES_256_BYTES + } + }, + { + .name = "aes-ecb-128", + .values.job_params = { + .cipher_mode = TEST_ECB, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-ecb-192", + .values.job_params = { + .cipher_mode = TEST_ECB, + .aes_key_size = AES_192_BYTES + } + }, + { + .name = "aes-ecb-256", + .values.job_params = { + .cipher_mode = TEST_ECB, + .aes_key_size = AES_256_BYTES + } + }, + { + .name = "aes-docsis", + .values.job_params = { + .cipher_mode = TEST_AESDOCSIS, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-docsis8", + .values.job_params = { + .cipher_mode = TEST_AESDOCSIS8, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "des-docsis", + .values.job_params = { + .cipher_mode = TEST_DESDOCSIS, + .aes_key_size = 8 + } + }, + { + .name = "des-docsis4", + .values.job_params = { + .cipher_mode = TEST_DESDOCSIS4, + .aes_key_size = 8 + } + }, + { + .name = "des-cbc", + .values.job_params = { + .cipher_mode = TEST_DES, + .aes_key_size = 8 + } + }, + { + .name = "3des-cbc", + .values.job_params = { + .cipher_mode = TEST_3DES, + .aes_key_size = 8 + } + }, + { + .name = "null", + .values.job_params = { + .cipher_mode = TEST_NULL_CIPHER, + .aes_key_size = 0 + } + } +}; + +struct str_value_mapping hash_algo_str_map[] = { + { + .name = "sha1-hmac", + .values.job_params = { + .hash_alg = TEST_SHA1 + } + }, + { + .name = "sha224-hmac", + .values.job_params = { + .hash_alg = TEST_SHA_224 + } + }, + { + .name = "sha256-hmac", + .values.job_params = { + .hash_alg = TEST_SHA_256 + } + }, + { + .name = "sha384-hmac", + .values.job_params = { + .hash_alg = TEST_SHA_384 + } + }, + { + .name = "sha512-hmac", + .values.job_params = { + .hash_alg = TEST_SHA_512 + } + }, + { + .name = "aes-xcbc", + .values.job_params = { + .hash_alg = TEST_XCBC + } + }, + { + .name = "md5-hmac", + .values.job_params = { + .hash_alg = TEST_MD5 + } + }, + { + .name = "aes-cmac", + .values.job_params = { + .hash_alg = TEST_HASH_CMAC + } + }, + { + .name = "null", + .values.job_params = { + .hash_alg = TEST_NULL_HASH + } + }, + { + .name = "aes-cmac-bitlen", + .values.job_params = { + .hash_alg = TEST_HASH_CMAC_BITLEN + } + }, +}; + +struct str_value_mapping aead_algo_str_map[] = { + { + .name = "aes-gcm-128", + .values.job_params = { + .cipher_mode = TEST_GCM, + .hash_alg = TEST_HASH_GCM, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "aes-gcm-192", + .values.job_params = { + .cipher_mode = TEST_GCM, + .hash_alg = TEST_HASH_GCM, + .aes_key_size = AES_192_BYTES + } + }, + { + .name = "aes-gcm-256", + .values.job_params = { + .cipher_mode = TEST_GCM, + .hash_alg = TEST_HASH_GCM, + .aes_key_size = AES_256_BYTES + } + }, + { + .name = "aes-ccm-128", + .values.job_params = { + .cipher_mode = TEST_CCM, + .hash_alg = TEST_HASH_CCM, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "pon-128", + .values.job_params = { + .cipher_mode = TEST_PON_CNTR, + .hash_alg = TEST_PON_CRC_BIP, + .aes_key_size = AES_128_BYTES + } + }, + { + .name = "pon-128-no-ctr", + .values.job_params = { + .cipher_mode = TEST_PON_NO_CNTR, + .hash_alg = TEST_PON_CRC_BIP, + .aes_key_size = 0 + } + }, +}; + +struct str_value_mapping cipher_dir_str_map[] = { + {.name = "encrypt", .values.job_params.cipher_dir = ENCRYPT}, + {.name = "decrypt", .values.job_params.cipher_dir = DECRYPT} +}; + +/* This struct stores all information about performed test case */ +struct variant_s { + uint32_t arch; + struct params_s params; + uint64_t *avg_times; +}; + +/* Struct storing information to be passed to threads */ +struct thread_info { + int print_info; + int core; + MB_MGR *p_mgr; +} t_info[MAX_NUM_THREADS]; + +enum cache_type_e { + WARM = 0, + COLD = 1 +}; + +enum cache_type_e cache_type = WARM; + +const uint32_t auth_tag_length_bytes[19] = { + 12, /* SHA1 */ + 14, /* SHA_224 */ + 16, /* SHA_256 */ + 24, /* SHA_384 */ + 32, /* SHA_512 */ + 12, /* AES_XCBC */ + 12, /* MD5 */ + 0, /* NULL_HASH */ +#ifndef NO_GCM + 16, /* AES_GMAC */ +#endif + 0, /* CUSTOM HASH */ + 0, /* AES_CCM */ + 16, /* AES_CMAC */ + 20, /* PLAIN_SHA1 */ + 28, /* PLAIN_SHA_224 */ + 32, /* PLAIN_SHA_256 */ + 48, /* PLAIN_SHA_384 */ + 64, /* PLAIN_SHA_512 */ + 4, /* AES_CMAC_BITLEN (3GPP) */ + 8, /* PON */ +}; +uint32_t index_limit; +uint32_t key_idxs[NUM_OFFSETS]; +uint32_t offsets[NUM_OFFSETS]; +uint32_t sha_size_incr = 24; + +enum range { + RANGE_MIN = 0, + RANGE_STEP, + RANGE_MAX, + NUM_RANGE +}; + +uint32_t job_sizes[NUM_RANGE] = {DEFAULT_JOB_SIZE_MIN, + DEFAULT_JOB_SIZE_STEP, + DEFAULT_JOB_SIZE_MAX}; +uint32_t job_iter = 0; +uint64_t gcm_aad_size = DEFAULT_GCM_AAD_SIZE; +uint64_t ccm_aad_size = DEFAULT_CCM_AAD_SIZE; + +struct custom_job_params custom_job_params = { + .cipher_mode = TEST_NULL_CIPHER, + .hash_alg = TEST_NULL_HASH, + .aes_key_size = 0, + .cipher_dir = ENCRYPT +}; + +uint8_t archs[NUM_ARCHS] = {1, 1, 1, 1}; /* uses all function sets */ +/* AES, DOCSIS, GCM, CCM, DES, 3DES, PON, CUSTOM */ +uint8_t test_types[NUM_TTYPES] = {1, 1, 1, 1, 1, 1, 1, 0}; + +int use_gcm_job_api = 0; +int use_unhalted_cycles = 0; /* read unhalted cycles instead of tsc */ +uint64_t rd_cycles_cost = 0; /* cost of reading unhalted cycles */ +uint64_t core_mask = 0; /* bitmap of selected cores */ + +uint64_t flags = 0; /* flags passed to alloc_mb_mgr() */ + +uint32_t iter_scale = ITER_SCALE_LONG; + +#define PB_INIT_SIZE 50 +#define PB_INIT_IDX 2 /* after \r and [ */ +static uint32_t PB_SIZE = PB_INIT_SIZE; +static uint32_t PB_FINAL_IDX = (PB_INIT_SIZE + (PB_INIT_IDX - 1)); +static char prog_bar[PB_INIT_SIZE + 4]; /* 50 + 4 for \r, [, ], \0 */ +static uint32_t pb_idx = PB_INIT_IDX; +static uint32_t pb_mod = 0; + +static int silent_progress_bar = 0; + +static void prog_bar_init(const uint32_t total_num) +{ + if (silent_progress_bar) + return; + + if (total_num < PB_SIZE) { + PB_SIZE = total_num; + PB_FINAL_IDX = (PB_SIZE + (PB_INIT_IDX - 1)); + } + pb_idx = PB_INIT_IDX; + pb_mod = total_num / PB_SIZE; + + /* 32 dec == ascii ' ' char */ + memset(prog_bar, 32, sizeof(prog_bar)); + prog_bar[0] = '\r'; + prog_bar[1] = '['; + prog_bar[PB_FINAL_IDX + 1] = ']'; + prog_bar[PB_FINAL_IDX + 2] = '\0'; + + fputs(prog_bar, stderr); +} + +static void prog_bar_fini(void) +{ + if (silent_progress_bar) + return; + + prog_bar[PB_FINAL_IDX] = 'X'; /* set final X */ + fputs(prog_bar, stderr); +} + +static void prog_bar_update(const uint32_t num) +{ + if (silent_progress_bar) + return; + + if ((pb_mod == 0) || num % pb_mod == 0) { + /* print X at every ~50th variant */ + prog_bar[pb_idx] = 'X'; + fputs(prog_bar, stderr); + + /* don't overrun final idx */ + if (pb_idx < (PB_SIZE + 1)) + pb_idx++; + } else { + const char pb_inter_chars[] = {'|', '/', '-', '\\'}; + /* print intermediate chars */ + prog_bar[pb_idx] = pb_inter_chars[num % DIM(pb_inter_chars)]; + fputs(prog_bar, stderr); + } +} + +/* Read unhalted cycles */ +__forceinline uint64_t read_cycles(const uint32_t core) +{ + uint64_t val = 0; + + if (msr_read(core, IA32_MSR_CPU_UNHALTED_THREAD, + &val) != MACHINE_RETVAL_OK) { + fprintf(stderr, "Error reading cycles " + "counter on core %u!\n", core); + exit(EXIT_FAILURE); + } + + return val; +} + +/* Method used by qsort to compare 2 values */ +static int compare_uint64_t(const void *a, const void *b) +{ + return (int)(int64_t)(*(const uint64_t *)a - *(const uint64_t *)b); +} + +/* Get number of bits set in value */ +static unsigned bitcount(const uint64_t val) +{ + unsigned i, bits = 0; + + for (i = 0; i < BITS(val); i++) + if (val & (1ULL << i)) + bits++; + + return bits; +} + +/* Get the next core in core mask + Set last_core to negative to start from beginnig of core_mask */ +static int next_core(const uint64_t core_mask, + const int last_core) +{ + int core = 0; + + if (last_core >= 0) + core = last_core; + + while (((core_mask >> core) & 1) == 0) { + core++; + + if (core >= (int)BITS(core_mask)) + return -1; + } + + return core; +} + +/* Set CPU affinity for current thread */ +static int set_affinity(const int cpu) +{ + int ret = 0; + int num_cpus = 0; + + /* Get number of cpus in the system */ +#ifdef _WIN32 + GROUP_AFFINITY NewGroupAffinity; + + memset(&NewGroupAffinity, 0, sizeof(GROUP_AFFINITY)); + num_cpus = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); +#else + num_cpus = sysconf(_SC_NPROCESSORS_CONF); +#endif + if (num_cpus == 0) { + fprintf(stderr, "Zero processors in the system!"); + return 1; + } + + /* Check if selected core is valid */ + if (cpu < 0 || cpu >= num_cpus) { + fprintf(stderr, "Invalid CPU selected! " + "Max valid CPU is %u\n", num_cpus - 1); + return 1; + } + +#ifdef _WIN32 + NewGroupAffinity.Mask = 1ULL << cpu; + ret = !SetThreadGroupAffinity(GetCurrentThread(), + &NewGroupAffinity, NULL); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + /* Set affinity of current process to cpu */ + ret = sched_setaffinity(0, sizeof(cpuset), &cpuset); +#endif /* _WIN32 */ + + return ret; +} + +/* Start counting unhalted cycles */ +static int start_cycles_ctr(const uint32_t core) +{ + int ret; + + if (core >= BITS(core_mask)) + return 1; + + /* Disable cycles counter */ + ret = msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, 0); + if (ret != MACHINE_RETVAL_OK) + return ret; + + /* Zero cycles counter */ + ret = msr_write(core, IA32_MSR_CPU_UNHALTED_THREAD, 0); + if (ret != MACHINE_RETVAL_OK) + return ret; + + /* Enable OS and user tracking in FixedCtr1 */ + ret = msr_write(core, IA32_MSR_FIXED_CTR_CTRL, 0x30); + if (ret != MACHINE_RETVAL_OK) + return ret; + + /* Enable cycles counter */ + return msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, (1ULL << 33)); +} + +/* Init MSR module */ +static int init_msr_mod(void) +{ + unsigned max_core_count = 0; +#ifdef _WIN32 + max_core_count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); +#else + max_core_count = sysconf(_SC_NPROCESSORS_CONF); +#endif + if (max_core_count == 0) { + fprintf(stderr, "Zero processors in the system!"); + return MACHINE_RETVAL_ERROR; + } + + return machine_init(max_core_count); +} + +/* Set the cost of reading unhalted cycles using RDMSR */ +static int set_unhalted_cycle_cost(const int core, uint64_t *value) +{ + uint64_t time1, time2; + + if (value == NULL || core < 0) + return 1; + + time1 = read_cycles(core); + time2 = read_cycles(core); + + /* Calculate delta */ + *value = (time2 - time1); + + return 0; +} + +/* Calculate the general cost of reading unhalted cycles (median) */ +static int set_avg_unhalted_cycle_cost(const int core, uint64_t *value) +{ + unsigned i; + uint64_t cycles[10]; + + if (value == NULL || core_mask == 0 || core < 0) + return 1; + + /* Fill cycles table with read cost values */ + for (i = 0; i < DIM(cycles); i++) + if (set_unhalted_cycle_cost(core, &cycles[i]) != 0) + return 1; + + /* sort array */ + qsort(cycles, DIM(cycles), sizeof(uint64_t), compare_uint64_t); + + /* set median cost */ + *value = cycles[DIM(cycles)/2]; + + return 0; +} + +/* Freeing allocated memory */ +static void free_mem(uint8_t **p_buffer, uint128_t **p_keys) +{ + uint128_t *keys = NULL; + uint8_t *buf = NULL; + + if (p_keys != NULL) { + keys = *p_keys; + *p_keys = NULL; + } + + if (p_buffer != NULL) { + buf = *p_buffer; + *p_buffer = NULL; + } + +#ifdef LINUX + if (keys != NULL) + free(keys); + + if (buf != NULL) + free(buf); +#else + if (keys != NULL) + _aligned_free(keys); + + if (buf != NULL) + _aligned_free(buf); +#endif +} + +static const void * +get_key_pointer(const uint32_t index, const uint128_t *p_keys) +{ + return (const void *) &p_keys[key_idxs[index]]; +} + +static uint8_t *get_src_buffer(const uint32_t index, uint8_t *p_buffer) +{ + return &p_buffer[offsets[index]]; +} + +static uint8_t *get_dst_buffer(const uint32_t index, uint8_t *p_buffer) +{ + return &p_buffer[offsets[index] + sha_size_incr]; +} + +static uint32_t get_next_index(uint32_t index) +{ + if (++index >= index_limit) + index = 0; + return index; +} + +static void init_buf(void *pb, const size_t length) +{ + const size_t n = length / sizeof(uint64_t); + size_t i = 0; + + if (pb == NULL) + return; + + for (i = 0; i < n; i++) + ((uint64_t *)pb)[i] = (uint64_t) rand(); +} + +/* + * Packet and key memory allocation and initialization. + * init_offsets() needs to be called prior to that so that + * index_limit is set up accordingly to hot/cold selection. + */ +static void init_mem(uint8_t **p_buffer, uint128_t **p_keys) +{ + const size_t bufs_size = index_limit * REGION_SIZE; + const size_t keys_size = index_limit * KEYS_PER_JOB * sizeof(uint128_t); + const size_t alignment = 64; + uint8_t *buf = NULL; + uint128_t *keys = NULL; + + if (p_keys == NULL || p_buffer == NULL) { + fprintf(stderr, "Internal buffer allocation error!\n"); + exit(EXIT_FAILURE); + } + +#ifdef LINUX + buf = (uint8_t *) memalign(alignment, bufs_size); +#else + buf = (uint8_t *) _aligned_malloc(bufs_size, alignment); +#endif + if (!buf) { + fprintf(stderr, "Could not malloc buf\n"); + exit(EXIT_FAILURE); + } + +#ifdef LINUX + keys = (uint128_t *) memalign(alignment, keys_size); +#else + keys = (uint128_t *) _aligned_malloc(keys_size, alignment); +#endif + if (!keys) { + fprintf(stderr, "Could not allocate memory for keys!\n"); + free_mem(&buf, &keys); + exit(EXIT_FAILURE); + } + + *p_keys = keys; + *p_buffer = buf; + + init_buf(buf, bufs_size); + init_buf(keys, keys_size); +} + +/* + * Initialize packet buffer and keys offsets from + * the start of the respective buffers + */ +static void init_offsets(const enum cache_type_e ctype) +{ + if (ctype == COLD) { + uint32_t i; + + for (i = 0; i < NUM_OFFSETS; i++) { + offsets[i] = (i * REGION_SIZE) + (rand() & 0x3C0); + key_idxs[i] = i * KEYS_PER_JOB; + } + + /* swap the entries at random */ + for (i = 0; i < NUM_OFFSETS; i++) { + const uint32_t swap_idx = (rand() % NUM_OFFSETS); + const uint32_t tmp_offset = offsets[swap_idx]; + const uint32_t tmp_keyidx = key_idxs[swap_idx]; + + offsets[swap_idx] = offsets[i]; + key_idxs[swap_idx] = key_idxs[i]; + offsets[i] = tmp_offset; + key_idxs[i] = tmp_keyidx; + } + + index_limit = NUM_OFFSETS; + } else { /* WARM */ + uint32_t i; + + index_limit = 16; + + for (i = 0; i < index_limit; i++) { + /* + * Each buffer starts at different offset from + * start of the page. + * The most optimum determined difference between + * offsets is 4 cache lines. + */ + const uint32_t offset_step = (4 * 64); + const uint32_t L1_way_size = 4096; + + key_idxs[i] = i * KEYS_PER_JOB; + offsets[i] = i * REGION_SIZE + + ((i * offset_step) & (L1_way_size - 1)); + } + } +} + +/* + * This function translates enum test_ciper_mode_e to be used by ipsec_mb + * library + */ +static JOB_CIPHER_MODE +translate_cipher_mode(const enum test_cipher_mode_e test_mode) +{ + JOB_CIPHER_MODE c_mode = NULL_CIPHER; + + switch (test_mode) { + case TEST_CBC: + c_mode = CBC; + break; + case TEST_CNTR: + case TEST_CNTR8: + c_mode = CNTR; + break; + case TEST_CNTR_BITLEN: + case TEST_CNTR_BITLEN4: + c_mode = CNTR_BITLEN; + break; + case TEST_ECB: + c_mode = ECB; + break; + case TEST_NULL_CIPHER: + c_mode = NULL_CIPHER; + break; + case TEST_AESDOCSIS: + case TEST_AESDOCSIS8: + c_mode = DOCSIS_SEC_BPI; + break; + case TEST_DESDOCSIS: + case TEST_DESDOCSIS4: + c_mode = DOCSIS_DES; + break; + case TEST_GCM: + c_mode = GCM; + break; + case TEST_CCM: + c_mode = CCM; + break; + case TEST_DES: + c_mode = DES; + break; + case TEST_3DES: + c_mode = DES3; + break; + case TEST_PON_CNTR: + case TEST_PON_NO_CNTR: + c_mode = PON_AES_CNTR; + break; + default: + break; + } + return c_mode; +} + +/* Performs test using AES_HMAC or DOCSIS */ +static uint64_t +do_test(MB_MGR *mb_mgr, struct params_s *params, + const uint32_t num_iter, uint8_t *p_buffer, uint128_t *p_keys) +{ + JOB_AES_HMAC *job; + JOB_AES_HMAC job_template; + uint32_t i; + static uint32_t index = 0; + static DECLARE_ALIGNED(uint128_t iv, 16); + static uint32_t ipad[5], opad[5], digest[3]; + static DECLARE_ALIGNED(uint32_t k1_expanded[11 * 4], 16); + static DECLARE_ALIGNED(uint8_t k2[16], 16); + static DECLARE_ALIGNED(uint8_t k3[16], 16); + static DECLARE_ALIGNED(struct gcm_key_data gdata_key, 512); + uint64_t xgem_hdr = 0; + uint32_t size_aes; + uint64_t time = 0; + uint32_t aux; + + if ((params->cipher_mode == TEST_AESDOCSIS8) || + (params->cipher_mode == TEST_CNTR8)) + size_aes = params->size_aes + 8; + else if (params->cipher_mode == TEST_DESDOCSIS4) + size_aes = params->size_aes + 4; + else + size_aes = params->size_aes; + + if (params->cipher_mode == TEST_CNTR_BITLEN) + job_template.msg_len_to_cipher_in_bits = size_aes * 8; + else if (params->cipher_mode == TEST_CNTR_BITLEN4) + job_template.msg_len_to_cipher_in_bits = size_aes * 8 - 4; + else + job_template.msg_len_to_cipher_in_bytes = size_aes; + + job_template.msg_len_to_hash_in_bytes = size_aes + sha_size_incr; + job_template.hash_start_src_offset_in_bytes = 0; + job_template.cipher_start_src_offset_in_bytes = sha_size_incr; + job_template.iv = (uint8_t *) &iv; + job_template.iv_len_in_bytes = 16; + + job_template.auth_tag_output = (uint8_t *) digest; + + switch (params->hash_alg) { + case TEST_XCBC: + job_template.u.XCBC._k1_expanded = k1_expanded; + job_template.u.XCBC._k2 = k2; + job_template.u.XCBC._k3 = k3; + job_template.hash_alg = AES_XCBC; + break; + case TEST_HASH_CCM: + job_template.hash_alg = AES_CCM; + break; + case TEST_HASH_GCM: + job_template.hash_alg = AES_GMAC; + break; + case TEST_NULL_HASH: + job_template.hash_alg = NULL_HASH; + break; + case TEST_HASH_CMAC: + job_template.u.CMAC._key_expanded = k1_expanded; + job_template.u.CMAC._skey1 = k2; + job_template.u.CMAC._skey2 = k3; + job_template.hash_alg = AES_CMAC; + break; + case TEST_HASH_CMAC_BITLEN: + job_template.u.CMAC._key_expanded = k1_expanded; + job_template.u.CMAC._skey1 = k2; + job_template.u.CMAC._skey2 = k3; + /* + * CMAC bit level version is done in bits (length is + * converted to bits and it is decreased by 4 bits, + * to force the CMAC bitlen path) + */ + job_template.msg_len_to_hash_in_bits = + (job_template.msg_len_to_hash_in_bytes * 8) - 4; + job_template.hash_alg = AES_CMAC_BITLEN; + break; + case TEST_PON_CRC_BIP: + job_template.hash_alg = PON_CRC_BIP; + job_template.msg_len_to_hash_in_bytes = size_aes + 8; + job_template.cipher_start_src_offset_in_bytes = 8; + if (params->cipher_mode == TEST_PON_NO_CNTR) + job_template.msg_len_to_cipher_in_bytes = 0; + break; + default: + /* HMAC hash alg is SHA1 or MD5 */ + job_template.u.HMAC._hashed_auth_key_xor_ipad = + (uint8_t *) ipad; + job_template.u.HMAC._hashed_auth_key_xor_opad = + (uint8_t *) opad; + job_template.hash_alg = (JOB_HASH_ALG) params->hash_alg; + break; + } + job_template.auth_tag_output_len_in_bytes = + (uint64_t) auth_tag_length_bytes[job_template.hash_alg - 1]; + + job_template.cipher_direction = params->cipher_dir; + + if (params->cipher_mode == TEST_NULL_CIPHER) { + job_template.chain_order = HASH_CIPHER; + } else if (params->cipher_mode == TEST_CCM) { + if (job_template.cipher_direction == ENCRYPT) + job_template.chain_order = HASH_CIPHER; + else + job_template.chain_order = CIPHER_HASH; + } else { + if (job_template.cipher_direction == ENCRYPT) + job_template.chain_order = CIPHER_HASH; + else + job_template.chain_order = HASH_CIPHER; + } + + /* Translating enum to the API's one */ + job_template.cipher_mode = translate_cipher_mode(params->cipher_mode); + job_template.aes_key_len_in_bytes = params->aes_key_size; + if (job_template.cipher_mode == GCM) { + uint8_t key[32]; + + switch (params->aes_key_size) { + case AES_128_BYTES: + IMB_AES128_GCM_PRE(mb_mgr, key, &gdata_key); + break; + case AES_192_BYTES: + IMB_AES192_GCM_PRE(mb_mgr, key, &gdata_key); + break; + case AES_256_BYTES: + default: + IMB_AES256_GCM_PRE(mb_mgr, key, &gdata_key); + break; + } + job_template.aes_enc_key_expanded = &gdata_key; + job_template.aes_dec_key_expanded = &gdata_key; + job_template.u.GCM.aad_len_in_bytes = params->aad_size; + job_template.iv_len_in_bytes = 12; + } else if (job_template.cipher_mode == CCM) { + job_template.msg_len_to_cipher_in_bytes = size_aes; + job_template.msg_len_to_hash_in_bytes = size_aes; + job_template.hash_start_src_offset_in_bytes = 0; + job_template.cipher_start_src_offset_in_bytes = 0; + job_template.u.CCM.aad_len_in_bytes = params->aad_size; + job_template.iv_len_in_bytes = 13; + } else if (job_template.cipher_mode == DES || + job_template.cipher_mode == DOCSIS_DES) { + job_template.aes_key_len_in_bytes = 8; + job_template.iv_len_in_bytes = 8; + } else if (job_template.cipher_mode == DES3) { + job_template.aes_key_len_in_bytes = 24; + job_template.iv_len_in_bytes = 8; + } + + + if (job_template.hash_alg == PON_CRC_BIP) { + /* create XGEM header template */ + const uint64_t pli = + (job_template.msg_len_to_cipher_in_bytes << 2) & 0xffff; + + xgem_hdr = ((pli >> 8) & 0xff) | ((pli & 0xff) << 8); + } + +#ifndef _WIN32 + if (use_unhalted_cycles) + time = read_cycles(params->core); + else +#endif + time = __rdtscp(&aux); + + for (i = 0; i < num_iter; i++) { + job = IMB_GET_NEXT_JOB(mb_mgr); + *job = job_template; + + if (job->hash_alg == PON_CRC_BIP) { + uint64_t *p_src = + (uint64_t *) get_src_buffer(index, p_buffer); + + job->src = (const uint8_t *)p_src; + p_src[0] = xgem_hdr; + } else { + job->src = get_src_buffer(index, p_buffer); + } + job->dst = get_dst_buffer(index, p_buffer); + if (job->cipher_mode == GCM) { + job->u.GCM.aad = job->src; + } else if (job->cipher_mode == CCM) { + job->u.CCM.aad = job->src; + job->aes_enc_key_expanded = job->aes_dec_key_expanded = + (const uint32_t *) get_key_pointer(index, + p_keys); + } else if (job->cipher_mode == DES3) { + static const void *ks_ptr[3]; + + ks_ptr[0] = ks_ptr[1] = ks_ptr[2] = + get_key_pointer(index, p_keys); + job->aes_enc_key_expanded = + job->aes_dec_key_expanded = ks_ptr; + } else { + job->aes_enc_key_expanded = job->aes_dec_key_expanded = + (const uint32_t *) get_key_pointer(index, + p_keys); + } + + index = get_next_index(index); +#ifdef DEBUG + job = IMB_SUBMIT_JOB(mb_mgr); +#else + job = IMB_SUBMIT_JOB_NOCHECK(mb_mgr); +#endif + while (job) { +#ifdef DEBUG + if (job->status != STS_COMPLETED) + fprintf(stderr, "failed job, status:%d\n", + job->status); +#endif + job = IMB_GET_COMPLETED_JOB(mb_mgr); + } + } + + while ((job = IMB_FLUSH_JOB(mb_mgr))) { +#ifdef DEBUG + if (job->status != STS_COMPLETED) + fprintf(stderr, "failed job, status:%d\n", job->status); +#endif + } + +#ifndef _WIN32 + if (use_unhalted_cycles) + time = (read_cycles(params->core) - rd_cycles_cost) - time; + else +#endif + time = __rdtscp(&aux) - time; + + return time / num_iter; +} + +/* Performs test using GCM */ +static uint64_t +do_test_gcm(struct params_s *params, + const uint32_t num_iter, MB_MGR *mb_mgr, + uint8_t *p_buffer, uint128_t *p_keys) +{ + static DECLARE_ALIGNED(struct gcm_key_data gdata_key, 512); + static DECLARE_ALIGNED(struct gcm_context_data gdata_ctx, 64); + uint8_t *key; + static uint32_t index = 0; + uint32_t size_aes = params->size_aes; + uint32_t i; + uint8_t *aad = NULL; + uint8_t auth_tag[12]; + DECLARE_ALIGNED(uint8_t iv[16], 16); + uint64_t time = 0; + uint32_t aux; + + key = (uint8_t *) malloc(sizeof(uint8_t) * params->aes_key_size); + if (!key) { + fprintf(stderr, "Could not malloc key\n"); + free_mem(&p_buffer, &p_keys); + exit(EXIT_FAILURE); + } + + aad = (uint8_t *) malloc(sizeof(uint8_t) * params->aad_size); + if (!aad) { + free(key); + fprintf(stderr, "Could not malloc AAD\n"); + free_mem(&p_buffer, &p_keys); + exit(EXIT_FAILURE); + } + + switch (params->aes_key_size) { + case AES_128_BYTES: + IMB_AES128_GCM_PRE(mb_mgr, key, &gdata_key); + break; + case AES_192_BYTES: + IMB_AES192_GCM_PRE(mb_mgr, key, &gdata_key); + break; + case AES_256_BYTES: + default: + IMB_AES256_GCM_PRE(mb_mgr, key, &gdata_key); + break; + } + + if (params->cipher_dir == ENCRYPT) { +#ifndef _WIN32 + if (use_unhalted_cycles) + time = read_cycles(params->core); + else +#endif + time = __rdtscp(&aux); + + if (params->aes_key_size == AES_128_BYTES) { + for (i = 0; i < num_iter; i++) { + uint8_t *pb = get_dst_buffer(index, p_buffer); + + IMB_AES128_GCM_ENC(mb_mgr, &gdata_key, + &gdata_ctx, + pb, + pb, + size_aes, iv, + aad, params->aad_size, + auth_tag, sizeof(auth_tag)); + index = get_next_index(index); + } + } else if (params->aes_key_size == AES_192_BYTES) { + for (i = 0; i < num_iter; i++) { + uint8_t *pb = get_dst_buffer(index, p_buffer); + + IMB_AES192_GCM_ENC(mb_mgr, &gdata_key, + &gdata_ctx, + pb, + pb, + size_aes, iv, + aad, params->aad_size, + auth_tag, sizeof(auth_tag)); + index = get_next_index(index); + } + } else { /* 256 */ + for (i = 0; i < num_iter; i++) { + uint8_t *pb = get_dst_buffer(index, p_buffer); + + IMB_AES256_GCM_ENC(mb_mgr, &gdata_key, + &gdata_ctx, + pb, + pb, + size_aes, iv, + aad, params->aad_size, + auth_tag, sizeof(auth_tag)); + index = get_next_index(index); + } + } +#ifndef _WIN32 + if (use_unhalted_cycles) + time = (read_cycles(params->core) - + rd_cycles_cost) - time; + else +#endif + time = __rdtscp(&aux) - time; + } else { /*DECRYPT*/ +#ifndef _WIN32 + if (use_unhalted_cycles) + time = read_cycles(params->core); + else +#endif + time = __rdtscp(&aux); + + if (params->aes_key_size == AES_128_BYTES) { + for (i = 0; i < num_iter; i++) { + uint8_t *pb = get_dst_buffer(index, p_buffer); + + IMB_AES128_GCM_DEC(mb_mgr, &gdata_key, + &gdata_ctx, + pb, + pb, + size_aes, iv, + aad, params->aad_size, + auth_tag, sizeof(auth_tag)); + index = get_next_index(index); + } + } else if (params->aes_key_size == AES_192_BYTES) { + for (i = 0; i < num_iter; i++) { + uint8_t *pb = get_dst_buffer(index, p_buffer); + + IMB_AES192_GCM_DEC(mb_mgr, &gdata_key, + &gdata_ctx, + pb, + pb, + size_aes, iv, + aad, params->aad_size, + auth_tag, sizeof(auth_tag)); + index = get_next_index(index); + } + } else { /* 256 */ + for (i = 0; i < num_iter; i++) { + uint8_t *pb = get_dst_buffer(index, p_buffer); + + IMB_AES256_GCM_DEC(mb_mgr, &gdata_key, + &gdata_ctx, + pb, + pb, + size_aes, iv, + aad, params->aad_size, + auth_tag, sizeof(auth_tag)); + index = get_next_index(index); + } + } +#ifndef _WIN32 + if (use_unhalted_cycles) + time = (read_cycles(params->core) - + rd_cycles_cost) - time; + else +#endif + time = __rdtscp(&aux) - time; + } + + free(key); + free(aad); + return time / num_iter; +} + +/* Computes mean of set of times after dropping bottom and top quarters */ +static uint64_t +mean_median(uint64_t *array, uint32_t size, + uint8_t *p_buffer, uint128_t *p_keys) +{ + const uint32_t quarter = size / 4; + uint32_t i; + uint64_t sum; + + /* these are single threaded runs, so we skip + * the hardware thread related skew clipping + * thus skipping "ignore first and last eighth" + */ + + /* ignore lowest and highest quarter */ + qsort(array, size, sizeof(uint64_t), compare_uint64_t); + + /* dropping the bottom and top quarters + * after sorting to remove noise/variations + */ + array += quarter; + size -= quarter * 2; + + + if ((size == 0) || (size & 0x80000000)) { + fprintf(stderr, "Not enough data points!\n"); + free_mem(&p_buffer, &p_keys); + exit(EXIT_FAILURE); + } + sum = 0; + for (i = 0; i < size; i++) + sum += array[i]; + + sum = (sum + size / 2) / size; + return sum; +} + +/* Runs test for each buffer size and stores averaged execution time */ +static void +process_variant(MB_MGR *mgr, const uint32_t arch, struct params_s *params, + struct variant_s *variant_ptr, const uint32_t run, + uint8_t *p_buffer, uint128_t *p_keys) +{ + const uint32_t sizes = params->num_sizes; + uint64_t *times = &variant_ptr->avg_times[run]; + uint32_t sz; + + for (sz = 0; sz < sizes; sz++) { + const uint32_t size_aes = job_sizes[RANGE_MIN] + + (sz * job_sizes[RANGE_STEP]); + uint32_t num_iter; + + params->aad_size = 0; + if (params->cipher_mode == TEST_GCM) + params->aad_size = gcm_aad_size; + + if (params->cipher_mode == TEST_CCM) + params->aad_size = ccm_aad_size; + + /* + * If job size == 0, check AAD size + * (only allowed for GCM/CCM) + */ + if (size_aes == 0 && params->aad_size != 0) + num_iter = (iter_scale >= (uint32_t)params->aad_size) ? + (iter_scale / (uint32_t)params->aad_size) : + 1; + else if (size_aes != 0) + num_iter = (iter_scale >= size_aes) ? + (iter_scale / size_aes) : 1; + else + num_iter = iter_scale; + + params->size_aes = size_aes; + if (params->cipher_mode == TEST_GCM && (!use_gcm_job_api)) { + if (job_iter == 0) + *times = do_test_gcm(params, 2 * num_iter, mgr, + p_buffer, p_keys); + else + *times = do_test_gcm(params, job_iter, mgr, + p_buffer, p_keys); + } else { + if (job_iter == 0) + *times = do_test(mgr, params, num_iter, + p_buffer, p_keys); + else + *times = do_test(mgr, params, job_iter, + p_buffer, p_keys); + } + times += NUM_RUNS; + } + + variant_ptr->params = *params; + variant_ptr->arch = arch; +} + +/* Sets cipher mode, hash algorithm */ +static void +do_variants(MB_MGR *mgr, const uint32_t arch, struct params_s *params, + const uint32_t run, struct variant_s **variant_ptr, + uint32_t *variant, uint8_t *p_buffer, uint128_t *p_keys, + const int print_info) +{ + uint32_t hash_alg; + uint32_t h_start = TEST_SHA1; + uint32_t h_end = TEST_NULL_HASH; + uint32_t c_mode; + uint32_t c_start = TEST_CBC; + uint32_t c_end = TEST_NULL_CIPHER; + + switch (params->test_type) { + case TTYPE_AES_DOCSIS: + h_start = TEST_NULL_HASH; + c_start = TEST_AESDOCSIS; + c_end = TEST_DESDOCSIS4; + break; + case TTYPE_AES_GCM: + h_start = TEST_HASH_GCM; + h_end = TEST_HASH_GCM; + c_start = TEST_GCM; + c_end = TEST_GCM; + break; + case TTYPE_AES_CCM: + h_start = TEST_HASH_CCM; + h_end = TEST_HASH_CCM; + c_start = TEST_CCM; + c_end = TEST_CCM; + break; + case TTYPE_AES_DES: + h_start = TEST_NULL_HASH; + h_end = TEST_NULL_HASH; + c_start = TEST_DES; + c_end = TEST_DES; + break; + case TTYPE_AES_3DES: + h_start = TEST_NULL_HASH; + h_end = TEST_NULL_HASH; + c_start = TEST_3DES; + c_end = TEST_3DES; + break; + case TTYPE_PON: + h_start = TEST_PON_CRC_BIP; + h_end = TEST_PON_CRC_BIP; + c_start = TEST_PON_CNTR; + c_end = TEST_PON_NO_CNTR; + break; + case TTYPE_CUSTOM: + h_start = params->hash_alg; + h_end = params->hash_alg; + c_start = params->cipher_mode; + c_end = params->cipher_mode; + break; + default: + break; + } + + for (c_mode = c_start; c_mode <= c_end; c_mode++) { + params->cipher_mode = (enum test_cipher_mode_e) c_mode; + for (hash_alg = h_start; hash_alg <= h_end; hash_alg++) { + params->hash_alg = (enum test_hash_alg_e) hash_alg; + process_variant(mgr, arch, params, *variant_ptr, run, + p_buffer, p_keys); + /* update and print progress bar */ + if (print_info) + prog_bar_update(*variant); + (*variant)++; + (*variant_ptr)++; + } + } +} + +/* Sets cipher direction and key size */ +static void +run_dir_test(MB_MGR *mgr, const uint32_t arch, struct params_s *params, + const uint32_t run, struct variant_s **variant_ptr, + uint32_t *variant, uint8_t *p_buffer, uint128_t *p_keys, + const int print_info) +{ + uint32_t dir; + uint32_t k; /* Key size */ + uint32_t limit = AES_256_BYTES; /* Key size value limit */ + + if (params->test_type == TTYPE_AES_DOCSIS || + params->test_type == TTYPE_AES_DES || + params->test_type == TTYPE_AES_3DES || + params->test_type == TTYPE_PON || + params->test_type == TTYPE_AES_CCM) + limit = AES_128_BYTES; + + switch (arch) { + case 0: + init_mb_mgr_sse(mgr); + break; + case 1: + init_mb_mgr_avx(mgr); + break; + case 2: + init_mb_mgr_avx2(mgr); + break; + default: + case 3: + init_mb_mgr_avx512(mgr); + break; + } + + if (params->test_type == TTYPE_CUSTOM) { + params->cipher_dir = custom_job_params.cipher_dir; + params->aes_key_size = custom_job_params.aes_key_size; + params->cipher_mode = custom_job_params.cipher_mode; + params->hash_alg = custom_job_params.hash_alg; + do_variants(mgr, arch, params, run, variant_ptr, + variant, p_buffer, p_keys, print_info); + return; + } + + for (dir = ENCRYPT; dir <= DECRYPT; dir++) { + params->cipher_dir = (JOB_CIPHER_DIRECTION) dir; + for (k = AES_128_BYTES; k <= limit; k += 8) { + params->aes_key_size = k; + do_variants(mgr, arch, params, run, variant_ptr, + variant, p_buffer, p_keys, print_info); + } + } +} + +/* Generates output containing averaged times for each test variant */ +static void +print_times(struct variant_s *variant_list, struct params_s *params, + const uint32_t total_variants, uint8_t *p_buffer, uint128_t *p_keys) +{ + const uint32_t sizes = params->num_sizes; + uint32_t col; + uint32_t sz; + + /* Temporary variables */ + struct params_s par; + uint8_t c_mode; + uint8_t c_dir; + uint8_t h_alg; + const char *func_names[4] = { + "SSE", "AVX", "AVX2", "AVX512" + }; + const char *c_mode_names[TEST_NUM_CIPHER_TESTS - 1] = { + "CBC", "CNTR", "CNTR+8", "CNTR_BITLEN", "CNTR_BITLEN4", "ECB", + "NULL_CIPHER", "DOCAES", "DOCAES+8", "DOCDES", "DOCDES+4", + "GCM", "CCM", "DES", "3DES", "PON", "PON_NO_CTR" + }; + const char *c_dir_names[2] = { + "ENCRYPT", "DECRYPT" + }; + const char *h_alg_names[TEST_NUM_HASH_TESTS - 1] = { + "SHA1", "SHA_224", "SHA_256", "SHA_384", "SHA_512", "XCBC", + "MD5", "CMAC", "CMAC_BITLEN", "NULL_HASH", "GCM", "CUSTOM", + "CCM", "BIP-CRC32" + }; + printf("ARCH"); + for (col = 0; col < total_variants; col++) + printf("\t%s", func_names[variant_list[col].arch]); + printf("\n"); + printf("CIPHER"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + c_mode = par.cipher_mode - TEST_CBC; + printf("\t%s", c_mode_names[c_mode]); + } + printf("\n"); + printf("DIR"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + c_dir = par.cipher_dir - ENCRYPT; + printf("\t%s", c_dir_names[c_dir]); + } + printf("\n"); + printf("HASH_ALG"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + h_alg = par.hash_alg - TEST_SHA1; + printf("\t%s", h_alg_names[h_alg]); + } + printf("\n"); + printf("KEY_SIZE"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + printf("\tAES-%u", par.aes_key_size * 8); + } + printf("\n"); + for (sz = 0; sz < sizes; sz++) { + printf("%d", job_sizes[RANGE_MIN] + + (sz * job_sizes[RANGE_STEP])); + for (col = 0; col < total_variants; col++) { + uint64_t *time_ptr = + &variant_list[col].avg_times[sz * NUM_RUNS]; + const unsigned long long val = + mean_median(time_ptr, NUM_RUNS, + p_buffer, p_keys); + + printf("\t%llu", val); + } + printf("\n"); + } +} + +/* Prepares data structure for test variants storage, sets test configuration */ +#ifdef _WIN32 +static void +#else +static void * +#endif +run_tests(void *arg) +{ + uint32_t i; + struct thread_info *info = (struct thread_info *)arg; + MB_MGR *p_mgr = NULL; + struct params_s params; + uint32_t num_variants[NUM_TTYPES] = {0}; + uint32_t type, at_size, run, arch; + uint32_t variants_per_arch, max_arch; + uint32_t variant; + uint32_t total_variants = 0; + struct variant_s *variant_ptr = NULL; + struct variant_s *variant_list = NULL; + const uint32_t min_size = job_sizes[RANGE_MIN]; + const uint32_t max_size = job_sizes[RANGE_MAX]; + const uint32_t step_size = job_sizes[RANGE_STEP]; + uint8_t *buf = NULL; + uint128_t *keys = NULL; + + p_mgr = info->p_mgr; + + params.num_sizes = ((max_size - min_size) / step_size) + 1; + + params.core = (uint32_t)info->core; + + /* if cores selected then set affinity */ + if (core_mask) + if (set_affinity(info->core) != 0) { + fprintf(stderr, "Failed to set cpu " + "affinity on core %d\n", info->core); + goto exit_failure; + } + + /* If unhalted cycles selected and this is + the primary thread then start counter */ + if (use_unhalted_cycles && info->print_info) { + int ret; + + ret = start_cycles_ctr(params.core); + if (ret != 0) { + fprintf(stderr, "Failed to start cycles " + "counter on core %u\n", params.core); + goto exit_failure; + } + /* Get average cost of reading counter */ + ret = set_avg_unhalted_cycle_cost(params.core, &rd_cycles_cost); + if (ret != 0 || rd_cycles_cost == 0) { + fprintf(stderr, "Error calculating unhalted " + "cycles read overhead!\n"); + goto exit_failure; + } else + fprintf(stderr, "Started counting unhalted cycles on " + "core %d\nUnhalted cycles read cost = %lu " + "cycles\n", params.core, + (unsigned long)rd_cycles_cost); + } + + init_mem(&buf, &keys); + + for (type = TTYPE_AES_HMAC; type < NUM_TTYPES; type++) { + if (test_types[type] == 0) + continue; + + switch (type) { + default: + case TTYPE_AES_HMAC: + variants_per_arch = VARIANTS_PER_ARCH_AES; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_DOCSIS: + variants_per_arch = VARIANTS_PER_ARCH_DOCSIS; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_GCM: + variants_per_arch = VARIANTS_PER_ARCH_GCM; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_CCM: + variants_per_arch = VARIANTS_PER_ARCH_CCM; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_DES: + variants_per_arch = VARIANTS_PER_ARCH_DES; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_3DES: + variants_per_arch = VARIANTS_PER_ARCH_3DES; + max_arch = NUM_ARCHS; + break; + case TTYPE_PON: + variants_per_arch = VARIANTS_PER_ARCH_PON; + max_arch = NUM_ARCHS; + break; + case TTYPE_CUSTOM: + variants_per_arch = 1; + max_arch = NUM_ARCHS; + break; + } + + /* Calculating number of all variants */ + for (arch = 0; arch < max_arch; arch++) { + if (archs[arch] == 0) + continue; + num_variants[type] += variants_per_arch; + } + total_variants += num_variants[type]; + } + + if (total_variants == 0) { + fprintf(stderr, "No tests to be run\n"); + goto exit; + } + + if (info->print_info && !silent_progress_bar) + fprintf(stderr, "Total number of combinations (algos, " + "key sizes, cipher directions) to test = %u\n", + total_variants); + + variant_list = (struct variant_s *) + malloc(total_variants * sizeof(struct variant_s)); + if (variant_list == NULL) { + fprintf(stderr, "Cannot allocate memory\n"); + goto exit_failure; + } + + at_size = NUM_RUNS * params.num_sizes * sizeof(uint64_t); + for (variant = 0, variant_ptr = variant_list; + variant < total_variants; + variant++, variant_ptr++) { + variant_ptr->avg_times = (uint64_t *) malloc(at_size); + if (!variant_ptr->avg_times) { + fprintf(stderr, "Cannot allocate memory\n"); + goto exit_failure; + } + } + + for (run = 0; run < NUM_RUNS; run++) { + if (info->print_info) + fprintf(stderr, "\nStarting run %d of %d%c", + run+1, NUM_RUNS, + silent_progress_bar ? '\r' : '\n' ); + + variant = 0; + variant_ptr = variant_list; + + if (iter_scale == ITER_SCALE_SMOKE && run != 0) + continue; + + if (info->print_info) + prog_bar_init(total_variants); + + for (type = TTYPE_AES_HMAC; type < NUM_TTYPES; type++) { + if (test_types[type] == 0) + continue; + + max_arch = NUM_ARCHS; + + params.num_variants = num_variants[type]; + params.test_type = type; + /* Performing tests for each selected architecture */ + for (arch = 0; arch < max_arch; arch++) { + if (archs[arch] == 0) + continue; + run_dir_test(p_mgr, arch, ¶ms, run, + &variant_ptr, &variant, buf, + keys, info->print_info); + } + } /* end for type */ + if (info->print_info) + prog_bar_fini(); + + } /* end for run */ + if (info->print_info == 1 && iter_scale != ITER_SCALE_SMOKE) { + fprintf(stderr, "\n"); + print_times(variant_list, ¶ms, total_variants, buf, keys); + } + +exit: + if (variant_list != NULL) { + /* Freeing variants list */ + for (i = 0; i < total_variants; i++) + free(variant_list[i].avg_times); + free(variant_list); + } + free_mem(&buf, &keys); + free_mb_mgr(p_mgr); +#ifndef _WIN32 + return NULL; + +#else + return; +#endif +exit_failure: + if (variant_list != NULL) + free(variant_list); + free_mem(&buf, &keys); + free_mb_mgr(p_mgr); + exit(EXIT_FAILURE); +} + +static void usage(void) +{ + fprintf(stderr, "Usage: ipsec_perf [args], " + "where args are zero or more\n" + "-h: print this message\n" + "-c: Use cold cache, it uses warm as default\n" + "-w: Use warm cache\n" + "--arch: run only tests on specified architecture (SSE/AVX/AVX2/AVX512)\n" + "--cipher-algo: Select cipher algorithm to run on the custom test\n" + "--cipher-dir: Select cipher direction to run on the custom test " + "(encrypt/decrypt) (default = encrypt)\n" + "--hash-algo: Select hash algorithm to run on the custom test\n" + "--aead-algo: Select AEAD algorithm to run on the custom test\n" + "--no-avx512: Don't do AVX512\n" + "--no-avx2: Don't do AVX2\n" + "--no-avx: Don't do AVX\n" + "--no-sse: Don't do SSE\n" + "-o val: Use <val> for the SHA size increment, default is 24\n" + "--shani-on: use SHA extensions, default: auto-detect\n" + "--shani-off: don't use SHA extensions\n" + "--no-gcm: do not run GCM perf tests\n" + "--no-aes: do not run standard AES + HMAC perf tests\n" + "--no-docsis: do not run DOCSIS cipher perf tests\n" + "--no-ccm: do not run CCM cipher perf tests\n" + "--no-des: do not run DES cipher perf tests\n" + "--no-3des: do not run 3DES cipher perf tests\n" + "--no-pon: do not run PON cipher perf tests\n" + "--gcm-job-api: use JOB API for GCM perf tests" + " (raw GCM API is default)\n" + "--threads num: <num> for the number of threads to run" + " Max: %d\n" + "--cores mask: <mask> CPU's to run threads\n" + "--unhalted-cycles: measure using unhalted cycles (requires root).\n" + " Note: RDTSC is used by default.\n" + "--quick: reduces number of test iterations by x10\n" + " (less precise but quicker)\n" + "--smoke: very quick, unprecise and without print out\n" + " (for validation only)\n" + "--job-size: size of the cipher & MAC job in bytes. It can be:\n" + " - single value: test single size\n" + " - range: test multiple sizes with following format" + " min:step:max (e.g. 16:16:256)\n" + " (-o still applies for MAC)\n" + "--aad-size: size of AAD for AEAD algorithms\n" + "--job-iter: number of tests iterations for each job size\n" + "--no-progress-bar: Don't display progress bar\n", + MAX_NUM_THREADS + 1); +} + +static int +get_next_num_arg(const char * const *argv, const int index, const int argc, + void *dst, const size_t dst_size) +{ + char *endptr = NULL; + uint64_t val; + + if (dst == NULL || argv == NULL || index < 0 || argc < 0) { + fprintf(stderr, "%s() internal error!\n", __func__); + exit(EXIT_FAILURE); + } + + if (index >= (argc - 1)) { + fprintf(stderr, "'%s' requires an argument!\n", argv[index]); + exit(EXIT_FAILURE); + } + +#ifdef _WIN32 + val = _strtoui64(argv[index + 1], &endptr, 0); +#else + val = strtoull(argv[index + 1], &endptr, 0); +#endif + if (endptr == argv[index + 1] || (endptr != NULL && *endptr != '\0')) { + fprintf(stderr, "Error converting '%s' as value for '%s'!\n", + argv[index + 1], argv[index]); + exit(EXIT_FAILURE); + } + + switch (dst_size) { + case (sizeof(uint8_t)): + *((uint8_t *)dst) = (uint8_t) val; + break; + case (sizeof(uint16_t)): + *((uint16_t *)dst) = (uint16_t) val; + break; + case (sizeof(uint32_t)): + *((uint32_t *)dst) = (uint32_t) val; + break; + case (sizeof(uint64_t)): + *((uint64_t *)dst) = val; + break; + default: + fprintf(stderr, "%s() invalid dst_size %u!\n", + __func__, (unsigned) dst_size); + exit(EXIT_FAILURE); + break; + } + + return index + 1; +} + +static int +detect_arch(unsigned int arch_support[NUM_ARCHS]) +{ + const uint64_t detect_sse = + IMB_FEATURE_SSE4_2 | IMB_FEATURE_CMOV | IMB_FEATURE_AESNI; + const uint64_t detect_avx = + IMB_FEATURE_AVX | IMB_FEATURE_CMOV | IMB_FEATURE_AESNI; + const uint64_t detect_avx2 = IMB_FEATURE_AVX2 | detect_avx; + const uint64_t detect_avx512 = IMB_FEATURE_AVX512_SKX | detect_avx2; + MB_MGR *p_mgr = NULL; + enum arch_type_e arch_id; + + if (arch_support == NULL) { + fprintf(stderr, "Array not passed correctly\n"); + return -1; + } + + for (arch_id = ARCH_SSE; arch_id < NUM_ARCHS; arch_id++) + arch_support[arch_id] = 1; + + p_mgr = alloc_mb_mgr(0); + if (p_mgr == NULL) { + fprintf(stderr, "Architecture detect error!\n"); + return -1; + } + + if ((p_mgr->features & detect_avx512) != detect_avx512) + arch_support[ARCH_AVX512] = 0; + + if ((p_mgr->features & detect_avx2) != detect_avx2) + arch_support[ARCH_AVX2] = 0; + + if ((p_mgr->features & detect_avx) != detect_avx) + arch_support[ARCH_AVX] = 0; + + if ((p_mgr->features & detect_sse) != detect_sse) + arch_support[ARCH_SSE] = 0; + + free_mb_mgr(p_mgr); + + return 0; +} + +/* + * Check string argument is supported and if it is, return values associated + * with it. + */ +static const union params * +check_string_arg(const char *param, const char *arg, + const struct str_value_mapping *map, + const unsigned int num_avail_opts) +{ + unsigned int i; + + if (arg == NULL) { + fprintf(stderr, "%s requires an argument\n", param); + goto exit; + } + + for (i = 0; i < num_avail_opts; i++) + if (strcmp(arg, map[i].name) == 0) + return &(map[i].values); + + /* Argument is not listed in the available options */ + fprintf(stderr, "Invalid argument for %s\n", param); +exit: + fprintf(stderr, "Accepted arguments: "); + for (i = 0; i < num_avail_opts; i++) + fprintf(stderr, "%s ", map[i].name); + fprintf(stderr, "\n"); + + return NULL; +} + +static int +parse_range(const char * const *argv, const int index, const int argc, + uint32_t range_values[NUM_RANGE]) +{ + char *token; + uint32_t number; + unsigned int i; + + + if (range_values == NULL || argv == NULL || index < 0 || argc < 0) { + fprintf(stderr, "%s() internal error!\n", __func__); + exit(EXIT_FAILURE); + } + + if (index >= (argc - 1)) { + fprintf(stderr, "'%s' requires an argument!\n", argv[index]); + exit(EXIT_FAILURE); + } + + char *copy_arg = strdup(argv[index + 1]); + + if (copy_arg == NULL) { + fprintf(stderr, "%s() internal error!\n", __func__); + exit(EXIT_FAILURE); + } + + errno = 0; + token = strtok(copy_arg, ":"); + + /* Try parsing range (minimum, step and maximum values) */ + for (i = 0; i < NUM_RANGE; i++) { + if (token == NULL) + goto no_range; + + number = strtoul(token, NULL, 10); + + if (errno != 0) + goto no_range; + + range_values[i] = number; + token = strtok(NULL, ":"); + } + + if (token != NULL) + goto no_range; + + if (range_values[RANGE_MAX] < range_values[RANGE_MIN]) { + fprintf(stderr, "Maximum value of range cannot be lower " + "than minimum value\n"); + exit(EXIT_FAILURE); + } + + if (range_values[RANGE_STEP] == 0) { + fprintf(stderr, "Step value in range cannot be 0\n"); + exit(EXIT_FAILURE); + } + + goto end_range; +no_range: + /* Try parsing as single value */ + get_next_num_arg(argv, index, argc, &job_sizes[RANGE_MIN], + sizeof(job_sizes[RANGE_MIN])); + + job_sizes[RANGE_MAX] = job_sizes[RANGE_MIN]; + +end_range: + free(copy_arg); + return (index + 1); + +} + +int main(int argc, char *argv[]) +{ + uint32_t num_t = 0; + int i, core = 0; + struct thread_info *thread_info_p = t_info; + unsigned int arch_id; + unsigned int arch_support[NUM_ARCHS]; + const union params *values; + unsigned int cipher_algo_set = 0; + unsigned int hash_algo_set = 0; + unsigned int aead_algo_set = 0; + unsigned int cipher_dir_set = 0; +#ifdef _WIN32 + HANDLE threads[MAX_NUM_THREADS]; +#else + pthread_t tids[MAX_NUM_THREADS]; +#endif + + for (i = 1; i < argc; i++) + if (strcmp(argv[i], "-h") == 0) { + usage(); + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "-c") == 0) { + cache_type = COLD; + fprintf(stderr, "Cold cache, "); + } else if (strcmp(argv[i], "-w") == 0) { + cache_type = WARM; + fprintf(stderr, "Warm cache, "); + } else if (strcmp(argv[i], "--no-avx512") == 0) { + archs[ARCH_AVX512] = 0; + } else if (strcmp(argv[i], "--no-avx2") == 0) { + archs[ARCH_AVX2] = 0; + } else if (strcmp(argv[i], "--no-avx") == 0) { + archs[ARCH_AVX] = 0; + } else if (strcmp(argv[i], "--no-sse") == 0) { + archs[ARCH_SSE] = 0; + } else if (strcmp(argv[i], "--shani-on") == 0) { + flags &= (~IMB_FLAG_SHANI_OFF); + } else if (strcmp(argv[i], "--shani-off") == 0) { + flags |= IMB_FLAG_SHANI_OFF; + } else if (strcmp(argv[i], "--no-gcm") == 0) { + test_types[TTYPE_AES_GCM] = 0; + } else if (strcmp(argv[i], "--no-aes") == 0) { + test_types[TTYPE_AES_HMAC] = 0; + } else if (strcmp(argv[i], "--no-docsis") == 0) { + test_types[TTYPE_AES_DOCSIS] = 0; + } else if (strcmp(argv[i], "--no-ccm") == 0) { + test_types[TTYPE_AES_CCM] = 0; + } else if (strcmp(argv[i], "--no-des") == 0) { + test_types[TTYPE_AES_DES] = 0; + } else if (strcmp(argv[i], "--no-3des") == 0) { + test_types[TTYPE_AES_3DES] = 0; + } else if (strcmp(argv[i], "--no-pon") == 0) { + test_types[TTYPE_PON] = 0; + } else if (strcmp(argv[i], "--gcm-job-api") == 0) { + use_gcm_job_api = 1; + } else if (strcmp(argv[i], "--quick") == 0) { + iter_scale = ITER_SCALE_SHORT; + } else if (strcmp(argv[i], "--smoke") == 0) { + iter_scale = ITER_SCALE_SMOKE; + } else if (strcmp(argv[i], "--arch") == 0) { + values = check_string_arg(argv[i], argv[i+1], + arch_str_map, + DIM(arch_str_map)); + if (values == NULL) + return EXIT_FAILURE; + + /* + * Disable all the other architectures + * and enable only the specified + */ + memset(archs, 0, sizeof(archs)); + archs[values->arch_type] = 1; + i++; + } else if (strcmp(argv[i], "--cipher-algo") == 0) { + values = check_string_arg(argv[i], argv[i+1], + cipher_algo_str_map, + DIM(cipher_algo_str_map)); + if (values == NULL) + return EXIT_FAILURE; + + custom_job_params.cipher_mode = + values->job_params.cipher_mode; + custom_job_params.aes_key_size = + values->job_params.aes_key_size; + test_types[TTYPE_CUSTOM] = 1; + cipher_algo_set = 1; + i++; + } else if (strcmp(argv[i], "--cipher-dir") == 0) { + values = check_string_arg(argv[i], argv[i+1], + cipher_dir_str_map, + DIM(cipher_dir_str_map)); + if (values == NULL) + return EXIT_FAILURE; + + custom_job_params.cipher_dir = + values->job_params.cipher_dir; + cipher_dir_set = 1; + i++; + } else if (strcmp(argv[i], "--hash-algo") == 0) { + values = check_string_arg(argv[i], argv[i+1], + hash_algo_str_map, + DIM(hash_algo_str_map)); + if (values == NULL) + return EXIT_FAILURE; + + custom_job_params.hash_alg = + values->job_params.hash_alg; + test_types[TTYPE_CUSTOM] = 1; + hash_algo_set = 1; + i++; + } else if (strcmp(argv[i], "--aead-algo") == 0) { + values = check_string_arg(argv[i], argv[i+1], + aead_algo_str_map, + DIM(aead_algo_str_map)); + if (values == NULL) + return EXIT_FAILURE; + + custom_job_params.cipher_mode = + values->job_params.cipher_mode; + custom_job_params.aes_key_size = + values->job_params.aes_key_size; + custom_job_params.hash_alg = + values->job_params.hash_alg; + test_types[TTYPE_CUSTOM] = 1; + aead_algo_set = 1; + i++; + } else if (strcmp(argv[i], "-o") == 0) { + i = get_next_num_arg((const char * const *)argv, i, + argc, &sha_size_incr, + sizeof(sha_size_incr)); + } else if (strcmp(argv[i], "--job-size") == 0) { + /* Try parsing the argument as a range first */ + i = parse_range((const char * const *)argv, i, argc, + job_sizes); + if (job_sizes[RANGE_MAX] > JOB_SIZE_TOP) { + fprintf(stderr, + "Invalid job size %u (max %u)\n", + (unsigned) job_sizes[RANGE_MAX], + JOB_SIZE_TOP); + return EXIT_FAILURE; + } + } else if (strcmp(argv[i], "--aad-size") == 0) { + /* Get AAD size for both GCM and CCM */ + i = get_next_num_arg((const char * const *)argv, i, + argc, &gcm_aad_size, + sizeof(gcm_aad_size)); + if (gcm_aad_size > AAD_SIZE_MAX) { + fprintf(stderr, + "Invalid AAD size %u (max %u)!\n", + (unsigned) gcm_aad_size, + AAD_SIZE_MAX); + return EXIT_FAILURE; + } + ccm_aad_size = gcm_aad_size; + } else if (strcmp(argv[i], "--job-iter") == 0) { + i = get_next_num_arg((const char * const *)argv, i, + argc, &job_iter, sizeof(job_iter)); + } else if (strcmp(argv[i], "--threads") == 0) { + i = get_next_num_arg((const char * const *)argv, i, + argc, &num_t, sizeof(num_t)); + if (num_t > (MAX_NUM_THREADS + 1)) { + fprintf(stderr, "Invalid number of threads!\n"); + return EXIT_FAILURE; + } + } else if (strcmp(argv[i], "--cores") == 0) { + i = get_next_num_arg((const char * const *)argv, i, + argc, &core_mask, + sizeof(core_mask)); + } else if (strcmp(argv[i], "--unhalted-cycles") == 0) { + use_unhalted_cycles = 1; + } else if (strcmp(argv[i], "--no-progress-bar") == 0) { + silent_progress_bar = 1; + } else { + usage(); + return EXIT_FAILURE; + } + + if (test_types[TTYPE_CUSTOM]) { + /* Disable all other tests when custom test is selected */ + memset(test_types, 0, sizeof(test_types)); + test_types[TTYPE_CUSTOM] = 1; + if (aead_algo_set && (cipher_algo_set || hash_algo_set)) { + fprintf(stderr, "AEAD algorithm cannot be used " + "combined with another cipher/hash " + "algorithm\n"); + return EXIT_FAILURE; + } + } + + if (cipher_algo_set == 0 && aead_algo_set == 0 && cipher_dir_set) { + fprintf(stderr, "--cipher-dir can only be used with " + "--cipher-algo or --aead-algo\n"); + return EXIT_FAILURE; + } + + if (test_types[TTYPE_AES_CCM] || + custom_job_params.cipher_mode == TEST_CCM) { + if (ccm_aad_size > CCM_AAD_SIZE_MAX) { + fprintf(stderr, "AAD cannot be higher than %u in CCM\n", + CCM_AAD_SIZE_MAX); + return EXIT_FAILURE; + } + } + + if (job_sizes[RANGE_MIN] == 0) { + if (test_types[TTYPE_AES_HMAC] || + test_types[TTYPE_AES_DOCSIS] || + test_types[TTYPE_AES_DES] || + test_types[TTYPE_AES_3DES] || + (test_types[TTYPE_CUSTOM] && + aead_algo_set == 0)) { + fprintf(stderr, "Buffer size cannot be 0 unless only " + "an AEAD algorithm is tested\n"); + return EXIT_FAILURE; + } + } + + /* Check num cores >= number of threads */ + if ((core_mask != 0 && num_t != 0) && (num_t > bitcount(core_mask))) { + fprintf(stderr, "Insufficient number of cores in " + "core mask (0x%lx) to run %d threads!\n", + (unsigned long) core_mask, num_t); + return EXIT_FAILURE; + } + + /* if cycles selected then init MSR module */ + if (use_unhalted_cycles) { + if (core_mask == 0) { + fprintf(stderr, "Must specify core mask " + "when reading unhalted cycles!\n"); + return EXIT_FAILURE; + } + + if (init_msr_mod() != 0) { + fprintf(stderr, "Error initializing MSR module!\n"); + return EXIT_FAILURE; + } + } + + if (detect_arch(arch_support) < 0) + return EXIT_FAILURE; + + /* disable tests depending on instruction sets supported */ + for (arch_id = 0; arch_id < NUM_ARCHS; arch_id++) { + if (archs[arch_id] == 1 && arch_support[arch_id] == 0) { + archs[arch_id] = 0; + fprintf(stderr, + "%s not supported. Disabling %s tests\n", + arch_str_map[arch_id].name, + arch_str_map[arch_id].name); + } + } + + fprintf(stderr, "SHA size incr = %d\n", sha_size_incr); + + if (test_types[TTYPE_AES_GCM] || + (custom_job_params.cipher_mode == TEST_GCM)) + fprintf(stderr, "GCM AAD = %"PRIu64"\n", gcm_aad_size); + + if (test_types[TTYPE_AES_CCM] || + (custom_job_params.cipher_mode == TEST_CCM)) + fprintf(stderr, "CCM AAD = %"PRIu64"\n", ccm_aad_size); + + if (archs[ARCH_SSE]) { + MB_MGR *p_mgr = alloc_mb_mgr(flags); + + if (p_mgr == NULL) { + fprintf(stderr, "Error allocating MB_MGR structure!\n"); + return EXIT_FAILURE; + } + init_mb_mgr_sse(p_mgr); + fprintf(stderr, "%s SHA extensions (shani) for SSE arch\n", + (p_mgr->features & IMB_FEATURE_SHANI) ? + "Using" : "Not using"); + free_mb_mgr(p_mgr); + } + + memset(t_info, 0, sizeof(t_info)); + init_offsets(cache_type); + + srand(ITER_SCALE_LONG + ITER_SCALE_SHORT + ITER_SCALE_SMOKE); + + if (num_t > 1) { + uint32_t n; + + for (n = 0; n < (num_t - 1); n++, thread_info_p++) { + /* Set core if selected */ + if (core_mask) { + core = next_core(core_mask, core); + thread_info_p->core = core++; + } + + /* Allocate MB manager for each thread */ + thread_info_p->p_mgr = alloc_mb_mgr(flags); + if (thread_info_p->p_mgr == NULL) { + fprintf(stderr, "Failed to allocate MB_MGR " + "structure for thread %u!\n", + (unsigned)(n + 1)); + exit(EXIT_FAILURE); + } +#ifdef _WIN32 + threads[n] = (HANDLE) + _beginthread(&run_tests, 0, + (void *)thread_info_p); +#else + pthread_attr_t attr; + + pthread_attr_init(&attr); + pthread_create(&tids[n], &attr, run_tests, + (void *)thread_info_p); +#endif + } + } + + thread_info_p->print_info = 1; + thread_info_p->p_mgr = alloc_mb_mgr(flags); + if (thread_info_p->p_mgr == NULL) { + fprintf(stderr, "Failed to allocate MB_MGR " + "structure for main thread!\n"); + exit(EXIT_FAILURE); + } + if (core_mask) { + core = next_core(core_mask, core); + thread_info_p->core = core; + } + + run_tests((void *)thread_info_p); + if (num_t > 1) { + uint32_t n; + +#ifdef _WIN32 + WaitForMultipleObjects(num_t, threads, FALSE, INFINITE); +#endif + for (n = 0; n < (num_t - 1); n++) { + fprintf(stderr, "Waiting on thread %u to finish...\n", + (unsigned)(n + 2)); +#ifdef _WIN32 + CloseHandle(threads[n]); +#else + pthread_join(tids[n], NULL); +#endif + } + } + + if (use_unhalted_cycles) + machine_fini(); + + return EXIT_SUCCESS; +} diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c new file mode 100644 index 000000000..1382d031b --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c @@ -0,0 +1,304 @@ +/********************************************************************** + Copyright(c) 2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @brief Provides access to MSR read & write operations + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifndef _WIN32 +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#else +#include <windows.h> +#ifdef WIN_MSR +#include "OlsDef.h" +#include "OlsApiInitExt.h" +#include "OlsApiInit.h" +#endif /* WIN_MSR */ +#endif + +#include "msr.h" + +static int *m_msr_fd = NULL; /**< MSR driver file descriptors table */ +static unsigned m_maxcores = 0; /**< max number of cores (size of the + table above too) */ +#ifdef WIN_MSR +union msr_data { + uint64_t ui64; + struct { + uint32_t low; + uint32_t high; + } ui32; +}; + +HMODULE hOpenLibSys = NULL; + +/** + * @brief Initialize WinRing0 driver + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +static int +initMSRdriver(void) +{ + const BOOL result = InitOpenLibSys(&hOpenLibSys); + + if (result == 0) { + hOpenLibSys = NULL; + fprintf(stderr, "Failed to load WinRing0 driver!\n"); + return MACHINE_RETVAL_ERROR; + } + + return MACHINE_RETVAL_OK; +} + +/** + * @brief Shutdown WinRing0 driver + */ +static void +deInitMSRdriver(void) +{ + const BOOL result = DeinitOpenLibSys(&hOpenLibSys); + + if (result == 0) + fprintf(stderr, "Error shutting down WinRing0 driver!\n"); + + hOpenLibSys = NULL; +} +#endif /* WIN_MSR */ + +int +machine_init(const unsigned max_core_id) +{ + unsigned i; + + if (max_core_id == 0) + return MACHINE_RETVAL_PARAM; +#ifdef _WIN32 +#ifdef WIN_MSR + if (initMSRdriver() != MACHINE_RETVAL_OK) + return MACHINE_RETVAL_ERROR; +#else + fprintf(stderr, "WinRing0 driver not available!\n"); + return MACHINE_RETVAL_ERROR; +#endif /* WIN_MSR */ +#endif /* _WIN32 */ + + m_maxcores = max_core_id + 1; + + /** + * Allocate table to hold MSR driver file descriptors + * Each file descriptor is for a different core. + * Core id is an index to the table. + */ + m_msr_fd = (int *)malloc(m_maxcores * sizeof(m_msr_fd[0])); + if (m_msr_fd == NULL) { + m_maxcores = 0; + return MACHINE_RETVAL_ERROR; + } + + for (i = 0; i < m_maxcores; i++) + m_msr_fd[i] = -1; + + return MACHINE_RETVAL_OK; +} + +int +machine_fini(void) +{ + ASSERT(m_msr_fd != NULL); + if (m_msr_fd == NULL) + return MACHINE_RETVAL_ERROR; +#ifdef _WIN32 +#ifdef WIN_MSR + deInitMSRdriver(); +#endif +#else + unsigned i; + + /** + * Close open file descriptors and free up table memory. + */ + for (i = 0; i < m_maxcores; i++) + if (m_msr_fd[i] != -1) { + close(m_msr_fd[i]); + m_msr_fd[i] = -1; + } +#endif /* WIN_MSR */ + free(m_msr_fd); + m_msr_fd = NULL; + m_maxcores = 0; + + return MACHINE_RETVAL_OK; +} + +#ifndef _WIN32 +/** + * @brief Returns MSR driver file descriptor for given core id + * + * File descriptor could be previously open and comes from + * m_msr_fd table or is open (& cached) during the call. + * + * @param lcore logical core id + * + * @return MSR driver file descriptor corresponding \a lcore + */ +static int +msr_file_open(const unsigned lcore) +{ + ASSERT(lcore < m_maxcores); + ASSERT(m_msr_fd != NULL); + + int fd = m_msr_fd[lcore]; + + if (fd < 0) { + char fname[32]; + + memset(fname, 0, sizeof(fname)); + snprintf(fname, sizeof(fname)-1, + "/dev/cpu/%u/msr", lcore); + fd = open(fname, O_RDWR); + if (fd < 0) + fprintf(stderr, "Error opening file '%s'!\n", fname); + else + m_msr_fd[lcore] = fd; + } + + return fd; +} +#endif /* _WIN32 */ + +int +msr_read(const unsigned lcore, + const uint32_t reg, + uint64_t *value) +{ + int ret = MACHINE_RETVAL_OK; +#ifdef _WIN32 +#ifdef WIN_MSR + union msr_data msr; + BOOL status; +#endif +#endif + ASSERT(value != NULL); + if (value == NULL) + return MACHINE_RETVAL_PARAM; + + ASSERT(lcore < m_maxcores); + if (lcore >= m_maxcores) + return MACHINE_RETVAL_PARAM; + + ASSERT(m_msr_fd != NULL); + if (m_msr_fd == NULL) + return MACHINE_RETVAL_ERROR; +#ifdef _WIN32 +#ifdef WIN_MSR + msr.ui64 = 0; + status = RdmsrTx((DWORD)reg, &(msr.ui32.low), + &(msr.ui32.high), (1ULL << lcore)); + if (status) + *value = msr.ui64; + else + ret = MACHINE_RETVAL_ERROR; +#endif /* WIN_MSR */ +#else + int fd = -1; + ssize_t read_ret = 0; + + fd = msr_file_open(lcore); + if (fd < 0) + return MACHINE_RETVAL_ERROR; + + read_ret = pread(fd, value, sizeof(value[0]), (off_t)reg); + + if (read_ret != sizeof(value[0])) + ret = MACHINE_RETVAL_ERROR; +#endif /* _WIN32 */ + if (ret != MACHINE_RETVAL_OK) + fprintf(stderr, "RDMSR failed for reg[0x%x] on lcore %u\n", + (unsigned)reg, lcore); + + return ret; +} + +int +msr_write(const unsigned lcore, + const uint32_t reg, + const uint64_t value) +{ + int ret = MACHINE_RETVAL_OK; +#ifdef _WIN32 +#ifdef WIN_MSR + union msr_data msr; + BOOL status; +#endif +#endif + ASSERT(lcore < m_maxcores); + if (lcore >= m_maxcores) + return MACHINE_RETVAL_PARAM; + + ASSERT(m_msr_fd != NULL); + if (m_msr_fd == NULL) + return MACHINE_RETVAL_ERROR; + +#ifdef _WIN32 +#ifdef WIN_MSR + msr.ui64 = value; + status = WrmsrTx((DWORD)reg, msr.ui32.low, + msr.ui32.high, (1ULL << lcore)); + if (!status) + ret = MACHINE_RETVAL_ERROR; +#endif /* WIN_MSR */ +#else + int fd = -1; + ssize_t write_ret = 0; + + fd = msr_file_open(lcore); + if (fd < 0) + return MACHINE_RETVAL_ERROR; + + write_ret = pwrite(fd, &value, sizeof(value), (off_t)reg); + + if (write_ret != sizeof(value)) + ret = MACHINE_RETVAL_ERROR; +#endif /* _WIN32 */ + if (ret != MACHINE_RETVAL_OK) + fprintf(stderr, "WRMSR failed for reg[0x%x] " + "<- value[0x%llx] on lcore %u\n", + (unsigned)reg, (unsigned long long)value, lcore); + + return ret; +} diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h new file mode 100644 index 000000000..afa8795c4 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h @@ -0,0 +1,114 @@ +/********************************************************************** + Copyright(c) 2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @brief Provides access to MSR read & write operations + */ + +#ifndef __MSR_H__ +#define __MSR_H__ + +#include <stdint.h> +#include <stdlib.h> +#ifdef DEBUG +#include <assert.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef DEBUG +#define ASSERT assert +#else +#define ASSERT(x) +#endif + +#define MACHINE_DEFAULT_MAX_COREID 255 /**< max core id */ + +#define MACHINE_RETVAL_OK 0 /**< everything OK */ +#define MACHINE_RETVAL_ERROR 1 /**< generic error */ +#define MACHINE_RETVAL_PARAM 2 /**< parameter error */ + +/** + * @brief Initializes machine module + * + * @param [in] max_core_id maximum logical core id to be handled by machine + * module. If zero then default value assumed + * \a MACHINE_DEFAULT_MAX_COREID + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int machine_init(const unsigned max_core_id); + +/** + * @brief Shuts down machine module + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int machine_fini(void); + +/** + * @brief Executes RDMSR on \a lcore logical core + * + * @param [in] lcore logical core id + * @param [in] reg MSR to read from + * @param [out] value place to store MSR value at + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int +msr_read(const unsigned lcore, + const uint32_t reg, + uint64_t *value); + +/** + * @brief Executes WRMSR on \a lcore logical core + * + * @param [in] lcore logical core id + * @param [in] reg MSR to write to + * @param [in] value to be written into \a reg + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int +msr_write(const unsigned lcore, + const uint32_t reg, + const uint64_t value); + +#ifdef __cplusplus +} +#endif + +#endif /* __MSR_H__ */ diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak new file mode 100644 index 000000000..c30e46571 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak @@ -0,0 +1,81 @@ +# +# Copyright (c) 2017-2018, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +APP = ipsec_perf +INSTNAME = intel-ipsec-mb + +!if !defined(PREFIX) +PREFIX = C:\Program Files +!endif + +!if exist("$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib") +IPSECLIB = "$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib" +INCDIR = -I"$(PREFIX)\$(INSTNAME)" +!else +IPSECLIB = ..\libIPSec_MB.lib +INCDIR = -I..\ -I..\include +!endif + +!ifdef WINRING0_DIR +EXTRA_CFLAGS = $(EXTRA_CFLAGS) /DWIN_MSR +INCDIR = $(INCDIR) -I$(WINRING0_DIR) +!endif + +!ifdef DEBUG +DCFLAGS = /Od /DDEBUG /Z7 +DLFLAGS = /debug +!else +DCFLAGS = /O2 /Oi +DLFLAGS = +!endif + +!if "$(GCM_BIG_DATA)" == "y" +GCM_CFLAGS = /DGCM_BIG_DATA +!else +GCM_CFLAGS = +!endif + +CC = cl +# _CRT_SECURE_NO_WARNINGS disables warning C4996 about unsecure strtok() being used +CFLAGS = /nologo /D_CRT_SECURE_NO_WARNINGS $(DCFLAGS) /Y- /W3 /WX- /Gm- /fp:precise /EHsc $(EXTRA_CFLAGS) $(GCM_CFLAGS) $(INCDIR) + +LNK = link +LFLAGS = /out:$(APP).exe $(DLFLAGS) + +all: $(APP).exe + +$(APP).exe: ipsec_perf.obj msr.obj $(IPSECLIB) + $(LNK) $(LFLAGS) ipsec_perf.obj msr.obj $(IPSECLIB) + +ipsec_perf.obj: ipsec_perf.c + $(CC) /c $(CFLAGS) ipsec_perf.c + +msr.obj: msr.c + $(CC) /c $(CFLAGS) msr.c + +clean: + del /q ipsec_perf.obj msr.obj $(APP).exe $(APP).pdb $(APP).ilk |