diff options
Diffstat (limited to '')
-rwxr-xr-x | src/spdk/intel-ipsec-mb/LibPerfApp/Makefile | 83 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/README | 82 | ||||
-rwxr-xr-x | src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py | 308 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c | 1459 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/msr.c | 209 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/msr.h | 114 | ||||
-rw-r--r-- | src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak | 69 |
7 files changed, 2324 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile new file mode 100755 index 00000000..7039558f --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile @@ -0,0 +1,83 @@ +# Copyright (c) 2017-2018, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +EXE=ipsec_perf +INSTPATH ?= /usr/include/intel-ipsec-mb.h + +CFLAGS = -DLINUX -D_GNU_SOURCE $(INCLUDES) \ + -W -Wall -Wextra -Wmissing-declarations -Wpointer-arith \ + -Wcast-qual -Wundef -Wwrite-strings \ + -Wformat -Wformat-security \ + -Wunreachable-code -Wmissing-noreturn -Wsign-compare -Wno-endif-labels \ + -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition \ + -pthread + +LDFLAGS = -fPIE -z noexecstack -z relro -z now -pthread +LDLIBS = -lIPSec_MB + +ifeq ("$(shell test -e $(INSTPATH) && echo -n yes)","yes") +# library installed +CFLAGS += +else +# library not installed +CFLAGS += -I../include -I../ +LDFLAGS += -L../ +endif + +ifeq ($(DEBUG),y) +CFLAGS += -g -DDEBUG -O0 +LDFLAGS += -g +else +CFLAGS += -O3 -fPIE -fstack-protector -D_FORTIFY_SOURCE=2 +endif + +SOURCES := ipsec_perf.c msr.c +OBJECTS := $(SOURCES:%.c=%.o) + +CHECKPATCH ?= checkpatch.pl +CPPCHECK ?= cppcheck + +.PHONY: all clean style cppcheck + +all: $(EXE) + +$(EXE): $(OBJECTS) + $(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@ + +ipsec_perf.o: $(SOURCES) + +.PHONY: clean +clean: + -rm -f $(OBJECTS) + -rm -f $(EXE) + +SOURCES_STYLE := $(foreach infile,$(SOURCES),-f $(infile)) +CHECKPATCH?=checkpatch.pl +.PHONY: style +style: + $(CHECKPATCH) --no-tree --no-signoff --emacs --no-color \ +--ignore CODE_INDENT,INITIALISED_STATIC,LEADING_SPACE,SPLIT_STRING,\ +UNSPECIFIED_INT,ARRAY_SIZE,BLOCK_COMMENT_STYLE,GLOBAL_INITIALISERS,\ +COMPLEX_MACRO,SPACING,STORAGE_CLASS $(SOURCES_STYLE) diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/README b/src/spdk/intel-ipsec-mb/LibPerfApp/README new file mode 100644 index 00000000..dad423d9 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/README @@ -0,0 +1,82 @@ +======================================================================== +README for Intel(R) Multi-Buffer Crypto for IPsec Library API +performance measurement tool + +February 2017 +======================================================================== + + +Contents +======== + +- Overview +- Files +- Compilation +- Usage +- Legal Disclaimer + + +Overview +======== +This test tool performs multiple execution of functions included in +Intel Multi-Buffer Crypto for IPsec Library. + +Files +===== + +ipsec_perf.c - Tool which produces text formatted output representing + average times of ipsec_mb functions execution. +ipsec_diff_tool.py - Another tool which interprets text data given. + +Compilation +=========== + +Required tools: +- GNU make +- gcc (GCC) 4.8.3 (or newer) + +Simply run "make" to compile the tool. +To clean the build please run "make clean". + +You can point to another directory contaning IPSec MB library by setting +LIB_LOC. for ex: + LIB_LOC=../ipsec_mb_lib make + +In order to perform static code analysis or style check you can do: + make cppcheck +or + make style + +Be aware that you will have cppcheck tool installed and checkpatch.pl +script copied into one of the directories listed in $PATH. +You can also set CPPCHECK and/or CHECKPATCH variables if you want give paths +to this tools being placed in different directories. for ex: + CPPCHECK=~/tools/cppcheck make cppcheck + CHECKPATCH=~/scripts/checkpatch.pl make style + +Usage +===== + +You can simply check list of arguments by typing: + ./ipsec_perf -h + +Usage example: + ./ipsec_perf -c --no-avx512 --no-gcm -o 24 + +Later you can pass output to ipsec_diff_tool.py for data +analysis: + ./ipsec_diff_tool.py out1.txt out2.txt 5 + +Run ipsec_diff_tool.py -h too see help page. + +Legal Disclaimer +================ + +THIS SOFTWARE IS PROVIDED BY INTEL"AS IS". NO LICENSE, EXPRESS OR +IMPLIED, BY ESTOPPEL OR OTHERWISE, TO ANY INTELLECTUAL PROPERTY RIGHTS +ARE GRANTED THROUGH USE. EXCEPT AS PROVIDED IN INTEL'S TERMS AND +CONDITIONS OF SALE, INTEL ASSUMES NO LIABILITY WHATSOEVER AND INTEL +DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY, RELATING TO SALE AND/OR +USE OF INTEL PRODUCTS INCLUDING LIABILITY OR WARRANTIES RELATING TO +FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABILITY, OR INFRINGEMENT +OF ANY PATENT, COPYRIGHT OR OTHER INTELLECTUAL PROPERTY RIGHT. diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py new file mode 100755 index 00000000..1e8219f5 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python + +""" +********************************************************************** + Copyright(c) 2017-2018, Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +********************************************************************** +""" + +import sys + +# Number of parameters (ARCH, CIPHER_MODE, DIR, HASH_ALG, KEY_SIZE) +PAR_NUM = 5 + +class Variant(object): + """ + Class representing one test including chosen parameters and + results of average execution times + """ + def __init__(self, **args): + self.params = (args['arch'], args['cipher'], args['dir'], args['alg'], + args['keysize']) + + self.avg_times = [] + self.slope = None + self.intercept = None + + def set_times(self, avg_times): + """ + Fills test execution time list + """ + self.avg_times = avg_times + + def lin_reg(self, sizes): + """ + Computes linear regression of set of coordinates (x,y) + """ + + n = len(sizes) + + if n != len(self.avg_times): + print "Error!" + return None + + sumx = sum(sizes) + sumy = sum(self.avg_times) + sumxy = sum([x * y for x, y in zip(sizes, self.avg_times)]) + sumsqrx = sum([pow(x, 2) for x in sizes]) + self.slope = (n * sumxy - sumx * sumy) / float(n * sumsqrx - pow(sumx, 2)) + self.intercept = (sumy - self.slope * sumx) / float(n) + + def get_params_str(self): + """ + Returns all parameters concatenated into one string + """ + return "\t".join(i for i in self.params) + + def get_lin_func_str(self): + """ + Returns string having linear coefficients + """ + slope = "{:.5f}".format(self.slope) + intercept = "{:.5f}".format(self.intercept) + return "{}\t{}".format(slope, intercept) + +class VarList(list): + """ + Class used to store all test variants as a list of objects + """ + + def find_obj(self, params): + """ + Finds first occurence of object containing given parameters + """ + ret_val = None + matches = (obj for obj in self if obj.params == params) + try: + ret_val = next(matches) + except StopIteration: + pass + return ret_val + + def compare(self, list_b, tolerance): + """ + Finds variants from two data sets which are matching and compares + its linear regression coefficients. + Compares list_b against itself. + """ + + if tolerance is None: + tolerance = 5.0 + if tolerance < 0.0: + print "Bad argument: Tolerance must not be less than 0%" + exit(1) + print "TOLERANCE: {:.2f}%".format(tolerance) + + warning = False + print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE A\tINTERCEPT A\tSLOPE B\tINTERCEPT B" + for i, obj_a in enumerate(self): + obj_b = list_b.find_obj(obj_a.params) + if obj_b != None: + if obj_a.slope < 0.0: + obj_a.slope = 0 + if obj_b.slope < 0.0: + obj_b.slope = 0 + slope_bv = 0.01 * tolerance * obj_a.slope # border value + intercept_bv = 0.01 * tolerance * obj_a.intercept + diff_slope = obj_b.slope - obj_a.slope + diff_intercept = obj_b.intercept - obj_a.intercept + if (obj_a.slope > 0.001 and obj_b.slope > 0.001 and + diff_slope > slope_bv) or diff_intercept > intercept_bv: + warning = True + print "{}\t{}\t{}\t{}".format(i + 1, + obj_b.get_params_str(), + obj_a.get_lin_func_str(), + obj_b.get_lin_func_str()) + if not warning: + print "No differences found." + return warning + + def printout(self): + """ + Prints out readable representation of the list + """ + + print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE \tINTERCEPT" + for i, obj in enumerate(self): + print "{}\t{}\t{}".format(i + 1, + obj.get_params_str(), + obj.get_lin_func_str()) + + + +class Parser(object): + """ + Class used to parse a text file contaning performance data + """ + + def __init__(self, fname, verbose): + self.fname = fname + self.verbose = verbose + + @staticmethod + def convert2int(in_tuple): + """ + Converts a tuple of strings into a list of integers + """ + + result = list(in_tuple) # Converting to list + result = [int(i) for i in result] # Converting str to int + return result + + def load(self): + """ + Reads a text file by columns, stores data in objects + for further comparision of performance + """ + + v_list = VarList() + # Reading by columns, results in list of tuples + # Each tuple is representing a column from a text file + try: + f = open(self.fname, 'r') + except IOError: + print "Error reading {} file.".format(self.fname) + exit(1) + else: + with f: + cols = zip(*(line.strip().split('\t') for line in f)) + + # Reading first column with payload sizes, ommiting first 5 rows + sizes = self.convert2int(cols[0][PAR_NUM:]) + if self.verbose: + print "Available buffer sizes:\n" + print sizes + print "========================================================" + print "\n\nVariants:\n" + + # Reading remaining columns contaning performance data + for row in cols[1:]: + # First rows are run options + arch, c_mode, c_dir, h_alg, key_size = row[:PAR_NUM] + if self.verbose: + print arch, c_mode, c_dir, h_alg, key_size + + # Getting average times + avg_times = self.convert2int(row[PAR_NUM:]) + if self.verbose: + print avg_times + print "------" + + # Putting new object to the result list + v_list.append(Variant(arch=arch, cipher=c_mode, dir=c_dir, + alg=h_alg, keysize=key_size)) + v_list[-1].set_times(avg_times) + # Finding linear function representation of data set + v_list[-1].lin_reg(sizes) + if self.verbose: + print "({}, {})".format(v_list[-1].slope, v_list[-1].intercept) + print "============\n" + return v_list, sizes + +class DiffTool(object): + """ + Main class + """ + + def __init__(self): + self.fname_a = None + self.fname_b = None + self.tolerance = None + self.verbose = False + self.analyze = False + + @staticmethod + def usage(): + """ + Prints usage + """ + print "This tool compares file_b against file_a printing out differences." + print "Usage:" + print "\tipsec_diff_tool.py [-v] [-a] file_a file_b [tol]\n" + print "\t-v - verbose" + print "\t-a - takes only one argument: name of the file to analyze" + print "\tfile_a, file_b - text files containing output from ipsec_perf tool" + print "\ttol - tolerance [%], must be >= 0, default 5\n" + print "Examples:" + print "\tipsec_diff_tool.py file01.txt file02.txt 10" + print "\tipsec_diff_tool.py -a file02.txt" + print "\tipsec_diff_tool.py -v -a file01.txt" + + + def parse_args(self): + """ + Get commandline arguments + """ + if len(sys.argv) < 3 or sys.argv[1] == "-h": + self.usage() + exit(1) + if sys.argv[1] == "-a": + self.analyze = True + self.fname_a = sys.argv[2] + elif sys.argv[2] == "-a": + if sys.argv[1] == "-v": + self.verbose = True + self.analyze = True + self.fname_a = sys.argv[3] + elif sys.argv[1] == "-v": + self.verbose = True + self.fname_a = sys.argv[2] + self.fname_b = sys.argv[3] + if len(sys.argv) >= 5: + self.tolerance = float(sys.argv[4]) + + else: + self.fname_a = sys.argv[1] + self.fname_b = sys.argv[2] + if len(sys.argv) >= 4: + self.tolerance = float(sys.argv[3]) + + def run(self): + """ + Main method + """ + self.parse_args() + + parser_a = Parser(self.fname_a, self.verbose) + list_a, sizes_a = parser_a.load() + + if not self.analyze: + parser_b = Parser(self.fname_b, self.verbose) + list_b, sizes_b = parser_b.load() + if sizes_a != sizes_b: + print "Error. Buffer size lists in two compared " \ + "data sets differ! Aborting.\n" + exit(1) + warning = list_a.compare(list_b, self.tolerance) # Compares list_b against list_a + if warning: + exit(2) + else: + list_a.printout() # Takes only one file and prints it out + +if __name__ == '__main__': + DiffTool().run() diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c new file mode 100644 index 00000000..841b689a --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c @@ -0,0 +1,1459 @@ +/********************************************************************** + Copyright(c) 2017-2018, Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#ifdef _WIN32 +#include <windows.h> +#include <process.h> +#include <intrin.h> +#define __forceinline static __forceinline +#else +#include <x86intrin.h> +#define __forceinline static inline __attribute__((always_inline)) +#include <unistd.h> +#include <pthread.h> +#include <sched.h> +#endif + +#include <intel-ipsec-mb.h> + +#include "msr.h" + +#define BUFSIZE (512 * 1024 * 1024) +#define JOB_SIZE (2 * 1024) +#define JOB_SIZE_STEP 16 +#define REGION_SIZE (JOB_SIZE + 3003) +#define NUM_OFFSETS (BUFSIZE / REGION_SIZE) +#define NUM_RUNS 16 +#define KEYS_PER_JOB 15 +#define ITER_SCALE 200000 +#define BITS(x) (sizeof(x) * 8) +#define DIM(x) (sizeof(x)/sizeof(x[0])) + +#define NUM_ARCHS 4 /* SSE, AVX, AVX2, AVX512 */ +#define NUM_TYPES 6 /* AES_HMAC, AES_DOCSIS, AES_GCM, AES_CCM, DES, 3DES */ +#define MAX_NUM_THREADS 16 /* Maximum number of threads that can be created */ + +#define CIPHER_MODES_AES 4 /* CBC, CNTR, CNTR+8, NULL_CIPHER */ +#define CIPHER_MODES_DOCSIS 4 /* AES DOCSIS, AES DOCSIS+8, DES DOCSIS, + DES DOCSIS+8 */ +#define CIPHER_MODES_DES 1 /* DES */ +#define CIPHER_MODES_GCM 1 /* GCM */ +#define CIPHER_MODES_CCM 1 /* CCM */ +#define CIPHER_MODES_3DES 1 /* 3DES */ +#define DIRECTIONS 2 /* ENC, DEC */ +#define HASH_ALGS_AES 9 /* SHA1, SHA256, SHA224, SHA384, SHA512, XCBC, + MD5, NULL_HASH, CMAC */ +#define HASH_ALGS_DOCSIS 1 /* NULL_HASH */ +#define HASH_ALGS_GCM 1 /* GCM */ +#define HASH_ALGS_CCM 1 /* CCM */ +#define HASH_ALGS_DES 1 /* NULL_HASH for DES */ +#define HASH_ALGS_3DES 1 /* NULL_HASH for 3DES */ +#define KEY_SIZES_AES 3 /* 16, 24, 32 */ +#define KEY_SIZES_DOCSIS 1 /* 16 or 8 */ +#define KEY_SIZES_GCM 3 /* 16, 24, 32 */ +#define KEY_SIZES_CCM 1 /* 16 */ +#define KEY_SIZES_DES 1 /* 8 */ +#define KEY_SIZES_3DES 1 /* 8 x 3 */ + +#define IA32_MSR_FIXED_CTR_CTRL 0x38D +#define IA32_MSR_PERF_GLOBAL_CTR 0x38F +#define IA32_MSR_CPU_UNHALTED_THREAD 0x30A + +/* Those defines tell how many different test cases are to be performed. + * Have to be multiplied by number of chosen architectures. + */ +#define VARIANTS_PER_ARCH_AES (CIPHER_MODES_AES * DIRECTIONS * \ + HASH_ALGS_AES * KEY_SIZES_AES) +#define VARIANTS_PER_ARCH_DOCSIS (CIPHER_MODES_DOCSIS * DIRECTIONS * \ + HASH_ALGS_DOCSIS * KEY_SIZES_DOCSIS) +#define VARIANTS_PER_ARCH_GCM (CIPHER_MODES_GCM * DIRECTIONS * \ + HASH_ALGS_GCM * KEY_SIZES_GCM) +#define VARIANTS_PER_ARCH_CCM (CIPHER_MODES_CCM * DIRECTIONS * \ + HASH_ALGS_CCM * KEY_SIZES_CCM) +#define VARIANTS_PER_ARCH_DES (CIPHER_MODES_DES * DIRECTIONS * \ + HASH_ALGS_DES * KEY_SIZES_DES) +#define VARIANTS_PER_ARCH_3DES (CIPHER_MODES_3DES * DIRECTIONS * \ + HASH_ALGS_3DES * KEY_SIZES_3DES) + +/* Typedefs used for GCM callbacks */ +typedef void (*aesni_gcm_t)(const struct gcm_key_data *, + struct gcm_context_data *, + uint8_t *, const uint8_t *, uint64_t, + const uint8_t *, const uint8_t *, uint64_t, + uint8_t *, uint64_t); +typedef void (*aesni_gcm_pre_t)(const void *, struct gcm_key_data *); + +/* AES_HMAC, DOCSIS callbacks */ +struct funcs_s { + init_mb_mgr_t init_mb_mgr; + get_next_job_t get_next_job; + submit_job_t submit_job; + get_completed_job_t get_completed_job; + flush_job_t flush_job; +}; + +/* GCM callbacks */ +struct funcs_gcm_s { + aesni_gcm_pre_t aesni_gcm_pre; + aesni_gcm_t aesni_gcm_enc; + aesni_gcm_t aesni_gcm_dec; +}; + +enum arch_type_e { + ARCH_SSE = 0, + ARCH_AVX, + ARCH_AVX2, + ARCH_AVX512 +}; + +enum test_type_e { + TTYPE_AES_HMAC, + TTYPE_AES_DOCSIS, + TTYPE_AES_GCM, + TTYPE_AES_CCM, + TTYPE_AES_DES, + TTYPE_AES_3DES +}; + +/* This enum will be mostly translated to JOB_CIPHER_MODE */ +enum test_cipher_mode_e { + TEST_CBC = 1, + TEST_CNTR, + TEST_CNTR8, /* CNTR with increased buffer by 8 */ + TEST_NULL_CIPHER, + TEST_AESDOCSIS, + TEST_AESDOCSIS8, /* AES DOCSIS with increased buffer size by 8 */ + TEST_DESDOCSIS, + TEST_DESDOCSIS4, /* DES DOCSIS with increased buffer size by 4 */ + TEST_GCM, /* Additional field used by GCM, not translated */ + TEST_CCM, + TEST_DES, + TEST_3DES, +}; + +/* This enum will be mostly translated to JOB_HASH_ALG */ +enum test_hash_alg_e { + TEST_SHA1 = 1, + TEST_SHA_224, + TEST_SHA_256, + TEST_SHA_384, + TEST_SHA_512, + TEST_XCBC, + TEST_MD5, + TEST_HASH_CMAC, /* added here to be included in AES tests */ + TEST_NULL_HASH, + TEST_HASH_GCM, /* Additional field used by GCM, not translated */ + TEST_CUSTOM_HASH, /* unused */ + TEST_HASH_CCM +}; + +/* Struct storing cipher parameters */ +struct params_s { + JOB_CIPHER_DIRECTION cipher_dir; + enum test_type_e test_type; /* AES, DOCSIS, GCM */ + enum test_cipher_mode_e cipher_mode; + enum test_hash_alg_e hash_alg; + uint32_t aes_key_size; + uint32_t size_aes; + uint32_t num_sizes; + uint32_t num_variants; + uint32_t core; +}; + +/* This struct stores all information about performed test case */ +struct variant_s { + uint32_t arch; + struct params_s params; + uint64_t *avg_times; +}; + +/* Struct storing information to be passed to threads */ +struct thread_info { + int print_info; + int core; +} t_info[MAX_NUM_THREADS]; + +enum cache_type_e { + WARM = 0, + COLD = 1 +}; + +#ifdef DEBUG +#define FUNCS(A) { \ + init_mb_mgr_##A, \ + get_next_job_##A, \ + submit_job_##A, \ + get_completed_job_##A, \ + flush_job_##A \ + } +#else +#define FUNCS(A) { \ + init_mb_mgr_##A, \ + get_next_job_##A, \ + submit_job_nocheck_##A, \ + get_completed_job_##A, \ + flush_job_##A \ + } +#endif + +#define FUNCS_GCM(A) \ + {aes_gcm_pre_128_##A, aes_gcm_enc_128_##A, aes_gcm_dec_128_##A}, \ + {aes_gcm_pre_192_##A, aes_gcm_enc_192_##A, aes_gcm_dec_192_##A}, \ + {aes_gcm_pre_256_##A, aes_gcm_enc_256_##A, aes_gcm_dec_256_##A} + + +/* Function pointers used by TTYPE_AES_HMAC, TTYPE_AES_DOCSIS */ +struct funcs_s func_sets[NUM_ARCHS] = { + FUNCS(sse), + FUNCS(avx), + FUNCS(avx2), + FUNCS(avx512) +}; + +/* Function pointers used by TTYPE_AES_GCM */ +struct funcs_gcm_s func_sets_gcm[NUM_ARCHS - 1][3] = { + {FUNCS_GCM(sse)}, + {FUNCS_GCM(avx_gen2)}, /* AVX */ + {FUNCS_GCM(avx_gen4)} /* AVX2 */ +}; + +enum cache_type_e cache_type = WARM; +/* As enum: SHA1, SHA224, SHA256, SHA384, SHA512, + XCBC, MD5, NULL, GMAC, CUSTOM, CCM, CMAC */ +const uint32_t auth_tag_length_bytes[12] = { + 12, 14, 16, 24, 32, 12, 12, 0, 8, 0, 16, 16 +}; +uint8_t *buf = NULL; +uint32_t index_limit; +uint128_t *keys = NULL; +uint64_t *offset_ptr = NULL; +uint32_t key_idxs[NUM_OFFSETS]; +uint32_t offsets[NUM_OFFSETS]; +int sha_size_incr = 24; + +uint8_t archs[NUM_ARCHS] = {1, 1, 1, 1}; /* uses all function sets */ +/* AES, DOCSIS, GCM, CCM, DES, 3DES */ +uint8_t test_types[NUM_TYPES] = {1, 1, 1, 1, 1, 1}; + +int use_gcm_job_api = 0; +int use_unhalted_cycles = 0; /* read unhalted cycles instead of tsc */ +uint64_t rd_cycles_cost = 0; /* cost of reading unhalted cycles */ +uint64_t core_mask = 0; /* bitmap of selected cores */ + +uint64_t flags = 0; /* flags passed to alloc_mb_mgr() */ + +/* Those inline functions run different types of ipsec_mb library functions. + * They run different functions depending on the chosen architecture + */ +__forceinline void init_mb_mgr(MB_MGR *mgr, uint32_t arch) +{ + func_sets[arch].init_mb_mgr(mgr); +} + +__forceinline JOB_AES_HMAC *get_next_job(MB_MGR *mgr, const uint32_t arch) +{ + return func_sets[arch].get_next_job(mgr); +} + +__forceinline JOB_AES_HMAC *submit_job(MB_MGR *mgr, const uint32_t arch) +{ + return func_sets[arch].submit_job(mgr); +} + +__forceinline JOB_AES_HMAC *get_completed_job(MB_MGR *mgr, const uint32_t arch) +{ + return func_sets[arch].get_completed_job(mgr); +} + +__forceinline JOB_AES_HMAC *flush_job(MB_MGR *mgr, const uint32_t arch) +{ + return func_sets[arch].flush_job(mgr); +} + +/* GCM functions take also key size argument (128, 192, 256bit) */ +__forceinline void aesni_gcm_pre(const uint32_t arch, const uint8_t key_sz, + uint8_t *key, struct gcm_key_data *gdata) +{ + func_sets_gcm[arch][key_sz].aesni_gcm_pre(key, gdata); +} + +__forceinline void aesni_gcm_enc(const uint32_t arch, const uint8_t key_sz, + const struct gcm_key_data *gdata, + struct gcm_context_data *ctx, + uint8_t *out, uint8_t const *in, + uint64_t len, uint8_t *iv, + uint8_t const *aad, uint64_t aad_len, + uint8_t *auth_tag, uint64_t auth_tag_len) +{ + func_sets_gcm[arch][key_sz].aesni_gcm_enc(gdata, ctx, out, in, len, iv, + aad, aad_len, + auth_tag, auth_tag_len); + +} + +__forceinline void aesni_gcm_dec(const uint32_t arch, const uint8_t key_sz, + const struct gcm_key_data *gdata, + struct gcm_context_data *ctx, + uint8_t *out, uint8_t const *in, + uint64_t len, uint8_t *iv, + uint8_t const *aad, uint64_t aad_len, + uint8_t *auth_tag, uint64_t auth_tag_len) +{ + func_sets_gcm[arch][key_sz].aesni_gcm_dec(gdata, ctx, out, in, len, iv, + aad, aad_len, + auth_tag, auth_tag_len); + +} + +/* Read unhalted cycles */ +__forceinline uint64_t read_cycles(uint32_t core) +{ + uint64_t val = 0; + + if (msr_read(core, IA32_MSR_CPU_UNHALTED_THREAD, + &val) != MACHINE_RETVAL_OK) { + fprintf(stderr, "Error reading cycles " + "counter on core %u!\n", core); + exit(EXIT_FAILURE); + } + + return val; +} + +/* Compare function used by qsort */ +static int compare(const void *a, const void *b) +{ + uint64_t x = *(const uint64_t *)a - *(const uint64_t *)b; + + if (x == 0) + return 0; + + if (x > *(const uint64_t *)a) + return -1; + + return 1; +} + +/* Get number of bits set in value */ +static int bitcount(const uint64_t val) +{ + unsigned i; + int bits = 0; + + for (i = 0; i < BITS(val); i++) + if (val & (1ULL << i)) + bits++; + + return bits; +} + +/* Get the next core in core mask + Set last_core to negative to start from beginnig of core_mask */ +static int next_core(const uint64_t core_mask, + const int last_core) +{ + int core = 0; + + if (last_core >= 0) + core = last_core; + + while (((core_mask >> core) & 1) == 0) { + core++; + + if (core >= (int)BITS(core_mask)) + return -1; + } + + return core; +} + +/* Set CPU affinity for current thread */ +static int set_affinity(const int cpu) +{ + int ret = 0; +#ifndef _WIN32 + cpu_set_t cpuset; + int num_cpus = 0; + + /* Get number of cpus in the system */ + num_cpus = sysconf(_SC_NPROCESSORS_CONF); + if (num_cpus == 0) { + fprintf(stderr, "Zero processors in the system!"); + return 1; + } + + /* Check if selected core is valid */ + if (cpu < 0 || cpu >= num_cpus) { + fprintf(stderr, "Invalid CPU selected! " + "Max valid CPU is %u\n", num_cpus - 1); + return 1; + } + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + /* Set affinity of current process to cpu */ + ret = sched_setaffinity(0, sizeof(cpuset), &cpuset); +#endif /* _WIN32 */ + return ret; +} + +/* Start counting unhalted cycles */ +static int start_cycles_ctr(uint32_t core) +{ + int ret; + + if (core >= BITS(core_mask)) + return 1; + + /* Disable cycles counter */ + ret = msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, 0); + if (ret != MACHINE_RETVAL_OK) + return ret; + + /* Zero cycles counter */ + ret = msr_write(core, IA32_MSR_CPU_UNHALTED_THREAD, 0); + if (ret != MACHINE_RETVAL_OK) + return ret; + + /* Enable OS and user tracking in FixedCtr1 */ + ret = msr_write(core, IA32_MSR_FIXED_CTR_CTRL, 0x30); + if (ret != MACHINE_RETVAL_OK) + return ret; + + /* Enable cycles counter */ + return msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, (1ULL << 33)); +} + +/* Init MSR module */ +static int init_msr_mod(void) +{ + unsigned max_core_count = 0; +#ifndef _WIN32 + max_core_count = sysconf(_SC_NPROCESSORS_CONF); + if (max_core_count == 0) { + fprintf(stderr, "Zero processors in the system!"); + return MACHINE_RETVAL_ERROR; + } +#endif + return machine_init(max_core_count); +} + +/* Set the cost of reading unhalted cycles using RDMSR */ +static int set_unhalted_cycle_cost(const int core, uint64_t *value) +{ + uint64_t time1, time2; + + if (value == NULL || core < 0) + return 1; + + time1 = read_cycles(core); + time2 = read_cycles(core); + + /* Calculate delta */ + *value = (time2 - time1); + + return 0; +} + +/* Calculate the general cost of reading unhalted cycles (median) */ +static int set_avg_unhalted_cycle_cost(const int core, uint64_t *value) +{ + unsigned i; + uint64_t cycles[10]; + + if (value == NULL || core_mask == 0 || core < 0) + return 1; + + /* Fill cycles table with read cost values */ + for (i = 0; i < DIM(cycles); i++) + if (set_unhalted_cycle_cost(core, &cycles[i]) != 0) + return 1; + + /* sort array */ + qsort(cycles, DIM(cycles), sizeof(uint64_t), compare); + + /* set median cost */ + *value = cycles[DIM(cycles)/2]; + + return 0; +} + +/* Freeing allocated memory */ +static void free_mem(void) +{ + if (offset_ptr != NULL) + free(offset_ptr); + if (buf != NULL) + free(buf); +} + +/* Input buffer initialization */ +static void init_buf(enum cache_type_e ctype) +{ + uint32_t tmp_off; + uint64_t offset; + int i; + + buf = (uint8_t *) malloc(BUFSIZE + REGION_SIZE); + if (!buf) { + fprintf(stderr, "Could not malloc buf\n"); + exit(EXIT_FAILURE); + } + + offset_ptr = (uint64_t *) + malloc(NUM_OFFSETS * KEYS_PER_JOB * sizeof(uint128_t) + 0x0F); + if (!offset_ptr) { + fprintf(stderr, "Could not malloc keys\n"); + free_mem(); + exit(EXIT_FAILURE); + } + + offset = (uint64_t) offset_ptr; + keys = (uint128_t *) ((offset + 0x0F) & ~0x0F); /* align to 16 bytes */ + + if (ctype == COLD) { + for (i = 0; i < NUM_OFFSETS; i++) { + offsets[i] = i * REGION_SIZE + (rand() & 0x3F0); + key_idxs[i] = i * KEYS_PER_JOB; + } + for (i = NUM_OFFSETS - 1; i >= 0; i--) { + offset = rand(); + offset *= i; + offset /= RAND_MAX; + tmp_off = offsets[offset]; + offsets[offset] = offsets[i]; + offsets[i] = tmp_off; + tmp_off = key_idxs[offset]; + key_idxs[offset] = key_idxs[i]; + key_idxs[i] = tmp_off; + } + index_limit = NUM_OFFSETS; + } else {/* WARM */ + for (i = 0; i < NUM_OFFSETS; i += 2) { + offsets[i] = (2 * i + 0) * REGION_SIZE + + (rand() & 0x3F0); + offsets[i + 1] = (2 * i + 1) * REGION_SIZE + + (rand() & 0x3F0); + key_idxs[i] = (2 * i + 0) * KEYS_PER_JOB; + } + index_limit = 8; + } +} + +/* This function translates enum test_ciper_mode_e to be used by ipsec_mb + * library + */ +static JOB_CIPHER_MODE translate_cipher_mode(enum test_cipher_mode_e test_mode) +{ + JOB_CIPHER_MODE c_mode = NULL_CIPHER; + + switch (test_mode) { + case TEST_CBC: + c_mode = CBC; + break; + case TEST_CNTR: + case TEST_CNTR8: + c_mode = CNTR; + break; + case TEST_NULL_CIPHER: + c_mode = NULL_CIPHER; + break; + case TEST_AESDOCSIS: + case TEST_AESDOCSIS8: + c_mode = DOCSIS_SEC_BPI; + break; + case TEST_DESDOCSIS: + case TEST_DESDOCSIS4: + c_mode = DOCSIS_DES; + break; + case TEST_GCM: + c_mode = GCM; + break; + case TEST_CCM: + c_mode = CCM; + break; + case TEST_DES: + c_mode = DES; + break; + case TEST_3DES: + c_mode = DES3; + break; + default: + break; + } + return c_mode; +} + +/* Performs test using AES_HMAC or DOCSIS */ +static uint64_t +do_test(const uint32_t arch, MB_MGR *mb_mgr, struct params_s *params, + const uint32_t num_iter) +{ + JOB_AES_HMAC *job; + JOB_AES_HMAC job_template; + uint32_t i; + static uint32_t index = 0; + static DECLARE_ALIGNED(uint128_t iv, 16); + static uint32_t ipad[5], opad[5], digest[3]; + static DECLARE_ALIGNED(uint32_t k1_expanded[11 * 4], 16); + static DECLARE_ALIGNED(uint8_t k2[16], 16); + static DECLARE_ALIGNED(uint8_t k3[16], 16); + static DECLARE_ALIGNED(struct gcm_key_data gdata_key, 16); + uint32_t size_aes; + uint64_t time = 0; + uint32_t aux; + + if ((params->cipher_mode == TEST_AESDOCSIS8) || + (params->cipher_mode == TEST_CNTR8)) + size_aes = params->size_aes + 8; + else if (params->cipher_mode == TEST_DESDOCSIS4) + size_aes = params->size_aes + 4; + else + size_aes = params->size_aes; + + job_template.msg_len_to_cipher_in_bytes = size_aes; + job_template.msg_len_to_hash_in_bytes = size_aes + sha_size_incr; + job_template.hash_start_src_offset_in_bytes = 0; + job_template.cipher_start_src_offset_in_bytes = sha_size_incr; + job_template.iv = (uint8_t *) &iv; + job_template.iv_len_in_bytes = 16; + + job_template.auth_tag_output = (uint8_t *) digest; + + switch (params->hash_alg) { + case TEST_XCBC: + job_template.u.XCBC._k1_expanded = k1_expanded; + job_template.u.XCBC._k2 = k2; + job_template.u.XCBC._k3 = k3; + job_template.hash_alg = AES_XCBC; + break; + case TEST_HASH_CCM: + job_template.hash_alg = AES_CCM; + break; + case TEST_HASH_GCM: + job_template.hash_alg = AES_GMAC; + break; + case TEST_NULL_HASH: + job_template.hash_alg = NULL_HASH; + break; + case TEST_HASH_CMAC: + job_template.u.CMAC._key_expanded = k1_expanded; + job_template.u.CMAC._skey1 = k2; + job_template.u.CMAC._skey2 = k3; + job_template.hash_alg = AES_CMAC; + break; + default: + /* HMAC hash alg is SHA1 or MD5 */ + job_template.u.HMAC._hashed_auth_key_xor_ipad = + (uint8_t *) ipad; + job_template.u.HMAC._hashed_auth_key_xor_opad = + (uint8_t *) opad; + job_template.hash_alg = (JOB_HASH_ALG) params->hash_alg; + break; + } + job_template.auth_tag_output_len_in_bytes = + (uint64_t) auth_tag_length_bytes[job_template.hash_alg - 1]; + + job_template.cipher_direction = params->cipher_dir; + + if (params->cipher_mode == TEST_NULL_CIPHER) { + job_template.chain_order = HASH_CIPHER; + } else { + if (job_template.cipher_direction == ENCRYPT) + job_template.chain_order = CIPHER_HASH; + else + job_template.chain_order = HASH_CIPHER; + } + + /* Translating enum to the API's one */ + job_template.cipher_mode = translate_cipher_mode(params->cipher_mode); + job_template.aes_key_len_in_bytes = params->aes_key_size; + if (job_template.cipher_mode == GCM) { + uint8_t key[32]; + + aesni_gcm_pre(arch, (params->aes_key_size / 8) - 2, + key, &gdata_key); + job_template.aes_enc_key_expanded = &gdata_key; + job_template.aes_dec_key_expanded = &gdata_key; + job_template.u.GCM.aad_len_in_bytes = 12; + job_template.iv_len_in_bytes = 12; + } else if (job_template.cipher_mode == CCM) { + job_template.msg_len_to_cipher_in_bytes = size_aes; + job_template.msg_len_to_hash_in_bytes = size_aes; + job_template.hash_start_src_offset_in_bytes = 0; + job_template.cipher_start_src_offset_in_bytes = 0; + job_template.u.CCM.aad_len_in_bytes = 8; + job_template.iv_len_in_bytes = 13; + } else if (job_template.cipher_mode == DES || + job_template.cipher_mode == DES3 || + job_template.cipher_mode == DOCSIS_DES) { + job_template.aes_key_len_in_bytes = 8; + job_template.iv_len_in_bytes = 8; + } + +#ifndef _WIN32 + if (use_unhalted_cycles) + time = read_cycles(params->core); + else +#endif + time = __rdtscp(&aux); + + for (i = 0; i < num_iter; i++) { + job = get_next_job(mb_mgr, arch); + *job = job_template; + + job->src = buf + offsets[index]; + job->dst = buf + offsets[index] + sha_size_incr; + if (job->cipher_mode == GCM) { + job->u.GCM.aad = job->src; + } else if (job->cipher_mode == CCM) { + job->u.CCM.aad = job->src; + job->aes_enc_key_expanded = job->aes_dec_key_expanded = + (uint32_t *) &keys[key_idxs[index]]; + } else if (job->cipher_mode == DES3) { + static const void *ks_ptr[3]; + + ks_ptr[0] = ks_ptr[1] = ks_ptr[2] = + &keys[key_idxs[index]]; + job->aes_enc_key_expanded = + job->aes_dec_key_expanded = ks_ptr; + } else { + job->aes_enc_key_expanded = job->aes_dec_key_expanded = + (uint32_t *) &keys[key_idxs[index]]; + } + + index += 2; + if (index >= index_limit) + index = 0; + + job = submit_job(mb_mgr, arch); + while (job) { +#ifdef DEBUG + if (job->status != STS_COMPLETED) + fprintf(stderr, "failed job, status:%d\n", + job->status); +#endif + job = get_completed_job(mb_mgr, arch); + } + } + + while ((job = flush_job(mb_mgr, arch))) { +#ifdef DEBUG + if (job->status != STS_COMPLETED) + fprintf(stderr, "failed job, status:%d\n", job->status); +#endif + } + +#ifndef _WIN32 + if (use_unhalted_cycles) + time = (read_cycles(params->core) - rd_cycles_cost) - time; + else +#endif + time = __rdtscp(&aux) - time; + + return time / num_iter; +} + +/* Performs test using GCM */ +static uint64_t +do_test_gcm(const uint32_t arch, struct params_s *params, + const uint32_t num_iter) +{ + struct gcm_key_data gdata_key; + struct gcm_context_data gdata_ctx; + uint8_t *key; + static uint32_t index = 0; + uint8_t key_sz = params->aes_key_size / 8 - 2; + uint32_t size_aes = params->size_aes; + uint32_t i; + uint8_t aad[12]; + uint8_t auth_tag[12]; + DECLARE_ALIGNED(uint8_t iv[16], 16); + uint64_t time = 0; + uint32_t aux; + + key = (uint8_t *) malloc(sizeof(uint8_t) * params->aes_key_size); + if (!key) { + fprintf(stderr, "Could not malloc key\n"); + free_mem(); + exit(EXIT_FAILURE); + } + + aesni_gcm_pre(arch, key_sz, key, &gdata_key); + if (params->cipher_dir == ENCRYPT) { +#ifndef _WIN32 + if (use_unhalted_cycles) + time = read_cycles(params->core); + else +#endif + time = __rdtscp(&aux); + + for (i = 0; i < num_iter; i++) { + aesni_gcm_enc(arch, key_sz, &gdata_key, &gdata_ctx, + buf + offsets[index] + sha_size_incr, + buf + offsets[index] + sha_size_incr, + size_aes, iv, aad, sizeof(aad), + auth_tag, sizeof(auth_tag)); + index += 2; + if (index >= index_limit) + index = 0; + } +#ifndef _WIN32 + if (use_unhalted_cycles) + time = (read_cycles(params->core) - + rd_cycles_cost) - time; + else +#endif + time = __rdtscp(&aux) - time; + } else { /*DECRYPT*/ +#ifndef _WIN32 + if (use_unhalted_cycles) + time = read_cycles(params->core); + else +#endif + time = __rdtscp(&aux); + + for (i = 0; i < num_iter; i++) { + aesni_gcm_dec(arch, key_sz, &gdata_key, &gdata_ctx, + buf + offsets[index] + sha_size_incr, + buf + offsets[index] + sha_size_incr, + size_aes, iv, aad, sizeof(aad), + auth_tag, sizeof(auth_tag)); + index += 2; + if (index >= index_limit) + index = 0; + } +#ifndef _WIN32 + if (use_unhalted_cycles) + time = (read_cycles(params->core) - + rd_cycles_cost) - time; + else +#endif + time = __rdtscp(&aux) - time; + } + + free(key); + return time / num_iter; +} + + +/* Method used by qsort to compare 2 values */ +static int compare_uint64_t(const void *a, const void *b) +{ + return (int)(int64_t)(*(const uint64_t *)a - *(const uint64_t *)b); +} + +/* Computes mean of set of times after dropping bottom and top quarters */ +static uint64_t mean_median(uint64_t *array, uint32_t size) +{ + uint32_t quarter = size / 4; + uint32_t i; + uint64_t sum; + + /* these are single threaded runs, so we skip + * the hardware thread related skew clipping + * thus skipping "ignore first and last eighth" + */ + + /* ignore lowest and highest quarter */ + qsort(array, size, sizeof(uint64_t), compare_uint64_t); + + /* dropping the bottom and top quarters + * after sorting to remove noise/variations + */ + array += quarter; + size -= quarter * 2; + + + if ((size == 0) || (size & 0x80000000)) { + fprintf(stderr, "not enough data points\n"); + free_mem(); + exit(EXIT_FAILURE); + } + sum = 0; + for (i = 0; i < size; i++) + sum += array[i]; + + sum = (sum + size / 2) / size; + return sum; +} + +/* Runs test for each buffer size and stores averaged execution time */ +static void +process_variant(MB_MGR *mgr, const uint32_t arch, struct params_s *params, + struct variant_s *variant_ptr, const uint32_t run) +{ + const uint32_t sizes = params->num_sizes; + uint64_t *times = &variant_ptr->avg_times[run]; + uint32_t sz; + + for (sz = 0; sz < sizes; sz++) { + const uint32_t size_aes = (sz + 1) * JOB_SIZE_STEP; + const uint32_t num_iter = ITER_SCALE / size_aes; + + params->size_aes = size_aes; + if (params->test_type == TTYPE_AES_GCM && (!use_gcm_job_api)) + *times = do_test_gcm(arch, params, 2 * num_iter); + else + *times = do_test(arch, mgr, params, num_iter); + times += NUM_RUNS; + } + + variant_ptr->params = *params; + variant_ptr->arch = arch; +} + +/* Sets cipher mode, hash algorithm */ +static void +do_variants(MB_MGR *mgr, const uint32_t arch, struct params_s *params, + const uint32_t run, struct variant_s **variant_ptr, + uint32_t *variant) +{ + uint32_t hash_alg; + uint32_t h_start = TEST_SHA1; + uint32_t h_end = TEST_NULL_HASH; + uint32_t c_mode; + uint32_t c_start = TEST_CBC; + uint32_t c_end = TEST_NULL_CIPHER; + + switch (params->test_type) { + case TTYPE_AES_DOCSIS: + h_start = TEST_NULL_HASH; + c_start = TEST_AESDOCSIS; + c_end = TEST_DESDOCSIS4; + break; + case TTYPE_AES_GCM: + h_start = TEST_HASH_GCM; + h_end = TEST_HASH_GCM; + c_start = TEST_GCM; + c_end = TEST_GCM; + break; + case TTYPE_AES_CCM: + h_start = TEST_HASH_CCM; + h_end = TEST_HASH_CCM; + c_start = TEST_CCM; + c_end = TEST_CCM; + break; + case TTYPE_AES_DES: + h_start = TEST_NULL_HASH; + h_end = TEST_NULL_HASH; + c_start = TEST_DES; + c_end = TEST_DES; + break; + case TTYPE_AES_3DES: + h_start = TEST_NULL_HASH; + h_end = TEST_NULL_HASH; + c_start = TEST_3DES; + c_end = TEST_3DES; + break; + default: + break; + } + + for (c_mode = c_start; c_mode <= c_end; c_mode++) { + params->cipher_mode = (enum test_cipher_mode_e) c_mode; + for (hash_alg = h_start; hash_alg <= h_end; hash_alg++) { + params->hash_alg = (enum test_hash_alg_e) hash_alg; + process_variant(mgr, arch, params, *variant_ptr, run); + (*variant)++; + (*variant_ptr)++; + } + } +} + +/* Sets cipher direction and key size */ +static void +run_dir_test(MB_MGR *mgr, const uint32_t arch, struct params_s *params, + const uint32_t run, struct variant_s **variant_ptr, + uint32_t *variant) +{ + uint32_t dir; + uint32_t k; /* Key size */ + uint32_t limit = AES_256_BYTES; /* Key size value limit */ + + if (params->test_type == TTYPE_AES_DOCSIS || + params->test_type == TTYPE_AES_DES || + params->test_type == TTYPE_AES_3DES || + params->test_type == TTYPE_AES_CCM) + limit = AES_128_BYTES; + + init_mb_mgr(mgr, arch); + + for (dir = ENCRYPT; dir <= DECRYPT; dir++) { + params->cipher_dir = (JOB_CIPHER_DIRECTION) dir; + for (k = AES_128_BYTES; k <= limit; k += 8) { + params->aes_key_size = k; + do_variants(mgr, arch, params, run, variant_ptr, + variant); + } + } +} + +/* Generates output containing averaged times for each test variant */ +static void print_times(struct variant_s *variant_list, struct params_s *params, + const uint32_t total_variants) +{ + const uint32_t sizes = params->num_sizes; + uint32_t col; + uint32_t sz; + + /* Temporary variables */ + struct params_s par; + uint8_t c_mode; + uint8_t c_dir; + uint8_t h_alg; + const char *func_names[4] = { + "SSE", "AVX", "AVX2", "AVX512" + }; + const char *c_mode_names[12] = { + "CBC", "CNTR", "CNTR+8", "NULL_CIPHER", "DOCAES", "DOCAES+8", + "DOCDES", "DOCDES+4", "GCM", "CCM", "DES", "3DES" + }; + const char *c_dir_names[2] = { + "ENCRYPT", "DECRYPT" + }; + const char *h_alg_names[12] = { + "SHA1", "SHA_224", "SHA_256", "SHA_384", "SHA_512", "XCBC", + "MD5", "CMAC", "NULL_HASH", "GCM", "CUSTOM", "CCM" + }; + printf("ARCH"); + for (col = 0; col < total_variants; col++) + printf("\t%s", func_names[variant_list[col].arch]); + printf("\n"); + printf("CIPHER"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + c_mode = par.cipher_mode - CBC; + printf("\t%s", c_mode_names[c_mode]); + } + printf("\n"); + printf("DIR"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + c_dir = par.cipher_dir - ENCRYPT; + printf("\t%s", c_dir_names[c_dir]); + } + printf("\n"); + printf("HASH_ALG"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + h_alg = par.hash_alg - SHA1; + printf("\t%s", h_alg_names[h_alg]); + } + printf("\n"); + printf("KEY_SIZE"); + for (col = 0; col < total_variants; col++) { + par = variant_list[col].params; + printf("\tAES-%u", par.aes_key_size * 8); + } + printf("\n"); + for (sz = 0; sz < sizes; sz++) { + printf("%d", (sz + 1) * JOB_SIZE_STEP); + for (col = 0; col < total_variants; col++) { + uint64_t *time_ptr = + &variant_list[col].avg_times[sz * NUM_RUNS]; + const unsigned long long val = + mean_median(time_ptr, NUM_RUNS); + + printf("\t%llu", val); + } + printf("\n"); + } +} + +/* Prepares data structure for test variants storage, sets test configuration */ +#ifdef _WIN32 +static void +#else +static void * +#endif +run_tests(void *arg) +{ + uint32_t i; + struct thread_info *info = (struct thread_info *)arg; + MB_MGR *p_mgr = NULL; + struct params_s params; + uint32_t num_variants[NUM_TYPES] = {0, 0, 0}; + uint32_t type, at_size, run, arch; + uint32_t variants_per_arch, max_arch; + uint32_t variant; + uint32_t total_variants = 0; + struct variant_s *variant_ptr = NULL; + struct variant_s *variant_list = NULL; + + p_mgr = alloc_mb_mgr(flags); + if (p_mgr == NULL) { + fprintf(stderr, "Failed to allocate MB_MGR structure!\n"); + free_mem(); + exit(EXIT_FAILURE); + } + + params.num_sizes = JOB_SIZE / JOB_SIZE_STEP; + params.core = (uint32_t)info->core; + + /* if cores selected then set affinity */ + if (core_mask) + if (set_affinity(info->core) != 0) { + fprintf(stderr, "Failed to set cpu " + "affinity on core %d\n", info->core); + goto exit_failure; + } + + /* If unhalted cycles selected and this is + the primary thread then start counter */ + if (use_unhalted_cycles && info->print_info) { + int ret; + + ret = start_cycles_ctr(params.core); + if (ret != 0) { + fprintf(stderr, "Failed to start cycles " + "counter on core %u\n", params.core); + goto exit_failure; + } + /* Get average cost of reading counter */ + ret = set_avg_unhalted_cycle_cost(params.core, &rd_cycles_cost); + if (ret != 0 || rd_cycles_cost == 0) { + fprintf(stderr, "Error calculating unhalted " + "cycles read overhead!\n"); + goto exit_failure; + } else + fprintf(stderr, "Started counting unhalted cycles on " + "core %d\nUnhalted cycles read cost = %lu " + "cycles\n", params.core, + (unsigned long)rd_cycles_cost); + } + + for (type = TTYPE_AES_HMAC; type < NUM_TYPES; type++) { + if (test_types[type] == 0) + continue; + + switch (type) { + default: + case TTYPE_AES_HMAC: + variants_per_arch = VARIANTS_PER_ARCH_AES; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_DOCSIS: + variants_per_arch = VARIANTS_PER_ARCH_DOCSIS; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_GCM: + variants_per_arch = VARIANTS_PER_ARCH_GCM; + max_arch = NUM_ARCHS - 1; /* No AVX512 for GCM */ + break; + case TTYPE_AES_CCM: + variants_per_arch = VARIANTS_PER_ARCH_CCM; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_DES: + variants_per_arch = VARIANTS_PER_ARCH_DES; + max_arch = NUM_ARCHS; + break; + case TTYPE_AES_3DES: + variants_per_arch = VARIANTS_PER_ARCH_3DES; + max_arch = NUM_ARCHS; + break; + } + + /* Calculating number of all variants */ + for (arch = 0; arch < max_arch; arch++) { + if (archs[arch] == 0) + continue; + num_variants[type] += variants_per_arch; + } + total_variants += num_variants[type]; + } + + variant_list = (struct variant_s *) + malloc(total_variants * sizeof(struct variant_s)); + if (variant_list == NULL) { + fprintf(stderr, "Cannot allocate memory\n"); + goto exit_failure; + } + + at_size = NUM_RUNS * params.num_sizes * sizeof(uint64_t); + for (variant = 0, variant_ptr = variant_list; + variant < total_variants; + variant++, variant_ptr++) { + variant_ptr->avg_times = (uint64_t *) malloc(at_size); + if (!variant_ptr->avg_times) { + fprintf(stderr, "Cannot allocate memory\n"); + goto exit_failure; + } + } + for (run = 0; run < NUM_RUNS; run++) { + fprintf(stderr, "Starting run %d of %d\n", run+1, NUM_RUNS); + + variant = 0; + variant_ptr = variant_list; + + for (type = TTYPE_AES_HMAC; type < NUM_TYPES; type++) { + if (test_types[type] == 0) + continue; + + if (type == TTYPE_AES_GCM) + /* No AVX512 for GCM */ + max_arch = NUM_ARCHS - 1; + else + max_arch = NUM_ARCHS; + + params.num_variants = num_variants[type]; + params.test_type = type; + /* Performing tests for each selected architecture */ + for (arch = 0; arch < max_arch; arch++) { + if (archs[arch] == 0) + continue; + run_dir_test(p_mgr, arch, ¶ms, run, + &variant_ptr, &variant); + } + } /* end for type */ + } /* end for run */ + if (info->print_info == 1) + print_times(variant_list, ¶ms, total_variants); + + if (variant_list != NULL) { + /* Freeing variants list */ + for (i = 0; i < total_variants; i++) + free(variant_list[i].avg_times); + free(variant_list); + } + free_mb_mgr(p_mgr); +#ifndef _WIN32 + return NULL; + +#else + return; +#endif +exit_failure: + if (variant_list != NULL) + free(variant_list); + free_mem(); + free_mb_mgr(p_mgr); + exit(EXIT_FAILURE); +} + +static void usage(void) +{ + fprintf(stderr, "Usage: ipsec_perf [args], " + "where args are zero or more\n" + "-h: print this message\n" + "-c: Use cold cache, it uses warm as default\n" + "-w: Use warm cache\n" + "--no-avx512: Don't do AVX512\n" + "--no-avx2: Don't do AVX2\n" + "--no-avx: Don't do AVX\n" + "--no-sse: Don't do SSE\n" + "-o val: Use <val> for the SHA size increment, default is 24\n" + "--shani-on: use SHA extensions, default: auto-detect\n" + "--shani-off: don't use SHA extensions\n" + "--no-gcm: do not run GCM perf tests\n" + "--no-aes: do not run standard AES + HMAC perf tests\n" + "--no-docsis: do not run DOCSIS cipher perf tests\n" + "--no-ccm: do not run CCM cipher perf tests\n" + "--no-des: do not run DES cipher perf tests\n" + "--no-3des: do not run 3DES cipher perf tests\n" + "--gcm-job-api: use JOB API for GCM perf tests" + " (raw GCM API is default)\n" + "--threads num: <num> for the number of threads to run" + " Max: %d\n" + "--cores mask: <mask> CPU's to run threads\n" + "--unhalted-cycles: measure using unhalted cycles (requires root).\n" + " Note: RDTSC is used by default.\n", + MAX_NUM_THREADS + 1); +} + +int main(int argc, char *argv[]) +{ + int i, num_t = 0, core = 0; + struct thread_info *thread_info_p = t_info; + +#ifdef _WIN32 + HANDLE threads[MAX_NUM_THREADS]; +#else + pthread_t tids[MAX_NUM_THREADS]; +#endif + + for (i = 1; i < argc; i++) + if (strcmp(argv[i], "-h") == 0) { + usage(); + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "-c") == 0) { + cache_type = COLD; + fprintf(stderr, "Cold cache, "); + } else if (strcmp(argv[i], "-w") == 0) { + cache_type = WARM; + fprintf(stderr, "Warm cache, "); + } else if (strcmp(argv[i], "--no-avx512") == 0) { + archs[ARCH_AVX512] = 0; + } else if (strcmp(argv[i], "--no-avx2") == 0) { + archs[ARCH_AVX2] = 0; + } else if (strcmp(argv[i], "--no-avx") == 0) { + archs[ARCH_AVX] = 0; + } else if (strcmp(argv[i], "--no-sse") == 0) { + archs[ARCH_SSE] = 0; + } else if (strcmp(argv[i], "--shani-on") == 0) { + flags &= (~IMB_FLAG_SHANI_OFF); + } else if (strcmp(argv[i], "--shani-off") == 0) { + flags |= IMB_FLAG_SHANI_OFF; + } else if (strcmp(argv[i], "--no-gcm") == 0) { + test_types[TTYPE_AES_GCM] = 0; + } else if (strcmp(argv[i], "--no-aes") == 0) { + test_types[TTYPE_AES_HMAC] = 0; + } else if (strcmp(argv[i], "--no-docsis") == 0) { + test_types[TTYPE_AES_DOCSIS] = 0; + } else if (strcmp(argv[i], "--no-ccm") == 0) { + test_types[TTYPE_AES_CCM] = 0; + } else if (strcmp(argv[i], "--no-des") == 0) { + test_types[TTYPE_AES_DES] = 0; + } else if (strcmp(argv[i], "--no-3des") == 0) { + test_types[TTYPE_AES_3DES] = 0; + } else if (strcmp(argv[i], "--gcm-job-api") == 0) { + use_gcm_job_api = 1; + } else if ((strcmp(argv[i], "-o") == 0) && (i < argc - 1)) { + i++; + sha_size_incr = atoi(argv[i]); + } else if (strcmp(argv[i], "--threads") == 0) { + num_t = atoi(argv[++i]); + if (num_t > (MAX_NUM_THREADS + 1)) { + fprintf(stderr, "Invalid number of threads!\n"); + return EXIT_FAILURE; + } + } else if (strcmp(argv[i], "--cores") == 0) { + errno = 0; + core_mask = strtoull(argv[++i], NULL, 0); + if (errno != 0) { + fprintf(stderr, "Error converting cpu mask!\n"); + return EXIT_FAILURE; + } + } else if (strcmp(argv[i], "--unhalted-cycles") == 0) { +#ifdef _WIN32 + fprintf(stderr, "Counting unhalted cycles not " + "currently supported on Windows!\n"); + return EXIT_FAILURE; +#endif + use_unhalted_cycles = 1; + } else { + usage(); + return EXIT_FAILURE; + } + + /* Check num cores >= number of threads */ + if ((core_mask != 0 && num_t != 0) && (num_t > bitcount(core_mask))) { + fprintf(stderr, "Insufficient number of cores in " + "core mask (0x%lx) to run %d threads!\n", + (unsigned long) core_mask, num_t); + return EXIT_FAILURE; + } + + /* if cycles selected then init MSR module */ + if (use_unhalted_cycles) { + if (core_mask == 0) { + fprintf(stderr, "Must specify core mask " + "when reading unhalted cycles!\n"); + return EXIT_FAILURE; + } + + if (init_msr_mod() != 0) { + fprintf(stderr, "Error initializing MSR module!\n"); + return EXIT_FAILURE; + } + } + + fprintf(stderr, "SHA size incr = %d\n", sha_size_incr); + if (archs[ARCH_SSE]) { + MB_MGR *p_mgr = alloc_mb_mgr(flags); + + if (p_mgr == NULL) { + fprintf(stderr, "Error allocating MB_MGR structure!\n"); + return EXIT_FAILURE; + } + init_mb_mgr_sse(p_mgr); + fprintf(stderr, "%s SHA extensions (shani) for SSE arch\n", + (p_mgr->features & IMB_FEATURE_SHANI) ? + "Using" : "Not using"); + free_mb_mgr(p_mgr); + } + + memset(t_info, 0, sizeof(t_info)); + init_buf(cache_type); + if (num_t > 1) + for (i = 0; i < num_t - 1; i++, thread_info_p++) { + /* Set core if selected */ + if (core_mask) { + core = next_core(core_mask, core); + thread_info_p->core = core++; + } +#ifdef _WIN32 + threads[i] = (HANDLE) + _beginthread(&run_tests, 0, + (void *)thread_info_p); +#else + pthread_attr_t attr; + + pthread_attr_init(&attr); + pthread_create(&tids[i], &attr, run_tests, + (void *)thread_info_p); +#endif + } + + thread_info_p->print_info = 1; + if (core_mask) { + core = next_core(core_mask, core); + thread_info_p->core = core; + } + + run_tests((void *)thread_info_p); + if (num_t > 1) { +#ifdef _WIN32 + WaitForMultipleObjects(num_t, threads, FALSE, INFINITE); +#endif + for (i = 0; i < num_t - 1; i++) { + fprintf(stderr, "Waiting on thread %d to finish...\n", + i+2); +#ifdef _WIN32 + CloseHandle(threads[i]); +#else + pthread_join(tids[i], NULL); +#endif + } + } + + if (use_unhalted_cycles) + machine_fini(); + + free_mem(); + + return EXIT_SUCCESS; +} diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c new file mode 100644 index 00000000..34ee4973 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c @@ -0,0 +1,209 @@ +/********************************************************************** + Copyright(c) 2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @brief Provides access to MSR read & write operations + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef _WIN32 +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#endif + +#include "msr.h" + +static int *m_msr_fd = NULL; /**< MSR driver file descriptors table */ +static unsigned m_maxcores = 0; /**< max number of cores (size of the + table above too) */ + +int +machine_init(const unsigned max_core_id) +{ +#ifndef _WIN32 + unsigned i; + + if (max_core_id == 0) + return MACHINE_RETVAL_PARAM; + + m_maxcores = max_core_id + 1; + + /** + * Allocate table to hold MSR driver file descriptors + * Each file descriptor is for a different core. + * Core id is an index to the table. + */ + m_msr_fd = (int *)malloc(m_maxcores * sizeof(m_msr_fd[0])); + if (m_msr_fd == NULL) { + m_maxcores = 0; + return MACHINE_RETVAL_ERROR; + } + + for (i = 0; i < m_maxcores; i++) + m_msr_fd[i] = -1; +#endif /* _WIN32 */ + return MACHINE_RETVAL_OK; +} + +int +machine_fini(void) +{ +#ifndef _WIN32 + unsigned i; + + ASSERT(m_msr_fd != NULL); + if (m_msr_fd == NULL) + return MACHINE_RETVAL_ERROR; + + /** + * Close open file descriptors and free up table memory. + */ + for (i = 0; i < m_maxcores; i++) + if (m_msr_fd[i] != -1) { + close(m_msr_fd[i]); + m_msr_fd[i] = -1; + } + + free(m_msr_fd); + m_msr_fd = NULL; + m_maxcores = 0; +#endif /* _WIN32 */ + return MACHINE_RETVAL_OK; +} + +#ifndef _WIN32 +/** + * @brief Returns MSR driver file descriptor for given core id + * + * File descriptor could be previously open and comes from + * m_msr_fd table or is open (& cached) during the call. + * + * @param lcore logical core id + * + * @return MSR driver file descriptor corresponding \a lcore + */ +static int +msr_file_open(const unsigned lcore) +{ + ASSERT(lcore < m_maxcores); + ASSERT(m_msr_fd != NULL); + + int fd = m_msr_fd[lcore]; + + if (fd < 0) { + char fname[32]; + + memset(fname, 0, sizeof(fname)); + snprintf(fname, sizeof(fname)-1, + "/dev/cpu/%u/msr", lcore); + fd = open(fname, O_RDWR); + if (fd < 0) + fprintf(stderr, "Error opening file '%s'!\n", fname); + else + m_msr_fd[lcore] = fd; + } + + return fd; +} +#endif /* _WIN32 */ + +int +msr_read(const unsigned lcore, + const uint32_t reg, + uint64_t *value) +{ + int ret = MACHINE_RETVAL_OK; +#ifndef _WIN32 + int fd = -1; + ssize_t read_ret = 0; + + ASSERT(value != NULL); + if (value == NULL) + return MACHINE_RETVAL_PARAM; + + ASSERT(lcore < m_maxcores); + if (lcore >= m_maxcores) + return MACHINE_RETVAL_PARAM; + + ASSERT(m_msr_fd != NULL); + if (m_msr_fd == NULL) + return MACHINE_RETVAL_ERROR; + + fd = msr_file_open(lcore); + if (fd < 0) + return MACHINE_RETVAL_ERROR; + + read_ret = pread(fd, value, sizeof(value[0]), (off_t)reg); + + if (read_ret != sizeof(value[0])) { + fprintf(stderr, "RDMSR failed for reg[0x%x] on lcore %u\n", + (unsigned)reg, lcore); + ret = MACHINE_RETVAL_ERROR; + } +#endif /* _WIN32 */ + return ret; +} + +int +msr_write(const unsigned lcore, + const uint32_t reg, + const uint64_t value) +{ + int ret = MACHINE_RETVAL_OK; +#ifndef _WIN32 + int fd = -1; + ssize_t write_ret = 0; + + ASSERT(lcore < m_maxcores); + if (lcore >= m_maxcores) + return MACHINE_RETVAL_PARAM; + + ASSERT(m_msr_fd != NULL); + if (m_msr_fd == NULL) + return MACHINE_RETVAL_ERROR; + + fd = msr_file_open(lcore); + if (fd < 0) + return MACHINE_RETVAL_ERROR; + + write_ret = pwrite(fd, &value, sizeof(value), (off_t)reg); + + if (write_ret != sizeof(value)) { + fprintf(stderr, "WRMSR failed for reg[0x%x] " + "<- value[0x%llx] on lcore %u\n", + (unsigned)reg, (unsigned long long)value, lcore); + ret = MACHINE_RETVAL_ERROR; + } +#endif /* _WIN32 */ + return ret; +} diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h new file mode 100644 index 00000000..afa8795c --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h @@ -0,0 +1,114 @@ +/********************************************************************** + Copyright(c) 2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @brief Provides access to MSR read & write operations + */ + +#ifndef __MSR_H__ +#define __MSR_H__ + +#include <stdint.h> +#include <stdlib.h> +#ifdef DEBUG +#include <assert.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef DEBUG +#define ASSERT assert +#else +#define ASSERT(x) +#endif + +#define MACHINE_DEFAULT_MAX_COREID 255 /**< max core id */ + +#define MACHINE_RETVAL_OK 0 /**< everything OK */ +#define MACHINE_RETVAL_ERROR 1 /**< generic error */ +#define MACHINE_RETVAL_PARAM 2 /**< parameter error */ + +/** + * @brief Initializes machine module + * + * @param [in] max_core_id maximum logical core id to be handled by machine + * module. If zero then default value assumed + * \a MACHINE_DEFAULT_MAX_COREID + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int machine_init(const unsigned max_core_id); + +/** + * @brief Shuts down machine module + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int machine_fini(void); + +/** + * @brief Executes RDMSR on \a lcore logical core + * + * @param [in] lcore logical core id + * @param [in] reg MSR to read from + * @param [out] value place to store MSR value at + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int +msr_read(const unsigned lcore, + const uint32_t reg, + uint64_t *value); + +/** + * @brief Executes WRMSR on \a lcore logical core + * + * @param [in] lcore logical core id + * @param [in] reg MSR to write to + * @param [in] value to be written into \a reg + * + * @return Operation status + * @retval MACHINE_RETVAL_OK on success + */ +int +msr_write(const unsigned lcore, + const uint32_t reg, + const uint64_t value); + +#ifdef __cplusplus +} +#endif + +#endif /* __MSR_H__ */ diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak new file mode 100644 index 00000000..a1d8d902 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak @@ -0,0 +1,69 @@ +# +# Copyright (c) 2017-2018, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +APP = ipsec_perf +INSTNAME = intel-ipsec-mb + +!if !defined(PREFIX) +PREFIX = C:\Program Files +!endif + +!if exist("$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib") +IPSECLIB = "$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib" +INCDIR = -I"$(PREFIX)\$(INSTNAME)" +!else +IPSECLIB = ..\libIPSec_MB.lib +INCDIR = -I..\ -I..\include +!endif + +!ifdef DEBUG +DCFLAGS = /Od /DDEBUG /Z7 +DLFLAGS = /debug +!else +DCFLAGS = /O2 /Oi +DLFLAGS = +!endif + +CC = cl +CFLAGS = /nologo $(DCFLAGS) /Y- /W3 /WX- /Gm- /fp:precise /EHsc $(INCDIR) + +LNK = link +LFLAGS = /out:$(APP).exe $(DLFLAGS) + +all: $(APP).exe + +$(APP).exe: ipsec_perf.obj msr.obj $(IPSECLIB) + $(LNK) $(LFLAGS) ipsec_perf.obj msr.obj $(IPSECLIB) + +ipsec_perf.obj: ipsec_perf.c + $(CC) /c $(CFLAGS) ipsec_perf.c + +msr.obj: msr.c + $(CC) /c $(CFLAGS) msr.c + +clean: + del /q ipsec_perf.obj msr.obj $(APP).exe $(APP).pdb $(APP).ilk |