summaryrefslogtreecommitdiffstats
path: root/src/spdk/intel-ipsec-mb/LibPerfApp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/intel-ipsec-mb/LibPerfApp
parentInitial commit. (diff)
downloadceph-upstream.tar.xz
ceph-upstream.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/intel-ipsec-mb/LibPerfApp')
-rwxr-xr-xsrc/spdk/intel-ipsec-mb/LibPerfApp/Makefile83
-rw-r--r--src/spdk/intel-ipsec-mb/LibPerfApp/README82
-rwxr-xr-xsrc/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py308
-rw-r--r--src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c1459
-rw-r--r--src/spdk/intel-ipsec-mb/LibPerfApp/msr.c209
-rw-r--r--src/spdk/intel-ipsec-mb/LibPerfApp/msr.h114
-rw-r--r--src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak69
7 files changed, 2324 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile
new file mode 100755
index 00000000..7039558f
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile
@@ -0,0 +1,83 @@
+# Copyright (c) 2017-2018, Intel Corporation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Intel Corporation nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+EXE=ipsec_perf
+INSTPATH ?= /usr/include/intel-ipsec-mb.h
+
+CFLAGS = -DLINUX -D_GNU_SOURCE $(INCLUDES) \
+ -W -Wall -Wextra -Wmissing-declarations -Wpointer-arith \
+ -Wcast-qual -Wundef -Wwrite-strings \
+ -Wformat -Wformat-security \
+ -Wunreachable-code -Wmissing-noreturn -Wsign-compare -Wno-endif-labels \
+ -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition \
+ -pthread
+
+LDFLAGS = -fPIE -z noexecstack -z relro -z now -pthread
+LDLIBS = -lIPSec_MB
+
+ifeq ("$(shell test -e $(INSTPATH) && echo -n yes)","yes")
+# library installed
+CFLAGS +=
+else
+# library not installed
+CFLAGS += -I../include -I../
+LDFLAGS += -L../
+endif
+
+ifeq ($(DEBUG),y)
+CFLAGS += -g -DDEBUG -O0
+LDFLAGS += -g
+else
+CFLAGS += -O3 -fPIE -fstack-protector -D_FORTIFY_SOURCE=2
+endif
+
+SOURCES := ipsec_perf.c msr.c
+OBJECTS := $(SOURCES:%.c=%.o)
+
+CHECKPATCH ?= checkpatch.pl
+CPPCHECK ?= cppcheck
+
+.PHONY: all clean style cppcheck
+
+all: $(EXE)
+
+$(EXE): $(OBJECTS)
+ $(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@
+
+ipsec_perf.o: $(SOURCES)
+
+.PHONY: clean
+clean:
+ -rm -f $(OBJECTS)
+ -rm -f $(EXE)
+
+SOURCES_STYLE := $(foreach infile,$(SOURCES),-f $(infile))
+CHECKPATCH?=checkpatch.pl
+.PHONY: style
+style:
+ $(CHECKPATCH) --no-tree --no-signoff --emacs --no-color \
+--ignore CODE_INDENT,INITIALISED_STATIC,LEADING_SPACE,SPLIT_STRING,\
+UNSPECIFIED_INT,ARRAY_SIZE,BLOCK_COMMENT_STYLE,GLOBAL_INITIALISERS,\
+COMPLEX_MACRO,SPACING,STORAGE_CLASS $(SOURCES_STYLE)
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/README b/src/spdk/intel-ipsec-mb/LibPerfApp/README
new file mode 100644
index 00000000..dad423d9
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/README
@@ -0,0 +1,82 @@
+========================================================================
+README for Intel(R) Multi-Buffer Crypto for IPsec Library API
+performance measurement tool
+
+February 2017
+========================================================================
+
+
+Contents
+========
+
+- Overview
+- Files
+- Compilation
+- Usage
+- Legal Disclaimer
+
+
+Overview
+========
+This test tool performs multiple execution of functions included in
+Intel Multi-Buffer Crypto for IPsec Library.
+
+Files
+=====
+
+ipsec_perf.c - Tool which produces text formatted output representing
+ average times of ipsec_mb functions execution.
+ipsec_diff_tool.py - Another tool which interprets text data given.
+
+Compilation
+===========
+
+Required tools:
+- GNU make
+- gcc (GCC) 4.8.3 (or newer)
+
+Simply run "make" to compile the tool.
+To clean the build please run "make clean".
+
+You can point to another directory contaning IPSec MB library by setting
+LIB_LOC. for ex:
+ LIB_LOC=../ipsec_mb_lib make
+
+In order to perform static code analysis or style check you can do:
+ make cppcheck
+or
+ make style
+
+Be aware that you will have cppcheck tool installed and checkpatch.pl
+script copied into one of the directories listed in $PATH.
+You can also set CPPCHECK and/or CHECKPATCH variables if you want give paths
+to this tools being placed in different directories. for ex:
+ CPPCHECK=~/tools/cppcheck make cppcheck
+ CHECKPATCH=~/scripts/checkpatch.pl make style
+
+Usage
+=====
+
+You can simply check list of arguments by typing:
+ ./ipsec_perf -h
+
+Usage example:
+ ./ipsec_perf -c --no-avx512 --no-gcm -o 24
+
+Later you can pass output to ipsec_diff_tool.py for data
+analysis:
+ ./ipsec_diff_tool.py out1.txt out2.txt 5
+
+Run ipsec_diff_tool.py -h too see help page.
+
+Legal Disclaimer
+================
+
+THIS SOFTWARE IS PROVIDED BY INTEL"AS IS". NO LICENSE, EXPRESS OR
+IMPLIED, BY ESTOPPEL OR OTHERWISE, TO ANY INTELLECTUAL PROPERTY RIGHTS
+ARE GRANTED THROUGH USE. EXCEPT AS PROVIDED IN INTEL'S TERMS AND
+CONDITIONS OF SALE, INTEL ASSUMES NO LIABILITY WHATSOEVER AND INTEL
+DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY, RELATING TO SALE AND/OR
+USE OF INTEL PRODUCTS INCLUDING LIABILITY OR WARRANTIES RELATING TO
+FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABILITY, OR INFRINGEMENT
+OF ANY PATENT, COPYRIGHT OR OTHER INTELLECTUAL PROPERTY RIGHT.
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py
new file mode 100755
index 00000000..1e8219f5
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+
+"""
+**********************************************************************
+ Copyright(c) 2017-2018, Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************
+"""
+
+import sys
+
+# Number of parameters (ARCH, CIPHER_MODE, DIR, HASH_ALG, KEY_SIZE)
+PAR_NUM = 5
+
+class Variant(object):
+ """
+ Class representing one test including chosen parameters and
+ results of average execution times
+ """
+ def __init__(self, **args):
+ self.params = (args['arch'], args['cipher'], args['dir'], args['alg'],
+ args['keysize'])
+
+ self.avg_times = []
+ self.slope = None
+ self.intercept = None
+
+ def set_times(self, avg_times):
+ """
+ Fills test execution time list
+ """
+ self.avg_times = avg_times
+
+ def lin_reg(self, sizes):
+ """
+ Computes linear regression of set of coordinates (x,y)
+ """
+
+ n = len(sizes)
+
+ if n != len(self.avg_times):
+ print "Error!"
+ return None
+
+ sumx = sum(sizes)
+ sumy = sum(self.avg_times)
+ sumxy = sum([x * y for x, y in zip(sizes, self.avg_times)])
+ sumsqrx = sum([pow(x, 2) for x in sizes])
+ self.slope = (n * sumxy - sumx * sumy) / float(n * sumsqrx - pow(sumx, 2))
+ self.intercept = (sumy - self.slope * sumx) / float(n)
+
+ def get_params_str(self):
+ """
+ Returns all parameters concatenated into one string
+ """
+ return "\t".join(i for i in self.params)
+
+ def get_lin_func_str(self):
+ """
+ Returns string having linear coefficients
+ """
+ slope = "{:.5f}".format(self.slope)
+ intercept = "{:.5f}".format(self.intercept)
+ return "{}\t{}".format(slope, intercept)
+
+class VarList(list):
+ """
+ Class used to store all test variants as a list of objects
+ """
+
+ def find_obj(self, params):
+ """
+ Finds first occurence of object containing given parameters
+ """
+ ret_val = None
+ matches = (obj for obj in self if obj.params == params)
+ try:
+ ret_val = next(matches)
+ except StopIteration:
+ pass
+ return ret_val
+
+ def compare(self, list_b, tolerance):
+ """
+ Finds variants from two data sets which are matching and compares
+ its linear regression coefficients.
+ Compares list_b against itself.
+ """
+
+ if tolerance is None:
+ tolerance = 5.0
+ if tolerance < 0.0:
+ print "Bad argument: Tolerance must not be less than 0%"
+ exit(1)
+ print "TOLERANCE: {:.2f}%".format(tolerance)
+
+ warning = False
+ print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE A\tINTERCEPT A\tSLOPE B\tINTERCEPT B"
+ for i, obj_a in enumerate(self):
+ obj_b = list_b.find_obj(obj_a.params)
+ if obj_b != None:
+ if obj_a.slope < 0.0:
+ obj_a.slope = 0
+ if obj_b.slope < 0.0:
+ obj_b.slope = 0
+ slope_bv = 0.01 * tolerance * obj_a.slope # border value
+ intercept_bv = 0.01 * tolerance * obj_a.intercept
+ diff_slope = obj_b.slope - obj_a.slope
+ diff_intercept = obj_b.intercept - obj_a.intercept
+ if (obj_a.slope > 0.001 and obj_b.slope > 0.001 and
+ diff_slope > slope_bv) or diff_intercept > intercept_bv:
+ warning = True
+ print "{}\t{}\t{}\t{}".format(i + 1,
+ obj_b.get_params_str(),
+ obj_a.get_lin_func_str(),
+ obj_b.get_lin_func_str())
+ if not warning:
+ print "No differences found."
+ return warning
+
+ def printout(self):
+ """
+ Prints out readable representation of the list
+ """
+
+ print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE \tINTERCEPT"
+ for i, obj in enumerate(self):
+ print "{}\t{}\t{}".format(i + 1,
+ obj.get_params_str(),
+ obj.get_lin_func_str())
+
+
+
+class Parser(object):
+ """
+ Class used to parse a text file contaning performance data
+ """
+
+ def __init__(self, fname, verbose):
+ self.fname = fname
+ self.verbose = verbose
+
+ @staticmethod
+ def convert2int(in_tuple):
+ """
+ Converts a tuple of strings into a list of integers
+ """
+
+ result = list(in_tuple) # Converting to list
+ result = [int(i) for i in result] # Converting str to int
+ return result
+
+ def load(self):
+ """
+ Reads a text file by columns, stores data in objects
+ for further comparision of performance
+ """
+
+ v_list = VarList()
+ # Reading by columns, results in list of tuples
+ # Each tuple is representing a column from a text file
+ try:
+ f = open(self.fname, 'r')
+ except IOError:
+ print "Error reading {} file.".format(self.fname)
+ exit(1)
+ else:
+ with f:
+ cols = zip(*(line.strip().split('\t') for line in f))
+
+ # Reading first column with payload sizes, ommiting first 5 rows
+ sizes = self.convert2int(cols[0][PAR_NUM:])
+ if self.verbose:
+ print "Available buffer sizes:\n"
+ print sizes
+ print "========================================================"
+ print "\n\nVariants:\n"
+
+ # Reading remaining columns contaning performance data
+ for row in cols[1:]:
+ # First rows are run options
+ arch, c_mode, c_dir, h_alg, key_size = row[:PAR_NUM]
+ if self.verbose:
+ print arch, c_mode, c_dir, h_alg, key_size
+
+ # Getting average times
+ avg_times = self.convert2int(row[PAR_NUM:])
+ if self.verbose:
+ print avg_times
+ print "------"
+
+ # Putting new object to the result list
+ v_list.append(Variant(arch=arch, cipher=c_mode, dir=c_dir,
+ alg=h_alg, keysize=key_size))
+ v_list[-1].set_times(avg_times)
+ # Finding linear function representation of data set
+ v_list[-1].lin_reg(sizes)
+ if self.verbose:
+ print "({}, {})".format(v_list[-1].slope, v_list[-1].intercept)
+ print "============\n"
+ return v_list, sizes
+
+class DiffTool(object):
+ """
+ Main class
+ """
+
+ def __init__(self):
+ self.fname_a = None
+ self.fname_b = None
+ self.tolerance = None
+ self.verbose = False
+ self.analyze = False
+
+ @staticmethod
+ def usage():
+ """
+ Prints usage
+ """
+ print "This tool compares file_b against file_a printing out differences."
+ print "Usage:"
+ print "\tipsec_diff_tool.py [-v] [-a] file_a file_b [tol]\n"
+ print "\t-v - verbose"
+ print "\t-a - takes only one argument: name of the file to analyze"
+ print "\tfile_a, file_b - text files containing output from ipsec_perf tool"
+ print "\ttol - tolerance [%], must be >= 0, default 5\n"
+ print "Examples:"
+ print "\tipsec_diff_tool.py file01.txt file02.txt 10"
+ print "\tipsec_diff_tool.py -a file02.txt"
+ print "\tipsec_diff_tool.py -v -a file01.txt"
+
+
+ def parse_args(self):
+ """
+ Get commandline arguments
+ """
+ if len(sys.argv) < 3 or sys.argv[1] == "-h":
+ self.usage()
+ exit(1)
+ if sys.argv[1] == "-a":
+ self.analyze = True
+ self.fname_a = sys.argv[2]
+ elif sys.argv[2] == "-a":
+ if sys.argv[1] == "-v":
+ self.verbose = True
+ self.analyze = True
+ self.fname_a = sys.argv[3]
+ elif sys.argv[1] == "-v":
+ self.verbose = True
+ self.fname_a = sys.argv[2]
+ self.fname_b = sys.argv[3]
+ if len(sys.argv) >= 5:
+ self.tolerance = float(sys.argv[4])
+
+ else:
+ self.fname_a = sys.argv[1]
+ self.fname_b = sys.argv[2]
+ if len(sys.argv) >= 4:
+ self.tolerance = float(sys.argv[3])
+
+ def run(self):
+ """
+ Main method
+ """
+ self.parse_args()
+
+ parser_a = Parser(self.fname_a, self.verbose)
+ list_a, sizes_a = parser_a.load()
+
+ if not self.analyze:
+ parser_b = Parser(self.fname_b, self.verbose)
+ list_b, sizes_b = parser_b.load()
+ if sizes_a != sizes_b:
+ print "Error. Buffer size lists in two compared " \
+ "data sets differ! Aborting.\n"
+ exit(1)
+ warning = list_a.compare(list_b, self.tolerance) # Compares list_b against list_a
+ if warning:
+ exit(2)
+ else:
+ list_a.printout() # Takes only one file and prints it out
+
+if __name__ == '__main__':
+ DiffTool().run()
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c
new file mode 100644
index 00000000..841b689a
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c
@@ -0,0 +1,1459 @@
+/**********************************************************************
+ Copyright(c) 2017-2018, Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <process.h>
+#include <intrin.h>
+#define __forceinline static __forceinline
+#else
+#include <x86intrin.h>
+#define __forceinline static inline __attribute__((always_inline))
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#endif
+
+#include <intel-ipsec-mb.h>
+
+#include "msr.h"
+
+#define BUFSIZE (512 * 1024 * 1024)
+#define JOB_SIZE (2 * 1024)
+#define JOB_SIZE_STEP 16
+#define REGION_SIZE (JOB_SIZE + 3003)
+#define NUM_OFFSETS (BUFSIZE / REGION_SIZE)
+#define NUM_RUNS 16
+#define KEYS_PER_JOB 15
+#define ITER_SCALE 200000
+#define BITS(x) (sizeof(x) * 8)
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+#define NUM_ARCHS 4 /* SSE, AVX, AVX2, AVX512 */
+#define NUM_TYPES 6 /* AES_HMAC, AES_DOCSIS, AES_GCM, AES_CCM, DES, 3DES */
+#define MAX_NUM_THREADS 16 /* Maximum number of threads that can be created */
+
+#define CIPHER_MODES_AES 4 /* CBC, CNTR, CNTR+8, NULL_CIPHER */
+#define CIPHER_MODES_DOCSIS 4 /* AES DOCSIS, AES DOCSIS+8, DES DOCSIS,
+ DES DOCSIS+8 */
+#define CIPHER_MODES_DES 1 /* DES */
+#define CIPHER_MODES_GCM 1 /* GCM */
+#define CIPHER_MODES_CCM 1 /* CCM */
+#define CIPHER_MODES_3DES 1 /* 3DES */
+#define DIRECTIONS 2 /* ENC, DEC */
+#define HASH_ALGS_AES 9 /* SHA1, SHA256, SHA224, SHA384, SHA512, XCBC,
+ MD5, NULL_HASH, CMAC */
+#define HASH_ALGS_DOCSIS 1 /* NULL_HASH */
+#define HASH_ALGS_GCM 1 /* GCM */
+#define HASH_ALGS_CCM 1 /* CCM */
+#define HASH_ALGS_DES 1 /* NULL_HASH for DES */
+#define HASH_ALGS_3DES 1 /* NULL_HASH for 3DES */
+#define KEY_SIZES_AES 3 /* 16, 24, 32 */
+#define KEY_SIZES_DOCSIS 1 /* 16 or 8 */
+#define KEY_SIZES_GCM 3 /* 16, 24, 32 */
+#define KEY_SIZES_CCM 1 /* 16 */
+#define KEY_SIZES_DES 1 /* 8 */
+#define KEY_SIZES_3DES 1 /* 8 x 3 */
+
+#define IA32_MSR_FIXED_CTR_CTRL 0x38D
+#define IA32_MSR_PERF_GLOBAL_CTR 0x38F
+#define IA32_MSR_CPU_UNHALTED_THREAD 0x30A
+
+/* Those defines tell how many different test cases are to be performed.
+ * Have to be multiplied by number of chosen architectures.
+ */
+#define VARIANTS_PER_ARCH_AES (CIPHER_MODES_AES * DIRECTIONS * \
+ HASH_ALGS_AES * KEY_SIZES_AES)
+#define VARIANTS_PER_ARCH_DOCSIS (CIPHER_MODES_DOCSIS * DIRECTIONS * \
+ HASH_ALGS_DOCSIS * KEY_SIZES_DOCSIS)
+#define VARIANTS_PER_ARCH_GCM (CIPHER_MODES_GCM * DIRECTIONS * \
+ HASH_ALGS_GCM * KEY_SIZES_GCM)
+#define VARIANTS_PER_ARCH_CCM (CIPHER_MODES_CCM * DIRECTIONS * \
+ HASH_ALGS_CCM * KEY_SIZES_CCM)
+#define VARIANTS_PER_ARCH_DES (CIPHER_MODES_DES * DIRECTIONS * \
+ HASH_ALGS_DES * KEY_SIZES_DES)
+#define VARIANTS_PER_ARCH_3DES (CIPHER_MODES_3DES * DIRECTIONS * \
+ HASH_ALGS_3DES * KEY_SIZES_3DES)
+
+/* Typedefs used for GCM callbacks */
+typedef void (*aesni_gcm_t)(const struct gcm_key_data *,
+ struct gcm_context_data *,
+ uint8_t *, const uint8_t *, uint64_t,
+ const uint8_t *, const uint8_t *, uint64_t,
+ uint8_t *, uint64_t);
+typedef void (*aesni_gcm_pre_t)(const void *, struct gcm_key_data *);
+
+/* AES_HMAC, DOCSIS callbacks */
+struct funcs_s {
+ init_mb_mgr_t init_mb_mgr;
+ get_next_job_t get_next_job;
+ submit_job_t submit_job;
+ get_completed_job_t get_completed_job;
+ flush_job_t flush_job;
+};
+
+/* GCM callbacks */
+struct funcs_gcm_s {
+ aesni_gcm_pre_t aesni_gcm_pre;
+ aesni_gcm_t aesni_gcm_enc;
+ aesni_gcm_t aesni_gcm_dec;
+};
+
+enum arch_type_e {
+ ARCH_SSE = 0,
+ ARCH_AVX,
+ ARCH_AVX2,
+ ARCH_AVX512
+};
+
+enum test_type_e {
+ TTYPE_AES_HMAC,
+ TTYPE_AES_DOCSIS,
+ TTYPE_AES_GCM,
+ TTYPE_AES_CCM,
+ TTYPE_AES_DES,
+ TTYPE_AES_3DES
+};
+
+/* This enum will be mostly translated to JOB_CIPHER_MODE */
+enum test_cipher_mode_e {
+ TEST_CBC = 1,
+ TEST_CNTR,
+ TEST_CNTR8, /* CNTR with increased buffer by 8 */
+ TEST_NULL_CIPHER,
+ TEST_AESDOCSIS,
+ TEST_AESDOCSIS8, /* AES DOCSIS with increased buffer size by 8 */
+ TEST_DESDOCSIS,
+ TEST_DESDOCSIS4, /* DES DOCSIS with increased buffer size by 4 */
+ TEST_GCM, /* Additional field used by GCM, not translated */
+ TEST_CCM,
+ TEST_DES,
+ TEST_3DES,
+};
+
+/* This enum will be mostly translated to JOB_HASH_ALG */
+enum test_hash_alg_e {
+ TEST_SHA1 = 1,
+ TEST_SHA_224,
+ TEST_SHA_256,
+ TEST_SHA_384,
+ TEST_SHA_512,
+ TEST_XCBC,
+ TEST_MD5,
+ TEST_HASH_CMAC, /* added here to be included in AES tests */
+ TEST_NULL_HASH,
+ TEST_HASH_GCM, /* Additional field used by GCM, not translated */
+ TEST_CUSTOM_HASH, /* unused */
+ TEST_HASH_CCM
+};
+
+/* Struct storing cipher parameters */
+struct params_s {
+ JOB_CIPHER_DIRECTION cipher_dir;
+ enum test_type_e test_type; /* AES, DOCSIS, GCM */
+ enum test_cipher_mode_e cipher_mode;
+ enum test_hash_alg_e hash_alg;
+ uint32_t aes_key_size;
+ uint32_t size_aes;
+ uint32_t num_sizes;
+ uint32_t num_variants;
+ uint32_t core;
+};
+
+/* This struct stores all information about performed test case */
+struct variant_s {
+ uint32_t arch;
+ struct params_s params;
+ uint64_t *avg_times;
+};
+
+/* Struct storing information to be passed to threads */
+struct thread_info {
+ int print_info;
+ int core;
+} t_info[MAX_NUM_THREADS];
+
+enum cache_type_e {
+ WARM = 0,
+ COLD = 1
+};
+
+#ifdef DEBUG
+#define FUNCS(A) { \
+ init_mb_mgr_##A, \
+ get_next_job_##A, \
+ submit_job_##A, \
+ get_completed_job_##A, \
+ flush_job_##A \
+ }
+#else
+#define FUNCS(A) { \
+ init_mb_mgr_##A, \
+ get_next_job_##A, \
+ submit_job_nocheck_##A, \
+ get_completed_job_##A, \
+ flush_job_##A \
+ }
+#endif
+
+#define FUNCS_GCM(A) \
+ {aes_gcm_pre_128_##A, aes_gcm_enc_128_##A, aes_gcm_dec_128_##A}, \
+ {aes_gcm_pre_192_##A, aes_gcm_enc_192_##A, aes_gcm_dec_192_##A}, \
+ {aes_gcm_pre_256_##A, aes_gcm_enc_256_##A, aes_gcm_dec_256_##A}
+
+
+/* Function pointers used by TTYPE_AES_HMAC, TTYPE_AES_DOCSIS */
+struct funcs_s func_sets[NUM_ARCHS] = {
+ FUNCS(sse),
+ FUNCS(avx),
+ FUNCS(avx2),
+ FUNCS(avx512)
+};
+
+/* Function pointers used by TTYPE_AES_GCM */
+struct funcs_gcm_s func_sets_gcm[NUM_ARCHS - 1][3] = {
+ {FUNCS_GCM(sse)},
+ {FUNCS_GCM(avx_gen2)}, /* AVX */
+ {FUNCS_GCM(avx_gen4)} /* AVX2 */
+};
+
+enum cache_type_e cache_type = WARM;
+/* As enum: SHA1, SHA224, SHA256, SHA384, SHA512,
+ XCBC, MD5, NULL, GMAC, CUSTOM, CCM, CMAC */
+const uint32_t auth_tag_length_bytes[12] = {
+ 12, 14, 16, 24, 32, 12, 12, 0, 8, 0, 16, 16
+};
+uint8_t *buf = NULL;
+uint32_t index_limit;
+uint128_t *keys = NULL;
+uint64_t *offset_ptr = NULL;
+uint32_t key_idxs[NUM_OFFSETS];
+uint32_t offsets[NUM_OFFSETS];
+int sha_size_incr = 24;
+
+uint8_t archs[NUM_ARCHS] = {1, 1, 1, 1}; /* uses all function sets */
+/* AES, DOCSIS, GCM, CCM, DES, 3DES */
+uint8_t test_types[NUM_TYPES] = {1, 1, 1, 1, 1, 1};
+
+int use_gcm_job_api = 0;
+int use_unhalted_cycles = 0; /* read unhalted cycles instead of tsc */
+uint64_t rd_cycles_cost = 0; /* cost of reading unhalted cycles */
+uint64_t core_mask = 0; /* bitmap of selected cores */
+
+uint64_t flags = 0; /* flags passed to alloc_mb_mgr() */
+
+/* Those inline functions run different types of ipsec_mb library functions.
+ * They run different functions depending on the chosen architecture
+ */
+__forceinline void init_mb_mgr(MB_MGR *mgr, uint32_t arch)
+{
+ func_sets[arch].init_mb_mgr(mgr);
+}
+
+__forceinline JOB_AES_HMAC *get_next_job(MB_MGR *mgr, const uint32_t arch)
+{
+ return func_sets[arch].get_next_job(mgr);
+}
+
+__forceinline JOB_AES_HMAC *submit_job(MB_MGR *mgr, const uint32_t arch)
+{
+ return func_sets[arch].submit_job(mgr);
+}
+
+__forceinline JOB_AES_HMAC *get_completed_job(MB_MGR *mgr, const uint32_t arch)
+{
+ return func_sets[arch].get_completed_job(mgr);
+}
+
+__forceinline JOB_AES_HMAC *flush_job(MB_MGR *mgr, const uint32_t arch)
+{
+ return func_sets[arch].flush_job(mgr);
+}
+
+/* GCM functions take also key size argument (128, 192, 256bit) */
+__forceinline void aesni_gcm_pre(const uint32_t arch, const uint8_t key_sz,
+ uint8_t *key, struct gcm_key_data *gdata)
+{
+ func_sets_gcm[arch][key_sz].aesni_gcm_pre(key, gdata);
+}
+
+__forceinline void aesni_gcm_enc(const uint32_t arch, const uint8_t key_sz,
+ const struct gcm_key_data *gdata,
+ struct gcm_context_data *ctx,
+ uint8_t *out, uint8_t const *in,
+ uint64_t len, uint8_t *iv,
+ uint8_t const *aad, uint64_t aad_len,
+ uint8_t *auth_tag, uint64_t auth_tag_len)
+{
+ func_sets_gcm[arch][key_sz].aesni_gcm_enc(gdata, ctx, out, in, len, iv,
+ aad, aad_len,
+ auth_tag, auth_tag_len);
+
+}
+
+__forceinline void aesni_gcm_dec(const uint32_t arch, const uint8_t key_sz,
+ const struct gcm_key_data *gdata,
+ struct gcm_context_data *ctx,
+ uint8_t *out, uint8_t const *in,
+ uint64_t len, uint8_t *iv,
+ uint8_t const *aad, uint64_t aad_len,
+ uint8_t *auth_tag, uint64_t auth_tag_len)
+{
+ func_sets_gcm[arch][key_sz].aesni_gcm_dec(gdata, ctx, out, in, len, iv,
+ aad, aad_len,
+ auth_tag, auth_tag_len);
+
+}
+
+/* Read unhalted cycles */
+__forceinline uint64_t read_cycles(uint32_t core)
+{
+ uint64_t val = 0;
+
+ if (msr_read(core, IA32_MSR_CPU_UNHALTED_THREAD,
+ &val) != MACHINE_RETVAL_OK) {
+ fprintf(stderr, "Error reading cycles "
+ "counter on core %u!\n", core);
+ exit(EXIT_FAILURE);
+ }
+
+ return val;
+}
+
+/* Compare function used by qsort */
+static int compare(const void *a, const void *b)
+{
+ uint64_t x = *(const uint64_t *)a - *(const uint64_t *)b;
+
+ if (x == 0)
+ return 0;
+
+ if (x > *(const uint64_t *)a)
+ return -1;
+
+ return 1;
+}
+
+/* Get number of bits set in value */
+static int bitcount(const uint64_t val)
+{
+ unsigned i;
+ int bits = 0;
+
+ for (i = 0; i < BITS(val); i++)
+ if (val & (1ULL << i))
+ bits++;
+
+ return bits;
+}
+
+/* Get the next core in core mask
+ Set last_core to negative to start from beginnig of core_mask */
+static int next_core(const uint64_t core_mask,
+ const int last_core)
+{
+ int core = 0;
+
+ if (last_core >= 0)
+ core = last_core;
+
+ while (((core_mask >> core) & 1) == 0) {
+ core++;
+
+ if (core >= (int)BITS(core_mask))
+ return -1;
+ }
+
+ return core;
+}
+
+/* Set CPU affinity for current thread */
+static int set_affinity(const int cpu)
+{
+ int ret = 0;
+#ifndef _WIN32
+ cpu_set_t cpuset;
+ int num_cpus = 0;
+
+ /* Get number of cpus in the system */
+ num_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ if (num_cpus == 0) {
+ fprintf(stderr, "Zero processors in the system!");
+ return 1;
+ }
+
+ /* Check if selected core is valid */
+ if (cpu < 0 || cpu >= num_cpus) {
+ fprintf(stderr, "Invalid CPU selected! "
+ "Max valid CPU is %u\n", num_cpus - 1);
+ return 1;
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ /* Set affinity of current process to cpu */
+ ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+#endif /* _WIN32 */
+ return ret;
+}
+
+/* Start counting unhalted cycles */
+static int start_cycles_ctr(uint32_t core)
+{
+ int ret;
+
+ if (core >= BITS(core_mask))
+ return 1;
+
+ /* Disable cycles counter */
+ ret = msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, 0);
+ if (ret != MACHINE_RETVAL_OK)
+ return ret;
+
+ /* Zero cycles counter */
+ ret = msr_write(core, IA32_MSR_CPU_UNHALTED_THREAD, 0);
+ if (ret != MACHINE_RETVAL_OK)
+ return ret;
+
+ /* Enable OS and user tracking in FixedCtr1 */
+ ret = msr_write(core, IA32_MSR_FIXED_CTR_CTRL, 0x30);
+ if (ret != MACHINE_RETVAL_OK)
+ return ret;
+
+ /* Enable cycles counter */
+ return msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, (1ULL << 33));
+}
+
+/* Init MSR module */
+static int init_msr_mod(void)
+{
+ unsigned max_core_count = 0;
+#ifndef _WIN32
+ max_core_count = sysconf(_SC_NPROCESSORS_CONF);
+ if (max_core_count == 0) {
+ fprintf(stderr, "Zero processors in the system!");
+ return MACHINE_RETVAL_ERROR;
+ }
+#endif
+ return machine_init(max_core_count);
+}
+
+/* Set the cost of reading unhalted cycles using RDMSR */
+static int set_unhalted_cycle_cost(const int core, uint64_t *value)
+{
+ uint64_t time1, time2;
+
+ if (value == NULL || core < 0)
+ return 1;
+
+ time1 = read_cycles(core);
+ time2 = read_cycles(core);
+
+ /* Calculate delta */
+ *value = (time2 - time1);
+
+ return 0;
+}
+
+/* Calculate the general cost of reading unhalted cycles (median) */
+static int set_avg_unhalted_cycle_cost(const int core, uint64_t *value)
+{
+ unsigned i;
+ uint64_t cycles[10];
+
+ if (value == NULL || core_mask == 0 || core < 0)
+ return 1;
+
+ /* Fill cycles table with read cost values */
+ for (i = 0; i < DIM(cycles); i++)
+ if (set_unhalted_cycle_cost(core, &cycles[i]) != 0)
+ return 1;
+
+ /* sort array */
+ qsort(cycles, DIM(cycles), sizeof(uint64_t), compare);
+
+ /* set median cost */
+ *value = cycles[DIM(cycles)/2];
+
+ return 0;
+}
+
+/* Freeing allocated memory */
+static void free_mem(void)
+{
+ if (offset_ptr != NULL)
+ free(offset_ptr);
+ if (buf != NULL)
+ free(buf);
+}
+
+/* Input buffer initialization */
+static void init_buf(enum cache_type_e ctype)
+{
+ uint32_t tmp_off;
+ uint64_t offset;
+ int i;
+
+ buf = (uint8_t *) malloc(BUFSIZE + REGION_SIZE);
+ if (!buf) {
+ fprintf(stderr, "Could not malloc buf\n");
+ exit(EXIT_FAILURE);
+ }
+
+ offset_ptr = (uint64_t *)
+ malloc(NUM_OFFSETS * KEYS_PER_JOB * sizeof(uint128_t) + 0x0F);
+ if (!offset_ptr) {
+ fprintf(stderr, "Could not malloc keys\n");
+ free_mem();
+ exit(EXIT_FAILURE);
+ }
+
+ offset = (uint64_t) offset_ptr;
+ keys = (uint128_t *) ((offset + 0x0F) & ~0x0F); /* align to 16 bytes */
+
+ if (ctype == COLD) {
+ for (i = 0; i < NUM_OFFSETS; i++) {
+ offsets[i] = i * REGION_SIZE + (rand() & 0x3F0);
+ key_idxs[i] = i * KEYS_PER_JOB;
+ }
+ for (i = NUM_OFFSETS - 1; i >= 0; i--) {
+ offset = rand();
+ offset *= i;
+ offset /= RAND_MAX;
+ tmp_off = offsets[offset];
+ offsets[offset] = offsets[i];
+ offsets[i] = tmp_off;
+ tmp_off = key_idxs[offset];
+ key_idxs[offset] = key_idxs[i];
+ key_idxs[i] = tmp_off;
+ }
+ index_limit = NUM_OFFSETS;
+ } else {/* WARM */
+ for (i = 0; i < NUM_OFFSETS; i += 2) {
+ offsets[i] = (2 * i + 0) * REGION_SIZE +
+ (rand() & 0x3F0);
+ offsets[i + 1] = (2 * i + 1) * REGION_SIZE +
+ (rand() & 0x3F0);
+ key_idxs[i] = (2 * i + 0) * KEYS_PER_JOB;
+ }
+ index_limit = 8;
+ }
+}
+
+/* This function translates enum test_ciper_mode_e to be used by ipsec_mb
+ * library
+ */
+static JOB_CIPHER_MODE translate_cipher_mode(enum test_cipher_mode_e test_mode)
+{
+ JOB_CIPHER_MODE c_mode = NULL_CIPHER;
+
+ switch (test_mode) {
+ case TEST_CBC:
+ c_mode = CBC;
+ break;
+ case TEST_CNTR:
+ case TEST_CNTR8:
+ c_mode = CNTR;
+ break;
+ case TEST_NULL_CIPHER:
+ c_mode = NULL_CIPHER;
+ break;
+ case TEST_AESDOCSIS:
+ case TEST_AESDOCSIS8:
+ c_mode = DOCSIS_SEC_BPI;
+ break;
+ case TEST_DESDOCSIS:
+ case TEST_DESDOCSIS4:
+ c_mode = DOCSIS_DES;
+ break;
+ case TEST_GCM:
+ c_mode = GCM;
+ break;
+ case TEST_CCM:
+ c_mode = CCM;
+ break;
+ case TEST_DES:
+ c_mode = DES;
+ break;
+ case TEST_3DES:
+ c_mode = DES3;
+ break;
+ default:
+ break;
+ }
+ return c_mode;
+}
+
+/* Performs test using AES_HMAC or DOCSIS */
+static uint64_t
+do_test(const uint32_t arch, MB_MGR *mb_mgr, struct params_s *params,
+ const uint32_t num_iter)
+{
+ JOB_AES_HMAC *job;
+ JOB_AES_HMAC job_template;
+ uint32_t i;
+ static uint32_t index = 0;
+ static DECLARE_ALIGNED(uint128_t iv, 16);
+ static uint32_t ipad[5], opad[5], digest[3];
+ static DECLARE_ALIGNED(uint32_t k1_expanded[11 * 4], 16);
+ static DECLARE_ALIGNED(uint8_t k2[16], 16);
+ static DECLARE_ALIGNED(uint8_t k3[16], 16);
+ static DECLARE_ALIGNED(struct gcm_key_data gdata_key, 16);
+ uint32_t size_aes;
+ uint64_t time = 0;
+ uint32_t aux;
+
+ if ((params->cipher_mode == TEST_AESDOCSIS8) ||
+ (params->cipher_mode == TEST_CNTR8))
+ size_aes = params->size_aes + 8;
+ else if (params->cipher_mode == TEST_DESDOCSIS4)
+ size_aes = params->size_aes + 4;
+ else
+ size_aes = params->size_aes;
+
+ job_template.msg_len_to_cipher_in_bytes = size_aes;
+ job_template.msg_len_to_hash_in_bytes = size_aes + sha_size_incr;
+ job_template.hash_start_src_offset_in_bytes = 0;
+ job_template.cipher_start_src_offset_in_bytes = sha_size_incr;
+ job_template.iv = (uint8_t *) &iv;
+ job_template.iv_len_in_bytes = 16;
+
+ job_template.auth_tag_output = (uint8_t *) digest;
+
+ switch (params->hash_alg) {
+ case TEST_XCBC:
+ job_template.u.XCBC._k1_expanded = k1_expanded;
+ job_template.u.XCBC._k2 = k2;
+ job_template.u.XCBC._k3 = k3;
+ job_template.hash_alg = AES_XCBC;
+ break;
+ case TEST_HASH_CCM:
+ job_template.hash_alg = AES_CCM;
+ break;
+ case TEST_HASH_GCM:
+ job_template.hash_alg = AES_GMAC;
+ break;
+ case TEST_NULL_HASH:
+ job_template.hash_alg = NULL_HASH;
+ break;
+ case TEST_HASH_CMAC:
+ job_template.u.CMAC._key_expanded = k1_expanded;
+ job_template.u.CMAC._skey1 = k2;
+ job_template.u.CMAC._skey2 = k3;
+ job_template.hash_alg = AES_CMAC;
+ break;
+ default:
+ /* HMAC hash alg is SHA1 or MD5 */
+ job_template.u.HMAC._hashed_auth_key_xor_ipad =
+ (uint8_t *) ipad;
+ job_template.u.HMAC._hashed_auth_key_xor_opad =
+ (uint8_t *) opad;
+ job_template.hash_alg = (JOB_HASH_ALG) params->hash_alg;
+ break;
+ }
+ job_template.auth_tag_output_len_in_bytes =
+ (uint64_t) auth_tag_length_bytes[job_template.hash_alg - 1];
+
+ job_template.cipher_direction = params->cipher_dir;
+
+ if (params->cipher_mode == TEST_NULL_CIPHER) {
+ job_template.chain_order = HASH_CIPHER;
+ } else {
+ if (job_template.cipher_direction == ENCRYPT)
+ job_template.chain_order = CIPHER_HASH;
+ else
+ job_template.chain_order = HASH_CIPHER;
+ }
+
+ /* Translating enum to the API's one */
+ job_template.cipher_mode = translate_cipher_mode(params->cipher_mode);
+ job_template.aes_key_len_in_bytes = params->aes_key_size;
+ if (job_template.cipher_mode == GCM) {
+ uint8_t key[32];
+
+ aesni_gcm_pre(arch, (params->aes_key_size / 8) - 2,
+ key, &gdata_key);
+ job_template.aes_enc_key_expanded = &gdata_key;
+ job_template.aes_dec_key_expanded = &gdata_key;
+ job_template.u.GCM.aad_len_in_bytes = 12;
+ job_template.iv_len_in_bytes = 12;
+ } else if (job_template.cipher_mode == CCM) {
+ job_template.msg_len_to_cipher_in_bytes = size_aes;
+ job_template.msg_len_to_hash_in_bytes = size_aes;
+ job_template.hash_start_src_offset_in_bytes = 0;
+ job_template.cipher_start_src_offset_in_bytes = 0;
+ job_template.u.CCM.aad_len_in_bytes = 8;
+ job_template.iv_len_in_bytes = 13;
+ } else if (job_template.cipher_mode == DES ||
+ job_template.cipher_mode == DES3 ||
+ job_template.cipher_mode == DOCSIS_DES) {
+ job_template.aes_key_len_in_bytes = 8;
+ job_template.iv_len_in_bytes = 8;
+ }
+
+#ifndef _WIN32
+ if (use_unhalted_cycles)
+ time = read_cycles(params->core);
+ else
+#endif
+ time = __rdtscp(&aux);
+
+ for (i = 0; i < num_iter; i++) {
+ job = get_next_job(mb_mgr, arch);
+ *job = job_template;
+
+ job->src = buf + offsets[index];
+ job->dst = buf + offsets[index] + sha_size_incr;
+ if (job->cipher_mode == GCM) {
+ job->u.GCM.aad = job->src;
+ } else if (job->cipher_mode == CCM) {
+ job->u.CCM.aad = job->src;
+ job->aes_enc_key_expanded = job->aes_dec_key_expanded =
+ (uint32_t *) &keys[key_idxs[index]];
+ } else if (job->cipher_mode == DES3) {
+ static const void *ks_ptr[3];
+
+ ks_ptr[0] = ks_ptr[1] = ks_ptr[2] =
+ &keys[key_idxs[index]];
+ job->aes_enc_key_expanded =
+ job->aes_dec_key_expanded = ks_ptr;
+ } else {
+ job->aes_enc_key_expanded = job->aes_dec_key_expanded =
+ (uint32_t *) &keys[key_idxs[index]];
+ }
+
+ index += 2;
+ if (index >= index_limit)
+ index = 0;
+
+ job = submit_job(mb_mgr, arch);
+ while (job) {
+#ifdef DEBUG
+ if (job->status != STS_COMPLETED)
+ fprintf(stderr, "failed job, status:%d\n",
+ job->status);
+#endif
+ job = get_completed_job(mb_mgr, arch);
+ }
+ }
+
+ while ((job = flush_job(mb_mgr, arch))) {
+#ifdef DEBUG
+ if (job->status != STS_COMPLETED)
+ fprintf(stderr, "failed job, status:%d\n", job->status);
+#endif
+ }
+
+#ifndef _WIN32
+ if (use_unhalted_cycles)
+ time = (read_cycles(params->core) - rd_cycles_cost) - time;
+ else
+#endif
+ time = __rdtscp(&aux) - time;
+
+ return time / num_iter;
+}
+
+/* Performs test using GCM */
+static uint64_t
+do_test_gcm(const uint32_t arch, struct params_s *params,
+ const uint32_t num_iter)
+{
+ struct gcm_key_data gdata_key;
+ struct gcm_context_data gdata_ctx;
+ uint8_t *key;
+ static uint32_t index = 0;
+ uint8_t key_sz = params->aes_key_size / 8 - 2;
+ uint32_t size_aes = params->size_aes;
+ uint32_t i;
+ uint8_t aad[12];
+ uint8_t auth_tag[12];
+ DECLARE_ALIGNED(uint8_t iv[16], 16);
+ uint64_t time = 0;
+ uint32_t aux;
+
+ key = (uint8_t *) malloc(sizeof(uint8_t) * params->aes_key_size);
+ if (!key) {
+ fprintf(stderr, "Could not malloc key\n");
+ free_mem();
+ exit(EXIT_FAILURE);
+ }
+
+ aesni_gcm_pre(arch, key_sz, key, &gdata_key);
+ if (params->cipher_dir == ENCRYPT) {
+#ifndef _WIN32
+ if (use_unhalted_cycles)
+ time = read_cycles(params->core);
+ else
+#endif
+ time = __rdtscp(&aux);
+
+ for (i = 0; i < num_iter; i++) {
+ aesni_gcm_enc(arch, key_sz, &gdata_key, &gdata_ctx,
+ buf + offsets[index] + sha_size_incr,
+ buf + offsets[index] + sha_size_incr,
+ size_aes, iv, aad, sizeof(aad),
+ auth_tag, sizeof(auth_tag));
+ index += 2;
+ if (index >= index_limit)
+ index = 0;
+ }
+#ifndef _WIN32
+ if (use_unhalted_cycles)
+ time = (read_cycles(params->core) -
+ rd_cycles_cost) - time;
+ else
+#endif
+ time = __rdtscp(&aux) - time;
+ } else { /*DECRYPT*/
+#ifndef _WIN32
+ if (use_unhalted_cycles)
+ time = read_cycles(params->core);
+ else
+#endif
+ time = __rdtscp(&aux);
+
+ for (i = 0; i < num_iter; i++) {
+ aesni_gcm_dec(arch, key_sz, &gdata_key, &gdata_ctx,
+ buf + offsets[index] + sha_size_incr,
+ buf + offsets[index] + sha_size_incr,
+ size_aes, iv, aad, sizeof(aad),
+ auth_tag, sizeof(auth_tag));
+ index += 2;
+ if (index >= index_limit)
+ index = 0;
+ }
+#ifndef _WIN32
+ if (use_unhalted_cycles)
+ time = (read_cycles(params->core) -
+ rd_cycles_cost) - time;
+ else
+#endif
+ time = __rdtscp(&aux) - time;
+ }
+
+ free(key);
+ return time / num_iter;
+}
+
+
+/* Method used by qsort to compare 2 values */
+static int compare_uint64_t(const void *a, const void *b)
+{
+ return (int)(int64_t)(*(const uint64_t *)a - *(const uint64_t *)b);
+}
+
+/* Computes mean of set of times after dropping bottom and top quarters */
+static uint64_t mean_median(uint64_t *array, uint32_t size)
+{
+ uint32_t quarter = size / 4;
+ uint32_t i;
+ uint64_t sum;
+
+ /* these are single threaded runs, so we skip
+ * the hardware thread related skew clipping
+ * thus skipping "ignore first and last eighth"
+ */
+
+ /* ignore lowest and highest quarter */
+ qsort(array, size, sizeof(uint64_t), compare_uint64_t);
+
+ /* dropping the bottom and top quarters
+ * after sorting to remove noise/variations
+ */
+ array += quarter;
+ size -= quarter * 2;
+
+
+ if ((size == 0) || (size & 0x80000000)) {
+ fprintf(stderr, "not enough data points\n");
+ free_mem();
+ exit(EXIT_FAILURE);
+ }
+ sum = 0;
+ for (i = 0; i < size; i++)
+ sum += array[i];
+
+ sum = (sum + size / 2) / size;
+ return sum;
+}
+
+/* Runs test for each buffer size and stores averaged execution time */
+static void
+process_variant(MB_MGR *mgr, const uint32_t arch, struct params_s *params,
+ struct variant_s *variant_ptr, const uint32_t run)
+{
+ const uint32_t sizes = params->num_sizes;
+ uint64_t *times = &variant_ptr->avg_times[run];
+ uint32_t sz;
+
+ for (sz = 0; sz < sizes; sz++) {
+ const uint32_t size_aes = (sz + 1) * JOB_SIZE_STEP;
+ const uint32_t num_iter = ITER_SCALE / size_aes;
+
+ params->size_aes = size_aes;
+ if (params->test_type == TTYPE_AES_GCM && (!use_gcm_job_api))
+ *times = do_test_gcm(arch, params, 2 * num_iter);
+ else
+ *times = do_test(arch, mgr, params, num_iter);
+ times += NUM_RUNS;
+ }
+
+ variant_ptr->params = *params;
+ variant_ptr->arch = arch;
+}
+
+/* Sets cipher mode, hash algorithm */
+static void
+do_variants(MB_MGR *mgr, const uint32_t arch, struct params_s *params,
+ const uint32_t run, struct variant_s **variant_ptr,
+ uint32_t *variant)
+{
+ uint32_t hash_alg;
+ uint32_t h_start = TEST_SHA1;
+ uint32_t h_end = TEST_NULL_HASH;
+ uint32_t c_mode;
+ uint32_t c_start = TEST_CBC;
+ uint32_t c_end = TEST_NULL_CIPHER;
+
+ switch (params->test_type) {
+ case TTYPE_AES_DOCSIS:
+ h_start = TEST_NULL_HASH;
+ c_start = TEST_AESDOCSIS;
+ c_end = TEST_DESDOCSIS4;
+ break;
+ case TTYPE_AES_GCM:
+ h_start = TEST_HASH_GCM;
+ h_end = TEST_HASH_GCM;
+ c_start = TEST_GCM;
+ c_end = TEST_GCM;
+ break;
+ case TTYPE_AES_CCM:
+ h_start = TEST_HASH_CCM;
+ h_end = TEST_HASH_CCM;
+ c_start = TEST_CCM;
+ c_end = TEST_CCM;
+ break;
+ case TTYPE_AES_DES:
+ h_start = TEST_NULL_HASH;
+ h_end = TEST_NULL_HASH;
+ c_start = TEST_DES;
+ c_end = TEST_DES;
+ break;
+ case TTYPE_AES_3DES:
+ h_start = TEST_NULL_HASH;
+ h_end = TEST_NULL_HASH;
+ c_start = TEST_3DES;
+ c_end = TEST_3DES;
+ break;
+ default:
+ break;
+ }
+
+ for (c_mode = c_start; c_mode <= c_end; c_mode++) {
+ params->cipher_mode = (enum test_cipher_mode_e) c_mode;
+ for (hash_alg = h_start; hash_alg <= h_end; hash_alg++) {
+ params->hash_alg = (enum test_hash_alg_e) hash_alg;
+ process_variant(mgr, arch, params, *variant_ptr, run);
+ (*variant)++;
+ (*variant_ptr)++;
+ }
+ }
+}
+
+/* Sets cipher direction and key size */
+static void
+run_dir_test(MB_MGR *mgr, const uint32_t arch, struct params_s *params,
+ const uint32_t run, struct variant_s **variant_ptr,
+ uint32_t *variant)
+{
+ uint32_t dir;
+ uint32_t k; /* Key size */
+ uint32_t limit = AES_256_BYTES; /* Key size value limit */
+
+ if (params->test_type == TTYPE_AES_DOCSIS ||
+ params->test_type == TTYPE_AES_DES ||
+ params->test_type == TTYPE_AES_3DES ||
+ params->test_type == TTYPE_AES_CCM)
+ limit = AES_128_BYTES;
+
+ init_mb_mgr(mgr, arch);
+
+ for (dir = ENCRYPT; dir <= DECRYPT; dir++) {
+ params->cipher_dir = (JOB_CIPHER_DIRECTION) dir;
+ for (k = AES_128_BYTES; k <= limit; k += 8) {
+ params->aes_key_size = k;
+ do_variants(mgr, arch, params, run, variant_ptr,
+ variant);
+ }
+ }
+}
+
+/* Generates output containing averaged times for each test variant */
+static void print_times(struct variant_s *variant_list, struct params_s *params,
+ const uint32_t total_variants)
+{
+ const uint32_t sizes = params->num_sizes;
+ uint32_t col;
+ uint32_t sz;
+
+ /* Temporary variables */
+ struct params_s par;
+ uint8_t c_mode;
+ uint8_t c_dir;
+ uint8_t h_alg;
+ const char *func_names[4] = {
+ "SSE", "AVX", "AVX2", "AVX512"
+ };
+ const char *c_mode_names[12] = {
+ "CBC", "CNTR", "CNTR+8", "NULL_CIPHER", "DOCAES", "DOCAES+8",
+ "DOCDES", "DOCDES+4", "GCM", "CCM", "DES", "3DES"
+ };
+ const char *c_dir_names[2] = {
+ "ENCRYPT", "DECRYPT"
+ };
+ const char *h_alg_names[12] = {
+ "SHA1", "SHA_224", "SHA_256", "SHA_384", "SHA_512", "XCBC",
+ "MD5", "CMAC", "NULL_HASH", "GCM", "CUSTOM", "CCM"
+ };
+ printf("ARCH");
+ for (col = 0; col < total_variants; col++)
+ printf("\t%s", func_names[variant_list[col].arch]);
+ printf("\n");
+ printf("CIPHER");
+ for (col = 0; col < total_variants; col++) {
+ par = variant_list[col].params;
+ c_mode = par.cipher_mode - CBC;
+ printf("\t%s", c_mode_names[c_mode]);
+ }
+ printf("\n");
+ printf("DIR");
+ for (col = 0; col < total_variants; col++) {
+ par = variant_list[col].params;
+ c_dir = par.cipher_dir - ENCRYPT;
+ printf("\t%s", c_dir_names[c_dir]);
+ }
+ printf("\n");
+ printf("HASH_ALG");
+ for (col = 0; col < total_variants; col++) {
+ par = variant_list[col].params;
+ h_alg = par.hash_alg - SHA1;
+ printf("\t%s", h_alg_names[h_alg]);
+ }
+ printf("\n");
+ printf("KEY_SIZE");
+ for (col = 0; col < total_variants; col++) {
+ par = variant_list[col].params;
+ printf("\tAES-%u", par.aes_key_size * 8);
+ }
+ printf("\n");
+ for (sz = 0; sz < sizes; sz++) {
+ printf("%d", (sz + 1) * JOB_SIZE_STEP);
+ for (col = 0; col < total_variants; col++) {
+ uint64_t *time_ptr =
+ &variant_list[col].avg_times[sz * NUM_RUNS];
+ const unsigned long long val =
+ mean_median(time_ptr, NUM_RUNS);
+
+ printf("\t%llu", val);
+ }
+ printf("\n");
+ }
+}
+
+/* Prepares data structure for test variants storage, sets test configuration */
+#ifdef _WIN32
+static void
+#else
+static void *
+#endif
+run_tests(void *arg)
+{
+ uint32_t i;
+ struct thread_info *info = (struct thread_info *)arg;
+ MB_MGR *p_mgr = NULL;
+ struct params_s params;
+ uint32_t num_variants[NUM_TYPES] = {0, 0, 0};
+ uint32_t type, at_size, run, arch;
+ uint32_t variants_per_arch, max_arch;
+ uint32_t variant;
+ uint32_t total_variants = 0;
+ struct variant_s *variant_ptr = NULL;
+ struct variant_s *variant_list = NULL;
+
+ p_mgr = alloc_mb_mgr(flags);
+ if (p_mgr == NULL) {
+ fprintf(stderr, "Failed to allocate MB_MGR structure!\n");
+ free_mem();
+ exit(EXIT_FAILURE);
+ }
+
+ params.num_sizes = JOB_SIZE / JOB_SIZE_STEP;
+ params.core = (uint32_t)info->core;
+
+ /* if cores selected then set affinity */
+ if (core_mask)
+ if (set_affinity(info->core) != 0) {
+ fprintf(stderr, "Failed to set cpu "
+ "affinity on core %d\n", info->core);
+ goto exit_failure;
+ }
+
+ /* If unhalted cycles selected and this is
+ the primary thread then start counter */
+ if (use_unhalted_cycles && info->print_info) {
+ int ret;
+
+ ret = start_cycles_ctr(params.core);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to start cycles "
+ "counter on core %u\n", params.core);
+ goto exit_failure;
+ }
+ /* Get average cost of reading counter */
+ ret = set_avg_unhalted_cycle_cost(params.core, &rd_cycles_cost);
+ if (ret != 0 || rd_cycles_cost == 0) {
+ fprintf(stderr, "Error calculating unhalted "
+ "cycles read overhead!\n");
+ goto exit_failure;
+ } else
+ fprintf(stderr, "Started counting unhalted cycles on "
+ "core %d\nUnhalted cycles read cost = %lu "
+ "cycles\n", params.core,
+ (unsigned long)rd_cycles_cost);
+ }
+
+ for (type = TTYPE_AES_HMAC; type < NUM_TYPES; type++) {
+ if (test_types[type] == 0)
+ continue;
+
+ switch (type) {
+ default:
+ case TTYPE_AES_HMAC:
+ variants_per_arch = VARIANTS_PER_ARCH_AES;
+ max_arch = NUM_ARCHS;
+ break;
+ case TTYPE_AES_DOCSIS:
+ variants_per_arch = VARIANTS_PER_ARCH_DOCSIS;
+ max_arch = NUM_ARCHS;
+ break;
+ case TTYPE_AES_GCM:
+ variants_per_arch = VARIANTS_PER_ARCH_GCM;
+ max_arch = NUM_ARCHS - 1; /* No AVX512 for GCM */
+ break;
+ case TTYPE_AES_CCM:
+ variants_per_arch = VARIANTS_PER_ARCH_CCM;
+ max_arch = NUM_ARCHS;
+ break;
+ case TTYPE_AES_DES:
+ variants_per_arch = VARIANTS_PER_ARCH_DES;
+ max_arch = NUM_ARCHS;
+ break;
+ case TTYPE_AES_3DES:
+ variants_per_arch = VARIANTS_PER_ARCH_3DES;
+ max_arch = NUM_ARCHS;
+ break;
+ }
+
+ /* Calculating number of all variants */
+ for (arch = 0; arch < max_arch; arch++) {
+ if (archs[arch] == 0)
+ continue;
+ num_variants[type] += variants_per_arch;
+ }
+ total_variants += num_variants[type];
+ }
+
+ variant_list = (struct variant_s *)
+ malloc(total_variants * sizeof(struct variant_s));
+ if (variant_list == NULL) {
+ fprintf(stderr, "Cannot allocate memory\n");
+ goto exit_failure;
+ }
+
+ at_size = NUM_RUNS * params.num_sizes * sizeof(uint64_t);
+ for (variant = 0, variant_ptr = variant_list;
+ variant < total_variants;
+ variant++, variant_ptr++) {
+ variant_ptr->avg_times = (uint64_t *) malloc(at_size);
+ if (!variant_ptr->avg_times) {
+ fprintf(stderr, "Cannot allocate memory\n");
+ goto exit_failure;
+ }
+ }
+ for (run = 0; run < NUM_RUNS; run++) {
+ fprintf(stderr, "Starting run %d of %d\n", run+1, NUM_RUNS);
+
+ variant = 0;
+ variant_ptr = variant_list;
+
+ for (type = TTYPE_AES_HMAC; type < NUM_TYPES; type++) {
+ if (test_types[type] == 0)
+ continue;
+
+ if (type == TTYPE_AES_GCM)
+ /* No AVX512 for GCM */
+ max_arch = NUM_ARCHS - 1;
+ else
+ max_arch = NUM_ARCHS;
+
+ params.num_variants = num_variants[type];
+ params.test_type = type;
+ /* Performing tests for each selected architecture */
+ for (arch = 0; arch < max_arch; arch++) {
+ if (archs[arch] == 0)
+ continue;
+ run_dir_test(p_mgr, arch, &params, run,
+ &variant_ptr, &variant);
+ }
+ } /* end for type */
+ } /* end for run */
+ if (info->print_info == 1)
+ print_times(variant_list, &params, total_variants);
+
+ if (variant_list != NULL) {
+ /* Freeing variants list */
+ for (i = 0; i < total_variants; i++)
+ free(variant_list[i].avg_times);
+ free(variant_list);
+ }
+ free_mb_mgr(p_mgr);
+#ifndef _WIN32
+ return NULL;
+
+#else
+ return;
+#endif
+exit_failure:
+ if (variant_list != NULL)
+ free(variant_list);
+ free_mem();
+ free_mb_mgr(p_mgr);
+ exit(EXIT_FAILURE);
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: ipsec_perf [args], "
+ "where args are zero or more\n"
+ "-h: print this message\n"
+ "-c: Use cold cache, it uses warm as default\n"
+ "-w: Use warm cache\n"
+ "--no-avx512: Don't do AVX512\n"
+ "--no-avx2: Don't do AVX2\n"
+ "--no-avx: Don't do AVX\n"
+ "--no-sse: Don't do SSE\n"
+ "-o val: Use <val> for the SHA size increment, default is 24\n"
+ "--shani-on: use SHA extensions, default: auto-detect\n"
+ "--shani-off: don't use SHA extensions\n"
+ "--no-gcm: do not run GCM perf tests\n"
+ "--no-aes: do not run standard AES + HMAC perf tests\n"
+ "--no-docsis: do not run DOCSIS cipher perf tests\n"
+ "--no-ccm: do not run CCM cipher perf tests\n"
+ "--no-des: do not run DES cipher perf tests\n"
+ "--no-3des: do not run 3DES cipher perf tests\n"
+ "--gcm-job-api: use JOB API for GCM perf tests"
+ " (raw GCM API is default)\n"
+ "--threads num: <num> for the number of threads to run"
+ " Max: %d\n"
+ "--cores mask: <mask> CPU's to run threads\n"
+ "--unhalted-cycles: measure using unhalted cycles (requires root).\n"
+ " Note: RDTSC is used by default.\n",
+ MAX_NUM_THREADS + 1);
+}
+
+int main(int argc, char *argv[])
+{
+ int i, num_t = 0, core = 0;
+ struct thread_info *thread_info_p = t_info;
+
+#ifdef _WIN32
+ HANDLE threads[MAX_NUM_THREADS];
+#else
+ pthread_t tids[MAX_NUM_THREADS];
+#endif
+
+ for (i = 1; i < argc; i++)
+ if (strcmp(argv[i], "-h") == 0) {
+ usage();
+ return EXIT_SUCCESS;
+ } else if (strcmp(argv[i], "-c") == 0) {
+ cache_type = COLD;
+ fprintf(stderr, "Cold cache, ");
+ } else if (strcmp(argv[i], "-w") == 0) {
+ cache_type = WARM;
+ fprintf(stderr, "Warm cache, ");
+ } else if (strcmp(argv[i], "--no-avx512") == 0) {
+ archs[ARCH_AVX512] = 0;
+ } else if (strcmp(argv[i], "--no-avx2") == 0) {
+ archs[ARCH_AVX2] = 0;
+ } else if (strcmp(argv[i], "--no-avx") == 0) {
+ archs[ARCH_AVX] = 0;
+ } else if (strcmp(argv[i], "--no-sse") == 0) {
+ archs[ARCH_SSE] = 0;
+ } else if (strcmp(argv[i], "--shani-on") == 0) {
+ flags &= (~IMB_FLAG_SHANI_OFF);
+ } else if (strcmp(argv[i], "--shani-off") == 0) {
+ flags |= IMB_FLAG_SHANI_OFF;
+ } else if (strcmp(argv[i], "--no-gcm") == 0) {
+ test_types[TTYPE_AES_GCM] = 0;
+ } else if (strcmp(argv[i], "--no-aes") == 0) {
+ test_types[TTYPE_AES_HMAC] = 0;
+ } else if (strcmp(argv[i], "--no-docsis") == 0) {
+ test_types[TTYPE_AES_DOCSIS] = 0;
+ } else if (strcmp(argv[i], "--no-ccm") == 0) {
+ test_types[TTYPE_AES_CCM] = 0;
+ } else if (strcmp(argv[i], "--no-des") == 0) {
+ test_types[TTYPE_AES_DES] = 0;
+ } else if (strcmp(argv[i], "--no-3des") == 0) {
+ test_types[TTYPE_AES_3DES] = 0;
+ } else if (strcmp(argv[i], "--gcm-job-api") == 0) {
+ use_gcm_job_api = 1;
+ } else if ((strcmp(argv[i], "-o") == 0) && (i < argc - 1)) {
+ i++;
+ sha_size_incr = atoi(argv[i]);
+ } else if (strcmp(argv[i], "--threads") == 0) {
+ num_t = atoi(argv[++i]);
+ if (num_t > (MAX_NUM_THREADS + 1)) {
+ fprintf(stderr, "Invalid number of threads!\n");
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(argv[i], "--cores") == 0) {
+ errno = 0;
+ core_mask = strtoull(argv[++i], NULL, 0);
+ if (errno != 0) {
+ fprintf(stderr, "Error converting cpu mask!\n");
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(argv[i], "--unhalted-cycles") == 0) {
+#ifdef _WIN32
+ fprintf(stderr, "Counting unhalted cycles not "
+ "currently supported on Windows!\n");
+ return EXIT_FAILURE;
+#endif
+ use_unhalted_cycles = 1;
+ } else {
+ usage();
+ return EXIT_FAILURE;
+ }
+
+ /* Check num cores >= number of threads */
+ if ((core_mask != 0 && num_t != 0) && (num_t > bitcount(core_mask))) {
+ fprintf(stderr, "Insufficient number of cores in "
+ "core mask (0x%lx) to run %d threads!\n",
+ (unsigned long) core_mask, num_t);
+ return EXIT_FAILURE;
+ }
+
+ /* if cycles selected then init MSR module */
+ if (use_unhalted_cycles) {
+ if (core_mask == 0) {
+ fprintf(stderr, "Must specify core mask "
+ "when reading unhalted cycles!\n");
+ return EXIT_FAILURE;
+ }
+
+ if (init_msr_mod() != 0) {
+ fprintf(stderr, "Error initializing MSR module!\n");
+ return EXIT_FAILURE;
+ }
+ }
+
+ fprintf(stderr, "SHA size incr = %d\n", sha_size_incr);
+ if (archs[ARCH_SSE]) {
+ MB_MGR *p_mgr = alloc_mb_mgr(flags);
+
+ if (p_mgr == NULL) {
+ fprintf(stderr, "Error allocating MB_MGR structure!\n");
+ return EXIT_FAILURE;
+ }
+ init_mb_mgr_sse(p_mgr);
+ fprintf(stderr, "%s SHA extensions (shani) for SSE arch\n",
+ (p_mgr->features & IMB_FEATURE_SHANI) ?
+ "Using" : "Not using");
+ free_mb_mgr(p_mgr);
+ }
+
+ memset(t_info, 0, sizeof(t_info));
+ init_buf(cache_type);
+ if (num_t > 1)
+ for (i = 0; i < num_t - 1; i++, thread_info_p++) {
+ /* Set core if selected */
+ if (core_mask) {
+ core = next_core(core_mask, core);
+ thread_info_p->core = core++;
+ }
+#ifdef _WIN32
+ threads[i] = (HANDLE)
+ _beginthread(&run_tests, 0,
+ (void *)thread_info_p);
+#else
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ pthread_create(&tids[i], &attr, run_tests,
+ (void *)thread_info_p);
+#endif
+ }
+
+ thread_info_p->print_info = 1;
+ if (core_mask) {
+ core = next_core(core_mask, core);
+ thread_info_p->core = core;
+ }
+
+ run_tests((void *)thread_info_p);
+ if (num_t > 1) {
+#ifdef _WIN32
+ WaitForMultipleObjects(num_t, threads, FALSE, INFINITE);
+#endif
+ for (i = 0; i < num_t - 1; i++) {
+ fprintf(stderr, "Waiting on thread %d to finish...\n",
+ i+2);
+#ifdef _WIN32
+ CloseHandle(threads[i]);
+#else
+ pthread_join(tids[i], NULL);
+#endif
+ }
+ }
+
+ if (use_unhalted_cycles)
+ machine_fini();
+
+ free_mem();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c
new file mode 100644
index 00000000..34ee4973
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c
@@ -0,0 +1,209 @@
+/**********************************************************************
+ Copyright(c) 2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/**
+ * @brief Provides access to MSR read & write operations
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+
+#include "msr.h"
+
+static int *m_msr_fd = NULL; /**< MSR driver file descriptors table */
+static unsigned m_maxcores = 0; /**< max number of cores (size of the
+ table above too) */
+
+int
+machine_init(const unsigned max_core_id)
+{
+#ifndef _WIN32
+ unsigned i;
+
+ if (max_core_id == 0)
+ return MACHINE_RETVAL_PARAM;
+
+ m_maxcores = max_core_id + 1;
+
+ /**
+ * Allocate table to hold MSR driver file descriptors
+ * Each file descriptor is for a different core.
+ * Core id is an index to the table.
+ */
+ m_msr_fd = (int *)malloc(m_maxcores * sizeof(m_msr_fd[0]));
+ if (m_msr_fd == NULL) {
+ m_maxcores = 0;
+ return MACHINE_RETVAL_ERROR;
+ }
+
+ for (i = 0; i < m_maxcores; i++)
+ m_msr_fd[i] = -1;
+#endif /* _WIN32 */
+ return MACHINE_RETVAL_OK;
+}
+
+int
+machine_fini(void)
+{
+#ifndef _WIN32
+ unsigned i;
+
+ ASSERT(m_msr_fd != NULL);
+ if (m_msr_fd == NULL)
+ return MACHINE_RETVAL_ERROR;
+
+ /**
+ * Close open file descriptors and free up table memory.
+ */
+ for (i = 0; i < m_maxcores; i++)
+ if (m_msr_fd[i] != -1) {
+ close(m_msr_fd[i]);
+ m_msr_fd[i] = -1;
+ }
+
+ free(m_msr_fd);
+ m_msr_fd = NULL;
+ m_maxcores = 0;
+#endif /* _WIN32 */
+ return MACHINE_RETVAL_OK;
+}
+
+#ifndef _WIN32
+/**
+ * @brief Returns MSR driver file descriptor for given core id
+ *
+ * File descriptor could be previously open and comes from
+ * m_msr_fd table or is open (& cached) during the call.
+ *
+ * @param lcore logical core id
+ *
+ * @return MSR driver file descriptor corresponding \a lcore
+ */
+static int
+msr_file_open(const unsigned lcore)
+{
+ ASSERT(lcore < m_maxcores);
+ ASSERT(m_msr_fd != NULL);
+
+ int fd = m_msr_fd[lcore];
+
+ if (fd < 0) {
+ char fname[32];
+
+ memset(fname, 0, sizeof(fname));
+ snprintf(fname, sizeof(fname)-1,
+ "/dev/cpu/%u/msr", lcore);
+ fd = open(fname, O_RDWR);
+ if (fd < 0)
+ fprintf(stderr, "Error opening file '%s'!\n", fname);
+ else
+ m_msr_fd[lcore] = fd;
+ }
+
+ return fd;
+}
+#endif /* _WIN32 */
+
+int
+msr_read(const unsigned lcore,
+ const uint32_t reg,
+ uint64_t *value)
+{
+ int ret = MACHINE_RETVAL_OK;
+#ifndef _WIN32
+ int fd = -1;
+ ssize_t read_ret = 0;
+
+ ASSERT(value != NULL);
+ if (value == NULL)
+ return MACHINE_RETVAL_PARAM;
+
+ ASSERT(lcore < m_maxcores);
+ if (lcore >= m_maxcores)
+ return MACHINE_RETVAL_PARAM;
+
+ ASSERT(m_msr_fd != NULL);
+ if (m_msr_fd == NULL)
+ return MACHINE_RETVAL_ERROR;
+
+ fd = msr_file_open(lcore);
+ if (fd < 0)
+ return MACHINE_RETVAL_ERROR;
+
+ read_ret = pread(fd, value, sizeof(value[0]), (off_t)reg);
+
+ if (read_ret != sizeof(value[0])) {
+ fprintf(stderr, "RDMSR failed for reg[0x%x] on lcore %u\n",
+ (unsigned)reg, lcore);
+ ret = MACHINE_RETVAL_ERROR;
+ }
+#endif /* _WIN32 */
+ return ret;
+}
+
+int
+msr_write(const unsigned lcore,
+ const uint32_t reg,
+ const uint64_t value)
+{
+ int ret = MACHINE_RETVAL_OK;
+#ifndef _WIN32
+ int fd = -1;
+ ssize_t write_ret = 0;
+
+ ASSERT(lcore < m_maxcores);
+ if (lcore >= m_maxcores)
+ return MACHINE_RETVAL_PARAM;
+
+ ASSERT(m_msr_fd != NULL);
+ if (m_msr_fd == NULL)
+ return MACHINE_RETVAL_ERROR;
+
+ fd = msr_file_open(lcore);
+ if (fd < 0)
+ return MACHINE_RETVAL_ERROR;
+
+ write_ret = pwrite(fd, &value, sizeof(value), (off_t)reg);
+
+ if (write_ret != sizeof(value)) {
+ fprintf(stderr, "WRMSR failed for reg[0x%x] "
+ "<- value[0x%llx] on lcore %u\n",
+ (unsigned)reg, (unsigned long long)value, lcore);
+ ret = MACHINE_RETVAL_ERROR;
+ }
+#endif /* _WIN32 */
+ return ret;
+}
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h
new file mode 100644
index 00000000..afa8795c
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h
@@ -0,0 +1,114 @@
+/**********************************************************************
+ Copyright(c) 2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/**
+ * @brief Provides access to MSR read & write operations
+ */
+
+#ifndef __MSR_H__
+#define __MSR_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+#ifdef DEBUG
+#include <assert.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifdef DEBUG
+#define ASSERT assert
+#else
+#define ASSERT(x)
+#endif
+
+#define MACHINE_DEFAULT_MAX_COREID 255 /**< max core id */
+
+#define MACHINE_RETVAL_OK 0 /**< everything OK */
+#define MACHINE_RETVAL_ERROR 1 /**< generic error */
+#define MACHINE_RETVAL_PARAM 2 /**< parameter error */
+
+/**
+ * @brief Initializes machine module
+ *
+ * @param [in] max_core_id maximum logical core id to be handled by machine
+ * module. If zero then default value assumed
+ * \a MACHINE_DEFAULT_MAX_COREID
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int machine_init(const unsigned max_core_id);
+
+/**
+ * @brief Shuts down machine module
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int machine_fini(void);
+
+/**
+ * @brief Executes RDMSR on \a lcore logical core
+ *
+ * @param [in] lcore logical core id
+ * @param [in] reg MSR to read from
+ * @param [out] value place to store MSR value at
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int
+msr_read(const unsigned lcore,
+ const uint32_t reg,
+ uint64_t *value);
+
+/**
+ * @brief Executes WRMSR on \a lcore logical core
+ *
+ * @param [in] lcore logical core id
+ * @param [in] reg MSR to write to
+ * @param [in] value to be written into \a reg
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int
+msr_write(const unsigned lcore,
+ const uint32_t reg,
+ const uint64_t value);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MSR_H__ */
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak
new file mode 100644
index 00000000..a1d8d902
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak
@@ -0,0 +1,69 @@
+#
+# Copyright (c) 2017-2018, Intel Corporation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Intel Corporation nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+APP = ipsec_perf
+INSTNAME = intel-ipsec-mb
+
+!if !defined(PREFIX)
+PREFIX = C:\Program Files
+!endif
+
+!if exist("$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib")
+IPSECLIB = "$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib"
+INCDIR = -I"$(PREFIX)\$(INSTNAME)"
+!else
+IPSECLIB = ..\libIPSec_MB.lib
+INCDIR = -I..\ -I..\include
+!endif
+
+!ifdef DEBUG
+DCFLAGS = /Od /DDEBUG /Z7
+DLFLAGS = /debug
+!else
+DCFLAGS = /O2 /Oi
+DLFLAGS =
+!endif
+
+CC = cl
+CFLAGS = /nologo $(DCFLAGS) /Y- /W3 /WX- /Gm- /fp:precise /EHsc $(INCDIR)
+
+LNK = link
+LFLAGS = /out:$(APP).exe $(DLFLAGS)
+
+all: $(APP).exe
+
+$(APP).exe: ipsec_perf.obj msr.obj $(IPSECLIB)
+ $(LNK) $(LFLAGS) ipsec_perf.obj msr.obj $(IPSECLIB)
+
+ipsec_perf.obj: ipsec_perf.c
+ $(CC) /c $(CFLAGS) ipsec_perf.c
+
+msr.obj: msr.c
+ $(CC) /c $(CFLAGS) msr.c
+
+clean:
+ del /q ipsec_perf.obj msr.obj $(APP).exe $(APP).pdb $(APP).ilk