Adding upstream version 14.2.21.upstream/14.2.21 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 18:24:20 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 18:24:20 +0000
commit: 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
tree: e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/intel-ipsec-mb/LibPerfApp
parent: Initial commit. (diff)
download: ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
7 files changed, 2324 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile
new file mode 100755
index 00000000..7039558f
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/Makefile
@@ -0,0 +1,83 @@
+# Copyright (c) 2017-2018, Intel Corporation
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of Intel Corporation nor the names of its contributors
+#       may be used to endorse or promote products derived from this software
+#       without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+EXE=ipsec_perf
+INSTPATH ?= /usr/include/intel-ipsec-mb.h
+
+CFLAGS = -DLINUX -D_GNU_SOURCE $(INCLUDES) \
+	-W -Wall -Wextra -Wmissing-declarations -Wpointer-arith \
+	-Wcast-qual -Wundef -Wwrite-strings  \
+	-Wformat -Wformat-security \
+	-Wunreachable-code -Wmissing-noreturn -Wsign-compare -Wno-endif-labels \
+	-Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition \
+	-pthread
+
+LDFLAGS = -fPIE -z noexecstack -z relro -z now -pthread
+LDLIBS = -lIPSec_MB
+
+ifeq ("$(shell test -e $(INSTPATH) && echo -n yes)","yes")
+# library installed
+CFLAGS += 
+else
+# library not installed
+CFLAGS +=  -I../include -I../
+LDFLAGS += -L../
+endif
+
+ifeq ($(DEBUG),y)
+CFLAGS += -g -DDEBUG -O0
+LDFLAGS += -g
+else
+CFLAGS += -O3 -fPIE -fstack-protector -D_FORTIFY_SOURCE=2
+endif
+
+SOURCES := ipsec_perf.c msr.c
+OBJECTS := $(SOURCES:%.c=%.o)
+
+CHECKPATCH ?= checkpatch.pl
+CPPCHECK ?= cppcheck
+
+.PHONY: all clean style cppcheck
+
+all: $(EXE)
+
+$(EXE): $(OBJECTS)
+	$(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@
+
+ipsec_perf.o: $(SOURCES)
+
+.PHONY: clean
+clean:
+	-rm -f $(OBJECTS)
+	-rm -f $(EXE)
+
+SOURCES_STYLE := $(foreach infile,$(SOURCES),-f $(infile))
+CHECKPATCH?=checkpatch.pl
+.PHONY: style
+style:
+	$(CHECKPATCH) --no-tree --no-signoff --emacs --no-color \
+--ignore CODE_INDENT,INITIALISED_STATIC,LEADING_SPACE,SPLIT_STRING,\
+UNSPECIFIED_INT,ARRAY_SIZE,BLOCK_COMMENT_STYLE,GLOBAL_INITIALISERS,\
+COMPLEX_MACRO,SPACING,STORAGE_CLASS $(SOURCES_STYLE)
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/README b/src/spdk/intel-ipsec-mb/LibPerfApp/README
new file mode 100644
index 00000000..dad423d9
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/README
@@ -0,0 +1,82 @@
+========================================================================
+README for Intel(R) Multi-Buffer Crypto for IPsec Library API
+performance measurement tool
+
+February 2017
+========================================================================
+
+
+Contents
+========
+
+- Overview
+- Files
+- Compilation
+- Usage
+- Legal Disclaimer
+
+
+Overview
+========
+This test tool performs multiple execution of functions included in
+Intel Multi-Buffer Crypto for IPsec Library.
+
+Files
+=====
+
+ipsec_perf.c -	Tool which produces text formatted output representing
+		average times of ipsec_mb functions execution.
+ipsec_diff_tool.py - Another tool which interprets text data given.
+
+Compilation
+===========
+
+Required tools:
+- GNU make
+- gcc (GCC) 4.8.3 (or newer)
+
+Simply run "make" to compile the tool.
+To clean the build please run "make clean".
+
+You can point to another directory contaning IPSec MB library by setting
+LIB_LOC. for ex:
+	LIB_LOC=../ipsec_mb_lib make
+
+In order to perform static code analysis or style check you can do:
+	make cppcheck
+or
+	make style
+
+Be aware that you will have cppcheck tool installed and checkpatch.pl
+script copied into one of the directories listed in $PATH.
+You can also set CPPCHECK and/or CHECKPATCH variables if you want give paths
+to this tools being placed in different directories. for ex:
+	CPPCHECK=~/tools/cppcheck make cppcheck
+	CHECKPATCH=~/scripts/checkpatch.pl make style
+
+Usage
+=====
+
+You can simply check list of arguments by typing:
+	./ipsec_perf -h
+
+Usage example:
+	./ipsec_perf -c --no-avx512 --no-gcm -o 24
+
+Later you can pass output to ipsec_diff_tool.py for data
+analysis:
+	./ipsec_diff_tool.py out1.txt out2.txt 5
+
+Run ipsec_diff_tool.py -h too see help page.
+
+Legal Disclaimer
+================
+
+THIS SOFTWARE IS PROVIDED BY INTEL"AS IS". NO LICENSE, EXPRESS OR
+IMPLIED, BY ESTOPPEL OR OTHERWISE, TO ANY INTELLECTUAL PROPERTY RIGHTS
+ARE GRANTED THROUGH USE. EXCEPT AS PROVIDED IN INTEL'S TERMS AND
+CONDITIONS OF SALE, INTEL ASSUMES NO LIABILITY WHATSOEVER AND INTEL
+DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY, RELATING TO SALE AND/OR
+USE OF INTEL PRODUCTS INCLUDING LIABILITY OR WARRANTIES RELATING TO
+FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABILITY, OR INFRINGEMENT
+OF ANY PATENT, COPYRIGHT OR OTHER INTELLECTUAL PROPERTY RIGHT.
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py
new file mode 100755
index 00000000..1e8219f5
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_diff_tool.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+
+"""
+**********************************************************************
+  Copyright(c) 2017-2018, Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************
+"""
+
+import sys
+
+# Number of parameters (ARCH, CIPHER_MODE, DIR, HASH_ALG, KEY_SIZE)
+PAR_NUM = 5
+
+class Variant(object):
+    """
+    Class representing one test including chosen parameters and
+    results of average execution times
+    """
+    def __init__(self, **args):
+        self.params = (args['arch'], args['cipher'], args['dir'], args['alg'],
+                       args['keysize'])
+
+        self.avg_times = []
+        self.slope = None
+        self.intercept = None
+
+    def set_times(self, avg_times):
+        """
+        Fills test execution time list
+        """
+        self.avg_times = avg_times
+
+    def lin_reg(self, sizes):
+        """
+        Computes linear regression of set of coordinates (x,y)
+        """
+
+        n = len(sizes)
+
+        if n != len(self.avg_times):
+            print "Error!"
+            return None
+
+        sumx = sum(sizes)
+        sumy = sum(self.avg_times)
+        sumxy = sum([x * y for x, y in zip(sizes, self.avg_times)])
+        sumsqrx = sum([pow(x, 2) for x in sizes])
+        self.slope = (n * sumxy - sumx * sumy) / float(n * sumsqrx - pow(sumx, 2))
+        self.intercept = (sumy - self.slope * sumx) / float(n)
+
+    def get_params_str(self):
+        """
+        Returns all parameters concatenated into one string
+        """
+        return "\t".join(i for i in self.params)
+
+    def get_lin_func_str(self):
+        """
+        Returns string having linear coefficients
+        """
+        slope = "{:.5f}".format(self.slope)
+        intercept = "{:.5f}".format(self.intercept)
+        return "{}\t{}".format(slope, intercept)
+
+class VarList(list):
+    """
+    Class used to store all test variants as a list of objects
+    """
+
+    def find_obj(self, params):
+        """
+        Finds first occurence of object containing given parameters
+        """
+        ret_val = None
+        matches = (obj for obj in self if obj.params == params)
+        try:
+            ret_val = next(matches)
+        except StopIteration:
+            pass
+        return ret_val
+
+    def compare(self, list_b, tolerance):
+        """
+        Finds variants from two data sets which are matching and compares
+        its linear regression coefficients.
+        Compares list_b against itself.
+        """
+
+        if tolerance is None:
+            tolerance = 5.0
+        if tolerance < 0.0:
+            print "Bad argument: Tolerance must not be less than 0%"
+            exit(1)
+        print "TOLERANCE: {:.2f}%".format(tolerance)
+
+        warning = False
+        print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE A\tINTERCEPT A\tSLOPE B\tINTERCEPT B"
+        for i, obj_a in enumerate(self):
+            obj_b = list_b.find_obj(obj_a.params)
+            if obj_b != None:
+                if obj_a.slope < 0.0:
+                    obj_a.slope = 0
+                if obj_b.slope < 0.0:
+                    obj_b.slope = 0
+                slope_bv = 0.01 * tolerance * obj_a.slope # border value
+                intercept_bv = 0.01 * tolerance * obj_a.intercept
+                diff_slope = obj_b.slope - obj_a.slope
+                diff_intercept = obj_b.intercept - obj_a.intercept
+                if (obj_a.slope > 0.001 and obj_b.slope > 0.001 and
+                        diff_slope > slope_bv) or diff_intercept > intercept_bv:
+                    warning = True
+                    print "{}\t{}\t{}\t{}".format(i + 1,
+                                                  obj_b.get_params_str(),
+                                                  obj_a.get_lin_func_str(),
+                                                  obj_b.get_lin_func_str())
+        if not warning:
+            print "No differences found."
+        return warning
+
+    def printout(self):
+        """
+        Prints out readable representation of the list
+        """
+
+        print "NO\tARCH\tCIPHER\tDIR\tHASH\tKEYSZ\tSLOPE \tINTERCEPT"
+        for i, obj in enumerate(self):
+            print "{}\t{}\t{}".format(i + 1,
+                                      obj.get_params_str(),
+                                      obj.get_lin_func_str())
+
+
+
+class Parser(object):
+    """
+    Class used to parse a text file contaning performance data
+    """
+
+    def __init__(self, fname, verbose):
+        self.fname = fname
+        self.verbose = verbose
+
+    @staticmethod
+    def convert2int(in_tuple):
+        """
+        Converts a tuple of strings into a list of integers
+        """
+
+        result = list(in_tuple)             # Converting to list
+        result = [int(i) for i in result]   # Converting str to int
+        return result
+
+    def load(self):
+        """
+        Reads a text file by columns, stores data in objects
+        for further comparision of performance
+        """
+
+        v_list = VarList()
+        # Reading by columns, results in list of tuples
+        # Each tuple is representing a column from a text file
+        try:
+            f = open(self.fname, 'r')
+        except IOError:
+            print "Error reading {} file.".format(self.fname)
+            exit(1)
+        else:
+            with f:
+                cols = zip(*(line.strip().split('\t') for line in f))
+
+        # Reading first column with payload sizes, ommiting first 5 rows
+        sizes = self.convert2int(cols[0][PAR_NUM:])
+        if self.verbose:
+            print "Available buffer sizes:\n"
+            print sizes
+            print "========================================================"
+            print "\n\nVariants:\n"
+
+        # Reading remaining columns contaning performance data
+        for row in cols[1:]:
+            # First rows are run options
+            arch, c_mode, c_dir, h_alg, key_size = row[:PAR_NUM]
+            if self.verbose:
+                print arch, c_mode, c_dir, h_alg, key_size
+
+            # Getting average times
+            avg_times = self.convert2int(row[PAR_NUM:])
+            if self.verbose:
+                print avg_times
+                print "------"
+
+            # Putting new object to the result list
+            v_list.append(Variant(arch=arch, cipher=c_mode, dir=c_dir,
+                                  alg=h_alg, keysize=key_size))
+            v_list[-1].set_times(avg_times)
+            # Finding linear function representation of data set
+            v_list[-1].lin_reg(sizes)
+            if self.verbose:
+                print "({}, {})".format(v_list[-1].slope, v_list[-1].intercept)
+                print "============\n"
+        return v_list, sizes
+
+class DiffTool(object):
+    """
+    Main class
+    """
+
+    def __init__(self):
+        self.fname_a = None
+        self.fname_b = None
+        self.tolerance = None
+        self.verbose = False
+        self.analyze = False
+
+    @staticmethod
+    def usage():
+        """
+        Prints usage
+        """
+        print "This tool compares file_b against file_a printing out differences."
+        print "Usage:"
+        print "\tipsec_diff_tool.py [-v] [-a] file_a file_b [tol]\n"
+        print "\t-v - verbose"
+        print "\t-a - takes only one argument: name of the file to analyze"
+        print "\tfile_a, file_b - text files containing output from ipsec_perf tool"
+        print "\ttol - tolerance [%], must be >= 0, default 5\n"
+        print "Examples:"
+        print "\tipsec_diff_tool.py file01.txt file02.txt 10"
+        print "\tipsec_diff_tool.py -a file02.txt"
+        print "\tipsec_diff_tool.py -v -a file01.txt"
+
+
+    def parse_args(self):
+        """
+        Get commandline arguments
+        """
+        if len(sys.argv) < 3 or sys.argv[1] == "-h":
+            self.usage()
+            exit(1)
+        if sys.argv[1] == "-a":
+            self.analyze = True
+            self.fname_a = sys.argv[2]
+        elif sys.argv[2] == "-a":
+            if sys.argv[1] == "-v":
+                self.verbose = True
+            self.analyze = True
+            self.fname_a = sys.argv[3]
+        elif sys.argv[1] == "-v":
+            self.verbose = True
+            self.fname_a = sys.argv[2]
+            self.fname_b = sys.argv[3]
+            if len(sys.argv) >= 5:
+                self.tolerance = float(sys.argv[4])
+
+        else:
+            self.fname_a = sys.argv[1]
+            self.fname_b = sys.argv[2]
+            if len(sys.argv) >= 4:
+                self.tolerance = float(sys.argv[3])
+
+    def run(self):
+        """
+        Main method
+        """
+        self.parse_args()
+
+        parser_a = Parser(self.fname_a, self.verbose)
+        list_a, sizes_a = parser_a.load()
+
+        if not self.analyze:
+            parser_b = Parser(self.fname_b, self.verbose)
+            list_b, sizes_b = parser_b.load()
+            if sizes_a != sizes_b:
+                print "Error. Buffer size lists in two compared " \
+                        "data sets differ! Aborting.\n"
+                exit(1)
+            warning = list_a.compare(list_b, self.tolerance) # Compares list_b against list_a
+            if warning:
+                exit(2)
+        else:
+            list_a.printout() # Takes only one file and prints it out
+
+if __name__ == '__main__':
+    DiffTool().run()
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c
new file mode 100644
index 00000000..841b689a
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/ipsec_perf.c
@@ -0,0 +1,1459 @@
+/**********************************************************************
+  Copyright(c) 2017-2018, Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <process.h>
+#include <intrin.h>
+#define __forceinline static __forceinline
+#else
+#include <x86intrin.h>
+#define __forceinline static inline __attribute__((always_inline))
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#endif
+
+#include <intel-ipsec-mb.h>
+
+#include "msr.h"
+
+#define BUFSIZE (512 * 1024 * 1024)
+#define JOB_SIZE (2 * 1024)
+#define JOB_SIZE_STEP 16
+#define REGION_SIZE (JOB_SIZE + 3003)
+#define NUM_OFFSETS (BUFSIZE / REGION_SIZE)
+#define NUM_RUNS 16
+#define KEYS_PER_JOB 15
+#define ITER_SCALE 200000
+#define BITS(x) (sizeof(x) * 8)
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+#define NUM_ARCHS 4 /* SSE, AVX, AVX2, AVX512 */
+#define NUM_TYPES 6 /* AES_HMAC, AES_DOCSIS, AES_GCM, AES_CCM, DES, 3DES */
+#define MAX_NUM_THREADS 16 /* Maximum number of threads that can be created */
+
+#define CIPHER_MODES_AES 4	/* CBC, CNTR, CNTR+8, NULL_CIPHER */
+#define CIPHER_MODES_DOCSIS 4	/* AES DOCSIS, AES DOCSIS+8, DES DOCSIS,
+                                   DES DOCSIS+8 */
+#define CIPHER_MODES_DES 1	/* DES */
+#define CIPHER_MODES_GCM 1	/* GCM */
+#define CIPHER_MODES_CCM 1	/* CCM */
+#define CIPHER_MODES_3DES 1	/* 3DES */
+#define DIRECTIONS 2		/* ENC, DEC */
+#define HASH_ALGS_AES 9		/* SHA1, SHA256, SHA224, SHA384, SHA512, XCBC,
+                                   MD5, NULL_HASH, CMAC */
+#define HASH_ALGS_DOCSIS 1	/* NULL_HASH */
+#define HASH_ALGS_GCM 1		/* GCM */
+#define HASH_ALGS_CCM 1		/* CCM */
+#define HASH_ALGS_DES 1		/* NULL_HASH for DES */
+#define HASH_ALGS_3DES 1	/* NULL_HASH for 3DES */
+#define KEY_SIZES_AES 3		/* 16, 24, 32 */
+#define KEY_SIZES_DOCSIS 1	/* 16 or 8 */
+#define KEY_SIZES_GCM 3		/* 16, 24, 32 */
+#define KEY_SIZES_CCM 1		/* 16 */
+#define KEY_SIZES_DES 1		/* 8 */
+#define KEY_SIZES_3DES 1	/* 8 x 3 */
+
+#define IA32_MSR_FIXED_CTR_CTRL      0x38D
+#define IA32_MSR_PERF_GLOBAL_CTR     0x38F
+#define IA32_MSR_CPU_UNHALTED_THREAD 0x30A
+
+/* Those defines tell how many different test cases are to be performed.
+ * Have to be multiplied by number of chosen architectures.
+ */
+#define VARIANTS_PER_ARCH_AES (CIPHER_MODES_AES * DIRECTIONS *  \
+                               HASH_ALGS_AES * KEY_SIZES_AES)
+#define VARIANTS_PER_ARCH_DOCSIS (CIPHER_MODES_DOCSIS * DIRECTIONS *  \
+                                  HASH_ALGS_DOCSIS * KEY_SIZES_DOCSIS)
+#define VARIANTS_PER_ARCH_GCM (CIPHER_MODES_GCM * DIRECTIONS *  \
+                               HASH_ALGS_GCM * KEY_SIZES_GCM)
+#define VARIANTS_PER_ARCH_CCM (CIPHER_MODES_CCM * DIRECTIONS *  \
+                               HASH_ALGS_CCM * KEY_SIZES_CCM)
+#define VARIANTS_PER_ARCH_DES (CIPHER_MODES_DES * DIRECTIONS *  \
+                               HASH_ALGS_DES * KEY_SIZES_DES)
+#define VARIANTS_PER_ARCH_3DES (CIPHER_MODES_3DES * DIRECTIONS *  \
+                                HASH_ALGS_3DES * KEY_SIZES_3DES)
+
+/* Typedefs used for GCM callbacks */
+typedef void (*aesni_gcm_t)(const struct gcm_key_data *,
+                            struct gcm_context_data *,
+                            uint8_t *, const uint8_t *, uint64_t,
+                            const uint8_t *, const uint8_t *, uint64_t,
+                            uint8_t *, uint64_t);
+typedef void (*aesni_gcm_pre_t)(const void *, struct gcm_key_data *);
+
+/* AES_HMAC, DOCSIS callbacks */
+struct funcs_s {
+        init_mb_mgr_t       init_mb_mgr;
+        get_next_job_t      get_next_job;
+        submit_job_t        submit_job;
+        get_completed_job_t get_completed_job;
+        flush_job_t         flush_job;
+};
+
+/* GCM callbacks */
+struct funcs_gcm_s {
+        aesni_gcm_pre_t	aesni_gcm_pre;
+        aesni_gcm_t	aesni_gcm_enc;
+        aesni_gcm_t	aesni_gcm_dec;
+};
+
+enum arch_type_e {
+        ARCH_SSE = 0,
+        ARCH_AVX,
+        ARCH_AVX2,
+        ARCH_AVX512
+};
+
+enum test_type_e {
+        TTYPE_AES_HMAC,
+        TTYPE_AES_DOCSIS,
+        TTYPE_AES_GCM,
+        TTYPE_AES_CCM,
+        TTYPE_AES_DES,
+        TTYPE_AES_3DES
+};
+
+/* This enum will be mostly translated to JOB_CIPHER_MODE */
+enum test_cipher_mode_e {
+        TEST_CBC = 1,
+        TEST_CNTR,
+        TEST_CNTR8, /* CNTR with increased buffer by 8 */
+        TEST_NULL_CIPHER,
+        TEST_AESDOCSIS,
+        TEST_AESDOCSIS8, /* AES DOCSIS with increased buffer size by 8 */
+        TEST_DESDOCSIS,
+        TEST_DESDOCSIS4, /* DES DOCSIS with increased buffer size by 4 */
+        TEST_GCM, /* Additional field used by GCM, not translated */
+        TEST_CCM,
+        TEST_DES,
+        TEST_3DES,
+};
+
+/* This enum will be mostly translated to JOB_HASH_ALG */
+enum test_hash_alg_e {
+        TEST_SHA1 = 1,
+        TEST_SHA_224,
+        TEST_SHA_256,
+        TEST_SHA_384,
+        TEST_SHA_512,
+        TEST_XCBC,
+        TEST_MD5,
+        TEST_HASH_CMAC, /* added here to be included in AES tests */
+        TEST_NULL_HASH,
+        TEST_HASH_GCM, /* Additional field used by GCM, not translated */
+        TEST_CUSTOM_HASH, /* unused */
+        TEST_HASH_CCM
+};
+
+/* Struct storing cipher parameters */
+struct params_s {
+        JOB_CIPHER_DIRECTION	cipher_dir;
+        enum test_type_e	test_type; /* AES, DOCSIS, GCM */
+        enum test_cipher_mode_e	cipher_mode;
+        enum test_hash_alg_e	hash_alg;
+        uint32_t		aes_key_size;
+        uint32_t		size_aes;
+        uint32_t		num_sizes;
+        uint32_t		num_variants;
+        uint32_t                core;
+};
+
+/* This struct stores all information about performed test case */
+struct variant_s {
+        uint32_t arch;
+        struct params_s params;
+        uint64_t *avg_times;
+};
+
+/* Struct storing information to be passed to threads */
+struct thread_info {
+        int print_info;
+        int core;
+} t_info[MAX_NUM_THREADS];
+
+enum cache_type_e {
+        WARM = 0,
+        COLD = 1
+};
+
+#ifdef DEBUG
+#define FUNCS(A) {                              \
+                init_mb_mgr_##A,                \
+                        get_next_job_##A,       \
+                        submit_job_##A,         \
+                        get_completed_job_##A,  \
+                        flush_job_##A           \
+                        }
+#else
+#define FUNCS(A) {                              \
+                init_mb_mgr_##A,                \
+                        get_next_job_##A,       \
+                        submit_job_nocheck_##A, \
+                        get_completed_job_##A,  \
+                        flush_job_##A           \
+                        }
+#endif
+
+#define FUNCS_GCM(A)                                                    \
+        {aes_gcm_pre_128_##A, aes_gcm_enc_128_##A, aes_gcm_dec_128_##A}, \
+        {aes_gcm_pre_192_##A, aes_gcm_enc_192_##A, aes_gcm_dec_192_##A}, \
+        {aes_gcm_pre_256_##A, aes_gcm_enc_256_##A, aes_gcm_dec_256_##A}
+
+
+/* Function pointers used by TTYPE_AES_HMAC, TTYPE_AES_DOCSIS */
+struct funcs_s func_sets[NUM_ARCHS] = {
+        FUNCS(sse),
+        FUNCS(avx),
+        FUNCS(avx2),
+        FUNCS(avx512)
+};
+
+/* Function pointers used by TTYPE_AES_GCM */
+struct funcs_gcm_s func_sets_gcm[NUM_ARCHS - 1][3] = {
+        {FUNCS_GCM(sse)},
+        {FUNCS_GCM(avx_gen2)}, /* AVX */
+        {FUNCS_GCM(avx_gen4)} /* AVX2 */
+};
+
+enum cache_type_e cache_type = WARM;
+/* As enum: SHA1, SHA224, SHA256, SHA384, SHA512,
+   XCBC, MD5, NULL, GMAC, CUSTOM, CCM, CMAC */
+const uint32_t auth_tag_length_bytes[12] = {
+        12, 14, 16, 24, 32, 12, 12, 0, 8, 0, 16, 16
+};
+uint8_t *buf = NULL;
+uint32_t index_limit;
+uint128_t *keys = NULL;
+uint64_t *offset_ptr = NULL;
+uint32_t key_idxs[NUM_OFFSETS];
+uint32_t offsets[NUM_OFFSETS];
+int sha_size_incr = 24;
+
+uint8_t archs[NUM_ARCHS] = {1, 1, 1, 1}; /* uses all function sets */
+/* AES, DOCSIS, GCM, CCM, DES, 3DES */
+uint8_t test_types[NUM_TYPES] = {1, 1, 1, 1, 1, 1};
+
+int use_gcm_job_api = 0;
+int use_unhalted_cycles = 0; /* read unhalted cycles instead of tsc */
+uint64_t rd_cycles_cost = 0; /* cost of reading unhalted cycles */
+uint64_t core_mask = 0; /* bitmap of selected cores */
+
+uint64_t flags = 0; /* flags passed to alloc_mb_mgr() */
+
+/* Those inline functions run different types of ipsec_mb library functions.
+ * They run different functions depending on the chosen architecture
+ */
+__forceinline void init_mb_mgr(MB_MGR *mgr, uint32_t arch)
+{
+        func_sets[arch].init_mb_mgr(mgr);
+}
+
+__forceinline JOB_AES_HMAC *get_next_job(MB_MGR *mgr, const uint32_t arch)
+{
+        return func_sets[arch].get_next_job(mgr);
+}
+
+__forceinline JOB_AES_HMAC *submit_job(MB_MGR *mgr, const uint32_t arch)
+{
+        return func_sets[arch].submit_job(mgr);
+}
+
+__forceinline JOB_AES_HMAC *get_completed_job(MB_MGR *mgr, const uint32_t arch)
+{
+        return func_sets[arch].get_completed_job(mgr);
+}
+
+__forceinline JOB_AES_HMAC *flush_job(MB_MGR *mgr, const uint32_t arch)
+{
+        return func_sets[arch].flush_job(mgr);
+}
+
+/* GCM functions take also key size argument (128, 192, 256bit) */
+__forceinline void aesni_gcm_pre(const uint32_t arch, const uint8_t key_sz,
+                                 uint8_t *key, struct gcm_key_data *gdata)
+{
+        func_sets_gcm[arch][key_sz].aesni_gcm_pre(key, gdata);
+}
+
+__forceinline void aesni_gcm_enc(const uint32_t arch, const uint8_t key_sz,
+                                 const struct gcm_key_data *gdata,
+                                 struct gcm_context_data *ctx,
+                                 uint8_t *out, uint8_t const *in,
+                                 uint64_t len, uint8_t *iv,
+                                 uint8_t const *aad, uint64_t aad_len,
+                                 uint8_t *auth_tag, uint64_t auth_tag_len)
+{
+        func_sets_gcm[arch][key_sz].aesni_gcm_enc(gdata, ctx, out, in, len, iv,
+                                                  aad, aad_len,
+                                                  auth_tag, auth_tag_len);
+
+}
+
+__forceinline void aesni_gcm_dec(const uint32_t arch, const uint8_t key_sz,
+                                 const struct gcm_key_data *gdata,
+                                 struct gcm_context_data *ctx,
+                                 uint8_t *out, uint8_t const *in,
+                                 uint64_t len, uint8_t *iv,
+                                 uint8_t const *aad, uint64_t aad_len,
+                                 uint8_t *auth_tag, uint64_t auth_tag_len)
+{
+        func_sets_gcm[arch][key_sz].aesni_gcm_dec(gdata, ctx, out, in, len, iv,
+                                                  aad, aad_len,
+                                                  auth_tag, auth_tag_len);
+
+}
+
+/* Read unhalted cycles */
+__forceinline uint64_t read_cycles(uint32_t core)
+{
+        uint64_t val = 0;
+
+        if (msr_read(core, IA32_MSR_CPU_UNHALTED_THREAD,
+                     &val) != MACHINE_RETVAL_OK) {
+                fprintf(stderr, "Error reading cycles "
+                        "counter on core %u!\n", core);
+                exit(EXIT_FAILURE);
+        }
+
+        return val;
+}
+
+/* Compare function used by qsort */
+static int compare(const void *a, const void *b)
+{
+        uint64_t x = *(const uint64_t *)a - *(const uint64_t *)b;
+
+        if (x == 0)
+                return 0;
+
+        if (x > *(const uint64_t *)a)
+                return -1;
+
+        return 1;
+}
+
+/* Get number of bits set in value */
+static int bitcount(const uint64_t val)
+{
+        unsigned i;
+        int bits = 0;
+
+        for (i = 0; i < BITS(val); i++)
+                if (val & (1ULL << i))
+                        bits++;
+
+        return bits;
+}
+
+/* Get the next core in core mask
+   Set last_core to negative to start from beginnig of core_mask */
+static int next_core(const uint64_t core_mask,
+                     const int last_core)
+{
+        int core = 0;
+
+        if (last_core >= 0)
+                core = last_core;
+
+        while (((core_mask >> core) & 1) == 0) {
+                core++;
+
+                if (core >= (int)BITS(core_mask))
+                        return -1;
+        }
+
+        return core;
+}
+
+/* Set CPU affinity for current thread */
+static int set_affinity(const int cpu)
+{
+        int ret = 0;
+#ifndef _WIN32
+        cpu_set_t cpuset;
+        int num_cpus = 0;
+
+        /* Get number of cpus in the system */
+        num_cpus = sysconf(_SC_NPROCESSORS_CONF);
+        if (num_cpus == 0) {
+                fprintf(stderr, "Zero processors in the system!");
+                return 1;
+        }
+
+        /* Check if selected core is valid */
+        if (cpu < 0 || cpu >= num_cpus) {
+                fprintf(stderr, "Invalid CPU selected! "
+                        "Max valid CPU is %u\n", num_cpus - 1);
+                return 1;
+        }
+
+        CPU_ZERO(&cpuset);
+        CPU_SET(cpu, &cpuset);
+
+        /* Set affinity of current process to cpu */
+        ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+#endif /* _WIN32 */
+        return ret;
+}
+
+/* Start counting unhalted cycles */
+static int start_cycles_ctr(uint32_t core)
+{
+        int ret;
+
+        if (core >= BITS(core_mask))
+                return 1;
+
+        /* Disable cycles counter */
+        ret = msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, 0);
+        if (ret != MACHINE_RETVAL_OK)
+                return ret;
+
+        /* Zero cycles counter */
+        ret = msr_write(core, IA32_MSR_CPU_UNHALTED_THREAD, 0);
+        if (ret != MACHINE_RETVAL_OK)
+                return ret;
+
+        /* Enable OS and user tracking in FixedCtr1 */
+        ret = msr_write(core, IA32_MSR_FIXED_CTR_CTRL, 0x30);
+        if (ret != MACHINE_RETVAL_OK)
+                return ret;
+
+        /* Enable cycles counter */
+        return  msr_write(core, IA32_MSR_PERF_GLOBAL_CTR, (1ULL << 33));
+}
+
+/* Init MSR module */
+static int init_msr_mod(void)
+{
+        unsigned max_core_count = 0;
+#ifndef _WIN32
+        max_core_count = sysconf(_SC_NPROCESSORS_CONF);
+        if (max_core_count == 0) {
+                fprintf(stderr, "Zero processors in the system!");
+                return MACHINE_RETVAL_ERROR;
+        }
+#endif
+        return machine_init(max_core_count);
+}
+
+/* Set the cost of reading unhalted cycles using RDMSR */
+static int set_unhalted_cycle_cost(const int core, uint64_t *value)
+{
+        uint64_t time1, time2;
+
+        if (value == NULL || core < 0)
+                return 1;
+
+        time1 = read_cycles(core);
+        time2 = read_cycles(core);
+
+        /* Calculate delta */
+        *value = (time2 - time1);
+
+        return 0;
+}
+
+/* Calculate the general cost of reading unhalted cycles (median) */
+static int set_avg_unhalted_cycle_cost(const int core, uint64_t *value)
+{
+        unsigned i;
+        uint64_t cycles[10];
+
+        if (value == NULL || core_mask == 0 || core < 0)
+                return 1;
+
+        /* Fill cycles table with read cost values */
+        for (i = 0; i < DIM(cycles); i++)
+                if (set_unhalted_cycle_cost(core, &cycles[i]) != 0)
+                        return 1;
+
+        /* sort array */
+        qsort(cycles, DIM(cycles), sizeof(uint64_t), compare);
+
+        /* set median cost */
+        *value = cycles[DIM(cycles)/2];
+
+        return 0;
+}
+
+/* Freeing allocated memory */
+static void free_mem(void)
+{
+        if (offset_ptr != NULL)
+                free(offset_ptr);
+        if (buf != NULL)
+                free(buf);
+}
+
+/* Input buffer initialization */
+static void init_buf(enum cache_type_e ctype)
+{
+        uint32_t tmp_off;
+        uint64_t offset;
+        int i;
+
+        buf = (uint8_t *) malloc(BUFSIZE + REGION_SIZE);
+        if (!buf) {
+                fprintf(stderr, "Could not malloc buf\n");
+                exit(EXIT_FAILURE);
+        }
+
+        offset_ptr = (uint64_t *)
+                malloc(NUM_OFFSETS * KEYS_PER_JOB * sizeof(uint128_t) + 0x0F);
+        if (!offset_ptr) {
+                fprintf(stderr, "Could not malloc keys\n");
+                free_mem();
+                exit(EXIT_FAILURE);
+        }
+
+        offset = (uint64_t) offset_ptr;
+        keys = (uint128_t *) ((offset + 0x0F) & ~0x0F); /* align to 16 bytes */
+
+        if (ctype == COLD) {
+                for (i = 0; i < NUM_OFFSETS; i++) {
+                        offsets[i] = i * REGION_SIZE + (rand() & 0x3F0);
+                        key_idxs[i] = i * KEYS_PER_JOB;
+                }
+                for (i = NUM_OFFSETS - 1; i >= 0; i--) {
+                        offset = rand();
+                        offset *= i;
+                        offset /= RAND_MAX;
+                        tmp_off = offsets[offset];
+                        offsets[offset] = offsets[i];
+                        offsets[i] = tmp_off;
+                        tmp_off = key_idxs[offset];
+                        key_idxs[offset] = key_idxs[i];
+                        key_idxs[i] = tmp_off;
+                }
+                index_limit = NUM_OFFSETS;
+        } else {/* WARM */
+                for (i = 0; i < NUM_OFFSETS; i += 2) {
+                        offsets[i]   = (2 * i + 0) * REGION_SIZE +
+                                (rand() & 0x3F0);
+                        offsets[i + 1] = (2 * i + 1) * REGION_SIZE +
+                                (rand() & 0x3F0);
+                        key_idxs[i]  = (2 * i + 0) * KEYS_PER_JOB;
+                }
+                index_limit = 8;
+        }
+}
+
+/* This function translates enum test_ciper_mode_e to be used by ipsec_mb
+ * library
+ */
+static JOB_CIPHER_MODE translate_cipher_mode(enum test_cipher_mode_e test_mode)
+{
+        JOB_CIPHER_MODE c_mode = NULL_CIPHER;
+
+        switch (test_mode) {
+        case TEST_CBC:
+                c_mode = CBC;
+                break;
+        case TEST_CNTR:
+        case TEST_CNTR8:
+                c_mode = CNTR;
+                break;
+        case TEST_NULL_CIPHER:
+                c_mode = NULL_CIPHER;
+                break;
+        case TEST_AESDOCSIS:
+        case TEST_AESDOCSIS8:
+                c_mode = DOCSIS_SEC_BPI;
+                break;
+        case TEST_DESDOCSIS:
+        case TEST_DESDOCSIS4:
+                c_mode = DOCSIS_DES;
+                break;
+        case TEST_GCM:
+                c_mode = GCM;
+                break;
+        case TEST_CCM:
+                c_mode = CCM;
+                break;
+        case TEST_DES:
+                c_mode = DES;
+                break;
+        case TEST_3DES:
+                c_mode = DES3;
+                break;
+        default:
+                break;
+        }
+        return c_mode;
+}
+
+/* Performs test using AES_HMAC or DOCSIS */
+static uint64_t
+do_test(const uint32_t arch, MB_MGR *mb_mgr, struct params_s *params,
+        const uint32_t num_iter)
+{
+        JOB_AES_HMAC *job;
+        JOB_AES_HMAC job_template;
+        uint32_t i;
+        static uint32_t index = 0;
+        static DECLARE_ALIGNED(uint128_t iv, 16);
+        static uint32_t ipad[5], opad[5], digest[3];
+        static DECLARE_ALIGNED(uint32_t k1_expanded[11 * 4], 16);
+        static DECLARE_ALIGNED(uint8_t	k2[16], 16);
+        static DECLARE_ALIGNED(uint8_t	k3[16], 16);
+        static DECLARE_ALIGNED(struct gcm_key_data gdata_key, 16);
+        uint32_t size_aes;
+        uint64_t time = 0;
+        uint32_t aux;
+
+        if ((params->cipher_mode == TEST_AESDOCSIS8) ||
+            (params->cipher_mode == TEST_CNTR8))
+                size_aes = params->size_aes + 8;
+        else if (params->cipher_mode == TEST_DESDOCSIS4)
+                size_aes = params->size_aes + 4;
+        else
+                size_aes = params->size_aes;
+
+        job_template.msg_len_to_cipher_in_bytes = size_aes;
+        job_template.msg_len_to_hash_in_bytes = size_aes + sha_size_incr;
+        job_template.hash_start_src_offset_in_bytes = 0;
+        job_template.cipher_start_src_offset_in_bytes = sha_size_incr;
+        job_template.iv = (uint8_t *) &iv;
+        job_template.iv_len_in_bytes = 16;
+
+        job_template.auth_tag_output = (uint8_t *) digest;
+
+        switch (params->hash_alg) {
+        case TEST_XCBC:
+                job_template.u.XCBC._k1_expanded = k1_expanded;
+                job_template.u.XCBC._k2 = k2;
+                job_template.u.XCBC._k3 = k3;
+                job_template.hash_alg = AES_XCBC;
+                break;
+        case TEST_HASH_CCM:
+                job_template.hash_alg = AES_CCM;
+                break;
+        case TEST_HASH_GCM:
+                job_template.hash_alg = AES_GMAC;
+                break;
+        case TEST_NULL_HASH:
+                job_template.hash_alg = NULL_HASH;
+                break;
+        case TEST_HASH_CMAC:
+                job_template.u.CMAC._key_expanded = k1_expanded;
+                job_template.u.CMAC._skey1 = k2;
+                job_template.u.CMAC._skey2 = k3;
+                job_template.hash_alg = AES_CMAC;
+                break;
+        default:
+                /* HMAC hash alg is SHA1 or MD5 */
+                job_template.u.HMAC._hashed_auth_key_xor_ipad =
+                        (uint8_t *) ipad;
+                job_template.u.HMAC._hashed_auth_key_xor_opad =
+                        (uint8_t *) opad;
+                job_template.hash_alg = (JOB_HASH_ALG) params->hash_alg;
+                break;
+        }
+        job_template.auth_tag_output_len_in_bytes =
+                (uint64_t) auth_tag_length_bytes[job_template.hash_alg - 1];
+
+        job_template.cipher_direction = params->cipher_dir;
+
+        if (params->cipher_mode == TEST_NULL_CIPHER) {
+                job_template.chain_order = HASH_CIPHER;
+        } else {
+                if (job_template.cipher_direction == ENCRYPT)
+                        job_template.chain_order = CIPHER_HASH;
+                else
+                        job_template.chain_order = HASH_CIPHER;
+        }
+
+        /* Translating enum to the API's one */
+        job_template.cipher_mode = translate_cipher_mode(params->cipher_mode);
+        job_template.aes_key_len_in_bytes = params->aes_key_size;
+        if (job_template.cipher_mode == GCM) {
+                uint8_t key[32];
+
+                aesni_gcm_pre(arch, (params->aes_key_size / 8) - 2,
+                              key, &gdata_key);
+                job_template.aes_enc_key_expanded = &gdata_key;
+                job_template.aes_dec_key_expanded = &gdata_key;
+                job_template.u.GCM.aad_len_in_bytes = 12;
+                job_template.iv_len_in_bytes = 12;
+        } else if (job_template.cipher_mode == CCM) {
+                job_template.msg_len_to_cipher_in_bytes = size_aes;
+                job_template.msg_len_to_hash_in_bytes = size_aes;
+                job_template.hash_start_src_offset_in_bytes = 0;
+                job_template.cipher_start_src_offset_in_bytes = 0;
+                job_template.u.CCM.aad_len_in_bytes = 8;
+                job_template.iv_len_in_bytes = 13;
+        } else if (job_template.cipher_mode == DES ||
+                   job_template.cipher_mode == DES3 ||
+                   job_template.cipher_mode == DOCSIS_DES) {
+                job_template.aes_key_len_in_bytes = 8;
+                job_template.iv_len_in_bytes = 8;
+        }
+
+#ifndef _WIN32
+        if (use_unhalted_cycles)
+                time = read_cycles(params->core);
+        else
+#endif
+                time = __rdtscp(&aux);
+
+        for (i = 0; i < num_iter; i++) {
+                job = get_next_job(mb_mgr, arch);
+                *job = job_template;
+
+                job->src = buf + offsets[index];
+                job->dst = buf + offsets[index] + sha_size_incr;
+                if (job->cipher_mode == GCM) {
+                        job->u.GCM.aad = job->src;
+                } else if (job->cipher_mode == CCM) {
+                        job->u.CCM.aad = job->src;
+                        job->aes_enc_key_expanded = job->aes_dec_key_expanded =
+                                (uint32_t *) &keys[key_idxs[index]];
+                } else if (job->cipher_mode == DES3) {
+                        static const void *ks_ptr[3];
+
+                        ks_ptr[0] = ks_ptr[1] = ks_ptr[2] =
+                                &keys[key_idxs[index]];
+                        job->aes_enc_key_expanded =
+                                job->aes_dec_key_expanded = ks_ptr;
+                } else {
+                        job->aes_enc_key_expanded = job->aes_dec_key_expanded =
+                                (uint32_t *) &keys[key_idxs[index]];
+                }
+
+                index += 2;
+                if (index >= index_limit)
+                        index = 0;
+
+                job = submit_job(mb_mgr, arch);
+                while (job) {
+#ifdef DEBUG
+                        if (job->status != STS_COMPLETED)
+                                fprintf(stderr, "failed job, status:%d\n",
+                                        job->status);
+#endif
+                        job = get_completed_job(mb_mgr, arch);
+                }
+        }
+
+        while ((job = flush_job(mb_mgr, arch))) {
+#ifdef DEBUG
+                if (job->status != STS_COMPLETED)
+                        fprintf(stderr, "failed job, status:%d\n", job->status);
+#endif
+        }
+
+#ifndef _WIN32
+        if (use_unhalted_cycles)
+                time = (read_cycles(params->core) - rd_cycles_cost) - time;
+        else
+#endif
+                time = __rdtscp(&aux) - time;
+
+        return time / num_iter;
+}
+
+/* Performs test using GCM */
+static uint64_t
+do_test_gcm(const uint32_t arch, struct params_s *params,
+            const uint32_t num_iter)
+{
+        struct gcm_key_data gdata_key;
+        struct gcm_context_data gdata_ctx;
+        uint8_t *key;
+        static uint32_t index = 0;
+        uint8_t key_sz = params->aes_key_size / 8 - 2;
+        uint32_t size_aes = params->size_aes;
+        uint32_t i;
+        uint8_t aad[12];
+        uint8_t auth_tag[12];
+        DECLARE_ALIGNED(uint8_t iv[16], 16);
+        uint64_t time = 0;
+        uint32_t aux;
+
+        key = (uint8_t *) malloc(sizeof(uint8_t) * params->aes_key_size);
+        if (!key) {
+                fprintf(stderr, "Could not malloc key\n");
+                free_mem();
+                exit(EXIT_FAILURE);
+        }
+
+        aesni_gcm_pre(arch, key_sz, key, &gdata_key);
+        if (params->cipher_dir == ENCRYPT) {
+#ifndef _WIN32
+                if (use_unhalted_cycles)
+                        time = read_cycles(params->core);
+                else
+#endif
+                        time = __rdtscp(&aux);
+
+                for (i = 0; i < num_iter; i++) {
+                        aesni_gcm_enc(arch, key_sz, &gdata_key, &gdata_ctx,
+                                      buf + offsets[index] + sha_size_incr,
+                                      buf + offsets[index] + sha_size_incr,
+                                      size_aes, iv, aad, sizeof(aad),
+                                      auth_tag, sizeof(auth_tag));
+                        index += 2;
+                        if (index >= index_limit)
+                                index = 0;
+                }
+#ifndef _WIN32
+                if (use_unhalted_cycles)
+                        time = (read_cycles(params->core) -
+                                rd_cycles_cost) - time;
+                else
+#endif
+                        time = __rdtscp(&aux) - time;
+        } else { /*DECRYPT*/
+#ifndef _WIN32
+                if (use_unhalted_cycles)
+                        time = read_cycles(params->core);
+                else
+#endif
+                        time = __rdtscp(&aux);
+
+                for (i = 0; i < num_iter; i++) {
+                        aesni_gcm_dec(arch, key_sz, &gdata_key, &gdata_ctx,
+                                      buf + offsets[index] + sha_size_incr,
+                                      buf + offsets[index] + sha_size_incr,
+                                      size_aes, iv, aad, sizeof(aad),
+                                      auth_tag, sizeof(auth_tag));
+                        index += 2;
+                        if (index >= index_limit)
+                                index = 0;
+                }
+#ifndef _WIN32
+                if (use_unhalted_cycles)
+                        time = (read_cycles(params->core) -
+                                rd_cycles_cost) - time;
+                else
+#endif
+                        time = __rdtscp(&aux) - time;
+        }
+
+        free(key);
+        return time / num_iter;
+}
+
+
+/* Method used by qsort to compare 2 values */
+static int compare_uint64_t(const void *a, const void *b)
+{
+        return (int)(int64_t)(*(const uint64_t *)a - *(const uint64_t *)b);
+}
+
+/* Computes mean of set of times after dropping bottom and top quarters */
+static uint64_t mean_median(uint64_t *array, uint32_t size)
+{
+        uint32_t quarter = size / 4;
+        uint32_t i;
+        uint64_t sum;
+
+        /* these are single threaded runs, so we skip
+         * the hardware thread related skew clipping
+         * thus skipping "ignore first and last eighth"
+         */
+
+        /* ignore lowest and highest quarter */
+        qsort(array, size, sizeof(uint64_t), compare_uint64_t);
+
+        /* dropping the bottom and top quarters
+         * after sorting to remove noise/variations
+         */
+        array += quarter;
+        size -= quarter * 2;
+
+
+        if ((size == 0) || (size & 0x80000000)) {
+                fprintf(stderr, "not enough data points\n");
+                free_mem();
+                exit(EXIT_FAILURE);
+        }
+        sum = 0;
+        for (i = 0; i < size; i++)
+                sum += array[i];
+
+        sum = (sum + size / 2) / size;
+        return sum;
+}
+
+/* Runs test for each buffer size and stores averaged execution time */
+static void
+process_variant(MB_MGR *mgr, const uint32_t arch, struct params_s *params,
+                struct variant_s *variant_ptr, const uint32_t run)
+{
+        const uint32_t sizes = params->num_sizes;
+        uint64_t *times = &variant_ptr->avg_times[run];
+        uint32_t sz;
+
+        for (sz = 0; sz < sizes; sz++) {
+                const uint32_t size_aes = (sz + 1) * JOB_SIZE_STEP;
+                const uint32_t num_iter = ITER_SCALE / size_aes;
+
+                params->size_aes = size_aes;
+                if (params->test_type == TTYPE_AES_GCM && (!use_gcm_job_api))
+                        *times = do_test_gcm(arch, params, 2 * num_iter);
+                else
+                        *times = do_test(arch, mgr, params, num_iter);
+                times += NUM_RUNS;
+        }
+
+        variant_ptr->params = *params;
+        variant_ptr->arch = arch;
+}
+
+/* Sets cipher mode, hash algorithm */
+static void
+do_variants(MB_MGR *mgr, const uint32_t arch, struct params_s *params,
+            const uint32_t run, struct variant_s **variant_ptr,
+            uint32_t *variant)
+{
+        uint32_t hash_alg;
+        uint32_t h_start = TEST_SHA1;
+        uint32_t h_end = TEST_NULL_HASH;
+        uint32_t c_mode;
+        uint32_t c_start = TEST_CBC;
+        uint32_t c_end = TEST_NULL_CIPHER;
+
+        switch (params->test_type) {
+        case TTYPE_AES_DOCSIS:
+                h_start = TEST_NULL_HASH;
+                c_start = TEST_AESDOCSIS;
+                c_end = TEST_DESDOCSIS4;
+                break;
+        case TTYPE_AES_GCM:
+                h_start = TEST_HASH_GCM;
+                h_end = TEST_HASH_GCM;
+                c_start = TEST_GCM;
+                c_end = TEST_GCM;
+                break;
+        case TTYPE_AES_CCM:
+                h_start = TEST_HASH_CCM;
+                h_end = TEST_HASH_CCM;
+                c_start = TEST_CCM;
+                c_end = TEST_CCM;
+                break;
+        case TTYPE_AES_DES:
+                h_start = TEST_NULL_HASH;
+                h_end = TEST_NULL_HASH;
+                c_start = TEST_DES;
+                c_end = TEST_DES;
+                break;
+        case TTYPE_AES_3DES:
+                h_start = TEST_NULL_HASH;
+                h_end = TEST_NULL_HASH;
+                c_start = TEST_3DES;
+                c_end = TEST_3DES;
+                break;
+        default:
+                break;
+        }
+
+        for (c_mode = c_start; c_mode <= c_end; c_mode++) {
+                params->cipher_mode = (enum test_cipher_mode_e) c_mode;
+                for (hash_alg = h_start; hash_alg <= h_end; hash_alg++) {
+                        params->hash_alg = (enum test_hash_alg_e) hash_alg;
+                        process_variant(mgr, arch, params, *variant_ptr, run);
+                        (*variant)++;
+                        (*variant_ptr)++;
+                }
+        }
+}
+
+/* Sets cipher direction and key size  */
+static void
+run_dir_test(MB_MGR *mgr, const uint32_t arch, struct params_s *params,
+             const uint32_t run, struct variant_s **variant_ptr,
+             uint32_t *variant)
+{
+        uint32_t dir;
+        uint32_t k; /* Key size */
+        uint32_t limit = AES_256_BYTES; /* Key size value limit */
+
+        if (params->test_type == TTYPE_AES_DOCSIS ||
+            params->test_type == TTYPE_AES_DES ||
+            params->test_type == TTYPE_AES_3DES ||
+            params->test_type == TTYPE_AES_CCM)
+                limit = AES_128_BYTES;
+
+        init_mb_mgr(mgr, arch);
+
+        for (dir = ENCRYPT; dir <= DECRYPT; dir++) {
+                params->cipher_dir = (JOB_CIPHER_DIRECTION) dir;
+                for (k = AES_128_BYTES; k <= limit; k += 8) {
+                        params->aes_key_size = k;
+                        do_variants(mgr, arch, params, run, variant_ptr,
+                                    variant);
+                }
+        }
+}
+
+/* Generates output containing averaged times for each test variant */
+static void print_times(struct variant_s *variant_list, struct params_s *params,
+                        const uint32_t total_variants)
+{
+        const uint32_t sizes = params->num_sizes;
+        uint32_t col;
+        uint32_t sz;
+
+        /* Temporary variables */
+        struct params_s par;
+        uint8_t	c_mode;
+        uint8_t c_dir;
+        uint8_t h_alg;
+        const char *func_names[4] = {
+                "SSE", "AVX", "AVX2", "AVX512"
+        };
+        const char *c_mode_names[12] = {
+                "CBC", "CNTR", "CNTR+8", "NULL_CIPHER", "DOCAES", "DOCAES+8",
+                "DOCDES", "DOCDES+4", "GCM", "CCM", "DES", "3DES"
+        };
+        const char *c_dir_names[2] = {
+                "ENCRYPT", "DECRYPT"
+        };
+        const char *h_alg_names[12] = {
+                "SHA1", "SHA_224", "SHA_256", "SHA_384", "SHA_512", "XCBC",
+                "MD5", "CMAC", "NULL_HASH", "GCM", "CUSTOM", "CCM"
+        };
+        printf("ARCH");
+        for (col = 0; col < total_variants; col++)
+                printf("\t%s", func_names[variant_list[col].arch]);
+        printf("\n");
+        printf("CIPHER");
+        for (col = 0; col < total_variants; col++) {
+                par = variant_list[col].params;
+                c_mode = par.cipher_mode - CBC;
+                printf("\t%s", c_mode_names[c_mode]);
+        }
+        printf("\n");
+        printf("DIR");
+        for (col = 0; col < total_variants; col++) {
+                par = variant_list[col].params;
+                c_dir = par.cipher_dir - ENCRYPT;
+                printf("\t%s", c_dir_names[c_dir]);
+        }
+        printf("\n");
+        printf("HASH_ALG");
+        for (col = 0; col < total_variants; col++) {
+                par = variant_list[col].params;
+                h_alg = par.hash_alg - SHA1;
+                printf("\t%s", h_alg_names[h_alg]);
+        }
+        printf("\n");
+        printf("KEY_SIZE");
+        for (col = 0; col < total_variants; col++) {
+                par = variant_list[col].params;
+                printf("\tAES-%u", par.aes_key_size * 8);
+        }
+        printf("\n");
+        for (sz = 0; sz < sizes; sz++) {
+                printf("%d", (sz + 1) * JOB_SIZE_STEP);
+                for (col = 0; col < total_variants; col++) {
+                        uint64_t *time_ptr =
+                                &variant_list[col].avg_times[sz * NUM_RUNS];
+                        const unsigned long long val =
+                                mean_median(time_ptr, NUM_RUNS);
+
+                        printf("\t%llu", val);
+                }
+                printf("\n");
+        }
+}
+
+/* Prepares data structure for test variants storage, sets test configuration */
+#ifdef _WIN32
+static void
+#else
+static void *
+#endif
+run_tests(void *arg)
+{
+        uint32_t i;
+        struct thread_info *info = (struct thread_info *)arg;
+        MB_MGR *p_mgr = NULL;
+        struct params_s params;
+        uint32_t num_variants[NUM_TYPES] = {0, 0, 0};
+        uint32_t type, at_size, run, arch;
+        uint32_t variants_per_arch, max_arch;
+        uint32_t variant;
+        uint32_t total_variants = 0;
+        struct variant_s *variant_ptr = NULL;
+        struct variant_s *variant_list = NULL;
+
+        p_mgr = alloc_mb_mgr(flags);
+        if (p_mgr == NULL) {
+                fprintf(stderr, "Failed to allocate MB_MGR structure!\n");
+                free_mem();
+                exit(EXIT_FAILURE);
+        }
+
+        params.num_sizes = JOB_SIZE / JOB_SIZE_STEP;
+        params.core = (uint32_t)info->core;
+
+        /* if cores selected then set affinity */
+        if (core_mask)
+                if (set_affinity(info->core) != 0) {
+                        fprintf(stderr, "Failed to set cpu "
+                                "affinity on core %d\n", info->core);
+                        goto exit_failure;
+                }
+
+        /* If unhalted cycles selected and this is
+           the primary thread then start counter */
+        if (use_unhalted_cycles && info->print_info) {
+                int ret;
+
+                ret = start_cycles_ctr(params.core);
+                if (ret != 0) {
+                        fprintf(stderr, "Failed to start cycles "
+                                "counter on core %u\n", params.core);
+                        goto exit_failure;
+                }
+                /* Get average cost of reading counter */
+                ret = set_avg_unhalted_cycle_cost(params.core, &rd_cycles_cost);
+                if (ret != 0 || rd_cycles_cost == 0) {
+                        fprintf(stderr, "Error calculating unhalted "
+                                "cycles read overhead!\n");
+                        goto exit_failure;
+                } else
+                        fprintf(stderr, "Started counting unhalted cycles on "
+                                "core %d\nUnhalted cycles read cost = %lu "
+                                "cycles\n", params.core,
+                                (unsigned long)rd_cycles_cost);
+        }
+
+        for (type = TTYPE_AES_HMAC; type < NUM_TYPES; type++) {
+                if (test_types[type] == 0)
+                        continue;
+
+                switch (type) {
+                default:
+                case TTYPE_AES_HMAC:
+                        variants_per_arch = VARIANTS_PER_ARCH_AES;
+                        max_arch = NUM_ARCHS;
+                        break;
+                case TTYPE_AES_DOCSIS:
+                        variants_per_arch = VARIANTS_PER_ARCH_DOCSIS;
+                        max_arch = NUM_ARCHS;
+                        break;
+                case TTYPE_AES_GCM:
+                        variants_per_arch = VARIANTS_PER_ARCH_GCM;
+                        max_arch = NUM_ARCHS - 1; /* No AVX512 for GCM */
+                        break;
+                case TTYPE_AES_CCM:
+                        variants_per_arch = VARIANTS_PER_ARCH_CCM;
+                        max_arch = NUM_ARCHS;
+                        break;
+                case TTYPE_AES_DES:
+                        variants_per_arch = VARIANTS_PER_ARCH_DES;
+                        max_arch = NUM_ARCHS;
+                        break;
+                case TTYPE_AES_3DES:
+                        variants_per_arch = VARIANTS_PER_ARCH_3DES;
+                        max_arch = NUM_ARCHS;
+                        break;
+                }
+
+                /* Calculating number of all variants */
+                for (arch = 0; arch < max_arch; arch++) {
+                        if (archs[arch] == 0)
+                                continue;
+                        num_variants[type] += variants_per_arch;
+                }
+                total_variants += num_variants[type];
+        }
+
+        variant_list = (struct variant_s *)
+                malloc(total_variants * sizeof(struct variant_s));
+        if (variant_list == NULL) {
+                fprintf(stderr, "Cannot allocate memory\n");
+                goto exit_failure;
+        }
+
+        at_size = NUM_RUNS * params.num_sizes * sizeof(uint64_t);
+        for (variant = 0, variant_ptr = variant_list;
+             variant < total_variants;
+             variant++, variant_ptr++) {
+                variant_ptr->avg_times = (uint64_t *) malloc(at_size);
+                if (!variant_ptr->avg_times) {
+                        fprintf(stderr, "Cannot allocate memory\n");
+                        goto exit_failure;
+                }
+        }
+        for (run = 0; run < NUM_RUNS; run++) {
+                fprintf(stderr, "Starting run %d of %d\n", run+1, NUM_RUNS);
+
+                variant = 0;
+                variant_ptr = variant_list;
+
+                for (type = TTYPE_AES_HMAC; type < NUM_TYPES; type++) {
+                        if (test_types[type] == 0)
+                                continue;
+
+                        if (type == TTYPE_AES_GCM)
+                                /* No AVX512 for GCM */
+                                max_arch = NUM_ARCHS - 1;
+                        else
+                                max_arch = NUM_ARCHS;
+
+                        params.num_variants = num_variants[type];
+                        params.test_type = type;
+                        /* Performing tests for each selected architecture */
+                        for (arch = 0; arch < max_arch; arch++) {
+                                if (archs[arch] == 0)
+                                        continue;
+                                run_dir_test(p_mgr, arch, &params, run,
+                                             &variant_ptr, &variant);
+                        }
+                } /* end for type */
+        } /* end for run */
+        if (info->print_info == 1)
+                print_times(variant_list, &params, total_variants);
+
+        if (variant_list != NULL) {
+                /* Freeing variants list */
+                for (i = 0; i < total_variants; i++)
+                        free(variant_list[i].avg_times);
+                free(variant_list);
+        }
+        free_mb_mgr(p_mgr);
+#ifndef _WIN32
+        return NULL;
+
+#else
+        return;
+#endif
+exit_failure:
+        if (variant_list != NULL)
+                free(variant_list);
+        free_mem();
+        free_mb_mgr(p_mgr);
+        exit(EXIT_FAILURE);
+}
+
+static void usage(void)
+{
+        fprintf(stderr, "Usage: ipsec_perf [args], "
+                "where args are zero or more\n"
+                "-h: print this message\n"
+                "-c: Use cold cache, it uses warm as default\n"
+                "-w: Use warm cache\n"
+                "--no-avx512: Don't do AVX512\n"
+                "--no-avx2: Don't do AVX2\n"
+                "--no-avx: Don't do AVX\n"
+                "--no-sse: Don't do SSE\n"
+                "-o val: Use <val> for the SHA size increment, default is 24\n"
+                "--shani-on: use SHA extensions, default: auto-detect\n"
+                "--shani-off: don't use SHA extensions\n"
+                "--no-gcm: do not run GCM perf tests\n"
+                "--no-aes: do not run standard AES + HMAC perf tests\n"
+                "--no-docsis: do not run DOCSIS cipher perf tests\n"
+                "--no-ccm: do not run CCM cipher perf tests\n"
+                "--no-des: do not run DES cipher perf tests\n"
+                "--no-3des: do not run 3DES cipher perf tests\n"
+                "--gcm-job-api: use JOB API for GCM perf tests"
+                " (raw GCM API is default)\n"
+                "--threads num: <num> for the number of threads to run"
+                " Max: %d\n"
+                "--cores mask: <mask> CPU's to run threads\n"
+                "--unhalted-cycles: measure using unhalted cycles (requires root).\n"
+                "                   Note: RDTSC is used by default.\n",
+                MAX_NUM_THREADS + 1);
+}
+
+int main(int argc, char *argv[])
+{
+        int i, num_t = 0, core = 0;
+        struct thread_info *thread_info_p = t_info;
+
+#ifdef _WIN32
+        HANDLE threads[MAX_NUM_THREADS];
+#else
+        pthread_t tids[MAX_NUM_THREADS];
+#endif
+
+        for (i = 1; i < argc; i++)
+                if (strcmp(argv[i], "-h") == 0) {
+                        usage();
+                        return EXIT_SUCCESS;
+                } else if (strcmp(argv[i], "-c") == 0) {
+                        cache_type = COLD;
+                        fprintf(stderr, "Cold cache, ");
+                } else if (strcmp(argv[i], "-w") == 0) {
+                        cache_type = WARM;
+                        fprintf(stderr, "Warm cache, ");
+                } else if (strcmp(argv[i], "--no-avx512") == 0) {
+                        archs[ARCH_AVX512] = 0;
+                } else if (strcmp(argv[i], "--no-avx2") == 0) {
+                        archs[ARCH_AVX2] = 0;
+                } else if (strcmp(argv[i], "--no-avx") == 0) {
+                        archs[ARCH_AVX] = 0;
+                } else if (strcmp(argv[i], "--no-sse") == 0) {
+                        archs[ARCH_SSE] = 0;
+                } else if (strcmp(argv[i], "--shani-on") == 0) {
+                        flags &= (~IMB_FLAG_SHANI_OFF);
+                } else if (strcmp(argv[i], "--shani-off") == 0) {
+                        flags |= IMB_FLAG_SHANI_OFF;
+                } else if (strcmp(argv[i], "--no-gcm") == 0) {
+                        test_types[TTYPE_AES_GCM] = 0;
+                } else if (strcmp(argv[i], "--no-aes") == 0) {
+                        test_types[TTYPE_AES_HMAC] = 0;
+                } else if (strcmp(argv[i], "--no-docsis") == 0) {
+                        test_types[TTYPE_AES_DOCSIS] = 0;
+                } else if (strcmp(argv[i], "--no-ccm") == 0) {
+                        test_types[TTYPE_AES_CCM] = 0;
+                } else if (strcmp(argv[i], "--no-des") == 0) {
+                        test_types[TTYPE_AES_DES] = 0;
+                } else if (strcmp(argv[i], "--no-3des") == 0) {
+                        test_types[TTYPE_AES_3DES] = 0;
+                } else if (strcmp(argv[i], "--gcm-job-api") == 0) {
+                        use_gcm_job_api = 1;
+                } else if ((strcmp(argv[i], "-o") == 0) && (i < argc - 1)) {
+                        i++;
+                        sha_size_incr = atoi(argv[i]);
+                } else if (strcmp(argv[i], "--threads") == 0) {
+                        num_t = atoi(argv[++i]);
+                        if (num_t > (MAX_NUM_THREADS + 1)) {
+                                fprintf(stderr, "Invalid number of threads!\n");
+                                return EXIT_FAILURE;
+                        }
+                } else if (strcmp(argv[i], "--cores") == 0) {
+                        errno = 0;
+                        core_mask = strtoull(argv[++i], NULL, 0);
+                        if (errno != 0) {
+                                fprintf(stderr, "Error converting cpu mask!\n");
+                                return EXIT_FAILURE;
+                        }
+                } else if (strcmp(argv[i], "--unhalted-cycles") == 0) {
+#ifdef _WIN32
+                        fprintf(stderr, "Counting unhalted cycles not "
+                                "currently supported on Windows!\n");
+                        return EXIT_FAILURE;
+#endif
+                        use_unhalted_cycles = 1;
+                } else {
+                        usage();
+                        return EXIT_FAILURE;
+                }
+
+        /* Check num cores >= number of threads */
+        if ((core_mask != 0 && num_t != 0) && (num_t > bitcount(core_mask))) {
+                fprintf(stderr, "Insufficient number of cores in "
+                        "core mask (0x%lx) to run %d threads!\n",
+                        (unsigned long) core_mask, num_t);
+                return EXIT_FAILURE;
+        }
+
+        /* if cycles selected then init MSR module */
+        if (use_unhalted_cycles) {
+                if (core_mask == 0) {
+                        fprintf(stderr, "Must specify core mask "
+                                "when reading unhalted cycles!\n");
+                        return EXIT_FAILURE;
+                }
+
+                if (init_msr_mod() != 0) {
+                        fprintf(stderr, "Error initializing MSR module!\n");
+                        return EXIT_FAILURE;
+                }
+        }
+
+        fprintf(stderr, "SHA size incr = %d\n", sha_size_incr);
+        if (archs[ARCH_SSE]) {
+                MB_MGR *p_mgr = alloc_mb_mgr(flags);
+
+                if (p_mgr == NULL) {
+                        fprintf(stderr, "Error allocating MB_MGR structure!\n");
+                        return EXIT_FAILURE;
+                }
+                init_mb_mgr_sse(p_mgr);
+                fprintf(stderr, "%s SHA extensions (shani) for SSE arch\n",
+                        (p_mgr->features & IMB_FEATURE_SHANI) ?
+                        "Using" : "Not using");
+                free_mb_mgr(p_mgr);
+        }
+
+        memset(t_info, 0, sizeof(t_info));
+        init_buf(cache_type);
+        if (num_t > 1)
+                for (i = 0; i < num_t - 1; i++, thread_info_p++) {
+                        /* Set core if selected */
+                        if (core_mask) {
+                                core = next_core(core_mask, core);
+                                thread_info_p->core = core++;
+                        }
+#ifdef _WIN32
+                        threads[i] = (HANDLE)
+                                _beginthread(&run_tests, 0,
+                                             (void *)thread_info_p);
+#else
+                        pthread_attr_t attr;
+
+                        pthread_attr_init(&attr);
+                        pthread_create(&tids[i], &attr, run_tests,
+                                       (void *)thread_info_p);
+#endif
+                }
+
+        thread_info_p->print_info = 1;
+        if (core_mask) {
+                core = next_core(core_mask, core);
+                thread_info_p->core = core;
+        }
+
+        run_tests((void *)thread_info_p);
+        if (num_t > 1) {
+#ifdef _WIN32
+                WaitForMultipleObjects(num_t, threads, FALSE, INFINITE);
+#endif
+                for (i = 0; i < num_t - 1; i++) {
+                        fprintf(stderr, "Waiting on thread %d to finish...\n",
+                                i+2);
+#ifdef _WIN32
+                        CloseHandle(threads[i]);
+#else
+                        pthread_join(tids[i], NULL);
+#endif
+                }
+        }
+
+        if (use_unhalted_cycles)
+                machine_fini();
+
+        free_mem();
+
+        return EXIT_SUCCESS;
+}
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c
new file mode 100644
index 00000000..34ee4973
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.c
@@ -0,0 +1,209 @@
+/**********************************************************************
+  Copyright(c) 2018 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/**
+ * @brief Provides access to MSR read & write operations
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+
+#include "msr.h"
+
+static int *m_msr_fd = NULL;           /**< MSR driver file descriptors table */
+static unsigned m_maxcores = 0;        /**< max number of cores (size of the
+                                          table above too) */
+
+int
+machine_init(const unsigned max_core_id)
+{
+#ifndef _WIN32
+        unsigned i;
+
+        if (max_core_id == 0)
+                return MACHINE_RETVAL_PARAM;
+
+        m_maxcores = max_core_id + 1;
+
+        /**
+         * Allocate table to hold MSR driver file descriptors
+         * Each file descriptor is for a different core.
+         * Core id is an index to the table.
+         */
+        m_msr_fd = (int *)malloc(m_maxcores * sizeof(m_msr_fd[0]));
+        if (m_msr_fd == NULL) {
+                m_maxcores = 0;
+                return MACHINE_RETVAL_ERROR;
+        }
+
+        for (i = 0; i < m_maxcores; i++)
+                m_msr_fd[i] = -1;
+#endif /* _WIN32 */
+        return MACHINE_RETVAL_OK;
+}
+
+int
+machine_fini(void)
+{
+#ifndef _WIN32
+        unsigned i;
+
+        ASSERT(m_msr_fd != NULL);
+        if (m_msr_fd == NULL)
+                return MACHINE_RETVAL_ERROR;
+
+        /**
+         * Close open file descriptors and free up table memory.
+         */
+        for (i = 0; i < m_maxcores; i++)
+                if (m_msr_fd[i] != -1) {
+                        close(m_msr_fd[i]);
+                        m_msr_fd[i] = -1;
+                }
+
+        free(m_msr_fd);
+        m_msr_fd = NULL;
+        m_maxcores = 0;
+#endif /* _WIN32 */
+        return MACHINE_RETVAL_OK;
+}
+
+#ifndef _WIN32
+/**
+ * @brief Returns MSR driver file descriptor for given core id
+ *
+ * File descriptor could be previously open and comes from
+ * m_msr_fd table or is open (& cached) during the call.
+ *
+ * @param lcore logical core id
+ *
+ * @return MSR driver file descriptor corresponding \a lcore
+ */
+static int
+msr_file_open(const unsigned lcore)
+{
+        ASSERT(lcore < m_maxcores);
+        ASSERT(m_msr_fd != NULL);
+
+        int fd = m_msr_fd[lcore];
+
+        if (fd < 0) {
+                char fname[32];
+
+                memset(fname, 0, sizeof(fname));
+                snprintf(fname, sizeof(fname)-1,
+                         "/dev/cpu/%u/msr", lcore);
+                fd = open(fname, O_RDWR);
+                if (fd < 0)
+                        fprintf(stderr, "Error opening file '%s'!\n", fname);
+                else
+                        m_msr_fd[lcore] = fd;
+        }
+
+        return fd;
+}
+#endif /* _WIN32 */
+
+int
+msr_read(const unsigned lcore,
+         const uint32_t reg,
+         uint64_t *value)
+{
+        int ret = MACHINE_RETVAL_OK;
+#ifndef _WIN32
+        int fd = -1;
+        ssize_t read_ret = 0;
+
+        ASSERT(value != NULL);
+        if (value == NULL)
+                return MACHINE_RETVAL_PARAM;
+
+        ASSERT(lcore < m_maxcores);
+        if (lcore >= m_maxcores)
+                return MACHINE_RETVAL_PARAM;
+
+        ASSERT(m_msr_fd != NULL);
+        if (m_msr_fd == NULL)
+                return MACHINE_RETVAL_ERROR;
+
+        fd = msr_file_open(lcore);
+        if (fd < 0)
+                return MACHINE_RETVAL_ERROR;
+
+        read_ret = pread(fd, value, sizeof(value[0]), (off_t)reg);
+
+        if (read_ret != sizeof(value[0])) {
+                fprintf(stderr, "RDMSR failed for reg[0x%x] on lcore %u\n",
+                        (unsigned)reg, lcore);
+                ret = MACHINE_RETVAL_ERROR;
+        }
+#endif /* _WIN32 */
+        return ret;
+}
+
+int
+msr_write(const unsigned lcore,
+          const uint32_t reg,
+          const uint64_t value)
+{
+        int ret = MACHINE_RETVAL_OK;
+#ifndef _WIN32
+        int fd = -1;
+        ssize_t write_ret = 0;
+
+        ASSERT(lcore < m_maxcores);
+        if (lcore >= m_maxcores)
+                return MACHINE_RETVAL_PARAM;
+
+        ASSERT(m_msr_fd != NULL);
+        if (m_msr_fd == NULL)
+                return MACHINE_RETVAL_ERROR;
+
+        fd = msr_file_open(lcore);
+        if (fd < 0)
+                return MACHINE_RETVAL_ERROR;
+
+        write_ret = pwrite(fd, &value, sizeof(value), (off_t)reg);
+
+        if (write_ret != sizeof(value)) {
+                fprintf(stderr, "WRMSR failed for reg[0x%x] "
+                        "<- value[0x%llx] on lcore %u\n",
+                        (unsigned)reg, (unsigned long long)value, lcore);
+                ret = MACHINE_RETVAL_ERROR;
+        }
+#endif /* _WIN32 */
+        return ret;
+}
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h
new file mode 100644
index 00000000..afa8795c
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/msr.h
@@ -0,0 +1,114 @@
+/**********************************************************************
+  Copyright(c) 2018 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/**
+ * @brief Provides access to MSR read & write operations
+ */
+
+#ifndef __MSR_H__
+#define __MSR_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+#ifdef DEBUG
+#include <assert.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifdef DEBUG
+#define ASSERT assert
+#else
+#define ASSERT(x)
+#endif
+
+#define MACHINE_DEFAULT_MAX_COREID  255       /**< max core id */
+
+#define MACHINE_RETVAL_OK           0         /**< everything OK */
+#define MACHINE_RETVAL_ERROR        1         /**< generic error */
+#define MACHINE_RETVAL_PARAM        2         /**< parameter error */
+
+/**
+ * @brief Initializes machine module
+ *
+ * @param [in] max_core_id maximum logical core id to be handled by machine
+ *             module. If zero then default value assumed
+ *             \a MACHINE_DEFAULT_MAX_COREID
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int machine_init(const unsigned max_core_id);
+
+/**
+ * @brief Shuts down machine module
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int machine_fini(void);
+
+/**
+ * @brief Executes RDMSR on \a lcore logical core
+ *
+ * @param [in] lcore logical core id
+ * @param [in] reg MSR to read from
+ * @param [out] value place to store MSR value at
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int
+msr_read(const unsigned lcore,
+         const uint32_t reg,
+         uint64_t *value);
+
+/**
+ * @brief Executes WRMSR on \a lcore logical core
+ *
+ * @param [in] lcore logical core id
+ * @param [in] reg MSR to write to
+ * @param [in] value to be written into \a reg
+ *
+ * @return Operation status
+ * @retval MACHINE_RETVAL_OK on success
+ */
+int
+msr_write(const unsigned lcore,
+          const uint32_t reg,
+          const uint64_t value);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MSR_H__ */
diff --git a/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak
new file mode 100644
index 00000000..a1d8d902
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/LibPerfApp/win_x64.mak
@@ -0,0 +1,69 @@
+#
+# Copyright (c) 2017-2018, Intel Corporation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of Intel Corporation nor the names of its contributors
+#       may be used to endorse or promote products derived from this software
+#       without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+APP = ipsec_perf
+INSTNAME = intel-ipsec-mb
+
+!if !defined(PREFIX)
+PREFIX = C:\Program Files
+!endif
+
+!if exist("$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib")
+IPSECLIB = "$(PREFIX)\$(INSTNAME)\libIPSec_MB.lib"
+INCDIR = -I"$(PREFIX)\$(INSTNAME)"
+!else
+IPSECLIB = ..\libIPSec_MB.lib
+INCDIR = -I..\ -I..\include
+!endif
+
+!ifdef DEBUG
+DCFLAGS = /Od /DDEBUG /Z7
+DLFLAGS = /debug
+!else
+DCFLAGS = /O2 /Oi
+DLFLAGS = 
+!endif
+
+CC = cl
+CFLAGS = /nologo $(DCFLAGS) /Y- /W3 /WX- /Gm- /fp:precise /EHsc $(INCDIR)
+
+LNK = link
+LFLAGS = /out:$(APP).exe $(DLFLAGS)
+
+all: $(APP).exe
+
+$(APP).exe: ipsec_perf.obj msr.obj $(IPSECLIB)
+        $(LNK) $(LFLAGS) ipsec_perf.obj msr.obj $(IPSECLIB)
+
+ipsec_perf.obj: ipsec_perf.c
+        $(CC) /c $(CFLAGS) ipsec_perf.c
+
+msr.obj: msr.c
+        $(CC) /c $(CFLAGS) msr.c
+
+clean:
+	del /q ipsec_perf.obj msr.obj $(APP).exe $(APP).pdb $(APP).ilk
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 18:24:20 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 18:24:20 +0000
commit	483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
tree	e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/spdk/intel-ipsec-mb/LibPerfApp
parent	Initial commit. (diff)
download	ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip