summaryrefslogtreecommitdiffstats
path: root/src/spdk/dpdk/kernel
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/dpdk/kernel
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/spdk/dpdk/kernel/Makefile9
-rw-r--r--src/spdk/dpdk/kernel/freebsd/BSDmakefile.meson18
-rw-r--r--src/spdk/dpdk/kernel/freebsd/Makefile9
-rw-r--r--src/spdk/dpdk/kernel/freebsd/contigmem/BSDmakefile8
-rw-r--r--src/spdk/dpdk/kernel/freebsd/contigmem/Makefile24
-rw-r--r--src/spdk/dpdk/kernel/freebsd/contigmem/contigmem.c361
-rw-r--r--src/spdk/dpdk/kernel/freebsd/contigmem/meson.build4
-rw-r--r--src/spdk/dpdk/kernel/freebsd/meson.build35
-rw-r--r--src/spdk/dpdk/kernel/freebsd/nic_uio/BSDmakefile8
-rw-r--r--src/spdk/dpdk/kernel/freebsd/nic_uio/Makefile24
-rw-r--r--src/spdk/dpdk/kernel/freebsd/nic_uio/meson.build4
-rw-r--r--src/spdk/dpdk/kernel/freebsd/nic_uio/nic_uio.c351
-rw-r--r--src/spdk/dpdk/kernel/linux/Makefile9
-rw-r--r--src/spdk/dpdk/kernel/linux/igb_uio/Kbuild2
-rw-r--r--src/spdk/dpdk/kernel/linux/igb_uio/Makefile25
-rw-r--r--src/spdk/dpdk/kernel/linux/igb_uio/compat.h154
-rw-r--r--src/spdk/dpdk/kernel/linux/igb_uio/igb_uio.c660
-rw-r--r--src/spdk/dpdk/kernel/linux/igb_uio/meson.build20
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/Kbuild6
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/Makefile34
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/compat.h136
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_dev.h127
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_fifo.h87
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_misc.c661
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_net.c844
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/meson.build28
-rw-r--r--src/spdk/dpdk/kernel/linux/meson.build28
-rw-r--r--src/spdk/dpdk/kernel/meson.build4
28 files changed, 3680 insertions, 0 deletions
diff --git a/src/spdk/dpdk/kernel/Makefile b/src/spdk/dpdk/kernel/Makefile
new file mode 100644
index 000000000..5d51fd94b
--- /dev/null
+++ b/src/spdk/dpdk/kernel/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+# Copyright 2017 NXP
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUX) += linux
+DIRS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += freebsd
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/src/spdk/dpdk/kernel/freebsd/BSDmakefile.meson b/src/spdk/dpdk/kernel/freebsd/BSDmakefile.meson
new file mode 100644
index 000000000..6839ac01c
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/BSDmakefile.meson
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+# makefile for building kernel modules using meson
+# takes parameters from the environment
+
+# source file is passed via KMOD_SRC as relative path, we only use final
+# (tail) component of it (:T), as VPATH is used to find actual file. The
+# VPATH is similarly extracted from the non-final (head) portion of the
+# path (:H) converted to absolute path (:tA). This use of VPATH is to have
+# the .o files placed in the build, not source directory
+
+VPATH := ${KMOD_SRC:H:tA}
+SRCS := ${KMOD_SRC:T} device_if.h bus_if.h pci_if.h
+CFLAGS += $(KMOD_CFLAGS)
+.OBJDIR: ${KMOD_OBJDIR}
+
+.include <bsd.kmod.mk>
diff --git a/src/spdk/dpdk/kernel/freebsd/Makefile b/src/spdk/dpdk/kernel/freebsd/Makefile
new file mode 100644
index 000000000..522d3f68f
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2017 NXP
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += contigmem
+DIRS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += nic_uio
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/src/spdk/dpdk/kernel/freebsd/contigmem/BSDmakefile b/src/spdk/dpdk/kernel/freebsd/contigmem/BSDmakefile
new file mode 100644
index 000000000..33ce83ee2
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/contigmem/BSDmakefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+#
+
+KMOD= contigmem
+SRCS= contigmem.c device_if.h bus_if.h
+
+.include <bsd.kmod.mk>
diff --git a/src/spdk/dpdk/kernel/freebsd/contigmem/Makefile b/src/spdk/dpdk/kernel/freebsd/contigmem/Makefile
new file mode 100644
index 000000000..428a7edee
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/contigmem/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = contigmem
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR)
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := contigmem.c
+
+include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/src/spdk/dpdk/kernel/freebsd/contigmem/contigmem.c b/src/spdk/dpdk/kernel/freebsd/contigmem/contigmem.c
new file mode 100644
index 000000000..bd72f4d62
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/contigmem/contigmem.c
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/vmmeter.h>
+#include <sys/eventhandler.h>
+
+#include <machine/bus.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+
+struct contigmem_buffer {
+ void *addr;
+ int refcnt;
+ struct mtx mtx;
+};
+
+struct contigmem_vm_handle {
+ int buffer_index;
+};
+
+static int contigmem_load(void);
+static int contigmem_unload(void);
+static int contigmem_physaddr(SYSCTL_HANDLER_ARGS);
+
+static d_mmap_single_t contigmem_mmap_single;
+static d_open_t contigmem_open;
+static d_close_t contigmem_close;
+
+static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
+static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
+
+static eventhandler_tag contigmem_eh_tag;
+static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
+static struct cdev *contigmem_cdev = NULL;
+static int contigmem_refcnt;
+
+TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
+TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
+
+static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");
+
+SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
+ &contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
+SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
+ &contigmem_buffer_size, 0, "Size of each contiguous buffer");
+SYSCTL_INT(_hw_contigmem, OID_AUTO, num_references, CTLFLAG_RD,
+ &contigmem_refcnt, 0, "Number of references to contigmem");
+
+static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
+ "physaddr");
+
+MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations");
+
+static int contigmem_modevent(module_t mod, int type, void *arg)
+{
+ int error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = contigmem_load();
+ break;
+ case MOD_UNLOAD:
+ error = contigmem_unload();
+ break;
+ default:
+ break;
+ }
+
+ return error;
+}
+
+moduledata_t contigmem_mod = {
+ "contigmem",
+ (modeventhand_t)contigmem_modevent,
+ 0
+};
+
+DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
+MODULE_VERSION(contigmem, 1);
+
+static struct cdevsw contigmem_ops = {
+ .d_name = "contigmem",
+ .d_version = D_VERSION,
+ .d_flags = D_TRACKCLOSE,
+ .d_mmap_single = contigmem_mmap_single,
+ .d_open = contigmem_open,
+ .d_close = contigmem_close,
+};
+
+static int
+contigmem_load()
+{
+ char index_string[8], description[32];
+ int i, error = 0;
+ void *addr;
+
+ if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
+ printf("%d buffers requested is greater than %d allowed\n",
+ contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS);
+ error = EINVAL;
+ goto error;
+ }
+
+ if (contigmem_buffer_size < PAGE_SIZE ||
+ (contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) {
+ printf("buffer size 0x%lx is not greater than PAGE_SIZE and "
+ "power of two\n", contigmem_buffer_size);
+ error = EINVAL;
+ goto error;
+ }
+
+ for (i = 0; i < contigmem_num_buffers; i++) {
+ addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
+ 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
+ if (addr == NULL) {
+ printf("contigmalloc failed for buffer %d\n", i);
+ error = ENOMEM;
+ goto error;
+ }
+
+ printf("%2u: virt=%p phys=%p\n", i, addr,
+ (void *)pmap_kextract((vm_offset_t)addr));
+
+ mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF);
+ contigmem_buffers[i].addr = addr;
+ contigmem_buffers[i].refcnt = 0;
+
+ snprintf(index_string, sizeof(index_string), "%d", i);
+ snprintf(description, sizeof(description),
+ "phys addr for buffer %d", i);
+ SYSCTL_ADD_PROC(NULL,
+ &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO,
+ index_string, CTLTYPE_U64 | CTLFLAG_RD,
+ (void *)(uintptr_t)i, 0, contigmem_physaddr, "LU",
+ description);
+ }
+
+ contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT,
+ GID_WHEEL, 0600, "contigmem");
+
+ return 0;
+
+error:
+ for (i = 0; i < contigmem_num_buffers; i++) {
+ if (contigmem_buffers[i].addr != NULL) {
+ contigfree(contigmem_buffers[i].addr,
+ contigmem_buffer_size, M_CONTIGMEM);
+ contigmem_buffers[i].addr = NULL;
+ }
+ if (mtx_initialized(&contigmem_buffers[i].mtx))
+ mtx_destroy(&contigmem_buffers[i].mtx);
+ }
+
+ return error;
+}
+
+static int
+contigmem_unload()
+{
+ int i;
+
+ if (contigmem_refcnt > 0)
+ return EBUSY;
+
+ if (contigmem_cdev != NULL)
+ destroy_dev(contigmem_cdev);
+
+ if (contigmem_eh_tag != NULL)
+ EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
+
+ for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) {
+ if (contigmem_buffers[i].addr != NULL)
+ contigfree(contigmem_buffers[i].addr,
+ contigmem_buffer_size, M_CONTIGMEM);
+ if (mtx_initialized(&contigmem_buffers[i].mtx))
+ mtx_destroy(&contigmem_buffers[i].mtx);
+ }
+
+ return 0;
+}
+
+static int
+contigmem_physaddr(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t physaddr;
+ int index = (int)(uintptr_t)arg1;
+
+ physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr);
+ return sysctl_handle_64(oidp, &physaddr, 0, req);
+}
+
+static int
+contigmem_open(struct cdev *cdev, int fflags, int devtype,
+ struct thread *td)
+{
+
+ atomic_add_int(&contigmem_refcnt, 1);
+
+ return 0;
+}
+
+static int
+contigmem_close(struct cdev *cdev, int fflags, int devtype,
+ struct thread *td)
+{
+
+ atomic_subtract_int(&contigmem_refcnt, 1);
+
+ return 0;
+}
+
+static int
+contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+ vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+ struct contigmem_vm_handle *vmh = handle;
+ struct contigmem_buffer *buf;
+
+ buf = &contigmem_buffers[vmh->buffer_index];
+
+ atomic_add_int(&contigmem_refcnt, 1);
+
+ mtx_lock(&buf->mtx);
+ if (buf->refcnt == 0)
+ memset(buf->addr, 0, contigmem_buffer_size);
+ buf->refcnt++;
+ mtx_unlock(&buf->mtx);
+
+ return 0;
+}
+
+static void
+contigmem_cdev_pager_dtor(void *handle)
+{
+ struct contigmem_vm_handle *vmh = handle;
+ struct contigmem_buffer *buf;
+
+ buf = &contigmem_buffers[vmh->buffer_index];
+
+ mtx_lock(&buf->mtx);
+ buf->refcnt--;
+ mtx_unlock(&buf->mtx);
+
+ free(vmh, M_CONTIGMEM);
+
+ atomic_subtract_int(&contigmem_refcnt, 1);
+}
+
+static int
+contigmem_cdev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
+ vm_page_t *mres)
+{
+ vm_paddr_t paddr;
+ vm_page_t m_paddr, page;
+ vm_memattr_t memattr, memattr1;
+
+ memattr = object->memattr;
+
+ VM_OBJECT_WUNLOCK(object);
+
+ paddr = offset;
+
+ m_paddr = vm_phys_paddr_to_vm_page(paddr);
+ if (m_paddr != NULL) {
+ memattr1 = pmap_page_get_memattr(m_paddr);
+ if (memattr1 != memattr)
+ memattr = memattr1;
+ }
+
+ if (((*mres)->flags & PG_FICTITIOUS) != 0) {
+ /*
+ * If the passed in result page is a fake page, update it with
+ * the new physical address.
+ */
+ page = *mres;
+ VM_OBJECT_WLOCK(object);
+ vm_page_updatefake(page, paddr, memattr);
+ } else {
+ /*
+ * Replace the passed in reqpage page with our own fake page and
+ * free up the original page.
+ */
+ page = vm_page_getfake(paddr, memattr);
+ VM_OBJECT_WLOCK(object);
+#if __FreeBSD__ >= 13
+ vm_page_replace(page, object, (*mres)->pindex, *mres);
+#else
+ vm_page_t mret = vm_page_replace(page, object, (*mres)->pindex);
+ KASSERT(mret == *mres,
+ ("invalid page replacement, old=%p, ret=%p", *mres, mret));
+ vm_page_lock(mret);
+ vm_page_free(mret);
+ vm_page_unlock(mret);
+#endif
+ *mres = page;
+ }
+
+ page->valid = VM_PAGE_BITS_ALL;
+
+ return VM_PAGER_OK;
+}
+
+static struct cdev_pager_ops contigmem_cdev_pager_ops = {
+ .cdev_pg_ctor = contigmem_cdev_pager_ctor,
+ .cdev_pg_dtor = contigmem_cdev_pager_dtor,
+ .cdev_pg_fault = contigmem_cdev_pager_fault,
+};
+
+static int
+contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
+ struct vm_object **obj, int nprot)
+{
+ struct contigmem_vm_handle *vmh;
+ uint64_t buffer_index;
+
+ /*
+ * The buffer index is encoded in the offset. Divide the offset by
+ * PAGE_SIZE to get the index of the buffer requested by the user
+ * app.
+ */
+ buffer_index = *offset / PAGE_SIZE;
+ if (buffer_index >= contigmem_num_buffers)
+ return EINVAL;
+
+ if (size > contigmem_buffer_size)
+ return EINVAL;
+
+ vmh = malloc(sizeof(*vmh), M_CONTIGMEM, M_NOWAIT | M_ZERO);
+ if (vmh == NULL)
+ return ENOMEM;
+ vmh->buffer_index = buffer_index;
+
+ *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr);
+ *obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops,
+ size, nprot, *offset, curthread->td_ucred);
+
+ return 0;
+}
diff --git a/src/spdk/dpdk/kernel/freebsd/contigmem/meson.build b/src/spdk/dpdk/kernel/freebsd/contigmem/meson.build
new file mode 100644
index 000000000..8fb2ab786
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/contigmem/meson.build
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+sources = files('contigmem.c')
diff --git a/src/spdk/dpdk/kernel/freebsd/meson.build b/src/spdk/dpdk/kernel/freebsd/meson.build
new file mode 100644
index 000000000..dc156a43f
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/meson.build
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+kmods = ['contigmem', 'nic_uio']
+
+# for building kernel modules, we use kernel build system using make, as
+# with Linux. We have a skeleton BSDmakefile, which pulls many of its
+# values from the environment. Each module only has a single source file
+# right now, which allows us to simplify things. We pull in the sourcer
+# files from the individual meson.build files, and then use a custom
+# target to call make, passing in the values as env parameters.
+kmod_cflags = ['-I' + meson.build_root(),
+ '-I' + join_paths(meson.source_root(), 'config'),
+ '-include rte_config.h']
+
+# to avoid warnings due to race conditions with creating the dev_if.h, etc.
+# files, serialize the kernel module builds. Each module will depend on
+# previous ones
+built_kmods = []
+foreach k:kmods
+ subdir(k)
+ built_kmods += custom_target(k,
+ input: [files('BSDmakefile.meson'), sources],
+ output: k + '.ko',
+ command: ['make', '-f', '@INPUT0@',
+ 'KMOD_OBJDIR=@OUTDIR@',
+ 'KMOD_SRC=@INPUT1@',
+ 'KMOD=' + k,
+ 'KMOD_CFLAGS=' + ' '.join(kmod_cflags),
+ 'CC=clang'],
+ depends: built_kmods, # make each module depend on prev
+ build_by_default: get_option('enable_kmods'),
+ install: get_option('enable_kmods'),
+ install_dir: '/boot/modules/')
+endforeach
diff --git a/src/spdk/dpdk/kernel/freebsd/nic_uio/BSDmakefile b/src/spdk/dpdk/kernel/freebsd/nic_uio/BSDmakefile
new file mode 100644
index 000000000..b6f92d55e
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/nic_uio/BSDmakefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+#
+
+KMOD= nic_uio
+SRCS= nic_uio.c device_if.h bus_if.h pci_if.h
+
+.include <bsd.kmod.mk>
diff --git a/src/spdk/dpdk/kernel/freebsd/nic_uio/Makefile b/src/spdk/dpdk/kernel/freebsd/nic_uio/Makefile
new file mode 100644
index 000000000..376ef3a3a
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/nic_uio/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = nic_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR)
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := nic_uio.c
+
+include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/src/spdk/dpdk/kernel/freebsd/nic_uio/meson.build b/src/spdk/dpdk/kernel/freebsd/nic_uio/meson.build
new file mode 100644
index 000000000..4bdaf9694
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/nic_uio/meson.build
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+sources = files('nic_uio.c')
diff --git a/src/spdk/dpdk/kernel/freebsd/nic_uio/nic_uio.c b/src/spdk/dpdk/kernel/freebsd/nic_uio/nic_uio.c
new file mode 100644
index 000000000..7a81694c9
--- /dev/null
+++ b/src/spdk/dpdk/kernel/freebsd/nic_uio/nic_uio.c
@@ -0,0 +1,351 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/module.h>
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct */
+#include <sys/bus.h> /* structs, prototypes for pci bus stuff and DEVMETHOD */
+#include <sys/rman.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/proc.h>
+
+#include <machine/bus.h>
+#include <dev/pci/pcivar.h> /* For pci_get macros! */
+#include <dev/pci/pcireg.h> /* The softc holds our per-instance data. */
+#include <vm/vm.h>
+#include <vm/uma.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+
+#define MAX_BARS (PCIR_MAX_BAR_0 + 1)
+
+#define MAX_DETACHED_DEVICES 128
+static device_t detached_devices[MAX_DETACHED_DEVICES] = {};
+static int num_detached = 0;
+
+struct nic_uio_softc {
+ device_t dev_t;
+ struct cdev *my_cdev;
+ int bar_id[MAX_BARS];
+ struct resource *bar_res[MAX_BARS];
+ u_long bar_start[MAX_BARS];
+ u_long bar_size[MAX_BARS];
+};
+
+/* Function prototypes */
+static d_open_t nic_uio_open;
+static d_close_t nic_uio_close;
+static d_mmap_t nic_uio_mmap;
+static d_mmap_single_t nic_uio_mmap_single;
+static int nic_uio_probe(device_t dev);
+static int nic_uio_attach(device_t dev);
+static int nic_uio_detach(device_t dev);
+static int nic_uio_shutdown(void);
+static int nic_uio_modevent(module_t mod, int type, void *arg);
+
+static struct cdevsw uio_cdevsw = {
+ .d_name = "nic_uio",
+ .d_version = D_VERSION,
+ .d_open = nic_uio_open,
+ .d_close = nic_uio_close,
+ .d_mmap = nic_uio_mmap,
+ .d_mmap_single = nic_uio_mmap_single,
+};
+
+static device_method_t nic_uio_methods[] = {
+ DEVMETHOD(device_probe, nic_uio_probe),
+ DEVMETHOD(device_attach, nic_uio_attach),
+ DEVMETHOD(device_detach, nic_uio_detach),
+ DEVMETHOD_END
+};
+
+struct device {
+ int vend;
+ int dev;
+};
+
+struct pci_bdf {
+ uint32_t bus;
+ uint32_t devid;
+ uint32_t function;
+};
+
+static devclass_t nic_uio_devclass;
+
+DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc));
+DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0);
+
+static int
+nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int prot, vm_memattr_t *memattr)
+{
+ *paddr = offset;
+ return 0;
+}
+
+static int
+nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
+ struct vm_object **obj, int nprot)
+{
+ /*
+ * The BAR index is encoded in the offset. Divide the offset by
+ * PAGE_SIZE to get the index of the bar requested by the user
+ * app.
+ */
+ unsigned bar = *offset/PAGE_SIZE;
+ struct nic_uio_softc *sc = cdev->si_drv1;
+
+ if (bar >= MAX_BARS)
+ return EINVAL;
+
+ if (sc->bar_res[bar] == NULL) {
+ sc->bar_id[bar] = PCIR_BAR(bar);
+
+ if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4)))
+ sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT,
+ &sc->bar_id[bar], RF_ACTIVE);
+ else
+ sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY,
+ &sc->bar_id[bar], RF_ACTIVE);
+ }
+ if (sc->bar_res[bar] == NULL)
+ return ENXIO;
+
+ sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]);
+ sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]);
+
+ device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar,
+ sc->bar_start[bar], sc->bar_size[bar]);
+
+ *offset = sc->bar_start[bar];
+ *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
+ curthread->td_ucred);
+ return 0;
+}
+
+
+int
+nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ return 0;
+}
+
+int
+nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+ return 0;
+}
+
+static int
+nic_uio_probe (device_t dev)
+{
+ int i;
+ unsigned int bus = pci_get_bus(dev);
+ unsigned int device = pci_get_slot(dev);
+ unsigned int function = pci_get_function(dev);
+
+ char bdf_str[256];
+ char *token, *remaining;
+
+ /* First check if we found this on load */
+ for (i = 0; i < num_detached; i++)
+ if (bus == pci_get_bus(detached_devices[i]) &&
+ device == pci_get_slot(detached_devices[i]) &&
+ function == pci_get_function(detached_devices[i])) {
+ device_set_desc(dev, "DPDK PCI Device");
+ return BUS_PROBE_SPECIFIC;
+ }
+
+ /* otherwise check if it's a new device and if it matches the BDF */
+ memset(bdf_str, 0, sizeof(bdf_str));
+ TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
+ remaining = bdf_str;
+ while (1) {
+ if (remaining == NULL || remaining[0] == '\0')
+ break;
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ bus = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ device = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ function = strtol(token, NULL, 10);
+
+ if (bus == pci_get_bus(dev) &&
+ device == pci_get_slot(dev) &&
+ function == pci_get_function(dev)) {
+
+ if (num_detached < MAX_DETACHED_DEVICES) {
+ printf("%s: probed dev=%p\n",
+ __func__, dev);
+ detached_devices[num_detached++] = dev;
+ device_set_desc(dev, "DPDK PCI Device");
+ return BUS_PROBE_SPECIFIC;
+ } else {
+ printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
+ __func__, MAX_DETACHED_DEVICES,
+ dev);
+ break;
+ }
+ }
+ }
+
+ return ENXIO;
+}
+
+static int
+nic_uio_attach(device_t dev)
+{
+ int i;
+ struct nic_uio_softc *sc;
+
+ sc = device_get_softc(dev);
+ sc->dev_t = dev;
+ sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev),
+ UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u",
+ pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
+ if (sc->my_cdev == NULL)
+ return ENXIO;
+ sc->my_cdev->si_drv1 = sc;
+
+ for (i = 0; i < MAX_BARS; i++)
+ sc->bar_res[i] = NULL;
+
+ pci_enable_busmaster(dev);
+
+ return 0;
+}
+
+static int
+nic_uio_detach(device_t dev)
+{
+ int i;
+ struct nic_uio_softc *sc;
+ sc = device_get_softc(dev);
+
+ for (i = 0; i < MAX_BARS; i++)
+ if (sc->bar_res[i] != NULL) {
+
+ if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4)))
+ bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i],
+ sc->bar_res[i]);
+ else
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i],
+ sc->bar_res[i]);
+ }
+
+ if (sc->my_cdev != NULL)
+ destroy_dev(sc->my_cdev);
+ return 0;
+}
+
+static void
+nic_uio_load(void)
+{
+ uint32_t bus, device, function;
+ device_t dev;
+ char bdf_str[256];
+ char *token, *remaining;
+
+ memset(bdf_str, 0, sizeof(bdf_str));
+ TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
+ remaining = bdf_str;
+ printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str);
+ /*
+ * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f".
+ * But the code below does not try differentiate between : and ,
+ * and just blindly uses 3 tokens at a time to construct a
+ * bus/device/function tuple.
+ *
+ * There is no checking on strtol() return values, but this should
+ * be OK. Worst case is it cannot convert and returns 0. This
+ * could give us a different BDF than intended, but as long as the
+ * PCI device/vendor ID does not match it will not matter.
+ */
+ while (1) {
+ if (remaining == NULL || remaining[0] == '\0')
+ break;
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ bus = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ device = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ function = strtol(token, NULL, 10);
+
+ dev = pci_find_bsf(bus, device, function);
+ if (dev == NULL)
+ continue;
+
+ if (num_detached < MAX_DETACHED_DEVICES) {
+ printf("nic_uio_load: detaching and storing dev=%p\n",
+ dev);
+ detached_devices[num_detached++] = dev;
+ } else {
+ printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
+ MAX_DETACHED_DEVICES, dev);
+ }
+ device_detach(dev);
+ }
+}
+
+static void
+nic_uio_unload(void)
+{
+ int i;
+ printf("nic_uio_unload: entered...\n");
+
+ for (i = 0; i < num_detached; i++) {
+ printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n",
+ detached_devices[i]);
+ device_probe_and_attach(detached_devices[i]);
+ printf("nic_uio_unload: done.\n");
+ }
+
+ printf("nic_uio_unload: leaving...\n");
+}
+
+static int
+nic_uio_shutdown(void)
+{
+ return 0;
+}
+
+static int
+nic_uio_modevent(module_t mod, int type, void *arg)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ nic_uio_load();
+ break;
+ case MOD_UNLOAD:
+ nic_uio_unload();
+ break;
+ case MOD_SHUTDOWN:
+ nic_uio_shutdown();
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
diff --git a/src/spdk/dpdk/kernel/linux/Makefile b/src/spdk/dpdk/kernel/linux/Makefile
new file mode 100644
index 000000000..c2c45a3e6
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2017 NXP
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
+DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/src/spdk/dpdk/kernel/linux/igb_uio/Kbuild b/src/spdk/dpdk/kernel/linux/igb_uio/Kbuild
new file mode 100644
index 000000000..3ab85c411
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/igb_uio/Kbuild
@@ -0,0 +1,2 @@
+ccflags-y := $(MODULE_CFLAGS)
+obj-m := igb_uio.o
diff --git a/src/spdk/dpdk/kernel/linux/igb_uio/Makefile b/src/spdk/dpdk/kernel/linux/igb_uio/Makefile
new file mode 100644
index 000000000..f83bcc7c6
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/igb_uio/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = igb_uio
+MODULE_PATH = drivers/net/igb_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := igb_uio.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/src/spdk/dpdk/kernel/linux/igb_uio/compat.h b/src/spdk/dpdk/kernel/linux/igb_uio/compat.h
new file mode 100644
index 000000000..8dbb896ae
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/igb_uio/compat.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Minimal wrappers to allow compiling igb_uio on older kernels.
+ */
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+#define pci_cfg_access_lock pci_block_user_cfg_access
+#define pci_cfg_access_unlock pci_unblock_user_cfg_access
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
+#define HAVE_PTE_MASK_PAGE_IOMAP
+#endif
+
+#ifndef PCI_MSIX_ENTRY_SIZE
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
+#endif
+
+/*
+ * for kernels < 2.6.38 and backported patch that moves MSI-X entry definition
+ * to pci_regs.h Those kernels has PCI_MSIX_ENTRY_SIZE defined but not
+ * PCI_MSIX_ENTRY_CTRL_MASKBIT
+ */
+#ifndef PCI_MSIX_ENTRY_CTRL_MASKBIT
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9)))
+
+static int pci_num_vf(struct pci_dev *dev)
+{
+ struct iov {
+ int pos;
+ int nres;
+ u32 cap;
+ u16 ctrl;
+ u16 total;
+ u16 initial;
+ u16 nr_virtfn;
+ } *iov = (struct iov *)dev->sriov;
+
+ if (!dev->is_physfn)
+ return 0;
+
+ return iov->nr_virtfn;
+}
+
+#endif /* < 2.6.34 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3)))
+
+/* Check if INTX works to control irq's.
+ * Set's INTX_DISABLE flag and reads it back
+ */
+static bool pci_intx_mask_supported(struct pci_dev *pdev)
+{
+ bool mask_supported = false;
+ uint16_t orig, new;
+
+ pci_block_user_cfg_access(pdev);
+ pci_read_config_word(pdev, PCI_COMMAND, &orig);
+ pci_write_config_word(pdev, PCI_COMMAND,
+ orig ^ PCI_COMMAND_INTX_DISABLE);
+ pci_read_config_word(pdev, PCI_COMMAND, &new);
+
+ if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+ dev_err(&pdev->dev, "Command register changed from "
+ "0x%x to 0x%x: driver or hardware bug?\n", orig, new);
+ } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
+ mask_supported = true;
+ pci_write_config_word(pdev, PCI_COMMAND, orig);
+ }
+ pci_unblock_user_cfg_access(pdev);
+
+ return mask_supported;
+}
+
+static bool pci_check_and_mask_intx(struct pci_dev *pdev)
+{
+ bool pending;
+ uint32_t status;
+
+ pci_block_user_cfg_access(pdev);
+ pci_read_config_dword(pdev, PCI_COMMAND, &status);
+
+ /* interrupt is not ours, goes to out */
+ pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0);
+ if (pending) {
+ uint16_t old, new;
+
+ old = status;
+ if (status != 0)
+ new = old & (~PCI_COMMAND_INTX_DISABLE);
+ else
+ new = old | PCI_COMMAND_INTX_DISABLE;
+
+ if (old != new)
+ pci_write_config_word(pdev, PCI_COMMAND, new);
+ }
+ pci_unblock_user_cfg_access(pdev);
+
+ return pending;
+}
+
+#endif /* < 3.3.0 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
+#define HAVE_PCI_IS_BRIDGE_API 1
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+#define HAVE_MSI_LIST_IN_GENERIC_DEVICE 1
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+#define HAVE_PCI_MSI_MASK_IRQ 1
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+#define HAVE_ALLOC_IRQ_VECTORS 1
+#endif
+
+static inline bool igbuio_kernel_is_locked_down(void)
+{
+#ifdef CONFIG_LOCK_DOWN_KERNEL
+#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT
+ return kernel_is_locked_down(NULL);
+#elif defined(CONFIG_EFI_SECURE_BOOT_LOCK_DOWN)
+ return kernel_is_locked_down();
+#else
+ return false;
+#endif
+#else
+ return false;
+#endif
+}
diff --git a/src/spdk/dpdk/kernel/linux/igb_uio/igb_uio.c b/src/spdk/dpdk/kernel/linux/igb_uio/igb_uio.c
new file mode 100644
index 000000000..039f5a5f6
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/igb_uio/igb_uio.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0
+/*-
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+
+#include <rte_pci_dev_features.h>
+
+#include "compat.h"
+
+/**
+ * A structure describing the private information for a uio device.
+ */
+struct rte_uio_pci_dev {
+ struct uio_info info;
+ struct pci_dev *pdev;
+ enum rte_intr_mode mode;
+ atomic_t refcnt;
+};
+
+static int wc_activate;
+static char *intr_mode;
+static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
+/* sriov sysfs */
+static ssize_t
+show_max_vfs(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 10, "%u\n", dev_num_vf(dev));
+}
+
+static ssize_t
+store_max_vfs(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err = 0;
+ unsigned long max_vfs;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (0 != kstrtoul(buf, 0, &max_vfs))
+ return -EINVAL;
+
+ if (0 == max_vfs)
+ pci_disable_sriov(pdev);
+ else if (0 == pci_num_vf(pdev))
+ err = pci_enable_sriov(pdev, max_vfs);
+ else /* do nothing if change max_vfs number */
+ err = -EINVAL;
+
+ return err ? err : count;
+}
+
+static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs);
+
+static struct attribute *dev_attrs[] = {
+ &dev_attr_max_vfs.attr,
+ NULL,
+};
+
+static const struct attribute_group dev_attr_grp = {
+ .attrs = dev_attrs,
+};
+
+#ifndef HAVE_PCI_MSI_MASK_IRQ
+/*
+ * It masks the msix on/off of generating MSI-X messages.
+ */
+static void
+igbuio_msix_mask_irq(struct msi_desc *desc, s32 state)
+{
+ u32 mask_bits = desc->masked;
+ unsigned int offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+ if (state != 0)
+ mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ else
+ mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+
+ if (mask_bits != desc->masked) {
+ writel(mask_bits, desc->mask_base + offset);
+ readl(desc->mask_base);
+ desc->masked = mask_bits;
+ }
+}
+
+/*
+ * It masks the msi on/off of generating MSI messages.
+ */
+static void
+igbuio_msi_mask_irq(struct pci_dev *pdev, struct msi_desc *desc, int32_t state)
+{
+ u32 mask_bits = desc->masked;
+ u32 offset = desc->irq - pdev->irq;
+ u32 mask = 1 << offset;
+
+ if (!desc->msi_attrib.maskbit)
+ return;
+
+ if (state != 0)
+ mask_bits &= ~mask;
+ else
+ mask_bits |= mask;
+
+ if (mask_bits != desc->masked) {
+ pci_write_config_dword(pdev, desc->mask_pos, mask_bits);
+ desc->masked = mask_bits;
+ }
+}
+
+static void
+igbuio_mask_irq(struct pci_dev *pdev, enum rte_intr_mode mode, s32 irq_state)
+{
+ struct msi_desc *desc;
+ struct list_head *msi_list;
+
+#ifdef HAVE_MSI_LIST_IN_GENERIC_DEVICE
+ msi_list = &pdev->dev.msi_list;
+#else
+ msi_list = &pdev->msi_list;
+#endif
+
+ if (mode == RTE_INTR_MODE_MSIX) {
+ list_for_each_entry(desc, msi_list, list)
+ igbuio_msix_mask_irq(desc, irq_state);
+ } else if (mode == RTE_INTR_MODE_MSI) {
+ list_for_each_entry(desc, msi_list, list)
+ igbuio_msi_mask_irq(pdev, desc, irq_state);
+ }
+}
+#endif
+
+/**
+ * This is the irqcontrol callback to be registered to uio_info.
+ * It can be used to disable/enable interrupt from user space processes.
+ *
+ * @param info
+ * pointer to uio_info.
+ * @param irq_state
+ * state value. 1 to enable interrupt, 0 to disable interrupt.
+ *
+ * @return
+ * - On success, 0.
+ * - On failure, a negative value.
+ */
+static int
+igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
+{
+ struct rte_uio_pci_dev *udev = info->priv;
+ struct pci_dev *pdev = udev->pdev;
+
+#ifdef HAVE_PCI_MSI_MASK_IRQ
+ struct irq_data *irq = irq_get_irq_data(udev->info.irq);
+#endif
+
+ pci_cfg_access_lock(pdev);
+
+ if (udev->mode == RTE_INTR_MODE_MSIX || udev->mode == RTE_INTR_MODE_MSI) {
+#ifdef HAVE_PCI_MSI_MASK_IRQ
+ if (irq_state == 1)
+ pci_msi_unmask_irq(irq);
+ else
+ pci_msi_mask_irq(irq);
+#else
+ igbuio_mask_irq(pdev, udev->mode, irq_state);
+#endif
+ }
+
+ if (udev->mode == RTE_INTR_MODE_LEGACY)
+ pci_intx(pdev, !!irq_state);
+
+ pci_cfg_access_unlock(pdev);
+
+ return 0;
+}
+
+/**
+ * This is interrupt handler which will check if the interrupt is for the right device.
+ * If yes, disable it here and will be enable later.
+ */
+static irqreturn_t
+igbuio_pci_irqhandler(int irq, void *dev_id)
+{
+ struct rte_uio_pci_dev *udev = (struct rte_uio_pci_dev *)dev_id;
+ struct uio_info *info = &udev->info;
+
+ /* Legacy mode need to mask in hardware */
+ if (udev->mode == RTE_INTR_MODE_LEGACY &&
+ !pci_check_and_mask_intx(udev->pdev))
+ return IRQ_NONE;
+
+ uio_event_notify(info);
+
+ /* Message signal mode, no share IRQ and automasked */
+ return IRQ_HANDLED;
+}
+
+static int
+igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev)
+{
+ int err = 0;
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ struct msix_entry msix_entry;
+#endif
+
+ switch (igbuio_intr_mode_preferred) {
+ case RTE_INTR_MODE_MSIX:
+ /* Only 1 msi-x vector needed */
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ msix_entry.entry = 0;
+ if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
+ dev_dbg(&udev->pdev->dev, "using MSI-X");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = msix_entry.vector;
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+#else
+ if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) {
+ dev_dbg(&udev->pdev->dev, "using MSI-X");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = pci_irq_vector(udev->pdev, 0);
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+#endif
+
+ /* falls through - to MSI */
+ case RTE_INTR_MODE_MSI:
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ if (pci_enable_msi(udev->pdev) == 0) {
+ dev_dbg(&udev->pdev->dev, "using MSI");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = udev->pdev->irq;
+ udev->mode = RTE_INTR_MODE_MSI;
+ break;
+ }
+#else
+ if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) {
+ dev_dbg(&udev->pdev->dev, "using MSI");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = pci_irq_vector(udev->pdev, 0);
+ udev->mode = RTE_INTR_MODE_MSI;
+ break;
+ }
+#endif
+ /* falls through - to INTX */
+ case RTE_INTR_MODE_LEGACY:
+ if (pci_intx_mask_supported(udev->pdev)) {
+ dev_dbg(&udev->pdev->dev, "using INTX");
+ udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
+ udev->info.irq = udev->pdev->irq;
+ udev->mode = RTE_INTR_MODE_LEGACY;
+ break;
+ }
+ dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n");
+ /* falls through - to no IRQ */
+ case RTE_INTR_MODE_NONE:
+ udev->mode = RTE_INTR_MODE_NONE;
+ udev->info.irq = UIO_IRQ_NONE;
+ break;
+
+ default:
+ dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
+ igbuio_intr_mode_preferred);
+ udev->info.irq = UIO_IRQ_NONE;
+ err = -EINVAL;
+ }
+
+ if (udev->info.irq != UIO_IRQ_NONE)
+ err = request_irq(udev->info.irq, igbuio_pci_irqhandler,
+ udev->info.irq_flags, udev->info.name,
+ udev);
+ dev_info(&udev->pdev->dev, "uio device registered with irq %ld\n",
+ udev->info.irq);
+
+ return err;
+}
+
+static void
+igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev)
+{
+ if (udev->info.irq) {
+ free_irq(udev->info.irq, udev);
+ udev->info.irq = 0;
+ }
+
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ if (udev->mode == RTE_INTR_MODE_MSIX)
+ pci_disable_msix(udev->pdev);
+ if (udev->mode == RTE_INTR_MODE_MSI)
+ pci_disable_msi(udev->pdev);
+#else
+ if (udev->mode == RTE_INTR_MODE_MSIX ||
+ udev->mode == RTE_INTR_MODE_MSI)
+ pci_free_irq_vectors(udev->pdev);
+#endif
+}
+
+
+/**
+ * This gets called while opening uio device file.
+ */
+static int
+igbuio_pci_open(struct uio_info *info, struct inode *inode)
+{
+ struct rte_uio_pci_dev *udev = info->priv;
+ struct pci_dev *dev = udev->pdev;
+ int err;
+
+ if (atomic_inc_return(&udev->refcnt) != 1)
+ return 0;
+
+ /* set bus master, which was cleared by the reset function */
+ pci_set_master(dev);
+
+ /* enable interrupts */
+ err = igbuio_pci_enable_interrupts(udev);
+ if (err) {
+ atomic_dec(&udev->refcnt);
+ dev_err(&dev->dev, "Enable interrupt fails\n");
+ }
+ return err;
+}
+
+static int
+igbuio_pci_release(struct uio_info *info, struct inode *inode)
+{
+ struct rte_uio_pci_dev *udev = info->priv;
+ struct pci_dev *dev = udev->pdev;
+
+ if (atomic_dec_and_test(&udev->refcnt)) {
+ /* disable interrupts */
+ igbuio_pci_disable_interrupts(udev);
+
+ /* stop the device from further DMA */
+ pci_clear_master(dev);
+ }
+
+ return 0;
+}
+
+/* Remap pci resources described by bar #pci_bar in uio resource n. */
+static int
+igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
+ int n, int pci_bar, const char *name)
+{
+ unsigned long addr, len;
+ void *internal_addr;
+
+ if (n >= ARRAY_SIZE(info->mem))
+ return -EINVAL;
+
+ addr = pci_resource_start(dev, pci_bar);
+ len = pci_resource_len(dev, pci_bar);
+ if (addr == 0 || len == 0)
+ return -1;
+ if (wc_activate == 0) {
+ internal_addr = ioremap(addr, len);
+ if (internal_addr == NULL)
+ return -1;
+ } else {
+ internal_addr = NULL;
+ }
+ info->mem[n].name = name;
+ info->mem[n].addr = addr;
+ info->mem[n].internal_addr = internal_addr;
+ info->mem[n].size = len;
+ info->mem[n].memtype = UIO_MEM_PHYS;
+ return 0;
+}
+
+/* Get pci port io resources described by bar #pci_bar in uio resource n. */
+static int
+igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
+ int n, int pci_bar, const char *name)
+{
+ unsigned long addr, len;
+
+ if (n >= ARRAY_SIZE(info->port))
+ return -EINVAL;
+
+ addr = pci_resource_start(dev, pci_bar);
+ len = pci_resource_len(dev, pci_bar);
+ if (addr == 0 || len == 0)
+ return -EINVAL;
+
+ info->port[n].name = name;
+ info->port[n].start = addr;
+ info->port[n].size = len;
+ info->port[n].porttype = UIO_PORT_X86;
+
+ return 0;
+}
+
+/* Unmap previously ioremap'd resources */
+static void
+igbuio_pci_release_iomem(struct uio_info *info)
+{
+ int i;
+
+ for (i = 0; i < MAX_UIO_MAPS; i++) {
+ if (info->mem[i].internal_addr)
+ iounmap(info->mem[i].internal_addr);
+ }
+}
+
+static int
+igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
+{
+ int i, iom, iop, ret;
+ unsigned long flags;
+ static const char *bar_names[PCI_STD_RESOURCE_END + 1] = {
+ "BAR0",
+ "BAR1",
+ "BAR2",
+ "BAR3",
+ "BAR4",
+ "BAR5",
+ };
+
+ iom = 0;
+ iop = 0;
+
+ for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+ if (pci_resource_len(dev, i) != 0 &&
+ pci_resource_start(dev, i) != 0) {
+ flags = pci_resource_flags(dev, i);
+ if (flags & IORESOURCE_MEM) {
+ ret = igbuio_pci_setup_iomem(dev, info, iom,
+ i, bar_names[i]);
+ if (ret != 0)
+ return ret;
+ iom++;
+ } else if (flags & IORESOURCE_IO) {
+ ret = igbuio_pci_setup_ioport(dev, info, iop,
+ i, bar_names[i]);
+ if (ret != 0)
+ return ret;
+ iop++;
+ }
+ }
+ }
+
+ return (iom != 0 || iop != 0) ? ret : -ENOENT;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+static int __devinit
+#else
+static int
+#endif
+igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ struct rte_uio_pci_dev *udev;
+ dma_addr_t map_dma_addr;
+ void *map_addr;
+ int err;
+
+#ifdef HAVE_PCI_IS_BRIDGE_API
+ if (pci_is_bridge(dev)) {
+ dev_warn(&dev->dev, "Ignoring PCI bridge device\n");
+ return -ENODEV;
+ }
+#endif
+
+ udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
+ if (!udev)
+ return -ENOMEM;
+
+ /*
+ * enable device: ask low-level code to enable I/O and
+ * memory
+ */
+ err = pci_enable_device(dev);
+ if (err != 0) {
+ dev_err(&dev->dev, "Cannot enable PCI device\n");
+ goto fail_free;
+ }
+
+ /* enable bus mastering on the device */
+ pci_set_master(dev);
+
+ /* remap IO memory */
+ err = igbuio_setup_bars(dev, &udev->info);
+ if (err != 0)
+ goto fail_release_iomem;
+
+ /* set 64-bit DMA mask */
+ err = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
+ if (err != 0) {
+ dev_err(&dev->dev, "Cannot set DMA mask\n");
+ goto fail_release_iomem;
+ }
+
+ err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
+ if (err != 0) {
+ dev_err(&dev->dev, "Cannot set consistent DMA mask\n");
+ goto fail_release_iomem;
+ }
+
+ /* fill uio infos */
+ udev->info.name = "igb_uio";
+ udev->info.version = "0.1";
+ udev->info.irqcontrol = igbuio_pci_irqcontrol;
+ udev->info.open = igbuio_pci_open;
+ udev->info.release = igbuio_pci_release;
+ udev->info.priv = udev;
+ udev->pdev = dev;
+ atomic_set(&udev->refcnt, 0);
+
+ err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
+ if (err != 0)
+ goto fail_release_iomem;
+
+ /* register uio driver */
+ err = uio_register_device(&dev->dev, &udev->info);
+ if (err != 0)
+ goto fail_remove_group;
+
+ pci_set_drvdata(dev, udev);
+
+ /*
+ * Doing a harmless dma mapping for attaching the device to
+ * the iommu identity mapping if kernel boots with iommu=pt.
+ * Note this is not a problem if no IOMMU at all.
+ */
+ map_addr = dma_alloc_coherent(&dev->dev, 1024, &map_dma_addr,
+ GFP_KERNEL);
+ if (map_addr)
+ memset(map_addr, 0, 1024);
+
+ if (!map_addr)
+ dev_info(&dev->dev, "dma mapping failed\n");
+ else {
+ dev_info(&dev->dev, "mapping 1K dma=%#llx host=%p\n",
+ (unsigned long long)map_dma_addr, map_addr);
+
+ dma_free_coherent(&dev->dev, 1024, map_addr, map_dma_addr);
+ dev_info(&dev->dev, "unmapping 1K dma=%#llx host=%p\n",
+ (unsigned long long)map_dma_addr, map_addr);
+ }
+
+ return 0;
+
+fail_remove_group:
+ sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+fail_release_iomem:
+ igbuio_pci_release_iomem(&udev->info);
+ pci_disable_device(dev);
+fail_free:
+ kfree(udev);
+
+ return err;
+}
+
+static void
+igbuio_pci_remove(struct pci_dev *dev)
+{
+ struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
+
+ igbuio_pci_release(&udev->info, NULL);
+
+ sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+ uio_unregister_device(&udev->info);
+ igbuio_pci_release_iomem(&udev->info);
+ pci_disable_device(dev);
+ pci_set_drvdata(dev, NULL);
+ kfree(udev);
+}
+
+static int
+igbuio_config_intr_mode(char *intr_str)
+{
+ if (!intr_str) {
+ pr_info("Use MSIX interrupt by default\n");
+ return 0;
+ }
+
+ if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) {
+ igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
+ pr_info("Use MSIX interrupt\n");
+ } else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) {
+ igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI;
+ pr_info("Use MSI interrupt\n");
+ } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) {
+ igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY;
+ pr_info("Use legacy interrupt\n");
+ } else {
+ pr_info("Error: bad parameter - %s\n", intr_str);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct pci_driver igbuio_pci_driver = {
+ .name = "igb_uio",
+ .id_table = NULL,
+ .probe = igbuio_pci_probe,
+ .remove = igbuio_pci_remove,
+};
+
+static int __init
+igbuio_pci_init_module(void)
+{
+ int ret;
+
+ if (igbuio_kernel_is_locked_down()) {
+ pr_err("Not able to use module, kernel lock down is enabled\n");
+ return -EINVAL;
+ }
+
+ if (wc_activate != 0)
+ pr_info("wc_activate is set\n");
+
+ ret = igbuio_config_intr_mode(intr_mode);
+ if (ret < 0)
+ return ret;
+
+ return pci_register_driver(&igbuio_pci_driver);
+}
+
+static void __exit
+igbuio_pci_exit_module(void)
+{
+ pci_unregister_driver(&igbuio_pci_driver);
+}
+
+module_init(igbuio_pci_init_module);
+module_exit(igbuio_pci_exit_module);
+
+module_param(intr_mode, charp, S_IRUGO);
+MODULE_PARM_DESC(intr_mode,
+"igb_uio interrupt mode (default=msix):\n"
+" " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n"
+" " RTE_INTR_MODE_MSI_NAME " Use MSI interrupt\n"
+" " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
+"\n");
+
+module_param(wc_activate, int, 0);
+MODULE_PARM_DESC(wc_activate,
+"Activate support for write combining (WC) (default=0)\n"
+" 0 - disable\n"
+" other - enable\n");
+
+MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/src/spdk/dpdk/kernel/linux/igb_uio/meson.build b/src/spdk/dpdk/kernel/linux/igb_uio/meson.build
new file mode 100644
index 000000000..80540aece
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/igb_uio/meson.build
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+mkfile = custom_target('igb_uio_makefile',
+ output: 'Makefile',
+ command: ['touch', '@OUTPUT@'])
+
+custom_target('igb_uio',
+ input: ['igb_uio.c', 'Kbuild'],
+ output: 'igb_uio.ko',
+ command: ['make', '-C', kernel_dir + '/build',
+ 'M=' + meson.current_build_dir(),
+ 'src=' + meson.current_source_dir(),
+ 'EXTRA_CFLAGS=-I' + meson.current_source_dir() +
+ '/../../../lib/librte_eal/include',
+ 'modules'],
+ depends: mkfile,
+ install: true,
+ install_dir: kernel_dir + '/extra/dpdk',
+ build_by_default: get_option('enable_kmods'))
diff --git a/src/spdk/dpdk/kernel/linux/kni/Kbuild b/src/spdk/dpdk/kernel/linux/kni/Kbuild
new file mode 100644
index 000000000..e5452d6c0
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/Kbuild
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+ccflags-y := $(MODULE_CFLAGS)
+obj-m := rte_kni.o
+rte_kni-y := $(patsubst $(src)/%.c,%.o,$(wildcard $(src)/*.c))
diff --git a/src/spdk/dpdk/kernel/linux/kni/Makefile b/src/spdk/dpdk/kernel/linux/kni/Makefile
new file mode 100644
index 000000000..595bac261
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_kni
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+-include /etc/lsb-release
+
+ifeq ($(DISTRIB_ID),Ubuntu)
+MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(subst .,,$(DISTRIB_RELEASE))
+UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \
+ | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1)
+MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
+endif
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := kni_misc.c
+SRCS-y += kni_net.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/src/spdk/dpdk/kernel/linux/kni/compat.h b/src/spdk/dpdk/kernel/linux/kni/compat.h
new file mode 100644
index 000000000..9ee45dbf6
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/compat.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Minimal wrappers to allow compiling kni on older kernels.
+ */
+
+#include <linux/version.h>
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c)
+#endif
+#ifdef CONFIG_SUSE_KERNEL
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
+/* SLES12SP3 is at least 4.4.57+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28))
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0)
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)))
+/* SLES11 SP3 is at least 3.0.61+ based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32))
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27))
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0)
+#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
+#endif /* CONFIG_SUSE_KERNEL */
+#ifndef SLE_VERSION_CODE
+#define SLE_VERSION_CODE 0
+#endif /* SLE_VERSION_CODE */
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33)
+#define HAVE_SIMPLIFIED_PERNET_OPERATIONS
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+#define sk_sleep(s) ((s)->sk_sleep)
+#else
+#define HAVE_SOCKET_WQ
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+#define HAVE_STATIC_SOCK_MAP_FD
+#else
+#define kni_sock_map_fd(s) sock_map_fd(s, 0)
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#define HAVE_CHANGE_CARRIER_CB
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN)
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define HAVE_IOV_ITER_MSGHDR
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)
+#define HAVE_KIOCB_MSG_PARAM
+#define HAVE_REBUILD_HEADER
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#define HAVE_SK_ALLOC_KERN_PARAM
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \
+ (defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \
+ (SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0))
+#define HAVE_TRANS_START_HELPER
+#endif
+
+/*
+ * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev()
+ * For old kernels just backported the commit that enables the macro
+ * (685343fc3ba6) but still uses old API, it is required to undefine macro to
+ * select correct version of API, this is safe since KNI doesn't use the value.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#undef NET_NAME_UNKNOWN
+#endif
+
+/*
+ * RHEL has two different version with different kernel version:
+ * 3.10 is for AMD, Intel, IBM POWER7 and POWER8;
+ * 4.14 is for ARM and IBM POWER9
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)))
+#define ndo_change_mtu ndo_change_mtu_rh74
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#define HAVE_MAX_MTU_PARAM
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#endif
+
+/*
+ * iova to kva mapping support can be provided since 4.6.0, but required
+ * kernel version increased to >= 4.10.0 because of the updates in
+ * get_user_pages_remote() kernel API
+ */
+#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+#define HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+#endif
+
+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
+#define HAVE_TX_TIMEOUT_TXQUEUE
+#endif
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_dev.h b/src/spdk/dpdk/kernel/linux/kni/kni_dev.h
new file mode 100644
index 000000000..ca5f92a47
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_dev.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+#ifndef _KNI_DEV_H_
+#define _KNI_DEV_H_
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define KNI_VERSION "1.0"
+
+#include "compat.h"
+
+#include <linux/if.h>
+#include <linux/wait.h>
+#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#include <linux/sched/signal.h>
+#else
+#include <linux/sched.h>
+#endif
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#include <rte_kni_common.h>
+#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
+
+#define MBUF_BURST_SZ 32
+
+/* Default carrier state for created KNI network interfaces */
+extern uint32_t kni_dflt_carrier;
+
+/**
+ * A structure describing the private information for a kni device.
+ */
+struct kni_dev {
+ /* kni list */
+ struct list_head list;
+
+ uint8_t iova_mode;
+
+ uint32_t core_id; /* Core ID to bind */
+ char name[RTE_KNI_NAMESIZE]; /* Network device name */
+ struct task_struct *pthread;
+
+ /* wait queue for req/resp */
+ wait_queue_head_t wq;
+ struct mutex sync_lock;
+
+ /* kni device */
+ struct net_device *net_dev;
+
+ /* queue for packets to be sent out */
+ struct rte_kni_fifo *tx_q;
+
+ /* queue for the packets received */
+ struct rte_kni_fifo *rx_q;
+
+ /* queue for the allocated mbufs those can be used to save sk buffs */
+ struct rte_kni_fifo *alloc_q;
+
+ /* free queue for the mbufs to be freed */
+ struct rte_kni_fifo *free_q;
+
+ /* request queue */
+ struct rte_kni_fifo *req_q;
+
+ /* response queue */
+ struct rte_kni_fifo *resp_q;
+
+ void *sync_kva;
+ void *sync_va;
+
+ void *mbuf_kva;
+ void *mbuf_va;
+
+ /* mbuf size */
+ uint32_t mbuf_size;
+
+ /* buffers */
+ void *pa[MBUF_BURST_SZ];
+ void *va[MBUF_BURST_SZ];
+ void *alloc_pa[MBUF_BURST_SZ];
+ void *alloc_va[MBUF_BURST_SZ];
+
+ struct task_struct *usr_tsk;
+};
+
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+static inline phys_addr_t iova_to_phys(struct task_struct *tsk,
+ unsigned long iova)
+{
+ phys_addr_t offset, phys_addr;
+ struct page *page = NULL;
+ long ret;
+
+ offset = iova & (PAGE_SIZE - 1);
+
+ /* Read one page struct info */
+ ret = get_user_pages_remote(tsk, tsk->mm, iova, 1,
+ FOLL_TOUCH, &page, NULL, NULL);
+ if (ret < 0)
+ return 0;
+
+ phys_addr = page_to_phys(page) | offset;
+ put_page(page);
+
+ return phys_addr;
+}
+
+static inline void *iova_to_kva(struct task_struct *tsk, unsigned long iova)
+{
+ return phys_to_virt(iova_to_phys(tsk, iova));
+}
+#endif
+
+void kni_net_release_fifo_phy(struct kni_dev *kni);
+void kni_net_rx(struct kni_dev *kni);
+void kni_net_init(struct net_device *dev);
+void kni_net_config_lo_mode(char *lo_str);
+void kni_net_poll_resp(struct kni_dev *kni);
+
+#endif
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_fifo.h b/src/spdk/dpdk/kernel/linux/kni/kni_fifo.h
new file mode 100644
index 000000000..5c91b5537
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_fifo.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+#ifndef _KNI_FIFO_H_
+#define _KNI_FIFO_H_
+
+#include <rte_kni_common.h>
+
+/* Skip some memory barriers on Linux < 3.14 */
+#ifndef smp_load_acquire
+#define smp_load_acquire(a) (*(a))
+#endif
+#ifndef smp_store_release
+#define smp_store_release(a, b) *(a) = (b)
+#endif
+
+/**
+ * Adds num elements into the fifo. Return the number actually written
+ */
+static inline uint32_t
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
+{
+ uint32_t i = 0;
+ uint32_t fifo_write = fifo->write;
+ uint32_t fifo_read = smp_load_acquire(&fifo->read);
+ uint32_t new_write = fifo_write;
+
+ for (i = 0; i < num; i++) {
+ new_write = (new_write + 1) & (fifo->len - 1);
+
+ if (new_write == fifo_read)
+ break;
+ fifo->buffer[fifo_write] = data[i];
+ fifo_write = new_write;
+ }
+ smp_store_release(&fifo->write, fifo_write);
+
+ return i;
+}
+
+/**
+ * Get up to num elements from the fifo. Return the number actully read
+ */
+static inline uint32_t
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
+{
+ uint32_t i = 0;
+ uint32_t new_read = fifo->read;
+ uint32_t fifo_write = smp_load_acquire(&fifo->write);
+
+ for (i = 0; i < num; i++) {
+ if (new_read == fifo_write)
+ break;
+
+ data[i] = fifo->buffer[new_read];
+ new_read = (new_read + 1) & (fifo->len - 1);
+ }
+ smp_store_release(&fifo->read, new_read);
+
+ return i;
+}
+
+/**
+ * Get the num of elements in the fifo
+ */
+static inline uint32_t
+kni_fifo_count(struct rte_kni_fifo *fifo)
+{
+ uint32_t fifo_write = smp_load_acquire(&fifo->write);
+ uint32_t fifo_read = smp_load_acquire(&fifo->read);
+ return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
+}
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline uint32_t
+kni_fifo_free_count(struct rte_kni_fifo *fifo)
+{
+ uint32_t fifo_write = smp_load_acquire(&fifo->write);
+ uint32_t fifo_read = smp_load_acquire(&fifo->read);
+ return (fifo_read - fifo_write - 1) & (fifo->len - 1);
+}
+
+#endif /* _KNI_FIFO_H_ */
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_misc.c b/src/spdk/dpdk/kernel/linux/kni/kni_misc.c
new file mode 100644
index 000000000..2b464c438
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_misc.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <rte_kni_common.h>
+
+#include "compat.h"
+#include "kni_dev.h"
+
+MODULE_VERSION(KNI_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for managing kni devices");
+
+#define KNI_RX_LOOP_NUM 1000
+
+#define KNI_MAX_DEVICES 32
+
+/* loopback mode */
+static char *lo_mode;
+
+/* Kernel thread mode */
+static char *kthread_mode;
+static uint32_t multiple_kthread_on;
+
+/* Default carrier state for created KNI network interfaces */
+static char *carrier;
+uint32_t kni_dflt_carrier;
+
+#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
+
+static int kni_net_id;
+
+struct kni_net {
+ unsigned long device_in_use; /* device in use flag */
+ struct mutex kni_kthread_lock;
+ struct task_struct *kni_kthread;
+ struct rw_semaphore kni_list_lock;
+ struct list_head kni_list_head;
+};
+
+static int __net_init
+kni_init_net(struct net *net)
+{
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ struct kni_net *knet = net_generic(net, kni_net_id);
+
+ memset(knet, 0, sizeof(*knet));
+#else
+ struct kni_net *knet;
+ int ret;
+
+ knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
+ if (!knet) {
+ ret = -ENOMEM;
+ return ret;
+ }
+#endif
+
+ /* Clear the bit of device in use */
+ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+ mutex_init(&knet->kni_kthread_lock);
+
+ init_rwsem(&knet->kni_list_lock);
+ INIT_LIST_HEAD(&knet->kni_list_head);
+
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ return 0;
+#else
+ ret = net_assign_generic(net, kni_net_id, knet);
+ if (ret < 0)
+ kfree(knet);
+
+ return ret;
+#endif
+}
+
+static void __net_exit
+kni_exit_net(struct net *net)
+{
+ struct kni_net *knet __maybe_unused;
+
+ knet = net_generic(net, kni_net_id);
+ mutex_destroy(&knet->kni_kthread_lock);
+
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ kfree(knet);
+#endif
+}
+
+static struct pernet_operations kni_net_ops = {
+ .init = kni_init_net,
+ .exit = kni_exit_net,
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ .id = &kni_net_id,
+ .size = sizeof(struct kni_net),
+#endif
+};
+
+static int
+kni_thread_single(void *data)
+{
+ struct kni_net *knet = data;
+ int j;
+ struct kni_dev *dev;
+
+ while (!kthread_should_stop()) {
+ down_read(&knet->kni_list_lock);
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+ list_for_each_entry(dev, &knet->kni_list_head, list) {
+ kni_net_rx(dev);
+ kni_net_poll_resp(dev);
+ }
+ }
+ up_read(&knet->kni_list_lock);
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ /* reschedule out for a while */
+ schedule_timeout_interruptible(
+ usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+ }
+
+ return 0;
+}
+
+static int
+kni_thread_multiple(void *param)
+{
+ int j;
+ struct kni_dev *dev = param;
+
+ while (!kthread_should_stop()) {
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+ kni_net_rx(dev);
+ kni_net_poll_resp(dev);
+ }
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ schedule_timeout_interruptible(
+ usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+ }
+
+ return 0;
+}
+
+static int
+kni_open(struct inode *inode, struct file *file)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct kni_net *knet = net_generic(net, kni_net_id);
+
+ /* kni device can be opened by one user only per netns */
+ if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
+ return -EBUSY;
+
+ file->private_data = get_net(net);
+ pr_debug("/dev/kni opened\n");
+
+ return 0;
+}
+
+static int
+kni_dev_remove(struct kni_dev *dev)
+{
+ if (!dev)
+ return -ENODEV;
+
+ if (dev->net_dev) {
+ unregister_netdev(dev->net_dev);
+ free_netdev(dev->net_dev);
+ }
+
+ kni_net_release_fifo_phy(dev);
+
+ return 0;
+}
+
+static int
+kni_release(struct inode *inode, struct file *file)
+{
+ struct net *net = file->private_data;
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ struct kni_dev *dev, *n;
+
+ /* Stop kernel thread for single mode */
+ if (multiple_kthread_on == 0) {
+ mutex_lock(&knet->kni_kthread_lock);
+ /* Stop kernel thread */
+ if (knet->kni_kthread != NULL) {
+ kthread_stop(knet->kni_kthread);
+ knet->kni_kthread = NULL;
+ }
+ mutex_unlock(&knet->kni_kthread_lock);
+ }
+
+ down_write(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ /* Stop kernel thread for multiple mode */
+ if (multiple_kthread_on && dev->pthread != NULL) {
+ kthread_stop(dev->pthread);
+ dev->pthread = NULL;
+ }
+
+ kni_dev_remove(dev);
+ list_del(&dev->list);
+ }
+ up_write(&knet->kni_list_lock);
+
+ /* Clear the bit of device in use */
+ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+ put_net(net);
+ pr_debug("/dev/kni closed\n");
+
+ return 0;
+}
+
+static int
+kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
+{
+ if (!kni || !dev)
+ return -1;
+
+ /* Check if network name has been used */
+ if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
+ pr_err("KNI name %s duplicated\n", dev->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
+{
+ /**
+ * Create a new kernel thread for multiple mode, set its core affinity,
+ * and finally wake it up.
+ */
+ if (multiple_kthread_on) {
+ kni->pthread = kthread_create(kni_thread_multiple,
+ (void *)kni, "kni_%s", kni->name);
+ if (IS_ERR(kni->pthread)) {
+ kni_dev_remove(kni);
+ return -ECANCELED;
+ }
+
+ if (force_bind)
+ kthread_bind(kni->pthread, kni->core_id);
+ wake_up_process(kni->pthread);
+ } else {
+ mutex_lock(&knet->kni_kthread_lock);
+
+ if (knet->kni_kthread == NULL) {
+ knet->kni_kthread = kthread_create(kni_thread_single,
+ (void *)knet, "kni_single");
+ if (IS_ERR(knet->kni_kthread)) {
+ mutex_unlock(&knet->kni_kthread_lock);
+ kni_dev_remove(kni);
+ return -ECANCELED;
+ }
+
+ if (force_bind)
+ kthread_bind(knet->kni_kthread, kni->core_id);
+ wake_up_process(knet->kni_kthread);
+ }
+
+ mutex_unlock(&knet->kni_kthread_lock);
+ }
+
+ return 0;
+}
+
+static int
+kni_ioctl_create(struct net *net, uint32_t ioctl_num,
+ unsigned long ioctl_param)
+{
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ int ret;
+ struct rte_kni_device_info dev_info;
+ struct net_device *net_dev = NULL;
+ struct kni_dev *kni, *dev, *n;
+
+ pr_info("Creating kni...\n");
+ /* Check the buffer size, to avoid warning */
+ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+ return -EINVAL;
+
+ /* Copy kni info from user space */
+ if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
+ return -EFAULT;
+
+ /* Check if name is zero-ended */
+ if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
+ pr_err("kni.name not zero-terminated");
+ return -EINVAL;
+ }
+
+ /**
+ * Check if the cpu core id is valid for binding.
+ */
+ if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
+ pr_err("cpu %u is not online\n", dev_info.core_id);
+ return -EINVAL;
+ }
+
+ /* Check if it has been created */
+ down_read(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ if (kni_check_param(dev, &dev_info) < 0) {
+ up_read(&knet->kni_list_lock);
+ return -EINVAL;
+ }
+ }
+ up_read(&knet->kni_list_lock);
+
+ net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
+#ifdef NET_NAME_USER
+ NET_NAME_USER,
+#endif
+ kni_net_init);
+ if (net_dev == NULL) {
+ pr_err("error allocating device \"%s\"\n", dev_info.name);
+ return -EBUSY;
+ }
+
+ dev_net_set(net_dev, net);
+
+ kni = netdev_priv(net_dev);
+
+ kni->net_dev = net_dev;
+ kni->core_id = dev_info.core_id;
+ strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
+
+ /* Translate user space info into kernel space info */
+ if (dev_info.iova_mode) {
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ kni->tx_q = iova_to_kva(current, dev_info.tx_phys);
+ kni->rx_q = iova_to_kva(current, dev_info.rx_phys);
+ kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys);
+ kni->free_q = iova_to_kva(current, dev_info.free_phys);
+
+ kni->req_q = iova_to_kva(current, dev_info.req_phys);
+ kni->resp_q = iova_to_kva(current, dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = iova_to_kva(current, dev_info.sync_phys);
+ kni->usr_tsk = current;
+ kni->iova_mode = 1;
+#else
+ pr_err("KNI module does not support IOVA to VA translation\n");
+ return -EINVAL;
+#endif
+ } else {
+
+ kni->tx_q = phys_to_virt(dev_info.tx_phys);
+ kni->rx_q = phys_to_virt(dev_info.rx_phys);
+ kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+ kni->free_q = phys_to_virt(dev_info.free_phys);
+
+ kni->req_q = phys_to_virt(dev_info.req_phys);
+ kni->resp_q = phys_to_virt(dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ kni->iova_mode = 0;
+ }
+
+ kni->mbuf_size = dev_info.mbuf_size;
+
+ pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
+ (unsigned long long) dev_info.tx_phys, kni->tx_q);
+ pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
+ (unsigned long long) dev_info.rx_phys, kni->rx_q);
+ pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
+ (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
+ pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n",
+ (unsigned long long) dev_info.free_phys, kni->free_q);
+ pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n",
+ (unsigned long long) dev_info.req_phys, kni->req_q);
+ pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
+ (unsigned long long) dev_info.resp_phys, kni->resp_q);
+ pr_debug("mbuf_size: %u\n", kni->mbuf_size);
+
+ /* if user has provided a valid mac address */
+ if (is_valid_ether_addr(dev_info.mac_addr))
+ memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN);
+ else
+ /*
+ * Generate random mac address. eth_random_addr() is the
+ * newer version of generating mac address in kernel.
+ */
+ random_ether_addr(net_dev->dev_addr);
+
+ if (dev_info.mtu)
+ net_dev->mtu = dev_info.mtu;
+#ifdef HAVE_MAX_MTU_PARAM
+ net_dev->max_mtu = net_dev->mtu;
+
+ if (dev_info.min_mtu)
+ net_dev->min_mtu = dev_info.min_mtu;
+
+ if (dev_info.max_mtu)
+ net_dev->max_mtu = dev_info.max_mtu;
+#endif
+
+ ret = register_netdev(net_dev);
+ if (ret) {
+ pr_err("error %i registering device \"%s\"\n",
+ ret, dev_info.name);
+ kni->net_dev = NULL;
+ kni_dev_remove(kni);
+ free_netdev(net_dev);
+ return -ENODEV;
+ }
+
+ netif_carrier_off(net_dev);
+
+ ret = kni_run_thread(knet, kni, dev_info.force_bind);
+ if (ret != 0)
+ return ret;
+
+ down_write(&knet->kni_list_lock);
+ list_add(&kni->list, &knet->kni_list_head);
+ up_write(&knet->kni_list_lock);
+
+ return 0;
+}
+
+static int
+kni_ioctl_release(struct net *net, uint32_t ioctl_num,
+ unsigned long ioctl_param)
+{
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ int ret = -EINVAL;
+ struct kni_dev *dev, *n;
+ struct rte_kni_device_info dev_info;
+
+ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+ return -EINVAL;
+
+ if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
+ return -EFAULT;
+
+ /* Release the network device according to its name */
+ if (strlen(dev_info.name) == 0)
+ return -EINVAL;
+
+ down_write(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
+ continue;
+
+ if (multiple_kthread_on && dev->pthread != NULL) {
+ kthread_stop(dev->pthread);
+ dev->pthread = NULL;
+ }
+
+ kni_dev_remove(dev);
+ list_del(&dev->list);
+ ret = 0;
+ break;
+ }
+ up_write(&knet->kni_list_lock);
+ pr_info("%s release kni named %s\n",
+ (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
+
+ return ret;
+}
+
+static int
+kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
+{
+ int ret = -EINVAL;
+ struct net *net = current->nsproxy->net_ns;
+
+ pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
+
+ /*
+ * Switch according to the ioctl called
+ */
+ switch (_IOC_NR(ioctl_num)) {
+ case _IOC_NR(RTE_KNI_IOCTL_TEST):
+ /* For test only, not used */
+ break;
+ case _IOC_NR(RTE_KNI_IOCTL_CREATE):
+ ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
+ break;
+ case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
+ ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
+ break;
+ default:
+ pr_debug("IOCTL default\n");
+ break;
+ }
+
+ return ret;
+}
+
+static int
+kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
+ unsigned long ioctl_param)
+{
+ /* 32 bits app on 64 bits OS to be supported later */
+ pr_debug("Not implemented.\n");
+
+ return -EINVAL;
+}
+
+static const struct file_operations kni_fops = {
+ .owner = THIS_MODULE,
+ .open = kni_open,
+ .release = kni_release,
+ .unlocked_ioctl = (void *)kni_ioctl,
+ .compat_ioctl = (void *)kni_compat_ioctl,
+};
+
+static struct miscdevice kni_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = KNI_DEVICE,
+ .fops = &kni_fops,
+};
+
+static int __init
+kni_parse_kthread_mode(void)
+{
+ if (!kthread_mode)
+ return 0;
+
+ if (strcmp(kthread_mode, "single") == 0)
+ return 0;
+ else if (strcmp(kthread_mode, "multiple") == 0)
+ multiple_kthread_on = 1;
+ else
+ return -1;
+
+ return 0;
+}
+
+static int __init
+kni_parse_carrier_state(void)
+{
+ if (!carrier) {
+ kni_dflt_carrier = 0;
+ return 0;
+ }
+
+ if (strcmp(carrier, "off") == 0)
+ kni_dflt_carrier = 0;
+ else if (strcmp(carrier, "on") == 0)
+ kni_dflt_carrier = 1;
+ else
+ return -1;
+
+ return 0;
+}
+
+static int __init
+kni_init(void)
+{
+ int rc;
+
+ if (kni_parse_kthread_mode() < 0) {
+ pr_err("Invalid parameter for kthread_mode\n");
+ return -EINVAL;
+ }
+
+ if (multiple_kthread_on == 0)
+ pr_debug("Single kernel thread for all KNI devices\n");
+ else
+ pr_debug("Multiple kernel thread mode enabled\n");
+
+ if (kni_parse_carrier_state() < 0) {
+ pr_err("Invalid parameter for carrier\n");
+ return -EINVAL;
+ }
+
+ if (kni_dflt_carrier == 0)
+ pr_debug("Default carrier state set to off.\n");
+ else
+ pr_debug("Default carrier state set to on.\n");
+
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ rc = register_pernet_subsys(&kni_net_ops);
+#else
+ rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif
+ if (rc)
+ return -EPERM;
+
+ rc = misc_register(&kni_misc);
+ if (rc != 0) {
+ pr_err("Misc registration failed\n");
+ goto out;
+ }
+
+ /* Configure the lo mode according to the input parameter */
+ kni_net_config_lo_mode(lo_mode);
+
+ return 0;
+
+out:
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+ return rc;
+}
+
+static void __exit
+kni_exit(void)
+{
+ misc_deregister(&kni_misc);
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+}
+
+module_init(kni_init);
+module_exit(kni_exit);
+
+module_param(lo_mode, charp, 0644);
+MODULE_PARM_DESC(lo_mode,
+"KNI loopback mode (default=lo_mode_none):\n"
+"\t\tlo_mode_none Kernel loopback disabled\n"
+"\t\tlo_mode_fifo Enable kernel loopback with fifo\n"
+"\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n"
+"\t\t"
+);
+
+module_param(kthread_mode, charp, 0644);
+MODULE_PARM_DESC(kthread_mode,
+"Kernel thread mode (default=single):\n"
+"\t\tsingle Single kernel thread mode enabled.\n"
+"\t\tmultiple Multiple kernel thread mode enabled.\n"
+"\t\t"
+);
+
+module_param(carrier, charp, 0644);
+MODULE_PARM_DESC(carrier,
+"Default carrier state for KNI interface (default=off):\n"
+"\t\toff Interfaces will be created with carrier state set to off.\n"
+"\t\ton Interfaces will be created with carrier state set to on.\n"
+"\t\t"
+);
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_net.c b/src/spdk/dpdk/kernel/linux/kni/kni_net.c
new file mode 100644
index 000000000..c82c881a2
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_net.c
@@ -0,0 +1,844 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+/*
+ * This code is inspired from the book "Linux Device Drivers" by
+ * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+#include <rte_kni_common.h>
+#include <kni_fifo.h>
+
+#include "compat.h"
+#include "kni_dev.h"
+
+#define WD_TIMEOUT 5 /*jiffies */
+
+#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
+
+/* typedef for rx function */
+typedef void (*kni_net_rx_t)(struct kni_dev *kni);
+
+static void kni_net_rx_normal(struct kni_dev *kni);
+
+/* kni rx function pointer, with default to normal rx */
+static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+/* iova to kernel virtual address */
+static inline void *
+iova2kva(struct kni_dev *kni, void *iova)
+{
+ return phys_to_virt(iova_to_phys(kni->usr_tsk, (unsigned long)iova));
+}
+
+static inline void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+ return phys_to_virt(iova_to_phys(kni->usr_tsk, m->buf_physaddr) +
+ m->data_off);
+}
+#endif
+
+/* physical address to kernel virtual address */
+static void *
+pa2kva(void *pa)
+{
+ return phys_to_virt((unsigned long)pa);
+}
+
+/* physical address to virtual address */
+static void *
+pa2va(void *pa, struct rte_kni_mbuf *m)
+{
+ void *va;
+
+ va = (void *)((unsigned long)pa +
+ (unsigned long)m->buf_addr -
+ (unsigned long)m->buf_physaddr);
+ return va;
+}
+
+/* mbuf data kernel virtual address from mbuf kernel virtual address */
+static void *
+kva2data_kva(struct rte_kni_mbuf *m)
+{
+ return phys_to_virt(m->buf_physaddr + m->data_off);
+}
+
+static inline void *
+get_kva(struct kni_dev *kni, void *pa)
+{
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ if (kni->iova_mode == 1)
+ return iova2kva(kni, pa);
+#endif
+ return pa2kva(pa);
+}
+
+static inline void *
+get_data_kva(struct kni_dev *kni, void *pkt_kva)
+{
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ if (kni->iova_mode == 1)
+ return iova2data_kva(kni, pkt_kva);
+#endif
+ return kva2data_kva(pkt_kva);
+}
+
+/*
+ * It can be called to process the request.
+ */
+static int
+kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+{
+ int ret = -1;
+ void *resp_va;
+ uint32_t num;
+ int ret_val;
+
+ if (!kni || !req) {
+ pr_err("No kni instance or request\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&kni->sync_lock);
+
+ /* Construct data */
+ memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
+ num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
+ if (num < 1) {
+ pr_err("Cannot send to req_q\n");
+ ret = -EBUSY;
+ goto fail;
+ }
+
+ ret_val = wait_event_interruptible_timeout(kni->wq,
+ kni_fifo_count(kni->resp_q), 3 * HZ);
+ if (signal_pending(current) || ret_val <= 0) {
+ ret = -ETIME;
+ goto fail;
+ }
+ num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
+ if (num != 1 || resp_va != kni->sync_va) {
+ /* This should never happen */
+ pr_err("No data in resp_q\n");
+ ret = -ENODATA;
+ goto fail;
+ }
+
+ memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+ ret = 0;
+
+fail:
+ mutex_unlock(&kni->sync_lock);
+ return ret;
+}
+
+/*
+ * Open and close
+ */
+static int
+kni_net_open(struct net_device *dev)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ netif_start_queue(dev);
+ if (kni_dflt_carrier == 1)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+ /* Setting if_up to non-zero means up */
+ req.if_up = 1;
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0) ? req.result : ret;
+}
+
+static int
+kni_net_release(struct net_device *dev)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ netif_stop_queue(dev); /* can't transmit any more */
+ netif_carrier_off(dev);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+ /* Setting if_up to 0 means down */
+ req.if_up = 0;
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0) ? req.result : ret;
+}
+
+static void
+kni_fifo_trans_pa2va(struct kni_dev *kni,
+ struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
+{
+ uint32_t ret, i, num_dst, num_rx;
+ struct rte_kni_mbuf *kva, *prev_kva;
+ int nb_segs;
+ int kva_nb_segs;
+
+ do {
+ num_dst = kni_fifo_free_count(dst_va);
+ if (num_dst == 0)
+ return;
+
+ num_rx = min_t(uint32_t, num_dst, MBUF_BURST_SZ);
+
+ num_rx = kni_fifo_get(src_pa, kni->pa, num_rx);
+ if (num_rx == 0)
+ return;
+
+ for (i = 0; i < num_rx; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ kva_nb_segs = kva->nb_segs;
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ if (!kva->next)
+ break;
+
+ prev_kva = kva;
+ kva = pa2kva(kva->next);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
+ }
+ }
+
+ ret = kni_fifo_put(dst_va, kni->va, num_rx);
+ if (ret != num_rx) {
+ /* Failing should not happen */
+ pr_err("Fail to enqueue entries into dst_va\n");
+ return;
+ }
+ } while (1);
+}
+
+/* Try to release mbufs when kni release */
+void kni_net_release_fifo_phy(struct kni_dev *kni)
+{
+ /* release rx_q first, because it can't release in userspace */
+ kni_fifo_trans_pa2va(kni, kni->rx_q, kni->free_q);
+ /* release alloc_q for speeding up kni release in userspace */
+ kni_fifo_trans_pa2va(kni, kni->alloc_q, kni->free_q);
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+static int
+kni_net_config(struct net_device *dev, struct ifmap *map)
+{
+ if (dev->flags & IFF_UP) /* can't act on a running interface */
+ return -EBUSY;
+
+ /* ignore other fields */
+ return 0;
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ */
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ int len = 0;
+ uint32_t ret;
+ struct kni_dev *kni = netdev_priv(dev);
+ struct rte_kni_mbuf *pkt_kva = NULL;
+ void *pkt_pa = NULL;
+ void *pkt_va = NULL;
+
+ /* save the timestamp */
+#ifdef HAVE_TRANS_START_HELPER
+ netif_trans_update(dev);
+#else
+ dev->trans_start = jiffies;
+#endif
+
+ /* Check if the length of skb is less than mbuf size */
+ if (skb->len > kni->mbuf_size)
+ goto drop;
+
+ /**
+ * Check if it has at least one free entry in tx_q and
+ * one entry in alloc_q.
+ */
+ if (kni_fifo_free_count(kni->tx_q) == 0 ||
+ kni_fifo_count(kni->alloc_q) == 0) {
+ /**
+ * If no free entry in tx_q or no entry in alloc_q,
+ * drops skb and goes out.
+ */
+ goto drop;
+ }
+
+ /* dequeue a mbuf from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
+ if (likely(ret == 1)) {
+ void *data_kva;
+
+ pkt_kva = get_kva(kni, pkt_pa);
+ data_kva = get_data_kva(kni, pkt_kva);
+ pkt_va = pa2va(pkt_pa, pkt_kva);
+
+ len = skb->len;
+ memcpy(data_kva, skb->data, len);
+ if (unlikely(len < ETH_ZLEN)) {
+ memset(data_kva + len, 0, ETH_ZLEN - len);
+ len = ETH_ZLEN;
+ }
+ pkt_kva->pkt_len = len;
+ pkt_kva->data_len = len;
+
+ /* enqueue mbuf into tx_q */
+ ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
+ if (unlikely(ret != 1)) {
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbuf into tx_q\n");
+ goto drop;
+ }
+ } else {
+ /* Failing should not happen */
+ pr_err("Fail to dequeue mbuf from alloc_q\n");
+ goto drop;
+ }
+
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ dev->stats.tx_bytes += len;
+ dev->stats.tx_packets++;
+
+ return NETDEV_TX_OK;
+
+drop:
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ dev->stats.tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+
+/*
+ * RX: normal working mode
+ */
+static void
+kni_net_rx_normal(struct kni_dev *kni)
+{
+ uint32_t ret;
+ uint32_t len;
+ uint32_t i, num_rx, num_fq;
+ struct rte_kni_mbuf *kva, *prev_kva;
+ void *data_kva;
+ struct sk_buff *skb;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+ if (num_fq == 0) {
+ /* No room on the free_q, bail out */
+ return;
+ }
+
+ /* Calculate the number of entries to dequeue from rx_q */
+ num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
+
+ /* Burst dequeue from rx_q */
+ num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
+ if (num_rx == 0)
+ return;
+
+ /* Transfer received packets to netif */
+ for (i = 0; i < num_rx; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ len = kva->pkt_len;
+ data_kva = get_data_kva(kni, kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ skb = netdev_alloc_skb(dev, len);
+ if (!skb) {
+ /* Update statistics */
+ dev->stats.rx_dropped++;
+ continue;
+ }
+
+ if (kva->nb_segs == 1) {
+ memcpy(skb_put(skb, len), data_kva, len);
+ } else {
+ int nb_segs;
+ int kva_nb_segs = kva->nb_segs;
+
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ memcpy(skb_put(skb, kva->data_len),
+ data_kva, kva->data_len);
+
+ if (!kva->next)
+ break;
+
+ prev_kva = kva;
+ kva = pa2kva(kva->next);
+ data_kva = kva2data_kva(kva);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
+ }
+ }
+
+ skb->protocol = eth_type_trans(skb, dev);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ /* Call netif interface */
+ netif_rx_ni(skb);
+
+ /* Update statistics */
+ dev->stats.rx_bytes += len;
+ dev->stats.rx_packets++;
+ }
+
+ /* Burst enqueue mbufs into free_q */
+ ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
+ if (ret != num_rx)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue entries into free_q\n");
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos.
+ */
+static void
+kni_net_rx_lo_fifo(struct kni_dev *kni)
+{
+ uint32_t ret;
+ uint32_t len;
+ uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
+ struct rte_kni_mbuf *kva, *next_kva;
+ void *data_kva;
+ struct rte_kni_mbuf *alloc_kva;
+ void *alloc_data_kva;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of entries in rx_q */
+ num_rq = kni_fifo_count(kni->rx_q);
+
+ /* Get the number of free entries in tx_q */
+ num_tq = kni_fifo_free_count(kni->tx_q);
+
+ /* Get the number of entries in alloc_q */
+ num_aq = kni_fifo_count(kni->alloc_q);
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+
+ /* Calculate the number of entries to be dequeued from rx_q */
+ num = min(num_rq, num_tq);
+ num = min(num, num_aq);
+ num = min(num, num_fq);
+ num = min_t(uint32_t, num, MBUF_BURST_SZ);
+
+ /* Return if no entry to dequeue from rx_q */
+ if (num == 0)
+ return;
+
+ /* Burst dequeue from rx_q */
+ ret = kni_fifo_get(kni->rx_q, kni->pa, num);
+ if (ret == 0)
+ return; /* Failing should not happen */
+
+ /* Dequeue entries from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
+ if (ret) {
+ num = ret;
+ /* Copy mbufs */
+ for (i = 0; i < num; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ len = kva->data_len;
+ data_kva = get_data_kva(kni, kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ while (kva->next) {
+ next_kva = pa2kva(kva->next);
+ /* Convert physical address to virtual address */
+ kva->next = pa2va(kva->next, next_kva);
+ kva = next_kva;
+ }
+
+ alloc_kva = get_kva(kni, kni->alloc_pa[i]);
+ alloc_data_kva = get_data_kva(kni, alloc_kva);
+ kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
+
+ memcpy(alloc_data_kva, data_kva, len);
+ alloc_kva->pkt_len = len;
+ alloc_kva->data_len = len;
+
+ dev->stats.tx_bytes += len;
+ dev->stats.rx_bytes += len;
+ }
+
+ /* Burst enqueue mbufs into tx_q */
+ ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbufs into tx_q\n");
+ }
+
+ /* Burst enqueue mbufs into free_q */
+ ret = kni_fifo_put(kni->free_q, kni->va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbufs into free_q\n");
+
+ /**
+ * Update statistic, and enqueue/dequeue failure is impossible,
+ * as all queues are checked at first.
+ */
+ dev->stats.tx_packets += num;
+ dev->stats.rx_packets += num;
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
+ */
+static void
+kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
+{
+ uint32_t ret;
+ uint32_t len;
+ uint32_t i, num_rq, num_fq, num;
+ struct rte_kni_mbuf *kva, *prev_kva;
+ void *data_kva;
+ struct sk_buff *skb;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of entries in rx_q */
+ num_rq = kni_fifo_count(kni->rx_q);
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+
+ /* Calculate the number of entries to dequeue from rx_q */
+ num = min(num_rq, num_fq);
+ num = min_t(uint32_t, num, MBUF_BURST_SZ);
+
+ /* Return if no entry to dequeue from rx_q */
+ if (num == 0)
+ return;
+
+ /* Burst dequeue mbufs from rx_q */
+ ret = kni_fifo_get(kni->rx_q, kni->pa, num);
+ if (ret == 0)
+ return;
+
+ /* Copy mbufs to sk buffer and then call tx interface */
+ for (i = 0; i < num; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ len = kva->pkt_len;
+ data_kva = get_data_kva(kni, kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ skb = netdev_alloc_skb(dev, len);
+ if (skb) {
+ memcpy(skb_put(skb, len), data_kva, len);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ dev_kfree_skb(skb);
+ }
+
+ /* Simulate real usage, allocate/copy skb twice */
+ skb = netdev_alloc_skb(dev, len);
+ if (skb == NULL) {
+ dev->stats.rx_dropped++;
+ continue;
+ }
+
+ if (kva->nb_segs == 1) {
+ memcpy(skb_put(skb, len), data_kva, len);
+ } else {
+ int nb_segs;
+ int kva_nb_segs = kva->nb_segs;
+
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ memcpy(skb_put(skb, kva->data_len),
+ data_kva, kva->data_len);
+
+ if (!kva->next)
+ break;
+
+ prev_kva = kva;
+ kva = get_kva(kni, kva->next);
+ data_kva = get_data_kva(kni, kva);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
+ }
+ }
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ dev->stats.rx_bytes += len;
+ dev->stats.rx_packets++;
+
+ /* call tx interface */
+ kni_net_tx(skb, dev);
+ }
+
+ /* enqueue all the mbufs from rx_q into free_q */
+ ret = kni_fifo_put(kni->free_q, kni->va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbufs into free_q\n");
+}
+
+/* rx interface */
+void
+kni_net_rx(struct kni_dev *kni)
+{
+ /**
+ * It doesn't need to check if it is NULL pointer,
+ * as it has a default value
+ */
+ (*kni_net_rx_func)(kni);
+}
+
+/*
+ * Deal with a transmit timeout.
+ */
+#ifdef HAVE_TX_TIMEOUT_TXQUEUE
+static void
+kni_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
+#else
+static void
+kni_net_tx_timeout(struct net_device *dev)
+#endif
+{
+ pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
+ jiffies - dev_trans_start(dev));
+
+ dev->stats.tx_errors++;
+ netif_wake_queue(dev);
+}
+
+static int
+kni_net_change_mtu(struct net_device *dev, int new_mtu)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CHANGE_MTU;
+ req.new_mtu = new_mtu;
+ ret = kni_net_process_request(kni, &req);
+ if (ret == 0 && req.result == 0)
+ dev->mtu = new_mtu;
+
+ return (ret == 0) ? req.result : ret;
+}
+
+static void
+kni_net_change_rx_flags(struct net_device *netdev, int flags)
+{
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(netdev);
+
+ memset(&req, 0, sizeof(req));
+
+ if (flags & IFF_ALLMULTI) {
+ req.req_id = RTE_KNI_REQ_CHANGE_ALLMULTI;
+
+ if (netdev->flags & IFF_ALLMULTI)
+ req.allmulti = 1;
+ else
+ req.allmulti = 0;
+ }
+
+ if (flags & IFF_PROMISC) {
+ req.req_id = RTE_KNI_REQ_CHANGE_PROMISC;
+
+ if (netdev->flags & IFF_PROMISC)
+ req.promiscusity = 1;
+ else
+ req.promiscusity = 0;
+ }
+
+ kni_net_process_request(kni, &req);
+}
+
+/*
+ * Checks if the user space application provided the resp message
+ */
+void
+kni_net_poll_resp(struct kni_dev *kni)
+{
+ if (kni_fifo_count(kni->resp_q))
+ wake_up_interruptible(&kni->wq);
+}
+
+/*
+ * Fill the eth header
+ */
+static int
+kni_net_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, uint32_t len)
+{
+ struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+
+ memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
+ memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len);
+ eth->h_proto = htons(type);
+
+ return dev->hard_header_len;
+}
+
+/*
+ * Re-fill the eth header
+ */
+#ifdef HAVE_REBUILD_HEADER
+static int
+kni_net_rebuild_header(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct ethhdr *eth = (struct ethhdr *) skb->data;
+
+ memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+ memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
+
+ return 0;
+}
+#endif /* < 4.1.0 */
+
+/**
+ * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int
+kni_net_set_mac(struct net_device *netdev, void *p)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni;
+ struct sockaddr *addr = p;
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CHANGE_MAC_ADDR;
+
+ if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
+ return -EADDRNOTAVAIL;
+
+ memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+
+ kni = netdev_priv(netdev);
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0 ? req.result : ret);
+}
+
+#ifdef HAVE_CHANGE_CARRIER_CB
+static int
+kni_net_change_carrier(struct net_device *dev, bool new_carrier)
+{
+ if (new_carrier)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+ return 0;
+}
+#endif
+
+static const struct header_ops kni_net_header_ops = {
+ .create = kni_net_header,
+ .parse = eth_header_parse,
+#ifdef HAVE_REBUILD_HEADER
+ .rebuild = kni_net_rebuild_header,
+#endif /* < 4.1.0 */
+ .cache = NULL, /* disable caching */
+};
+
+static const struct net_device_ops kni_net_netdev_ops = {
+ .ndo_open = kni_net_open,
+ .ndo_stop = kni_net_release,
+ .ndo_set_config = kni_net_config,
+ .ndo_change_rx_flags = kni_net_change_rx_flags,
+ .ndo_start_xmit = kni_net_tx,
+ .ndo_change_mtu = kni_net_change_mtu,
+ .ndo_tx_timeout = kni_net_tx_timeout,
+ .ndo_set_mac_address = kni_net_set_mac,
+#ifdef HAVE_CHANGE_CARRIER_CB
+ .ndo_change_carrier = kni_net_change_carrier,
+#endif
+};
+
+static void kni_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->version, KNI_VERSION, sizeof(info->version));
+ strlcpy(info->driver, "kni", sizeof(info->driver));
+}
+
+static const struct ethtool_ops kni_net_ethtool_ops = {
+ .get_drvinfo = kni_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+};
+
+void
+kni_net_init(struct net_device *dev)
+{
+ struct kni_dev *kni = netdev_priv(dev);
+
+ init_waitqueue_head(&kni->wq);
+ mutex_init(&kni->sync_lock);
+
+ ether_setup(dev); /* assign some of the fields */
+ dev->netdev_ops = &kni_net_netdev_ops;
+ dev->header_ops = &kni_net_header_ops;
+ dev->ethtool_ops = &kni_net_ethtool_ops;
+ dev->watchdog_timeo = WD_TIMEOUT;
+}
+
+void
+kni_net_config_lo_mode(char *lo_str)
+{
+ if (!lo_str) {
+ pr_debug("loopback disabled");
+ return;
+ }
+
+ if (!strcmp(lo_str, "lo_mode_none"))
+ pr_debug("loopback disabled");
+ else if (!strcmp(lo_str, "lo_mode_fifo")) {
+ pr_debug("loopback mode=lo_mode_fifo enabled");
+ kni_net_rx_func = kni_net_rx_lo_fifo;
+ } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
+ pr_debug("loopback mode=lo_mode_fifo_skb enabled");
+ kni_net_rx_func = kni_net_rx_lo_fifo_skb;
+ } else {
+ pr_debug("Unknown loopback parameter, disabled");
+ }
+}
diff --git a/src/spdk/dpdk/kernel/linux/kni/meson.build b/src/spdk/dpdk/kernel/linux/kni/meson.build
new file mode 100644
index 000000000..d696347f2
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/meson.build
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+kni_mkfile = custom_target('rte_kni_makefile',
+ output: 'Makefile',
+ command: ['touch', '@OUTPUT@'])
+
+kni_sources = files(
+ 'kni_misc.c',
+ 'kni_net.c',
+ 'Kbuild')
+
+custom_target('rte_kni',
+ input: kni_sources,
+ output: 'rte_kni.ko',
+ command: ['make', '-j4', '-C', kernel_dir + '/build',
+ 'M=' + meson.current_build_dir(),
+ 'src=' + meson.current_source_dir(),
+ 'MODULE_CFLAGS=-include ' + meson.source_root() + '/config/rte_config.h' +
+ ' -I' + meson.source_root() + '/lib/librte_eal/include' +
+ ' -I' + meson.source_root() + '/lib/librte_eal/linux/include' +
+ ' -I' + meson.build_root() +
+ ' -I' + meson.current_source_dir(),
+ 'modules'],
+ depends: kni_mkfile,
+ install: true,
+ install_dir: kernel_dir + '/extra/dpdk',
+ build_by_default: get_option('enable_kmods'))
diff --git a/src/spdk/dpdk/kernel/linux/meson.build b/src/spdk/dpdk/kernel/linux/meson.build
new file mode 100644
index 000000000..da79df168
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/meson.build
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+subdirs = ['igb_uio', 'kni']
+
+# if we are cross-compiling we need kernel_dir specified
+if get_option('kernel_dir') == '' and meson.is_cross_build()
+ error('Need "kernel_dir" option for kmod compilation when cross-compiling')
+endif
+
+kernel_dir = get_option('kernel_dir')
+if kernel_dir == ''
+ # use default path for native builds
+ kernel_version = run_command('uname', '-r').stdout().strip()
+ kernel_dir = '/lib/modules/' + kernel_version
+endif
+
+# test running make in kernel directory, using "make kernelversion"
+make_returncode = run_command('make', '-sC', kernel_dir + '/build',
+ 'kernelversion').returncode()
+if make_returncode != 0
+ error('Cannot compile kernel modules as requested - are kernel headers installed?')
+endif
+
+# DO ACTUAL MODULE BUILDING
+foreach d:subdirs
+ subdir(d)
+endforeach
diff --git a/src/spdk/dpdk/kernel/meson.build b/src/spdk/dpdk/kernel/meson.build
new file mode 100644
index 000000000..b247e2df4
--- /dev/null
+++ b/src/spdk/dpdk/kernel/meson.build
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+subdir(exec_env)