summaryrefslogtreecommitdiffstats
path: root/src/spdk/dpdk/kernel/linux/kni
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/dpdk/kernel/linux/kni')
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/Kbuild6
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/Makefile34
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/compat.h136
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_dev.h127
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_fifo.h87
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_misc.c661
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/kni_net.c844
-rw-r--r--src/spdk/dpdk/kernel/linux/kni/meson.build28
8 files changed, 1923 insertions, 0 deletions
diff --git a/src/spdk/dpdk/kernel/linux/kni/Kbuild b/src/spdk/dpdk/kernel/linux/kni/Kbuild
new file mode 100644
index 000000000..e5452d6c0
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/Kbuild
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+ccflags-y := $(MODULE_CFLAGS)
+obj-m := rte_kni.o
+rte_kni-y := $(patsubst $(src)/%.c,%.o,$(wildcard $(src)/*.c))
diff --git a/src/spdk/dpdk/kernel/linux/kni/Makefile b/src/spdk/dpdk/kernel/linux/kni/Makefile
new file mode 100644
index 000000000..595bac261
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_kni
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+-include /etc/lsb-release
+
+ifeq ($(DISTRIB_ID),Ubuntu)
+MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(subst .,,$(DISTRIB_RELEASE))
+UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \
+ | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1)
+MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
+endif
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := kni_misc.c
+SRCS-y += kni_net.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/src/spdk/dpdk/kernel/linux/kni/compat.h b/src/spdk/dpdk/kernel/linux/kni/compat.h
new file mode 100644
index 000000000..9ee45dbf6
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/compat.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Minimal wrappers to allow compiling kni on older kernels.
+ */
+
+#include <linux/version.h>
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c)
+#endif
+#ifdef CONFIG_SUSE_KERNEL
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
+/* SLES12SP3 is at least 4.4.57+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28))
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0)
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)))
+/* SLES11 SP3 is at least 3.0.61+ based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32))
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27))
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0)
+#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
+#endif /* CONFIG_SUSE_KERNEL */
+#ifndef SLE_VERSION_CODE
+#define SLE_VERSION_CODE 0
+#endif /* SLE_VERSION_CODE */
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+ (!(defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33)
+#define HAVE_SIMPLIFIED_PERNET_OPERATIONS
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+#define sk_sleep(s) ((s)->sk_sleep)
+#else
+#define HAVE_SOCKET_WQ
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+#define HAVE_STATIC_SOCK_MAP_FD
+#else
+#define kni_sock_map_fd(s) sock_map_fd(s, 0)
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#define HAVE_CHANGE_CARRIER_CB
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN)
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
+#define HAVE_IOV_ITER_MSGHDR
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)
+#define HAVE_KIOCB_MSG_PARAM
+#define HAVE_REBUILD_HEADER
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#define HAVE_SK_ALLOC_KERN_PARAM
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \
+ (defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \
+ (SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0))
+#define HAVE_TRANS_START_HELPER
+#endif
+
+/*
+ * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev()
+ * For old kernels just backported the commit that enables the macro
+ * (685343fc3ba6) but still uses old API, it is required to undefine macro to
+ * select correct version of API, this is safe since KNI doesn't use the value.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#undef NET_NAME_UNKNOWN
+#endif
+
+/*
+ * RHEL has two different version with different kernel version:
+ * 3.10 is for AMD, Intel, IBM POWER7 and POWER8;
+ * 4.14 is for ARM and IBM POWER9
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)))
+#define ndo_change_mtu ndo_change_mtu_rh74
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#define HAVE_MAX_MTU_PARAM
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#endif
+
+/*
+ * iova to kva mapping support can be provided since 4.6.0, but required
+ * kernel version increased to >= 4.10.0 because of the updates in
+ * get_user_pages_remote() kernel API
+ */
+#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+#define HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+#endif
+
+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
+#define HAVE_TX_TIMEOUT_TXQUEUE
+#endif
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_dev.h b/src/spdk/dpdk/kernel/linux/kni/kni_dev.h
new file mode 100644
index 000000000..ca5f92a47
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_dev.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+#ifndef _KNI_DEV_H_
+#define _KNI_DEV_H_
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define KNI_VERSION "1.0"
+
+#include "compat.h"
+
+#include <linux/if.h>
+#include <linux/wait.h>
+#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#include <linux/sched/signal.h>
+#else
+#include <linux/sched.h>
+#endif
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#include <rte_kni_common.h>
+#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
+
+#define MBUF_BURST_SZ 32
+
+/* Default carrier state for created KNI network interfaces */
+extern uint32_t kni_dflt_carrier;
+
+/**
+ * A structure describing the private information for a kni device.
+ */
+struct kni_dev {
+ /* kni list */
+ struct list_head list;
+
+ uint8_t iova_mode;
+
+ uint32_t core_id; /* Core ID to bind */
+ char name[RTE_KNI_NAMESIZE]; /* Network device name */
+ struct task_struct *pthread;
+
+ /* wait queue for req/resp */
+ wait_queue_head_t wq;
+ struct mutex sync_lock;
+
+ /* kni device */
+ struct net_device *net_dev;
+
+ /* queue for packets to be sent out */
+ struct rte_kni_fifo *tx_q;
+
+ /* queue for the packets received */
+ struct rte_kni_fifo *rx_q;
+
+ /* queue for the allocated mbufs those can be used to save sk buffs */
+ struct rte_kni_fifo *alloc_q;
+
+ /* free queue for the mbufs to be freed */
+ struct rte_kni_fifo *free_q;
+
+ /* request queue */
+ struct rte_kni_fifo *req_q;
+
+ /* response queue */
+ struct rte_kni_fifo *resp_q;
+
+ void *sync_kva;
+ void *sync_va;
+
+ void *mbuf_kva;
+ void *mbuf_va;
+
+ /* mbuf size */
+ uint32_t mbuf_size;
+
+ /* buffers */
+ void *pa[MBUF_BURST_SZ];
+ void *va[MBUF_BURST_SZ];
+ void *alloc_pa[MBUF_BURST_SZ];
+ void *alloc_va[MBUF_BURST_SZ];
+
+ struct task_struct *usr_tsk;
+};
+
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+static inline phys_addr_t iova_to_phys(struct task_struct *tsk,
+ unsigned long iova)
+{
+ phys_addr_t offset, phys_addr;
+ struct page *page = NULL;
+ long ret;
+
+ offset = iova & (PAGE_SIZE - 1);
+
+ /* Read one page struct info */
+ ret = get_user_pages_remote(tsk, tsk->mm, iova, 1,
+ FOLL_TOUCH, &page, NULL, NULL);
+ if (ret < 0)
+ return 0;
+
+ phys_addr = page_to_phys(page) | offset;
+ put_page(page);
+
+ return phys_addr;
+}
+
+static inline void *iova_to_kva(struct task_struct *tsk, unsigned long iova)
+{
+ return phys_to_virt(iova_to_phys(tsk, iova));
+}
+#endif
+
+void kni_net_release_fifo_phy(struct kni_dev *kni);
+void kni_net_rx(struct kni_dev *kni);
+void kni_net_init(struct net_device *dev);
+void kni_net_config_lo_mode(char *lo_str);
+void kni_net_poll_resp(struct kni_dev *kni);
+
+#endif
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_fifo.h b/src/spdk/dpdk/kernel/linux/kni/kni_fifo.h
new file mode 100644
index 000000000..5c91b5537
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_fifo.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+#ifndef _KNI_FIFO_H_
+#define _KNI_FIFO_H_
+
+#include <rte_kni_common.h>
+
+/* Skip some memory barriers on Linux < 3.14 */
+#ifndef smp_load_acquire
+#define smp_load_acquire(a) (*(a))
+#endif
+#ifndef smp_store_release
+#define smp_store_release(a, b) *(a) = (b)
+#endif
+
+/**
+ * Adds num elements into the fifo. Return the number actually written
+ */
+static inline uint32_t
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
+{
+ uint32_t i = 0;
+ uint32_t fifo_write = fifo->write;
+ uint32_t fifo_read = smp_load_acquire(&fifo->read);
+ uint32_t new_write = fifo_write;
+
+ for (i = 0; i < num; i++) {
+ new_write = (new_write + 1) & (fifo->len - 1);
+
+ if (new_write == fifo_read)
+ break;
+ fifo->buffer[fifo_write] = data[i];
+ fifo_write = new_write;
+ }
+ smp_store_release(&fifo->write, fifo_write);
+
+ return i;
+}
+
+/**
+ * Get up to num elements from the fifo. Return the number actully read
+ */
+static inline uint32_t
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
+{
+ uint32_t i = 0;
+ uint32_t new_read = fifo->read;
+ uint32_t fifo_write = smp_load_acquire(&fifo->write);
+
+ for (i = 0; i < num; i++) {
+ if (new_read == fifo_write)
+ break;
+
+ data[i] = fifo->buffer[new_read];
+ new_read = (new_read + 1) & (fifo->len - 1);
+ }
+ smp_store_release(&fifo->read, new_read);
+
+ return i;
+}
+
+/**
+ * Get the num of elements in the fifo
+ */
+static inline uint32_t
+kni_fifo_count(struct rte_kni_fifo *fifo)
+{
+ uint32_t fifo_write = smp_load_acquire(&fifo->write);
+ uint32_t fifo_read = smp_load_acquire(&fifo->read);
+ return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
+}
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline uint32_t
+kni_fifo_free_count(struct rte_kni_fifo *fifo)
+{
+ uint32_t fifo_write = smp_load_acquire(&fifo->write);
+ uint32_t fifo_read = smp_load_acquire(&fifo->read);
+ return (fifo_read - fifo_write - 1) & (fifo->len - 1);
+}
+
+#endif /* _KNI_FIFO_H_ */
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_misc.c b/src/spdk/dpdk/kernel/linux/kni/kni_misc.c
new file mode 100644
index 000000000..2b464c438
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_misc.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <rte_kni_common.h>
+
+#include "compat.h"
+#include "kni_dev.h"
+
+MODULE_VERSION(KNI_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for managing kni devices");
+
+#define KNI_RX_LOOP_NUM 1000
+
+#define KNI_MAX_DEVICES 32
+
+/* loopback mode */
+static char *lo_mode;
+
+/* Kernel thread mode */
+static char *kthread_mode;
+static uint32_t multiple_kthread_on;
+
+/* Default carrier state for created KNI network interfaces */
+static char *carrier;
+uint32_t kni_dflt_carrier;
+
+#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
+
+static int kni_net_id;
+
+struct kni_net {
+ unsigned long device_in_use; /* device in use flag */
+ struct mutex kni_kthread_lock;
+ struct task_struct *kni_kthread;
+ struct rw_semaphore kni_list_lock;
+ struct list_head kni_list_head;
+};
+
+static int __net_init
+kni_init_net(struct net *net)
+{
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ struct kni_net *knet = net_generic(net, kni_net_id);
+
+ memset(knet, 0, sizeof(*knet));
+#else
+ struct kni_net *knet;
+ int ret;
+
+ knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
+ if (!knet) {
+ ret = -ENOMEM;
+ return ret;
+ }
+#endif
+
+ /* Clear the bit of device in use */
+ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+ mutex_init(&knet->kni_kthread_lock);
+
+ init_rwsem(&knet->kni_list_lock);
+ INIT_LIST_HEAD(&knet->kni_list_head);
+
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ return 0;
+#else
+ ret = net_assign_generic(net, kni_net_id, knet);
+ if (ret < 0)
+ kfree(knet);
+
+ return ret;
+#endif
+}
+
+static void __net_exit
+kni_exit_net(struct net *net)
+{
+ struct kni_net *knet __maybe_unused;
+
+ knet = net_generic(net, kni_net_id);
+ mutex_destroy(&knet->kni_kthread_lock);
+
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ kfree(knet);
+#endif
+}
+
+static struct pernet_operations kni_net_ops = {
+ .init = kni_init_net,
+ .exit = kni_exit_net,
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ .id = &kni_net_id,
+ .size = sizeof(struct kni_net),
+#endif
+};
+
+static int
+kni_thread_single(void *data)
+{
+ struct kni_net *knet = data;
+ int j;
+ struct kni_dev *dev;
+
+ while (!kthread_should_stop()) {
+ down_read(&knet->kni_list_lock);
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+ list_for_each_entry(dev, &knet->kni_list_head, list) {
+ kni_net_rx(dev);
+ kni_net_poll_resp(dev);
+ }
+ }
+ up_read(&knet->kni_list_lock);
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ /* reschedule out for a while */
+ schedule_timeout_interruptible(
+ usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+ }
+
+ return 0;
+}
+
+static int
+kni_thread_multiple(void *param)
+{
+ int j;
+ struct kni_dev *dev = param;
+
+ while (!kthread_should_stop()) {
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+ kni_net_rx(dev);
+ kni_net_poll_resp(dev);
+ }
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ schedule_timeout_interruptible(
+ usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+ }
+
+ return 0;
+}
+
+static int
+kni_open(struct inode *inode, struct file *file)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct kni_net *knet = net_generic(net, kni_net_id);
+
+ /* kni device can be opened by one user only per netns */
+ if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
+ return -EBUSY;
+
+ file->private_data = get_net(net);
+ pr_debug("/dev/kni opened\n");
+
+ return 0;
+}
+
+static int
+kni_dev_remove(struct kni_dev *dev)
+{
+ if (!dev)
+ return -ENODEV;
+
+ if (dev->net_dev) {
+ unregister_netdev(dev->net_dev);
+ free_netdev(dev->net_dev);
+ }
+
+ kni_net_release_fifo_phy(dev);
+
+ return 0;
+}
+
+static int
+kni_release(struct inode *inode, struct file *file)
+{
+ struct net *net = file->private_data;
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ struct kni_dev *dev, *n;
+
+ /* Stop kernel thread for single mode */
+ if (multiple_kthread_on == 0) {
+ mutex_lock(&knet->kni_kthread_lock);
+ /* Stop kernel thread */
+ if (knet->kni_kthread != NULL) {
+ kthread_stop(knet->kni_kthread);
+ knet->kni_kthread = NULL;
+ }
+ mutex_unlock(&knet->kni_kthread_lock);
+ }
+
+ down_write(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ /* Stop kernel thread for multiple mode */
+ if (multiple_kthread_on && dev->pthread != NULL) {
+ kthread_stop(dev->pthread);
+ dev->pthread = NULL;
+ }
+
+ kni_dev_remove(dev);
+ list_del(&dev->list);
+ }
+ up_write(&knet->kni_list_lock);
+
+ /* Clear the bit of device in use */
+ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+ put_net(net);
+ pr_debug("/dev/kni closed\n");
+
+ return 0;
+}
+
+static int
+kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
+{
+ if (!kni || !dev)
+ return -1;
+
+ /* Check if network name has been used */
+ if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
+ pr_err("KNI name %s duplicated\n", dev->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
+{
+ /**
+ * Create a new kernel thread for multiple mode, set its core affinity,
+ * and finally wake it up.
+ */
+ if (multiple_kthread_on) {
+ kni->pthread = kthread_create(kni_thread_multiple,
+ (void *)kni, "kni_%s", kni->name);
+ if (IS_ERR(kni->pthread)) {
+ kni_dev_remove(kni);
+ return -ECANCELED;
+ }
+
+ if (force_bind)
+ kthread_bind(kni->pthread, kni->core_id);
+ wake_up_process(kni->pthread);
+ } else {
+ mutex_lock(&knet->kni_kthread_lock);
+
+ if (knet->kni_kthread == NULL) {
+ knet->kni_kthread = kthread_create(kni_thread_single,
+ (void *)knet, "kni_single");
+ if (IS_ERR(knet->kni_kthread)) {
+ mutex_unlock(&knet->kni_kthread_lock);
+ kni_dev_remove(kni);
+ return -ECANCELED;
+ }
+
+ if (force_bind)
+ kthread_bind(knet->kni_kthread, kni->core_id);
+ wake_up_process(knet->kni_kthread);
+ }
+
+ mutex_unlock(&knet->kni_kthread_lock);
+ }
+
+ return 0;
+}
+
+static int
+kni_ioctl_create(struct net *net, uint32_t ioctl_num,
+ unsigned long ioctl_param)
+{
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ int ret;
+ struct rte_kni_device_info dev_info;
+ struct net_device *net_dev = NULL;
+ struct kni_dev *kni, *dev, *n;
+
+ pr_info("Creating kni...\n");
+ /* Check the buffer size, to avoid warning */
+ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+ return -EINVAL;
+
+ /* Copy kni info from user space */
+ if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
+ return -EFAULT;
+
+ /* Check if name is zero-ended */
+ if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
+ pr_err("kni.name not zero-terminated");
+ return -EINVAL;
+ }
+
+ /**
+ * Check if the cpu core id is valid for binding.
+ */
+ if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
+ pr_err("cpu %u is not online\n", dev_info.core_id);
+ return -EINVAL;
+ }
+
+ /* Check if it has been created */
+ down_read(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ if (kni_check_param(dev, &dev_info) < 0) {
+ up_read(&knet->kni_list_lock);
+ return -EINVAL;
+ }
+ }
+ up_read(&knet->kni_list_lock);
+
+ net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
+#ifdef NET_NAME_USER
+ NET_NAME_USER,
+#endif
+ kni_net_init);
+ if (net_dev == NULL) {
+ pr_err("error allocating device \"%s\"\n", dev_info.name);
+ return -EBUSY;
+ }
+
+ dev_net_set(net_dev, net);
+
+ kni = netdev_priv(net_dev);
+
+ kni->net_dev = net_dev;
+ kni->core_id = dev_info.core_id;
+ strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
+
+ /* Translate user space info into kernel space info */
+ if (dev_info.iova_mode) {
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ kni->tx_q = iova_to_kva(current, dev_info.tx_phys);
+ kni->rx_q = iova_to_kva(current, dev_info.rx_phys);
+ kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys);
+ kni->free_q = iova_to_kva(current, dev_info.free_phys);
+
+ kni->req_q = iova_to_kva(current, dev_info.req_phys);
+ kni->resp_q = iova_to_kva(current, dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = iova_to_kva(current, dev_info.sync_phys);
+ kni->usr_tsk = current;
+ kni->iova_mode = 1;
+#else
+ pr_err("KNI module does not support IOVA to VA translation\n");
+ return -EINVAL;
+#endif
+ } else {
+
+ kni->tx_q = phys_to_virt(dev_info.tx_phys);
+ kni->rx_q = phys_to_virt(dev_info.rx_phys);
+ kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+ kni->free_q = phys_to_virt(dev_info.free_phys);
+
+ kni->req_q = phys_to_virt(dev_info.req_phys);
+ kni->resp_q = phys_to_virt(dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ kni->iova_mode = 0;
+ }
+
+ kni->mbuf_size = dev_info.mbuf_size;
+
+ pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
+ (unsigned long long) dev_info.tx_phys, kni->tx_q);
+ pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
+ (unsigned long long) dev_info.rx_phys, kni->rx_q);
+ pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
+ (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
+ pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n",
+ (unsigned long long) dev_info.free_phys, kni->free_q);
+ pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n",
+ (unsigned long long) dev_info.req_phys, kni->req_q);
+ pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
+ (unsigned long long) dev_info.resp_phys, kni->resp_q);
+ pr_debug("mbuf_size: %u\n", kni->mbuf_size);
+
+ /* if user has provided a valid mac address */
+ if (is_valid_ether_addr(dev_info.mac_addr))
+ memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN);
+ else
+ /*
+ * Generate random mac address. eth_random_addr() is the
+ * newer version of generating mac address in kernel.
+ */
+ random_ether_addr(net_dev->dev_addr);
+
+ if (dev_info.mtu)
+ net_dev->mtu = dev_info.mtu;
+#ifdef HAVE_MAX_MTU_PARAM
+ net_dev->max_mtu = net_dev->mtu;
+
+ if (dev_info.min_mtu)
+ net_dev->min_mtu = dev_info.min_mtu;
+
+ if (dev_info.max_mtu)
+ net_dev->max_mtu = dev_info.max_mtu;
+#endif
+
+ ret = register_netdev(net_dev);
+ if (ret) {
+ pr_err("error %i registering device \"%s\"\n",
+ ret, dev_info.name);
+ kni->net_dev = NULL;
+ kni_dev_remove(kni);
+ free_netdev(net_dev);
+ return -ENODEV;
+ }
+
+ netif_carrier_off(net_dev);
+
+ ret = kni_run_thread(knet, kni, dev_info.force_bind);
+ if (ret != 0)
+ return ret;
+
+ down_write(&knet->kni_list_lock);
+ list_add(&kni->list, &knet->kni_list_head);
+ up_write(&knet->kni_list_lock);
+
+ return 0;
+}
+
+static int
+kni_ioctl_release(struct net *net, uint32_t ioctl_num,
+ unsigned long ioctl_param)
+{
+ struct kni_net *knet = net_generic(net, kni_net_id);
+ int ret = -EINVAL;
+ struct kni_dev *dev, *n;
+ struct rte_kni_device_info dev_info;
+
+ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+ return -EINVAL;
+
+ if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
+ return -EFAULT;
+
+ /* Release the network device according to its name */
+ if (strlen(dev_info.name) == 0)
+ return -EINVAL;
+
+ down_write(&knet->kni_list_lock);
+ list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+ if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
+ continue;
+
+ if (multiple_kthread_on && dev->pthread != NULL) {
+ kthread_stop(dev->pthread);
+ dev->pthread = NULL;
+ }
+
+ kni_dev_remove(dev);
+ list_del(&dev->list);
+ ret = 0;
+ break;
+ }
+ up_write(&knet->kni_list_lock);
+ pr_info("%s release kni named %s\n",
+ (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
+
+ return ret;
+}
+
+static int
+kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
+{
+ int ret = -EINVAL;
+ struct net *net = current->nsproxy->net_ns;
+
+ pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
+
+ /*
+ * Switch according to the ioctl called
+ */
+ switch (_IOC_NR(ioctl_num)) {
+ case _IOC_NR(RTE_KNI_IOCTL_TEST):
+ /* For test only, not used */
+ break;
+ case _IOC_NR(RTE_KNI_IOCTL_CREATE):
+ ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
+ break;
+ case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
+ ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
+ break;
+ default:
+ pr_debug("IOCTL default\n");
+ break;
+ }
+
+ return ret;
+}
+
+static int
+kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
+ unsigned long ioctl_param)
+{
+ /* 32 bits app on 64 bits OS to be supported later */
+ pr_debug("Not implemented.\n");
+
+ return -EINVAL;
+}
+
+static const struct file_operations kni_fops = {
+ .owner = THIS_MODULE,
+ .open = kni_open,
+ .release = kni_release,
+ .unlocked_ioctl = (void *)kni_ioctl,
+ .compat_ioctl = (void *)kni_compat_ioctl,
+};
+
+static struct miscdevice kni_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = KNI_DEVICE,
+ .fops = &kni_fops,
+};
+
+static int __init
+kni_parse_kthread_mode(void)
+{
+ if (!kthread_mode)
+ return 0;
+
+ if (strcmp(kthread_mode, "single") == 0)
+ return 0;
+ else if (strcmp(kthread_mode, "multiple") == 0)
+ multiple_kthread_on = 1;
+ else
+ return -1;
+
+ return 0;
+}
+
+static int __init
+kni_parse_carrier_state(void)
+{
+ if (!carrier) {
+ kni_dflt_carrier = 0;
+ return 0;
+ }
+
+ if (strcmp(carrier, "off") == 0)
+ kni_dflt_carrier = 0;
+ else if (strcmp(carrier, "on") == 0)
+ kni_dflt_carrier = 1;
+ else
+ return -1;
+
+ return 0;
+}
+
+static int __init
+kni_init(void)
+{
+ int rc;
+
+ if (kni_parse_kthread_mode() < 0) {
+ pr_err("Invalid parameter for kthread_mode\n");
+ return -EINVAL;
+ }
+
+ if (multiple_kthread_on == 0)
+ pr_debug("Single kernel thread for all KNI devices\n");
+ else
+ pr_debug("Multiple kernel thread mode enabled\n");
+
+ if (kni_parse_carrier_state() < 0) {
+ pr_err("Invalid parameter for carrier\n");
+ return -EINVAL;
+ }
+
+ if (kni_dflt_carrier == 0)
+ pr_debug("Default carrier state set to off.\n");
+ else
+ pr_debug("Default carrier state set to on.\n");
+
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ rc = register_pernet_subsys(&kni_net_ops);
+#else
+ rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif
+ if (rc)
+ return -EPERM;
+
+ rc = misc_register(&kni_misc);
+ if (rc != 0) {
+ pr_err("Misc registration failed\n");
+ goto out;
+ }
+
+ /* Configure the lo mode according to the input parameter */
+ kni_net_config_lo_mode(lo_mode);
+
+ return 0;
+
+out:
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+ return rc;
+}
+
+static void __exit
+kni_exit(void)
+{
+ misc_deregister(&kni_misc);
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+}
+
+module_init(kni_init);
+module_exit(kni_exit);
+
+module_param(lo_mode, charp, 0644);
+MODULE_PARM_DESC(lo_mode,
+"KNI loopback mode (default=lo_mode_none):\n"
+"\t\tlo_mode_none Kernel loopback disabled\n"
+"\t\tlo_mode_fifo Enable kernel loopback with fifo\n"
+"\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n"
+"\t\t"
+);
+
+module_param(kthread_mode, charp, 0644);
+MODULE_PARM_DESC(kthread_mode,
+"Kernel thread mode (default=single):\n"
+"\t\tsingle Single kernel thread mode enabled.\n"
+"\t\tmultiple Multiple kernel thread mode enabled.\n"
+"\t\t"
+);
+
+module_param(carrier, charp, 0644);
+MODULE_PARM_DESC(carrier,
+"Default carrier state for KNI interface (default=off):\n"
+"\t\toff Interfaces will be created with carrier state set to off.\n"
+"\t\ton Interfaces will be created with carrier state set to on.\n"
+"\t\t"
+);
diff --git a/src/spdk/dpdk/kernel/linux/kni/kni_net.c b/src/spdk/dpdk/kernel/linux/kni/kni_net.c
new file mode 100644
index 000000000..c82c881a2
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/kni_net.c
@@ -0,0 +1,844 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+/*
+ * This code is inspired from the book "Linux Device Drivers" by
+ * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+#include <rte_kni_common.h>
+#include <kni_fifo.h>
+
+#include "compat.h"
+#include "kni_dev.h"
+
+#define WD_TIMEOUT 5 /*jiffies */
+
+#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
+
+/* typedef for rx function */
+typedef void (*kni_net_rx_t)(struct kni_dev *kni);
+
+static void kni_net_rx_normal(struct kni_dev *kni);
+
+/* kni rx function pointer, with default to normal rx */
+static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+/* iova to kernel virtual address */
+static inline void *
+iova2kva(struct kni_dev *kni, void *iova)
+{
+ return phys_to_virt(iova_to_phys(kni->usr_tsk, (unsigned long)iova));
+}
+
+static inline void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+ return phys_to_virt(iova_to_phys(kni->usr_tsk, m->buf_physaddr) +
+ m->data_off);
+}
+#endif
+
+/* physical address to kernel virtual address */
+static void *
+pa2kva(void *pa)
+{
+ return phys_to_virt((unsigned long)pa);
+}
+
+/* physical address to virtual address */
+static void *
+pa2va(void *pa, struct rte_kni_mbuf *m)
+{
+ void *va;
+
+ va = (void *)((unsigned long)pa +
+ (unsigned long)m->buf_addr -
+ (unsigned long)m->buf_physaddr);
+ return va;
+}
+
+/* mbuf data kernel virtual address from mbuf kernel virtual address */
+static void *
+kva2data_kva(struct rte_kni_mbuf *m)
+{
+ return phys_to_virt(m->buf_physaddr + m->data_off);
+}
+
+static inline void *
+get_kva(struct kni_dev *kni, void *pa)
+{
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ if (kni->iova_mode == 1)
+ return iova2kva(kni, pa);
+#endif
+ return pa2kva(pa);
+}
+
+static inline void *
+get_data_kva(struct kni_dev *kni, void *pkt_kva)
+{
+#ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
+ if (kni->iova_mode == 1)
+ return iova2data_kva(kni, pkt_kva);
+#endif
+ return kva2data_kva(pkt_kva);
+}
+
+/*
+ * It can be called to process the request.
+ */
+static int
+kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+{
+ int ret = -1;
+ void *resp_va;
+ uint32_t num;
+ int ret_val;
+
+ if (!kni || !req) {
+ pr_err("No kni instance or request\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&kni->sync_lock);
+
+ /* Construct data */
+ memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
+ num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
+ if (num < 1) {
+ pr_err("Cannot send to req_q\n");
+ ret = -EBUSY;
+ goto fail;
+ }
+
+ ret_val = wait_event_interruptible_timeout(kni->wq,
+ kni_fifo_count(kni->resp_q), 3 * HZ);
+ if (signal_pending(current) || ret_val <= 0) {
+ ret = -ETIME;
+ goto fail;
+ }
+ num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
+ if (num != 1 || resp_va != kni->sync_va) {
+ /* This should never happen */
+ pr_err("No data in resp_q\n");
+ ret = -ENODATA;
+ goto fail;
+ }
+
+ memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+ ret = 0;
+
+fail:
+ mutex_unlock(&kni->sync_lock);
+ return ret;
+}
+
+/*
+ * Open and close
+ */
+static int
+kni_net_open(struct net_device *dev)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ netif_start_queue(dev);
+ if (kni_dflt_carrier == 1)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+ /* Setting if_up to non-zero means up */
+ req.if_up = 1;
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0) ? req.result : ret;
+}
+
+static int
+kni_net_release(struct net_device *dev)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ netif_stop_queue(dev); /* can't transmit any more */
+ netif_carrier_off(dev);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+ /* Setting if_up to 0 means down */
+ req.if_up = 0;
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0) ? req.result : ret;
+}
+
+static void
+kni_fifo_trans_pa2va(struct kni_dev *kni,
+ struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
+{
+ uint32_t ret, i, num_dst, num_rx;
+ struct rte_kni_mbuf *kva, *prev_kva;
+ int nb_segs;
+ int kva_nb_segs;
+
+ do {
+ num_dst = kni_fifo_free_count(dst_va);
+ if (num_dst == 0)
+ return;
+
+ num_rx = min_t(uint32_t, num_dst, MBUF_BURST_SZ);
+
+ num_rx = kni_fifo_get(src_pa, kni->pa, num_rx);
+ if (num_rx == 0)
+ return;
+
+ for (i = 0; i < num_rx; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ kva_nb_segs = kva->nb_segs;
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ if (!kva->next)
+ break;
+
+ prev_kva = kva;
+ kva = pa2kva(kva->next);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
+ }
+ }
+
+ ret = kni_fifo_put(dst_va, kni->va, num_rx);
+ if (ret != num_rx) {
+ /* Failing should not happen */
+ pr_err("Fail to enqueue entries into dst_va\n");
+ return;
+ }
+ } while (1);
+}
+
+/* Try to release mbufs when kni release */
+void kni_net_release_fifo_phy(struct kni_dev *kni)
+{
+ /* release rx_q first, because it can't release in userspace */
+ kni_fifo_trans_pa2va(kni, kni->rx_q, kni->free_q);
+ /* release alloc_q for speeding up kni release in userspace */
+ kni_fifo_trans_pa2va(kni, kni->alloc_q, kni->free_q);
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+static int
+kni_net_config(struct net_device *dev, struct ifmap *map)
+{
+ if (dev->flags & IFF_UP) /* can't act on a running interface */
+ return -EBUSY;
+
+ /* ignore other fields */
+ return 0;
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ */
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ int len = 0;
+ uint32_t ret;
+ struct kni_dev *kni = netdev_priv(dev);
+ struct rte_kni_mbuf *pkt_kva = NULL;
+ void *pkt_pa = NULL;
+ void *pkt_va = NULL;
+
+ /* save the timestamp */
+#ifdef HAVE_TRANS_START_HELPER
+ netif_trans_update(dev);
+#else
+ dev->trans_start = jiffies;
+#endif
+
+ /* Check if the length of skb is less than mbuf size */
+ if (skb->len > kni->mbuf_size)
+ goto drop;
+
+ /**
+ * Check if it has at least one free entry in tx_q and
+ * one entry in alloc_q.
+ */
+ if (kni_fifo_free_count(kni->tx_q) == 0 ||
+ kni_fifo_count(kni->alloc_q) == 0) {
+ /**
+ * If no free entry in tx_q or no entry in alloc_q,
+ * drops skb and goes out.
+ */
+ goto drop;
+ }
+
+ /* dequeue a mbuf from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
+ if (likely(ret == 1)) {
+ void *data_kva;
+
+ pkt_kva = get_kva(kni, pkt_pa);
+ data_kva = get_data_kva(kni, pkt_kva);
+ pkt_va = pa2va(pkt_pa, pkt_kva);
+
+ len = skb->len;
+ memcpy(data_kva, skb->data, len);
+ if (unlikely(len < ETH_ZLEN)) {
+ memset(data_kva + len, 0, ETH_ZLEN - len);
+ len = ETH_ZLEN;
+ }
+ pkt_kva->pkt_len = len;
+ pkt_kva->data_len = len;
+
+ /* enqueue mbuf into tx_q */
+ ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
+ if (unlikely(ret != 1)) {
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbuf into tx_q\n");
+ goto drop;
+ }
+ } else {
+ /* Failing should not happen */
+ pr_err("Fail to dequeue mbuf from alloc_q\n");
+ goto drop;
+ }
+
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ dev->stats.tx_bytes += len;
+ dev->stats.tx_packets++;
+
+ return NETDEV_TX_OK;
+
+drop:
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ dev->stats.tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+
+/*
+ * RX: normal working mode
+ */
+static void
+kni_net_rx_normal(struct kni_dev *kni)
+{
+ uint32_t ret;
+ uint32_t len;
+ uint32_t i, num_rx, num_fq;
+ struct rte_kni_mbuf *kva, *prev_kva;
+ void *data_kva;
+ struct sk_buff *skb;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+ if (num_fq == 0) {
+ /* No room on the free_q, bail out */
+ return;
+ }
+
+ /* Calculate the number of entries to dequeue from rx_q */
+ num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
+
+ /* Burst dequeue from rx_q */
+ num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
+ if (num_rx == 0)
+ return;
+
+ /* Transfer received packets to netif */
+ for (i = 0; i < num_rx; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ len = kva->pkt_len;
+ data_kva = get_data_kva(kni, kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ skb = netdev_alloc_skb(dev, len);
+ if (!skb) {
+ /* Update statistics */
+ dev->stats.rx_dropped++;
+ continue;
+ }
+
+ if (kva->nb_segs == 1) {
+ memcpy(skb_put(skb, len), data_kva, len);
+ } else {
+ int nb_segs;
+ int kva_nb_segs = kva->nb_segs;
+
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ memcpy(skb_put(skb, kva->data_len),
+ data_kva, kva->data_len);
+
+ if (!kva->next)
+ break;
+
+ prev_kva = kva;
+ kva = pa2kva(kva->next);
+ data_kva = kva2data_kva(kva);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
+ }
+ }
+
+ skb->protocol = eth_type_trans(skb, dev);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ /* Call netif interface */
+ netif_rx_ni(skb);
+
+ /* Update statistics */
+ dev->stats.rx_bytes += len;
+ dev->stats.rx_packets++;
+ }
+
+ /* Burst enqueue mbufs into free_q */
+ ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
+ if (ret != num_rx)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue entries into free_q\n");
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos.
+ */
+static void
+kni_net_rx_lo_fifo(struct kni_dev *kni)
+{
+ uint32_t ret;
+ uint32_t len;
+ uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
+ struct rte_kni_mbuf *kva, *next_kva;
+ void *data_kva;
+ struct rte_kni_mbuf *alloc_kva;
+ void *alloc_data_kva;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of entries in rx_q */
+ num_rq = kni_fifo_count(kni->rx_q);
+
+ /* Get the number of free entries in tx_q */
+ num_tq = kni_fifo_free_count(kni->tx_q);
+
+ /* Get the number of entries in alloc_q */
+ num_aq = kni_fifo_count(kni->alloc_q);
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+
+ /* Calculate the number of entries to be dequeued from rx_q */
+ num = min(num_rq, num_tq);
+ num = min(num, num_aq);
+ num = min(num, num_fq);
+ num = min_t(uint32_t, num, MBUF_BURST_SZ);
+
+ /* Return if no entry to dequeue from rx_q */
+ if (num == 0)
+ return;
+
+ /* Burst dequeue from rx_q */
+ ret = kni_fifo_get(kni->rx_q, kni->pa, num);
+ if (ret == 0)
+ return; /* Failing should not happen */
+
+ /* Dequeue entries from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
+ if (ret) {
+ num = ret;
+ /* Copy mbufs */
+ for (i = 0; i < num; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ len = kva->data_len;
+ data_kva = get_data_kva(kni, kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ while (kva->next) {
+ next_kva = pa2kva(kva->next);
+ /* Convert physical address to virtual address */
+ kva->next = pa2va(kva->next, next_kva);
+ kva = next_kva;
+ }
+
+ alloc_kva = get_kva(kni, kni->alloc_pa[i]);
+ alloc_data_kva = get_data_kva(kni, alloc_kva);
+ kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
+
+ memcpy(alloc_data_kva, data_kva, len);
+ alloc_kva->pkt_len = len;
+ alloc_kva->data_len = len;
+
+ dev->stats.tx_bytes += len;
+ dev->stats.rx_bytes += len;
+ }
+
+ /* Burst enqueue mbufs into tx_q */
+ ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbufs into tx_q\n");
+ }
+
+ /* Burst enqueue mbufs into free_q */
+ ret = kni_fifo_put(kni->free_q, kni->va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbufs into free_q\n");
+
+ /**
+ * Update statistic, and enqueue/dequeue failure is impossible,
+ * as all queues are checked at first.
+ */
+ dev->stats.tx_packets += num;
+ dev->stats.rx_packets += num;
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
+ */
+static void
+kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
+{
+ uint32_t ret;
+ uint32_t len;
+ uint32_t i, num_rq, num_fq, num;
+ struct rte_kni_mbuf *kva, *prev_kva;
+ void *data_kva;
+ struct sk_buff *skb;
+ struct net_device *dev = kni->net_dev;
+
+ /* Get the number of entries in rx_q */
+ num_rq = kni_fifo_count(kni->rx_q);
+
+ /* Get the number of free entries in free_q */
+ num_fq = kni_fifo_free_count(kni->free_q);
+
+ /* Calculate the number of entries to dequeue from rx_q */
+ num = min(num_rq, num_fq);
+ num = min_t(uint32_t, num, MBUF_BURST_SZ);
+
+ /* Return if no entry to dequeue from rx_q */
+ if (num == 0)
+ return;
+
+ /* Burst dequeue mbufs from rx_q */
+ ret = kni_fifo_get(kni->rx_q, kni->pa, num);
+ if (ret == 0)
+ return;
+
+ /* Copy mbufs to sk buffer and then call tx interface */
+ for (i = 0; i < num; i++) {
+ kva = get_kva(kni, kni->pa[i]);
+ len = kva->pkt_len;
+ data_kva = get_data_kva(kni, kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ skb = netdev_alloc_skb(dev, len);
+ if (skb) {
+ memcpy(skb_put(skb, len), data_kva, len);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ dev_kfree_skb(skb);
+ }
+
+ /* Simulate real usage, allocate/copy skb twice */
+ skb = netdev_alloc_skb(dev, len);
+ if (skb == NULL) {
+ dev->stats.rx_dropped++;
+ continue;
+ }
+
+ if (kva->nb_segs == 1) {
+ memcpy(skb_put(skb, len), data_kva, len);
+ } else {
+ int nb_segs;
+ int kva_nb_segs = kva->nb_segs;
+
+ for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+ memcpy(skb_put(skb, kva->data_len),
+ data_kva, kva->data_len);
+
+ if (!kva->next)
+ break;
+
+ prev_kva = kva;
+ kva = get_kva(kni, kva->next);
+ data_kva = get_data_kva(kni, kva);
+ /* Convert physical address to virtual address */
+ prev_kva->next = pa2va(prev_kva->next, kva);
+ }
+ }
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ dev->stats.rx_bytes += len;
+ dev->stats.rx_packets++;
+
+ /* call tx interface */
+ kni_net_tx(skb, dev);
+ }
+
+ /* enqueue all the mbufs from rx_q into free_q */
+ ret = kni_fifo_put(kni->free_q, kni->va, num);
+ if (ret != num)
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbufs into free_q\n");
+}
+
+/* rx interface */
+void
+kni_net_rx(struct kni_dev *kni)
+{
+ /**
+ * It doesn't need to check if it is NULL pointer,
+ * as it has a default value
+ */
+ (*kni_net_rx_func)(kni);
+}
+
+/*
+ * Deal with a transmit timeout.
+ */
+#ifdef HAVE_TX_TIMEOUT_TXQUEUE
+static void
+kni_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
+#else
+static void
+kni_net_tx_timeout(struct net_device *dev)
+#endif
+{
+ pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
+ jiffies - dev_trans_start(dev));
+
+ dev->stats.tx_errors++;
+ netif_wake_queue(dev);
+}
+
+static int
+kni_net_change_mtu(struct net_device *dev, int new_mtu)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(dev);
+
+ pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CHANGE_MTU;
+ req.new_mtu = new_mtu;
+ ret = kni_net_process_request(kni, &req);
+ if (ret == 0 && req.result == 0)
+ dev->mtu = new_mtu;
+
+ return (ret == 0) ? req.result : ret;
+}
+
+static void
+kni_net_change_rx_flags(struct net_device *netdev, int flags)
+{
+ struct rte_kni_request req;
+ struct kni_dev *kni = netdev_priv(netdev);
+
+ memset(&req, 0, sizeof(req));
+
+ if (flags & IFF_ALLMULTI) {
+ req.req_id = RTE_KNI_REQ_CHANGE_ALLMULTI;
+
+ if (netdev->flags & IFF_ALLMULTI)
+ req.allmulti = 1;
+ else
+ req.allmulti = 0;
+ }
+
+ if (flags & IFF_PROMISC) {
+ req.req_id = RTE_KNI_REQ_CHANGE_PROMISC;
+
+ if (netdev->flags & IFF_PROMISC)
+ req.promiscusity = 1;
+ else
+ req.promiscusity = 0;
+ }
+
+ kni_net_process_request(kni, &req);
+}
+
+/*
+ * Checks if the user space application provided the resp message
+ */
+void
+kni_net_poll_resp(struct kni_dev *kni)
+{
+ if (kni_fifo_count(kni->resp_q))
+ wake_up_interruptible(&kni->wq);
+}
+
+/*
+ * Fill the eth header
+ */
+static int
+kni_net_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, uint32_t len)
+{
+ struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+
+ memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
+ memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len);
+ eth->h_proto = htons(type);
+
+ return dev->hard_header_len;
+}
+
+/*
+ * Re-fill the eth header
+ */
+#ifdef HAVE_REBUILD_HEADER
+static int
+kni_net_rebuild_header(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct ethhdr *eth = (struct ethhdr *) skb->data;
+
+ memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+ memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
+
+ return 0;
+}
+#endif /* < 4.1.0 */
+
+/**
+ * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int
+kni_net_set_mac(struct net_device *netdev, void *p)
+{
+ int ret;
+ struct rte_kni_request req;
+ struct kni_dev *kni;
+ struct sockaddr *addr = p;
+
+ memset(&req, 0, sizeof(req));
+ req.req_id = RTE_KNI_REQ_CHANGE_MAC_ADDR;
+
+ if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
+ return -EADDRNOTAVAIL;
+
+ memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+
+ kni = netdev_priv(netdev);
+ ret = kni_net_process_request(kni, &req);
+
+ return (ret == 0 ? req.result : ret);
+}
+
+#ifdef HAVE_CHANGE_CARRIER_CB
+static int
+kni_net_change_carrier(struct net_device *dev, bool new_carrier)
+{
+ if (new_carrier)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+ return 0;
+}
+#endif
+
+static const struct header_ops kni_net_header_ops = {
+ .create = kni_net_header,
+ .parse = eth_header_parse,
+#ifdef HAVE_REBUILD_HEADER
+ .rebuild = kni_net_rebuild_header,
+#endif /* < 4.1.0 */
+ .cache = NULL, /* disable caching */
+};
+
+static const struct net_device_ops kni_net_netdev_ops = {
+ .ndo_open = kni_net_open,
+ .ndo_stop = kni_net_release,
+ .ndo_set_config = kni_net_config,
+ .ndo_change_rx_flags = kni_net_change_rx_flags,
+ .ndo_start_xmit = kni_net_tx,
+ .ndo_change_mtu = kni_net_change_mtu,
+ .ndo_tx_timeout = kni_net_tx_timeout,
+ .ndo_set_mac_address = kni_net_set_mac,
+#ifdef HAVE_CHANGE_CARRIER_CB
+ .ndo_change_carrier = kni_net_change_carrier,
+#endif
+};
+
+static void kni_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->version, KNI_VERSION, sizeof(info->version));
+ strlcpy(info->driver, "kni", sizeof(info->driver));
+}
+
+static const struct ethtool_ops kni_net_ethtool_ops = {
+ .get_drvinfo = kni_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+};
+
+void
+kni_net_init(struct net_device *dev)
+{
+ struct kni_dev *kni = netdev_priv(dev);
+
+ init_waitqueue_head(&kni->wq);
+ mutex_init(&kni->sync_lock);
+
+ ether_setup(dev); /* assign some of the fields */
+ dev->netdev_ops = &kni_net_netdev_ops;
+ dev->header_ops = &kni_net_header_ops;
+ dev->ethtool_ops = &kni_net_ethtool_ops;
+ dev->watchdog_timeo = WD_TIMEOUT;
+}
+
+void
+kni_net_config_lo_mode(char *lo_str)
+{
+ if (!lo_str) {
+ pr_debug("loopback disabled");
+ return;
+ }
+
+ if (!strcmp(lo_str, "lo_mode_none"))
+ pr_debug("loopback disabled");
+ else if (!strcmp(lo_str, "lo_mode_fifo")) {
+ pr_debug("loopback mode=lo_mode_fifo enabled");
+ kni_net_rx_func = kni_net_rx_lo_fifo;
+ } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
+ pr_debug("loopback mode=lo_mode_fifo_skb enabled");
+ kni_net_rx_func = kni_net_rx_lo_fifo_skb;
+ } else {
+ pr_debug("Unknown loopback parameter, disabled");
+ }
+}
diff --git a/src/spdk/dpdk/kernel/linux/kni/meson.build b/src/spdk/dpdk/kernel/linux/kni/meson.build
new file mode 100644
index 000000000..d696347f2
--- /dev/null
+++ b/src/spdk/dpdk/kernel/linux/kni/meson.build
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+kni_mkfile = custom_target('rte_kni_makefile',
+ output: 'Makefile',
+ command: ['touch', '@OUTPUT@'])
+
+kni_sources = files(
+ 'kni_misc.c',
+ 'kni_net.c',
+ 'Kbuild')
+
+custom_target('rte_kni',
+ input: kni_sources,
+ output: 'rte_kni.ko',
+ command: ['make', '-j4', '-C', kernel_dir + '/build',
+ 'M=' + meson.current_build_dir(),
+ 'src=' + meson.current_source_dir(),
+ 'MODULE_CFLAGS=-include ' + meson.source_root() + '/config/rte_config.h' +
+ ' -I' + meson.source_root() + '/lib/librte_eal/include' +
+ ' -I' + meson.source_root() + '/lib/librte_eal/linux/include' +
+ ' -I' + meson.build_root() +
+ ' -I' + meson.current_source_dir(),
+ 'modules'],
+ depends: kni_mkfile,
+ install: true,
+ install_dir: kernel_dir + '/extra/dpdk',
+ build_by_default: get_option('enable_kmods'))