Adding upstream version 16.2.11+ds.upstream/16.2.11+ds upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
commit: 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree: 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/include/spdk_internal
parent: Initial commit. (diff)
download: ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
15 files changed, 2942 insertions, 0 deletions
diff --git a/src/spdk/include/spdk_internal/accel_engine.h b/src/spdk/include/spdk_internal/accel_engine.h
new file mode 100644
index 000000000..9b78bc967
--- /dev/null
+++ b/src/spdk/include/spdk_internal/accel_engine.h
@@ -0,0 +1,130 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_ACCEL_ENGINE_H
+#define SPDK_INTERNAL_ACCEL_ENGINE_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/accel_engine.h"
+#include "spdk/queue.h"
+
+struct spdk_accel_task {
+	spdk_accel_completion_cb	cb;
+	void				*cb_arg;
+	uint8_t				offload_ctx[0];
+};
+
+struct spdk_accel_engine {
+	uint64_t (*get_capabilities)(void);
+	int (*copy)(struct spdk_io_channel *ch, void *dst, void *src,
+		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
+			uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	uint32_t (*batch_get_max)(void);
+	struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch);
+	int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			       void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+				   void *dst1, void *dst2, void *src, uint64_t nbytes,
+				   spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+				  void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			       void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+				 uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+				 spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			    spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch);
+	int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2,
+		       uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill,
+		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src,
+		      uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+	struct spdk_io_channel *(*get_io_channel)(void);
+};
+
+struct spdk_accel_module_if {
+	/** Initialization function for the module.  Called by the spdk
+	 *   application during startup.
+	 *
+	 *  Modules are required to define this function.
+	 */
+	int	(*module_init)(void);
+
+	/** Finish function for the module.  Called by the spdk application
+	 *   before the spdk application exits to perform any necessary cleanup.
+	 *
+	 *  Modules are not required to define this function.
+	 */
+	void	(*module_fini)(void *ctx);
+
+	/** Function called to return a text string representing the
+	 *   module's configuration options for inclusion in an
+	 *   spdk configuration file.
+	 */
+	void	(*config_text)(FILE *fp);
+
+	/**
+	 * Write Acceleration module configuration into provided JSON context.
+	 */
+	void	(*write_config_json)(struct spdk_json_write_ctx *w);
+
+	/**
+	 * Returns the allocation size required for the modules to use for context.
+	 */
+	size_t	(*get_ctx_size)(void);
+
+	TAILQ_ENTRY(spdk_accel_module_if)	tailq;
+};
+
+void spdk_accel_hw_engine_register(struct spdk_accel_engine *accel_engine);
+void spdk_accel_module_list_add(struct spdk_accel_module_if *accel_module);
+
+#define SPDK_ACCEL_MODULE_REGISTER(init_fn, fini_fn, config_fn, config_json, ctx_size_fn)				\
+	static struct spdk_accel_module_if init_fn ## _if = {						\
+	.module_init		= init_fn,								\
+	.module_fini		= fini_fn,								\
+	.config_text		= config_fn,								\
+	.write_config_json	= config_json,								\
+	.get_ctx_size		= ctx_size_fn,								\
+	};												\
+	__attribute__((constructor)) static void init_fn ## _init(void)					\
+	{												\
+		spdk_accel_module_list_add(&init_fn ## _if);						\
+	}
+
+#endif
diff --git a/src/spdk/include/spdk_internal/assert.h b/src/spdk/include/spdk_internal/assert.h
new file mode 100644
index 000000000..7e4c45070
--- /dev/null
+++ b/src/spdk/include/spdk_internal/assert.h
@@ -0,0 +1,55 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_ASSERT_H
+#define SPDK_INTERNAL_ASSERT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/assert.h"
+
+#if !defined(DEBUG) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+#define SPDK_UNREACHABLE() __builtin_unreachable()
+#else
+#define SPDK_UNREACHABLE() abort()
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_ASSERT_H */
diff --git a/src/spdk/include/spdk_internal/event.h b/src/spdk/include/spdk_internal/event.h
new file mode 100644
index 000000000..2d88d08ba
--- /dev/null
+++ b/src/spdk/include/spdk_internal/event.h
@@ -0,0 +1,197 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_EVENT_H
+#define SPDK_INTERNAL_EVENT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/event.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+
+struct spdk_event {
+	uint32_t		lcore;
+	spdk_event_fn		fn;
+	void			*arg1;
+	void			*arg2;
+};
+
+enum spdk_reactor_state {
+	SPDK_REACTOR_STATE_UNINITIALIZED = 0,
+	SPDK_REACTOR_STATE_INITIALIZED = 1,
+	SPDK_REACTOR_STATE_RUNNING = 2,
+	SPDK_REACTOR_STATE_EXITING = 3,
+	SPDK_REACTOR_STATE_SHUTDOWN = 4,
+};
+
+struct spdk_lw_thread {
+	TAILQ_ENTRY(spdk_lw_thread)	link;
+	bool				resched;
+	uint64_t			tsc_start;
+};
+
+struct spdk_reactor {
+	/* Lightweight threads running on this reactor */
+	TAILQ_HEAD(, spdk_lw_thread)			threads;
+	uint32_t					thread_count;
+
+	/* Logical core number for this reactor. */
+	uint32_t					lcore;
+
+	struct {
+		uint32_t				is_valid : 1;
+		uint32_t				reserved : 31;
+	} flags;
+
+	uint64_t					tsc_last;
+
+	struct spdk_ring				*events;
+
+	/* The last known rusage values */
+	struct rusage					rusage;
+	uint64_t					last_rusage;
+
+	uint64_t					busy_tsc;
+	uint64_t					idle_tsc;
+} __attribute__((aligned(SPDK_CACHE_LINE_SIZE)));
+
+int spdk_reactors_init(void);
+void spdk_reactors_fini(void);
+
+void spdk_reactors_start(void);
+void spdk_reactors_stop(void *arg1);
+
+struct spdk_reactor *spdk_reactor_get(uint32_t lcore);
+
+/**
+ * Allocate and pass an event to each reactor, serially.
+ *
+ * The allocated event is processed asynchronously - i.e. spdk_for_each_reactor
+ * will return prior to `fn` being called on each reactor.
+ *
+ * \param fn This is the function that will be called on each reactor.
+ * \param arg1 Argument will be passed to fn when called.
+ * \param arg2 Argument will be passed to fn when called.
+ * \param cpl This will be called on the originating reactor after `fn` has been
+ * called on each reactor.
+ */
+void spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl);
+
+struct spdk_subsystem {
+	const char *name;
+	/* User must call spdk_subsystem_init_next() when they are done with their initialization. */
+	void (*init)(void);
+	void (*fini)(void);
+	void (*config)(FILE *fp);
+
+	/**
+	 * Write JSON configuration handler.
+	 *
+	 * \param w JSON write context
+	 */
+	void (*write_config_json)(struct spdk_json_write_ctx *w);
+	TAILQ_ENTRY(spdk_subsystem) tailq;
+};
+
+struct spdk_subsystem *spdk_subsystem_find(const char *name);
+struct spdk_subsystem *spdk_subsystem_get_first(void);
+struct spdk_subsystem *spdk_subsystem_get_next(struct spdk_subsystem *cur_subsystem);
+
+struct spdk_subsystem_depend {
+	const char *name;
+	const char *depends_on;
+	TAILQ_ENTRY(spdk_subsystem_depend) tailq;
+};
+
+struct spdk_subsystem_depend *spdk_subsystem_get_first_depend(void);
+struct spdk_subsystem_depend *spdk_subsystem_get_next_depend(struct spdk_subsystem_depend
+		*cur_depend);
+
+void spdk_add_subsystem(struct spdk_subsystem *subsystem);
+void spdk_add_subsystem_depend(struct spdk_subsystem_depend *depend);
+
+typedef void (*spdk_subsystem_init_fn)(int rc, void *ctx);
+void spdk_subsystem_init(spdk_subsystem_init_fn cb_fn, void *cb_arg);
+void spdk_subsystem_fini(spdk_msg_fn cb_fn, void *cb_arg);
+void spdk_subsystem_init_next(int rc);
+void spdk_subsystem_fini_next(void);
+void spdk_subsystem_config(FILE *fp);
+void spdk_app_json_config_load(const char *json_config_file, const char *rpc_addr,
+			       spdk_subsystem_init_fn cb_fn, void *cb_arg,
+			       bool stop_on_error);
+
+/**
+ * Save pointed \c subsystem configuration to the JSON write context \c w. In case of
+ * error \c null is written to the JSON context.
+ *
+ * \param w JSON write context
+ * \param subsystem the subsystem to query
+ */
+void spdk_subsystem_config_json(struct spdk_json_write_ctx *w, struct spdk_subsystem *subsystem);
+
+void spdk_rpc_initialize(const char *listen_addr);
+void spdk_rpc_finish(void);
+
+/**
+ * \brief Register a new subsystem
+ */
+#define SPDK_SUBSYSTEM_REGISTER(_name) \
+	__attribute__((constructor)) static void _name ## _register(void)	\
+	{									\
+		spdk_add_subsystem(&_name);					\
+	}
+
+/**
+ * \brief Declare that a subsystem depends on another subsystem.
+ */
+#define SPDK_SUBSYSTEM_DEPEND(_name, _depends_on)						\
+	static struct spdk_subsystem_depend __subsystem_ ## _name ## _depend_on ## _depends_on = { \
+	.name = #_name,										\
+	.depends_on = #_depends_on,								\
+	};											\
+	__attribute__((constructor)) static void _name ## _depend_on ## _depends_on(void)	\
+	{											\
+		spdk_add_subsystem_depend(&__subsystem_ ## _name ## _depend_on ## _depends_on); \
+	}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_EVENT_H */
diff --git a/src/spdk/include/spdk_internal/idxd.h b/src/spdk/include/spdk_internal/idxd.h
new file mode 100644
index 000000000..17db2405d
--- /dev/null
+++ b/src/spdk/include/spdk_internal/idxd.h
@@ -0,0 +1,74 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IDXD_INTERNAL_H__
+#define __IDXD_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/idxd.h"
+#include "spdk/queue.h"
+#include "spdk/mmio.h"
+#include "spdk/bit_array.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IDXD_MAX_CONFIG_NUM 1
+
+enum dsa_opcode {
+	IDXD_OPCODE_NOOP	= 0,
+	IDXD_OPCODE_BATCH	= 1,
+	IDXD_OPCODE_DRAIN	= 2,
+	IDXD_OPCODE_MEMMOVE	= 3,
+	IDXD_OPCODE_MEMFILL	= 4,
+	IDXD_OPCODE_COMPARE	= 5,
+	IDXD_OPCODE_COMPVAL	= 6,
+	IDXD_OPCODE_CR_DELTA	= 7,
+	IDXD_OPCODE_AP_DELTA	= 8,
+	IDXD_OPCODE_DUALCAST	= 9,
+	IDXD_OPCODE_CRC32C_GEN	= 16,
+	IDXD_OPCODE_COPY_CRC	= 17,
+	IDXD_OPCODE_DIF_CHECK	= 18,
+	IDXD_OPCODE_DIF_INS	= 19,
+	IDXD_OPCODE_DIF_STRP	= 20,
+	IDXD_OPCODE_DIF_UPDT	= 21,
+	IDXD_OPCODE_CFLUSH	= 32,
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __IDXD_INTERNAL_H__ */
diff --git a/src/spdk/include/spdk_internal/log.h b/src/spdk/include/spdk_internal/log.h
new file mode 100644
index 000000000..0993d1016
--- /dev/null
+++ b/src/spdk/include/spdk_internal/log.h
@@ -0,0 +1,108 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Logging interfaces
+ */
+
+#ifndef SPDK_INTERNAL_LOG_H
+#define SPDK_INTERNAL_LOG_H
+
+#include "spdk/log.h"
+#include "spdk/queue.h"
+
+extern enum spdk_log_level g_spdk_log_level;
+extern enum spdk_log_level g_spdk_log_print_level;
+extern enum spdk_log_level g_spdk_log_backtrace_level;
+
+struct spdk_log_flag {
+	TAILQ_ENTRY(spdk_log_flag) tailq;
+	const char *name;
+	bool enabled;
+};
+
+void spdk_log_register_flag(const char *name, struct spdk_log_flag *flag);
+
+struct spdk_log_flag *spdk_log_get_first_flag(void);
+struct spdk_log_flag *spdk_log_get_next_flag(struct spdk_log_flag *flag);
+
+#define SPDK_LOG_REGISTER_COMPONENT(str, flag) \
+struct spdk_log_flag flag = { \
+	.enabled = false, \
+	.name = str, \
+}; \
+__attribute__((constructor)) static void register_flag_##flag(void) \
+{ \
+	spdk_log_register_flag(str, &flag); \
+}
+
+#define SPDK_INFOLOG(FLAG, ...)									\
+	do {											\
+		extern struct spdk_log_flag FLAG;						\
+		if (FLAG.enabled) {								\
+			spdk_log(SPDK_LOG_INFO, __FILE__, __LINE__, __func__, __VA_ARGS__);	\
+		}										\
+	} while (0)
+
+#ifdef DEBUG
+
+#define SPDK_DEBUGLOG(FLAG, ...)								\
+	do {											\
+		extern struct spdk_log_flag FLAG;						\
+		if (FLAG.enabled) {								\
+			spdk_log(SPDK_LOG_DEBUG, __FILE__, __LINE__, __func__, __VA_ARGS__);	\
+		}										\
+	} while (0)
+
+#define SPDK_LOGDUMP(FLAG, LABEL, BUF, LEN)						\
+	do {										\
+		extern struct spdk_log_flag FLAG;					\
+		if ((FLAG.enabled) && (LEN)) {						\
+			spdk_log_dump(stderr, (LABEL), (BUF), (LEN));			\
+		}									\
+	} while (0)
+
+#else
+#define SPDK_DEBUGLOG(...) do { } while (0)
+#define SPDK_LOGDUMP(...) do { } while (0)
+#endif
+
+#define SPDK_ERRLOGDUMP(LABEL, BUF, LEN)				\
+	do {								\
+		if ((LEN)) {						\
+			spdk_log_dump(stderr, (LABEL), (BUF), (LEN));	\
+		}							\
+	} while (0)
+
+#endif /* SPDK_INTERNAL_LOG_H */
diff --git a/src/spdk/include/spdk_internal/lvolstore.h b/src/spdk/include/spdk_internal/lvolstore.h
new file mode 100644
index 000000000..f82157e53
--- /dev/null
+++ b/src/spdk/include/spdk_internal/lvolstore.h
@@ -0,0 +1,128 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_LVOLSTORE_H
+#define SPDK_INTERNAL_LVOLSTORE_H
+
+#include "spdk/blob.h"
+#include "spdk/lvol.h"
+#include "spdk/uuid.h"
+#include "spdk/bdev_module.h"
+
+/* Default size of blobstore cluster */
+#define SPDK_LVS_OPTS_CLUSTER_SZ (4 * 1024 * 1024)
+
+/* UUID + '_' + blobid (20 characters for uint64_t).
+ * Null terminator is already included in SPDK_UUID_STRING_LEN. */
+#define SPDK_LVOL_UNIQUE_ID_MAX (SPDK_UUID_STRING_LEN + 1 + 20)
+
+struct spdk_lvs_req {
+	spdk_lvs_op_complete    cb_fn;
+	void                    *cb_arg;
+	struct spdk_lvol_store		*lvol_store;
+	int				lvserrno;
+};
+
+struct spdk_lvol_req {
+	spdk_lvol_op_complete   cb_fn;
+	void                    *cb_arg;
+	struct spdk_lvol	*lvol;
+	size_t			sz;
+	struct spdk_io_channel	*channel;
+	char			name[SPDK_LVOL_NAME_MAX];
+};
+
+struct spdk_lvs_with_handle_req {
+	spdk_lvs_op_with_handle_complete cb_fn;
+	void				*cb_arg;
+	struct spdk_lvol_store		*lvol_store;
+	struct spdk_bs_dev		*bs_dev;
+	struct spdk_bdev		*base_bdev;
+	int				lvserrno;
+};
+
+struct spdk_lvs_destroy_req {
+	spdk_lvs_op_complete    cb_fn;
+	void                    *cb_arg;
+	struct spdk_lvol_store	*lvs;
+};
+
+struct spdk_lvol_with_handle_req {
+	spdk_lvol_op_with_handle_complete cb_fn;
+	void				*cb_arg;
+	struct spdk_lvol		*lvol;
+};
+
+struct spdk_lvol_store {
+	struct spdk_bs_dev		*bs_dev;
+	struct spdk_blob_store		*blobstore;
+	struct spdk_blob		*super_blob;
+	spdk_blob_id			super_blob_id;
+	struct spdk_uuid		uuid;
+	int				lvol_count;
+	int				lvols_opened;
+	bool				destruct;
+	TAILQ_HEAD(, spdk_lvol)		lvols;
+	TAILQ_HEAD(, spdk_lvol)		pending_lvols;
+	bool				on_list;
+	TAILQ_ENTRY(spdk_lvol_store)	link;
+	char				name[SPDK_LVS_NAME_MAX];
+	char				new_name[SPDK_LVS_NAME_MAX];
+};
+
+struct spdk_lvol {
+	struct spdk_lvol_store		*lvol_store;
+	struct spdk_blob		*blob;
+	spdk_blob_id			blob_id;
+	char				unique_id[SPDK_LVOL_UNIQUE_ID_MAX];
+	char				name[SPDK_LVOL_NAME_MAX];
+	struct spdk_uuid		uuid;
+	char				uuid_str[SPDK_UUID_STRING_LEN];
+	bool				thin_provision;
+	struct spdk_bdev		*bdev;
+	int				ref_count;
+	bool				action_in_progress;
+	enum blob_clear_method		clear_method;
+	TAILQ_ENTRY(spdk_lvol) link;
+};
+
+struct lvol_store_bdev *vbdev_lvol_store_first(void);
+struct lvol_store_bdev *vbdev_lvol_store_next(struct lvol_store_bdev *prev);
+
+void spdk_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn,
+		      void *cb_arg);
+
+void spdk_lvol_set_read_only(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn,
+			     void *cb_arg);
+
+#endif /* SPDK_INTERNAL_LVOLSTORE_H */
diff --git a/src/spdk/include/spdk_internal/mock.h b/src/spdk/include/spdk_internal/mock.h
new file mode 100644
index 000000000..8de44ae55
--- /dev/null
+++ b/src/spdk/include/spdk_internal/mock.h
@@ -0,0 +1,135 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_MOCK_H
+#define SPDK_INTERNAL_MOCK_H
+
+#include "spdk/stdinc.h"
+
+#define MOCK_STRUCT_INIT(...) \
+	{ __VA_ARGS__ }
+
+#define DEFINE_RETURN_MOCK(fn, ret) \
+	bool ut_ ## fn ## _mocked = false; \
+	ret ut_ ## fn
+
+/*
+ * For controlling mocked function behavior, setting
+ * and getting values from the stub, the _P macros are
+ * for mocking functions that return pointer values.
+ */
+#define MOCK_SET(fn, val) \
+	ut_ ## fn ## _mocked = true; \
+	ut_ ## fn = val
+
+#define MOCK_GET(fn) \
+	ut_ ## fn
+
+#define MOCK_CLEAR(fn) \
+	ut_ ## fn ## _mocked = false
+
+#define MOCK_CLEAR_P(fn) \
+	ut_ ## fn ## _mocked = false; \
+	ut_ ## fn = NULL
+
+/* for proving to *certain* static analysis tools that we didn't reset the mock function. */
+#define MOCK_CLEARED_ASSERT(fn) \
+	SPDK_CU_ASSERT_FATAL(ut_ ## fn ## _mocked == false)
+
+/* for declaring function protoypes for wrappers */
+#define DECLARE_WRAPPER(fn, ret, args) \
+	extern bool ut_ ## fn ## _mocked; \
+	extern ret ut_ ## fn; \
+	ret __wrap_ ## fn args; ret __real_ ## fn args
+
+/* for defining the implmentation of wrappers for syscalls */
+#define DEFINE_WRAPPER(fn, ret, dargs, pargs) \
+	DEFINE_RETURN_MOCK(fn, ret); \
+	__attribute__((used)) ret __wrap_ ## fn dargs \
+	{ \
+		if (!ut_ ## fn ## _mocked) { \
+			return __real_ ## fn pargs; \
+		} else { \
+			return MOCK_GET(fn); \
+		} \
+	}
+
+/* DEFINE_STUB is for defining the implmentation of stubs for SPDK funcs. */
+#define DEFINE_STUB(fn, ret, dargs, val) \
+	bool ut_ ## fn ## _mocked = true; \
+	ret ut_ ## fn = val; \
+	ret fn dargs; \
+	ret fn dargs \
+	{ \
+		return MOCK_GET(fn); \
+	}
+
+/* DEFINE_STUB_V macro is for stubs that don't have a return value */
+#define DEFINE_STUB_V(fn, dargs) \
+	void fn dargs; \
+	void fn dargs \
+	{ \
+	}
+
+#define HANDLE_RETURN_MOCK(fn) \
+	if (ut_ ## fn ## _mocked) { \
+		return ut_ ## fn; \
+	}
+
+
+/* declare wrapper protos (alphabetically please) here */
+DECLARE_WRAPPER(calloc, void *, (size_t nmemb, size_t size));
+
+DECLARE_WRAPPER(pthread_mutex_init, int,
+		(pthread_mutex_t *mtx, const pthread_mutexattr_t *attr));
+
+DECLARE_WRAPPER(pthread_mutexattr_init, int,
+		(pthread_mutexattr_t *attr));
+
+DECLARE_WRAPPER(recvmsg, ssize_t, (int sockfd, struct msghdr *msg, int flags));
+
+DECLARE_WRAPPER(sendmsg, ssize_t, (int sockfd, const struct msghdr *msg, int flags));
+
+DECLARE_WRAPPER(writev, ssize_t, (int fd, const struct iovec *iov, int iovcnt));
+
+/* unlink is done a bit differently. */
+extern char *g_unlink_path;
+extern void (*g_unlink_callback)(void);
+/* If g_unlink_path is NULL, __wrap_unlink will return ENOENT.
+ * If the __wrap_unlink() parameter does not match g_unlink_path, it will return ENOENT.
+ * If g_unlink_path does match, and g_unlink_callback has been set, g_unlink_callback will
+ * be called before returning 0.
+ */
+int __wrap_unlink(const char *path);
+
+#endif /* SPDK_INTERNAL_MOCK_H */
diff --git a/src/spdk/include/spdk_internal/nvme_tcp.h b/src/spdk/include/spdk_internal/nvme_tcp.h
new file mode 100644
index 000000000..7065bc060
--- /dev/null
+++ b/src/spdk/include/spdk_internal/nvme_tcp.h
@@ -0,0 +1,633 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation. All rights reserved.
+ *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_NVME_TCP_H
+#define SPDK_INTERNAL_NVME_TCP_H
+
+#include "spdk/likely.h"
+#include "spdk/sock.h"
+#include "spdk/dif.h"
+
+#define SPDK_CRC32C_XOR				0xffffffffUL
+#define SPDK_NVME_TCP_DIGEST_LEN		4
+#define SPDK_NVME_TCP_DIGEST_ALIGNMENT		4
+#define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT	30
+#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR	8
+
+/*
+ * Maximum number of SGL elements.
+ */
+#define NVME_TCP_MAX_SGL_DESCRIPTORS	(16)
+
+#define MAKE_DIGEST_WORD(BUF, CRC32C) \
+        (   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
+            ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
+            ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
+            ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
+
+#define MATCH_DIGEST_WORD(BUF, CRC32C) \
+        (    ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0)         \
+            | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8)         \
+            | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16)        \
+            | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24))       \
+            == (CRC32C))
+
+#define DGET32(B)                                                               \
+        (((  (uint32_t) *((uint8_t *)(B)+0)) << 0)                              \
+         | (((uint32_t) *((uint8_t *)(B)+1)) << 8)                              \
+         | (((uint32_t) *((uint8_t *)(B)+2)) << 16)                             \
+         | (((uint32_t) *((uint8_t *)(B)+3)) << 24))
+
+#define DSET32(B,D)                                                             \
+        (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)),               \
+         ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)),               \
+         ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)),              \
+         ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24)))
+
+typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg);
+
+struct _nvme_tcp_sgl {
+	struct iovec	*iov;
+	int		iovcnt;
+	uint32_t	iov_offset;
+	uint32_t	total_size;
+};
+
+struct nvme_tcp_pdu {
+	union {
+		/* to hold error pdu data */
+		uint8_t					raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE];
+		struct spdk_nvme_tcp_common_pdu_hdr	common;
+		struct spdk_nvme_tcp_ic_req		ic_req;
+		struct spdk_nvme_tcp_term_req_hdr	term_req;
+		struct spdk_nvme_tcp_cmd		capsule_cmd;
+		struct spdk_nvme_tcp_h2c_data_hdr	h2c_data;
+		struct spdk_nvme_tcp_ic_resp		ic_resp;
+		struct spdk_nvme_tcp_rsp		capsule_resp;
+		struct spdk_nvme_tcp_c2h_data_hdr	c2h_data;
+		struct spdk_nvme_tcp_r2t_hdr		r2t;
+
+	} hdr;
+
+	bool						has_hdgst;
+	bool						ddgst_enable;
+	uint8_t						data_digest[SPDK_NVME_TCP_DIGEST_LEN];
+
+	uint8_t						ch_valid_bytes;
+	uint8_t						psh_valid_bytes;
+	uint8_t						psh_len;
+
+	nvme_tcp_qpair_xfer_complete_cb			cb_fn;
+	void						*cb_arg;
+
+	/* The sock request ends with a 0 length iovec. Place the actual iovec immediately
+	 * after it. There is a static assert below to check if the compiler inserted
+	 * any unwanted padding */
+	struct spdk_sock_request			sock_req;
+	struct iovec					iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2];
+
+	struct iovec					data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
+	uint32_t					data_iovcnt;
+	uint32_t					data_len;
+
+	uint32_t					readv_offset;
+	TAILQ_ENTRY(nvme_tcp_pdu)			tailq;
+	uint32_t					remaining;
+	uint32_t					padding_len;
+	struct _nvme_tcp_sgl				sgl;
+
+	struct spdk_dif_ctx				*dif_ctx;
+
+	void						*req; /* data tied to a tcp request */
+	void						*qpair;
+};
+SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu,
+			    sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov),
+		   "Compiler inserted padding between iov and sock_req");
+
+enum nvme_tcp_pdu_recv_state {
+	/* Ready to wait for PDU */
+	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY,
+
+	/* Active tqpair waiting for any PDU common header */
+	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH,
+
+	/* Active tqpair waiting for any PDU specific header */
+	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH,
+
+	/* Active tqpair waiting for a tcp request, only use in target side */
+	NVME_TCP_PDU_RECV_STATE_AWAIT_REQ,
+
+	/* Active tqpair waiting for payload */
+	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD,
+
+	/* Active tqpair does not wait for payload */
+	NVME_TCP_PDU_RECV_STATE_ERROR,
+};
+
+enum nvme_tcp_error_codes {
+	NVME_TCP_PDU_IN_PROGRESS        = 0,
+	NVME_TCP_CONNECTION_FATAL       = -1,
+	NVME_TCP_PDU_FATAL              = -2,
+};
+
+enum nvme_tcp_qpair_state {
+	NVME_TCP_QPAIR_STATE_INVALID = 0,
+	NVME_TCP_QPAIR_STATE_INITIALIZING = 1,
+	NVME_TCP_QPAIR_STATE_RUNNING = 2,
+	NVME_TCP_QPAIR_STATE_EXITING = 3,
+	NVME_TCP_QPAIR_STATE_EXITED = 4,
+};
+
+static const bool g_nvme_tcp_hdgst[] = {
+	[SPDK_NVME_TCP_PDU_TYPE_IC_REQ]         = false,
+	[SPDK_NVME_TCP_PDU_TYPE_IC_RESP]        = false,
+	[SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ]   = false,
+	[SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ]   = false,
+	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD]    = true,
+	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP]   = true,
+	[SPDK_NVME_TCP_PDU_TYPE_H2C_DATA]       = true,
+	[SPDK_NVME_TCP_PDU_TYPE_C2H_DATA]       = true,
+	[SPDK_NVME_TCP_PDU_TYPE_R2T]            = true
+};
+
+static const bool g_nvme_tcp_ddgst[] = {
+	[SPDK_NVME_TCP_PDU_TYPE_IC_REQ]         = false,
+	[SPDK_NVME_TCP_PDU_TYPE_IC_RESP]        = false,
+	[SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ]   = false,
+	[SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ]   = false,
+	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD]    = true,
+	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP]   = false,
+	[SPDK_NVME_TCP_PDU_TYPE_H2C_DATA]       = true,
+	[SPDK_NVME_TCP_PDU_TYPE_C2H_DATA]       = true,
+	[SPDK_NVME_TCP_PDU_TYPE_R2T]            = false
+};
+
+static uint32_t
+nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu)
+{
+	uint32_t crc32c;
+	uint32_t hlen = pdu->hdr.common.hlen;
+
+	crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0);
+	crc32c = crc32c ^ SPDK_CRC32C_XOR;
+	return crc32c;
+}
+
+static uint32_t
+_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c)
+{
+	int i;
+
+	for (i = 0; i < iovcnt; i++) {
+		assert(iov[i].iov_base != NULL);
+		assert(iov[i].iov_len != 0);
+		crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c);
+	}
+
+	return crc32c;
+}
+
+static uint32_t
+nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu)
+{
+	uint32_t crc32c = SPDK_CRC32C_XOR;
+	uint32_t mod;
+
+	assert(pdu->data_len != 0);
+
+	if (spdk_likely(!pdu->dif_ctx)) {
+		crc32c = _update_crc32c_iov(pdu->data_iov, pdu->data_iovcnt, crc32c);
+	} else {
+		spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt,
+					      0, pdu->data_len, &crc32c, pdu->dif_ctx);
+	}
+
+	mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT;
+	if (mod != 0) {
+		uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod;
+		uint8_t pad[3] = {0, 0, 0};
+
+		assert(pad_length > 0);
+		assert(pad_length <= sizeof(pad));
+		crc32c = spdk_crc32c_update(pad, pad_length, crc32c);
+	}
+	crc32c = crc32c ^ SPDK_CRC32C_XOR;
+	return crc32c;
+}
+
+static inline void
+_nvme_tcp_sgl_init(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt,
+		   uint32_t iov_offset)
+{
+	s->iov = iov;
+	s->iovcnt = iovcnt;
+	s->iov_offset = iov_offset;
+	s->total_size = 0;
+}
+
+static inline void
+_nvme_tcp_sgl_advance(struct _nvme_tcp_sgl *s, uint32_t step)
+{
+	s->iov_offset += step;
+	while (s->iovcnt > 0) {
+		if (s->iov_offset < s->iov->iov_len) {
+			break;
+		}
+
+		s->iov_offset -= s->iov->iov_len;
+		s->iov++;
+		s->iovcnt--;
+	}
+}
+
+static inline void
+_nvme_tcp_sgl_get_buf(struct _nvme_tcp_sgl *s, void **_buf, uint32_t *_buf_len)
+{
+	if (_buf != NULL) {
+		*_buf = s->iov->iov_base + s->iov_offset;
+	}
+	if (_buf_len != NULL) {
+		*_buf_len = s->iov->iov_len - s->iov_offset;
+	}
+}
+
+static inline bool
+_nvme_tcp_sgl_append(struct _nvme_tcp_sgl *s, uint8_t *data, uint32_t data_len)
+{
+	if (s->iov_offset >= data_len) {
+		s->iov_offset -= data_len;
+	} else {
+		assert(s->iovcnt > 0);
+		s->iov->iov_base = data + s->iov_offset;
+		s->iov->iov_len = data_len - s->iov_offset;
+		s->total_size += data_len - s->iov_offset;
+		s->iov_offset = 0;
+		s->iov++;
+		s->iovcnt--;
+		if (s->iovcnt == 0) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static inline bool
+_nvme_tcp_sgl_append_multi(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt)
+{
+	int i;
+
+	for (i = 0; i < iovcnt; i++) {
+		if (!_nvme_tcp_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static inline uint32_t
+_get_iov_array_size(struct iovec *iov, int iovcnt)
+{
+	int i;
+	uint32_t size = 0;
+
+	for (i = 0; i < iovcnt; i++) {
+		size += iov[i].iov_len;
+	}
+
+	return size;
+}
+
+static inline bool
+_nvme_tcp_sgl_append_multi_with_md(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt,
+				   uint32_t data_len, const struct spdk_dif_ctx *dif_ctx)
+{
+	int rc;
+	uint32_t mapped_len = 0;
+
+	if (s->iov_offset >= data_len) {
+		s->iov_offset -= _get_iov_array_size(iov, iovcnt);
+	} else {
+		rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt,
+						     s->iov_offset, data_len - s->iov_offset,
+						     &mapped_len, dif_ctx);
+		if (rc < 0) {
+			SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n");
+			return false;
+		}
+
+		s->total_size += mapped_len;
+		s->iov_offset = 0;
+		assert(s->iovcnt >= rc);
+		s->iovcnt -= rc;
+		s->iov += rc;
+
+		if (s->iovcnt == 0) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int
+nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
+		    bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length)
+{
+	uint32_t hlen, plen;
+	struct _nvme_tcp_sgl *sgl;
+
+	if (iovcnt == 0) {
+		return 0;
+	}
+
+	sgl = &pdu->sgl;
+	_nvme_tcp_sgl_init(sgl, iov, iovcnt, 0);
+	hlen = pdu->hdr.common.hlen;
+
+	/* Header Digest */
+	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
+		hlen += SPDK_NVME_TCP_DIGEST_LEN;
+	}
+
+	plen = hlen;
+	if (!pdu->data_len) {
+		/* PDU header + possible header digest */
+		_nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen);
+		goto end;
+	}
+
+	/* Padding */
+	if (pdu->padding_len > 0) {
+		hlen += pdu->padding_len;
+		plen = hlen;
+	}
+
+	if (!_nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen)) {
+		goto end;
+	}
+
+	/* Data Segment */
+	plen += pdu->data_len;
+	if (spdk_likely(!pdu->dif_ctx)) {
+		if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) {
+			goto end;
+		}
+	} else {
+		if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt,
+							pdu->data_len, pdu->dif_ctx)) {
+			goto end;
+		}
+	}
+
+	/* Data Digest */
+	if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) {
+		plen += SPDK_NVME_TCP_DIGEST_LEN;
+		_nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
+	}
+
+	assert(plen == pdu->hdr.common.plen);
+
+end:
+	if (_mapped_length != NULL) {
+		*_mapped_length = sgl->total_size;
+	}
+
+	return iovcnt - sgl->iovcnt;
+}
+
+static int
+nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
+			    bool ddgst_enable, uint32_t *_mapped_length)
+{
+	struct _nvme_tcp_sgl *sgl;
+
+	if (iovcnt == 0) {
+		return 0;
+	}
+
+	sgl = &pdu->sgl;
+	_nvme_tcp_sgl_init(sgl, iov, iovcnt, pdu->readv_offset);
+
+	if (spdk_likely(!pdu->dif_ctx)) {
+		if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) {
+			goto end;
+		}
+	} else {
+		if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt,
+							pdu->data_len, pdu->dif_ctx)) {
+			goto end;
+		}
+	}
+
+	/* Data Digest */
+	if (ddgst_enable) {
+		_nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
+	}
+
+end:
+	if (_mapped_length != NULL) {
+		*_mapped_length = sgl->total_size;
+	}
+	return iovcnt - sgl->iovcnt;
+}
+
+static int
+nvme_tcp_read_data(struct spdk_sock *sock, int bytes,
+		   void *buf)
+{
+	int ret;
+
+	ret = spdk_sock_recv(sock, buf, bytes);
+
+	if (ret > 0) {
+		return ret;
+	}
+
+	if (ret < 0) {
+		if (errno == EAGAIN || errno == EWOULDBLOCK) {
+			return 0;
+		}
+
+		/* For connect reset issue, do not output error log */
+		if (errno != ECONNRESET) {
+			SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
+				    errno, spdk_strerror(errno));
+		}
+	}
+
+	/* connection closed */
+	return NVME_TCP_CONNECTION_FATAL;
+}
+
+static int
+nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt)
+{
+	int ret;
+
+	assert(sock != NULL);
+	if (iov == NULL || iovcnt == 0) {
+		return 0;
+	}
+
+	if (iovcnt == 1) {
+		return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base);
+	}
+
+	ret = spdk_sock_readv(sock, iov, iovcnt);
+
+	if (ret > 0) {
+		return ret;
+	}
+
+	if (ret < 0) {
+		if (errno == EAGAIN || errno == EWOULDBLOCK) {
+			return 0;
+		}
+
+		/* For connect reset issue, do not output error log */
+		if (errno != ECONNRESET) {
+			SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
+				    errno, spdk_strerror(errno));
+		}
+	}
+
+	/* connection closed */
+	return NVME_TCP_CONNECTION_FATAL;
+}
+
+
+static int
+nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu)
+{
+	struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
+	int iovcnt;
+
+	iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
+					     pdu->ddgst_enable, NULL);
+	assert(iovcnt >= 0);
+
+	return nvme_tcp_readv_data(sock, iov, iovcnt);
+}
+
+static void
+_nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
+{
+	pdu->data_iov[0].iov_base = data;
+	pdu->data_iov[0].iov_len = data_len;
+	pdu->data_iovcnt = 1;
+}
+
+static void
+nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
+{
+	_nvme_tcp_pdu_set_data(pdu, data, data_len);
+	pdu->data_len = data_len;
+}
+
+static void
+nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu,
+			  struct iovec *iov, int iovcnt,
+			  uint32_t data_offset, uint32_t data_len)
+{
+	uint32_t buf_offset, buf_len, remain_len, len;
+	uint8_t *buf;
+	struct _nvme_tcp_sgl *pdu_sgl, buf_sgl;
+
+	pdu->data_len = data_len;
+
+	if (spdk_likely(!pdu->dif_ctx)) {
+		buf_offset = data_offset;
+		buf_len = data_len;
+	} else {
+		spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset);
+		spdk_dif_get_range_with_md(data_offset, data_len,
+					   &buf_offset, &buf_len, pdu->dif_ctx);
+	}
+
+	if (iovcnt == 1) {
+		_nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len);
+	} else {
+		pdu_sgl = &pdu->sgl;
+
+		_nvme_tcp_sgl_init(pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0);
+		_nvme_tcp_sgl_init(&buf_sgl, iov, iovcnt, 0);
+
+		_nvme_tcp_sgl_advance(&buf_sgl, buf_offset);
+		remain_len = buf_len;
+
+		while (remain_len > 0) {
+			_nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len);
+			len = spdk_min(len, remain_len);
+
+			_nvme_tcp_sgl_advance(&buf_sgl, len);
+			remain_len -= len;
+
+			if (!_nvme_tcp_sgl_append(pdu_sgl, buf, len)) {
+				break;
+			}
+		}
+
+		assert(remain_len == 0);
+		assert(pdu_sgl->total_size == buf_len);
+
+		pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl->iovcnt;
+	}
+}
+
+static void
+nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable)
+{
+	uint8_t psh_len, pdo, padding_len;
+
+	psh_len = pdu->hdr.common.hlen;
+
+	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
+		pdu->has_hdgst = true;
+		psh_len += SPDK_NVME_TCP_DIGEST_LEN;
+		if (pdu->hdr.common.plen > psh_len) {
+			pdo = pdu->hdr.common.pdo;
+			padding_len = pdo - psh_len;
+			if (padding_len > 0) {
+				psh_len = pdo;
+			}
+		}
+	}
+
+	psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
+	pdu->psh_len = psh_len;
+}
+
+#endif /* SPDK_INTERNAL_NVME_TCP_H */
diff --git a/src/spdk/include/spdk_internal/rdma.h b/src/spdk/include/spdk_internal/rdma.h
new file mode 100644
index 000000000..4a6d5104b
--- /dev/null
+++ b/src/spdk/include/spdk_internal/rdma.h
@@ -0,0 +1,117 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation. All rights reserved.
+ *   Copyright (c) Mellanox Technologies LTD. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_RDMA_H
+#define SPDK_RDMA_H
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+
+struct spdk_rdma_qp_init_attr {
+	void		       *qp_context;
+	struct ibv_cq	       *send_cq;
+	struct ibv_cq	       *recv_cq;
+	struct ibv_srq	       *srq;
+	struct ibv_qp_cap	cap;
+	struct ibv_pd	       *pd;
+};
+
+struct spdk_rdma_send_wr_list {
+	struct ibv_send_wr	*first;
+	struct ibv_send_wr	*last;
+};
+
+struct spdk_rdma_qp {
+	struct ibv_qp *qp;
+	struct rdma_cm_id *cm_id;
+	struct spdk_rdma_send_wr_list send_wrs;
+};
+
+/**
+ * Create RDMA provider specific qpair
+ * \param cm_id Pointer to RDMACM cm_id
+ * \param qp_attr Pointer to qpair init attributes
+ * \return Pointer to a newly created qpair on success or NULL on failure
+ */
+struct spdk_rdma_qp *spdk_rdma_qp_create(struct rdma_cm_id *cm_id,
+		struct spdk_rdma_qp_init_attr *qp_attr);
+
+/**
+ * Accept a connection request. Called by the passive side (NVMEoF target)
+ * \param spdk_rdma_qp Pointer to a qpair
+ * \param conn_param Optional information needed to establish the connection
+ * \return 0 on success, errno on failure
+ */
+int spdk_rdma_qp_accept(struct spdk_rdma_qp *spdk_rdma_qp, struct rdma_conn_param *conn_param);
+
+/**
+ * Complete the connection process, must be called by the active
+ * side (NVMEoF initiator) upon receipt RDMA_CM_EVENT_CONNECT_RESPONSE
+ * \param spdk_rdma_qp Pointer to a qpair
+ * \return 0 on success, errno on failure
+ */
+int spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Destroy RDMA provider specific qpair
+ * \param spdk_rdma_qp Pointer to qpair to be destroyed
+ */
+void spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Disconnect a connection and transition assoiciated qpair to error state.
+ * Generates RDMA_CM_EVENT_DISCONNECTED on both connection sides
+ * \param spdk_rdma_qp Pointer to qpair to be destroyed
+ */
+int spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Append the given send wr structure to the qpair's outstanding sends list.
+ * This function accepts either a single Work Request or the first WR in a linked list.
+ *
+ * \param spdk_rdma_qp Pointer to SPDK RDMA qpair
+ * \param first Pointer to the first Work Request
+ * \return true if there were no outstanding WRs before, false otherwise
+ */
+bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first);
+
+/**
+ * Submit all queued Work Request
+ * \param spdk_rdma_qp Pointer to SPDK RDMA qpair
+ * \param bad_wr Stores a pointer to the first failed WR if this function return nonzero value
+ * \return 0 on succes, errno on failure
+ */
+int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr);
+
+#endif /* SPDK_RDMA_H */
diff --git a/src/spdk/include/spdk_internal/sock.h b/src/spdk/include/spdk_internal/sock.h
new file mode 100644
index 000000000..d88d6bd03
--- /dev/null
+++ b/src/spdk/include/spdk_internal/sock.h
@@ -0,0 +1,227 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation. All rights reserved.
+ *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * TCP network implementation abstraction layer
+ */
+
+#ifndef SPDK_INTERNAL_SOCK_H
+#define SPDK_INTERNAL_SOCK_H
+
+#include "spdk/stdinc.h"
+#include "spdk/sock.h"
+#include "spdk/queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_EVENTS_PER_POLL 32
+#define DEFAULT_SOCK_PRIORITY 0
+#define MIN_SOCK_PIPE_SIZE 1024
+
+struct spdk_sock {
+	struct spdk_net_impl		*net_impl;
+	struct spdk_sock_opts		opts;
+	int				cb_cnt;
+	spdk_sock_cb			cb_fn;
+	void				*cb_arg;
+	struct spdk_sock_group_impl	*group_impl;
+	TAILQ_ENTRY(spdk_sock)		link;
+
+	int				max_iovcnt;
+	TAILQ_HEAD(, spdk_sock_request)	queued_reqs;
+	TAILQ_HEAD(, spdk_sock_request)	pending_reqs;
+	int				queued_iovcnt;
+
+	struct {
+		uint8_t		closed		: 1;
+		uint8_t		reserved	: 7;
+	} flags;
+};
+
+struct spdk_sock_group {
+	STAILQ_HEAD(, spdk_sock_group_impl)	group_impls;
+	void					*ctx;
+};
+
+struct spdk_sock_group_impl {
+	struct spdk_net_impl			*net_impl;
+	TAILQ_HEAD(, spdk_sock)			socks;
+	STAILQ_ENTRY(spdk_sock_group_impl)	link;
+	/* List of removed sockets. refreshed each time we poll the sock group. */
+	int					num_removed_socks;
+	/* Unfortunately, we can't just keep a tailq of the sockets in case they are freed
+	 * or added to another poll group later.
+	 */
+	uintptr_t				removed_socks[MAX_EVENTS_PER_POLL];
+};
+
+struct spdk_net_impl {
+	const char *name;
+	int priority;
+
+	int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr,
+		       int clen, uint16_t *cport);
+	struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts);
+	struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts);
+	struct spdk_sock *(*accept)(struct spdk_sock *sock);
+	int (*close)(struct spdk_sock *sock);
+	ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len);
+	ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+	ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+
+	void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
+	int (*flush)(struct spdk_sock *sock);
+
+	int (*set_recvlowat)(struct spdk_sock *sock, int nbytes);
+	int (*set_recvbuf)(struct spdk_sock *sock, int sz);
+	int (*set_sendbuf)(struct spdk_sock *sock, int sz);
+
+	bool (*is_ipv6)(struct spdk_sock *sock);
+	bool (*is_ipv4)(struct spdk_sock *sock);
+	bool (*is_connected)(struct spdk_sock *sock);
+
+	int (*get_placement_id)(struct spdk_sock *sock, int *placement_id);
+	struct spdk_sock_group_impl *(*group_impl_create)(void);
+	int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
+	int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
+	int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events,
+			       struct spdk_sock **socks);
+	int (*group_impl_close)(struct spdk_sock_group_impl *group);
+
+	int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len);
+	int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len);
+
+	STAILQ_ENTRY(spdk_net_impl) link;
+};
+
+void spdk_net_impl_register(struct spdk_net_impl *impl, int priority);
+
+#define SPDK_NET_IMPL_REGISTER(name, impl, priority) \
+static void __attribute__((constructor)) net_impl_register_##name(void) \
+{ \
+	spdk_net_impl_register(impl, priority); \
+}
+
+static inline void
+spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req)
+{
+	TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link);
+	sock->queued_iovcnt += req->iovcnt;
+}
+
+static inline void
+spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req)
+{
+	TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
+	assert(sock->queued_iovcnt >= req->iovcnt);
+	sock->queued_iovcnt -= req->iovcnt;
+	TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link);
+}
+
+static inline int
+spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
+{
+	bool closed;
+	int rc = 0;
+
+	TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
+
+	req->internal.offset = 0;
+
+	closed = sock->flags.closed;
+	sock->cb_cnt++;
+	req->cb_fn(req->cb_arg, err);
+	assert(sock->cb_cnt > 0);
+	sock->cb_cnt--;
+
+	if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
+		/* The user closed the socket in response to a callback above. */
+		rc = -1;
+		spdk_sock_close(&sock);
+	}
+
+	return rc;
+}
+
+static inline int
+spdk_sock_abort_requests(struct spdk_sock *sock)
+{
+	struct spdk_sock_request *req;
+	bool closed;
+	int rc = 0;
+
+	closed = sock->flags.closed;
+	sock->cb_cnt++;
+
+	req = TAILQ_FIRST(&sock->pending_reqs);
+	while (req) {
+		TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
+
+		req->cb_fn(req->cb_arg, -ECANCELED);
+
+		req = TAILQ_FIRST(&sock->pending_reqs);
+	}
+
+	req = TAILQ_FIRST(&sock->queued_reqs);
+	while (req) {
+		TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
+
+		assert(sock->queued_iovcnt >= req->iovcnt);
+		sock->queued_iovcnt -= req->iovcnt;
+
+		req->cb_fn(req->cb_arg, -ECANCELED);
+
+		req = TAILQ_FIRST(&sock->queued_reqs);
+	}
+	assert(sock->cb_cnt > 0);
+	sock->cb_cnt--;
+
+	assert(TAILQ_EMPTY(&sock->queued_reqs));
+	assert(TAILQ_EMPTY(&sock->pending_reqs));
+
+	if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
+		/* The user closed the socket in response to a callback above. */
+		rc = -1;
+		spdk_sock_close(&sock);
+	}
+
+	return rc;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_SOCK_H */
diff --git a/src/spdk/include/spdk_internal/thread.h b/src/spdk/include/spdk_internal/thread.h
new file mode 100644
index 000000000..10bc4824c
--- /dev/null
+++ b/src/spdk/include/spdk_internal/thread.h
@@ -0,0 +1,136 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation. All rights reserved.
+ *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_THREAD_INTERNAL_H_
+#define SPDK_THREAD_INTERNAL_H_
+
+#include "spdk/stdinc.h"
+#include "spdk/thread.h"
+
+#define SPDK_MAX_POLLER_NAME_LEN	256
+#define SPDK_MAX_THREAD_NAME_LEN	256
+
+enum spdk_poller_state {
+	/* The poller is registered with a thread but not currently executing its fn. */
+	SPDK_POLLER_STATE_WAITING,
+
+	/* The poller is currently running its fn. */
+	SPDK_POLLER_STATE_RUNNING,
+
+	/* The poller was unregistered during the execution of its fn. */
+	SPDK_POLLER_STATE_UNREGISTERED,
+
+	/* The poller is in the process of being paused.  It will be paused
+	 * during the next time it's supposed to be executed.
+	 */
+	SPDK_POLLER_STATE_PAUSING,
+
+	/* The poller is registered but currently paused.  It's on the
+	 * paused_pollers list.
+	 */
+	SPDK_POLLER_STATE_PAUSED,
+};
+
+struct spdk_poller {
+	TAILQ_ENTRY(spdk_poller)	tailq;
+
+	/* Current state of the poller; should only be accessed from the poller's thread. */
+	enum spdk_poller_state		state;
+
+	uint64_t			period_ticks;
+	uint64_t			next_run_tick;
+	uint64_t			run_count;
+	uint64_t			busy_count;
+	spdk_poller_fn			fn;
+	void				*arg;
+	struct spdk_thread		*thread;
+
+	char				name[SPDK_MAX_POLLER_NAME_LEN + 1];
+};
+
+enum spdk_thread_state {
+	/* The thread is pocessing poller and message by spdk_thread_poll(). */
+	SPDK_THREAD_STATE_RUNNING,
+
+	/* The thread is in the process of termination. It reaps unregistering
+	 * poller are releasing I/O channel.
+	 */
+	SPDK_THREAD_STATE_EXITING,
+
+	/* The thread is exited. It is ready to call spdk_thread_destroy(). */
+	SPDK_THREAD_STATE_EXITED,
+};
+
+struct spdk_thread {
+	uint64_t			tsc_last;
+	struct spdk_thread_stats	stats;
+	/*
+	 * Contains pollers actively running on this thread.  Pollers
+	 *  are run round-robin. The thread takes one poller from the head
+	 *  of the ring, executes it, then puts it back at the tail of
+	 *  the ring.
+	 */
+	TAILQ_HEAD(active_pollers_head, spdk_poller)	active_pollers;
+	/**
+	 * Contains pollers running on this thread with a periodic timer.
+	 */
+	TAILQ_HEAD(timed_pollers_head, spdk_poller)	timed_pollers;
+	/*
+	 * Contains paused pollers.  Pollers on this queue are waiting until
+	 * they are resumed (in which case they're put onto the active/timer
+	 * queues) or unregistered.
+	 */
+	TAILQ_HEAD(paused_pollers_head, spdk_poller)	paused_pollers;
+	struct spdk_ring		*messages;
+	SLIST_HEAD(, spdk_msg)		msg_cache;
+	size_t				msg_cache_count;
+	spdk_msg_fn			critical_msg;
+	uint64_t			id;
+	enum spdk_thread_state		state;
+
+	TAILQ_HEAD(, spdk_io_channel)	io_channels;
+	TAILQ_ENTRY(spdk_thread)	tailq;
+
+	char				name[SPDK_MAX_THREAD_NAME_LEN + 1];
+	struct spdk_cpuset		cpumask;
+	uint64_t			exit_timeout_tsc;
+
+	/* User context allocated at the end */
+	uint8_t				ctx[0];
+};
+
+const char *spdk_poller_state_str(enum spdk_poller_state state);
+
+const char *spdk_io_device_get_name(struct io_device *dev);
+
+#endif /* SPDK_THREAD_INTERNAL_H_ */
diff --git a/src/spdk/include/spdk_internal/uring.h b/src/spdk/include/spdk_internal/uring.h
new file mode 100644
index 000000000..ff22f11d4
--- /dev/null
+++ b/src/spdk/include/spdk_internal/uring.h
@@ -0,0 +1,51 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_URING_H
+#define SPDK_INTERNAL_URING_H
+
+#include <liburing.h>
+
+#ifndef __NR_sys_io_uring_enter
+#define __NR_sys_io_uring_enter         426
+#endif
+
+static int
+spdk_io_uring_enter(int ring_fd, unsigned int to_submit,
+		    unsigned int min_complete, unsigned int flags)
+{
+	return syscall(__NR_sys_io_uring_enter, ring_fd, to_submit,
+		       min_complete, flags, NULL, 0);
+}
+
+#endif /* SPDK_INTERNAL_URING_H */
diff --git a/src/spdk/include/spdk_internal/utf.h b/src/spdk/include/spdk_internal/utf.h
new file mode 100644
index 000000000..b2b1c3c45
--- /dev/null
+++ b/src/spdk/include/spdk_internal/utf.h
@@ -0,0 +1,325 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_UTF_H_
+#define SPDK_UTF_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/endian.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+
+static inline bool
+utf8_tail(uint8_t c)
+{
+	/* c >= 0x80 && c <= 0xBF, or binary 01xxxxxx */
+	return (c & 0xC0) == 0x80;
+}
+
+/*
+ * Check for a valid UTF-8 encoding of a single codepoint.
+ *
+ * \return Length of valid UTF-8 byte sequence, or negative if invalid.
+ */
+static inline int
+utf8_valid(const uint8_t *start, const uint8_t *end)
+{
+	const uint8_t *p = start;
+	uint8_t b0, b1, b2, b3;
+
+	if (p == end) {
+		return 0;
+	}
+
+	b0 = *p;
+
+	if (b0 <= 0x7F) {
+		return 1;
+	}
+
+	if (b0 <= 0xC1) {
+		/* Invalid start byte */
+		return -1;
+	}
+
+	if (++p == end) {
+		/* Not enough bytes left */
+		return -1;
+	}
+	b1 = *p;
+
+	if (b0 <= 0xDF) {
+		/* C2..DF 80..BF */
+		if (!utf8_tail(b1)) {
+			return -1;
+		}
+		return 2;
+	}
+
+	if (++p == end) {
+		/* Not enough bytes left */
+		return -1;
+	}
+	b2 = *p;
+
+	if (b0 == 0xE0) {
+		/* E0 A0..BF 80..BF */
+		if (b1 < 0xA0 || b1 > 0xBF || !utf8_tail(b2)) {
+			return -1;
+		}
+		return 3;
+	} else if (b0 == 0xED && b1 >= 0xA0) {
+		/*
+		 * UTF-16 surrogate pairs use U+D800..U+DFFF, which would be encoded as
+		 * ED A0..BF 80..BF in UTF-8; however, surrogate pairs are not allowed in UTF-8.
+		 */
+		return -1;
+	} else if (b0 <= 0xEF) {
+		/* E1..EF 80..BF 80..BF */
+		if (!utf8_tail(b1) || !utf8_tail(b2)) {
+			return -1;
+		}
+		return 3;
+	}
+
+	if (++p == end) {
+		/* Not enough bytes left */
+		return -1;
+	}
+	b3 = *p;
+
+	if (b0 == 0xF0) {
+		/* F0 90..BF 80..BF 80..BF */
+		if (b1 < 0x90 || b1 > 0xBF || !utf8_tail(b2) || !utf8_tail(b3)) {
+			return -1;
+		}
+		return 4;
+	} else if (b0 <= 0xF3) {
+		/* F1..F3 80..BF 80..BF 80..BF */
+		if (!utf8_tail(b1) || !utf8_tail(b2) || !utf8_tail(b3)) {
+			return -1;
+		}
+		return 4;
+	} else if (b0 == 0xF4) {
+		/* F4 80..8F 80..BF 80..BF */
+		if (b1 < 0x80 || b1 > 0x8F || !utf8_tail(b2) || !utf8_tail(b3)) {
+			return -1;
+		}
+		return 4;
+	}
+
+	return -1;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_1(const uint8_t *data)
+{
+	return data[0];
+}
+
+static inline uint32_t
+utf8_decode_unsafe_2(const uint8_t *data)
+{
+	uint32_t codepoint;
+
+	codepoint = ((data[0] & 0x1F) << 6);
+	codepoint |= (data[1] & 0x3F);
+
+	return codepoint;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_3(const uint8_t *data)
+{
+	uint32_t codepoint;
+
+	codepoint = ((data[0] & 0x0F) << 12);
+	codepoint |= (data[1] & 0x3F) << 6;
+	codepoint |= (data[2] & 0x3F);
+
+	return codepoint;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_4(const uint8_t *data)
+{
+	uint32_t codepoint;
+
+	codepoint = ((data[0] & 0x07) << 18);
+	codepoint |= (data[1] & 0x3F) << 12;
+	codepoint |= (data[2] & 0x3F) << 6;
+	codepoint |= (data[3] & 0x3F);
+
+	return codepoint;
+}
+
+/*
+ * Encode a single Unicode codepoint as UTF-8.
+ *
+ * buf must have at least 4 bytes of space available (hence unsafe).
+ *
+ * \return Number of bytes appended to buf, or negative if encoding failed.
+ */
+static inline int
+utf8_encode_unsafe(uint8_t *buf, uint32_t c)
+{
+	if (c <= 0x7F) {
+		buf[0] = c;
+		return 1;
+	} else if (c <= 0x7FF) {
+		buf[0] = 0xC0 | (c >> 6);
+		buf[1] = 0x80 | (c & 0x3F);
+		return 2;
+	} else if (c >= 0xD800 && c <= 0xDFFF) {
+		/* UTF-16 surrogate pairs - invalid in UTF-8 */
+		return -1;
+	} else if (c <= 0xFFFF) {
+		buf[0] = 0xE0 | (c >> 12);
+		buf[1] = 0x80 | ((c >> 6) & 0x3F);
+		buf[2] = 0x80 | (c & 0x3F);
+		return 3;
+	} else if (c <= 0x10FFFF) {
+		buf[0] = 0xF0 | (c >> 18);
+		buf[1] = 0x80 | ((c >> 12) & 0x3F);
+		buf[2] = 0x80 | ((c >> 6) & 0x3F);
+		buf[3] = 0x80 | (c & 0x3F);
+		return 4;
+	}
+	return -1;
+}
+
+static inline int
+utf8_codepoint_len(uint32_t c)
+{
+	if (c <= 0x7F) {
+		return 1;
+	} else if (c <= 0x7FF) {
+		return 2;
+	} else if (c >= 0xD800 && c <= 0xDFFF) {
+		/* UTF-16 surrogate pairs - invalid in UTF-8 */
+		return -1;
+	} else if (c <= 0xFFFF) {
+		return 3;
+	} else if (c <= 0x10FFFF) {
+		return 4;
+	}
+	return -1;
+}
+
+static inline bool
+utf16_valid_surrogate_high(uint32_t val)
+{
+	return val >= 0xD800 && val <= 0xDBFF;
+}
+
+static inline bool
+utf16_valid_surrogate_low(uint32_t val)
+{
+	return val >= 0xDC00 && val <= 0xDFFF;
+}
+
+/*
+ * Check for a valid UTF-16LE encoding of a single codepoint.
+ *
+ * \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid.
+ */
+static inline int
+utf16le_valid(const uint16_t *start, const uint16_t *end)
+{
+	const uint16_t *p = start;
+	uint16_t high, low;
+
+	if (p == end) {
+		return 0;
+	}
+
+	high = from_le16(p);
+
+	if (high <= 0xD7FF || high >= 0xE000) {
+		/* Single code unit in BMP */
+		return 1;
+	}
+
+	if (high >= 0xDC00) {
+		/* Low surrogate in first code unit - invalid */
+		return -1;
+	}
+
+	assert(utf16_valid_surrogate_high(high));
+
+	if (++p == end) {
+		/* Not enough code units left */
+		return -1;
+	}
+	low = from_le16(p);
+
+	if (!utf16_valid_surrogate_low(low)) {
+		return -1;
+	}
+
+	/* Valid surrogate pair */
+	return 2;
+}
+
+static inline uint32_t
+utf16_decode_surrogate_pair(uint32_t high, uint32_t low)
+{
+	uint32_t codepoint;
+
+	assert(utf16_valid_surrogate_high(high));
+	assert(utf16_valid_surrogate_low(low));
+
+	codepoint = low;
+	codepoint &= 0x3FF;
+	codepoint |= ((high & 0x3FF) << 10);
+	codepoint += 0x10000;
+
+	return codepoint;
+}
+
+static inline void
+utf16_encode_surrogate_pair(uint32_t codepoint, uint16_t *high, uint16_t *low)
+{
+	assert(codepoint >= 0x10000);
+	assert(codepoint <= 0x10FFFF);
+
+	codepoint -= 0x10000;
+	*high = 0xD800 | (codepoint >> 10);
+	*low = 0xDC00 | (codepoint & 0x3FF);
+
+	assert(utf16_valid_surrogate_high(*high));
+	assert(utf16_valid_surrogate_low(*low));
+}
+
+#endif
diff --git a/src/spdk/include/spdk_internal/vhost_user.h b/src/spdk/include/spdk_internal/vhost_user.h
new file mode 100644
index 000000000..92ed3b65b
--- /dev/null
+++ b/src/spdk/include/spdk_internal/vhost_user.h
@@ -0,0 +1,140 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Structures defined in the vhost-user specification
+ */
+
+#ifndef SPDK_VHOST_USER_H
+#define SPDK_VHOST_USER_H
+
+#include "spdk/stdinc.h"
+
+#include <linux/vhost.h>
+
+#ifndef VHOST_USER_MEMORY_MAX_NREGIONS
+#define VHOST_USER_MEMORY_MAX_NREGIONS	8
+#endif
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE	256
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_MQ
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
+#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
+#endif
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+#endif
+
+enum vhost_user_request {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_MAX
+};
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+/** Fixed-size vhost_memory struct */
+struct vhost_memory_padded {
+	uint32_t nregions;
+	uint32_t padding;
+	struct vhost_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS];
+};
+
+struct vhost_user_msg {
+	enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+	uint32_t flags;
+	uint32_t size; /**< the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		struct vhost_memory_padded memory;
+		struct vhost_user_config cfg;
+	} payload;
+} __attribute((packed));
+
+#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
+#define VHOST_USER_PAYLOAD_SIZE \
+	(sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
+
+#endif /* SPDK_VHOST_USER_H */
diff --git a/src/spdk/include/spdk_internal/virtio.h b/src/spdk/include/spdk_internal/virtio.h
new file mode 100644
index 000000000..c30013efe
--- /dev/null
+++ b/src/spdk/include/spdk_internal/virtio.h
@@ -0,0 +1,486 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VIRTIO_H
+#define SPDK_VIRTIO_H
+
+#include "spdk/stdinc.h"
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_config.h>
+
+#include "spdk_internal/log.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/pci_ids.h"
+#include "spdk/env.h"
+
+/**
+ * The maximum virtqueue size is 2^15. Use that value as the end of
+ * descriptor chain terminator since it will never be a valid index
+ * in the descriptor table. This is used to verify we are correctly
+ * handling vq_free_cnt.
+ */
+#define VQ_RING_DESC_CHAIN_END 32768
+
+#define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100
+
+/* Extra status define for readability */
+#define VIRTIO_CONFIG_S_RESET 0
+
+struct virtio_dev_ops;
+
+struct virtio_dev {
+	struct virtqueue **vqs;
+
+	/** Name of this virtio dev set by backend */
+	char		*name;
+
+	/** Fixed number of backend-specific non-I/O virtqueues. */
+	uint16_t	fixed_queues_num;
+
+	/** Max number of virtqueues the host supports. */
+	uint16_t	max_queues;
+
+	/** Common device & guest features. */
+	uint64_t	negotiated_features;
+
+	int		is_hw;
+
+	/** Modern/legacy virtio device flag. */
+	uint8_t		modern;
+
+	/** Mutex for asynchronous virtqueue-changing operations. */
+	pthread_mutex_t	mutex;
+
+	/** Backend-specific callbacks. */
+	const struct virtio_dev_ops *backend_ops;
+
+	/** Context for the backend ops */
+	void		*ctx;
+};
+
+struct virtio_dev_ops {
+	int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset,
+			    void *dst, int len);
+	int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset,
+			     const void *src, int len);
+	uint8_t (*get_status)(struct virtio_dev *hw);
+	void (*set_status)(struct virtio_dev *hw, uint8_t status);
+
+	/**
+	 * Get device features. The features might be already
+	 * negotiated with driver (guest) features.
+	 */
+	uint64_t (*get_features)(struct virtio_dev *vdev);
+
+	/**
+	 * Negotiate and set device features.
+	 * The negotiation can fail with return code -1.
+	 * This function should also set vdev->negotiated_features field.
+	 */
+	int (*set_features)(struct virtio_dev *vdev, uint64_t features);
+
+	/** Destruct virtio device */
+	void (*destruct_dev)(struct virtio_dev *vdev);
+
+	uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id);
+	int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+	void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+	void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+
+	void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
+	void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
+};
+
+struct vq_desc_extra {
+	void *cookie;
+	uint16_t ndescs;
+};
+
+struct virtqueue {
+	struct virtio_dev *vdev; /**< owner of this virtqueue */
+	struct vring vq_ring;  /**< vring keeping desc, used and avail */
+	/**
+	 * Last consumed descriptor in the used table,
+	 * trails vq_ring.used->idx.
+	 */
+	uint16_t vq_used_cons_idx;
+	uint16_t vq_nentries;  /**< vring desc numbers */
+	uint16_t vq_free_cnt;  /**< num of desc available */
+	uint16_t vq_avail_idx; /**< sync until needed */
+
+	void *vq_ring_virt_mem;  /**< virtual address of vring */
+	unsigned int vq_ring_size;
+
+	uint64_t vq_ring_mem; /**< physical address of vring */
+
+	/**
+	 * Head of the free chain in the descriptor table. If
+	 * there are no free descriptors, this will be set to
+	 * VQ_RING_DESC_CHAIN_END.
+	 */
+	uint16_t  vq_desc_head_idx;
+
+	/**
+	 * Tail of the free chain in desc table. If
+	 * there are no free descriptors, this will be set to
+	 * VQ_RING_DESC_CHAIN_END.
+	 */
+	uint16_t  vq_desc_tail_idx;
+	uint16_t  vq_queue_index;   /**< PCI queue index */
+	uint16_t  *notify_addr;
+
+	/** Thread that's polling this queue. */
+	struct spdk_thread *owner_thread;
+
+	uint16_t req_start;
+	uint16_t req_end;
+	uint16_t reqs_finished;
+
+	struct vq_desc_extra vq_descx[0];
+};
+
+enum spdk_virtio_desc_type {
+	SPDK_VIRTIO_DESC_RO = 0, /**< Read only */
+	SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */
+	/* TODO VIRTIO_DESC_INDIRECT */
+};
+
+/** Context for creating PCI virtio_devs */
+struct virtio_pci_ctx;
+
+/**
+ * Callback for creating virtio_dev from a PCI device.
+ * \param pci_ctx PCI context to be associated with a virtio_dev
+ * \param ctx context provided by the user
+ * \return 0 on success, -1 on error.
+ */
+typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx);
+
+uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt);
+
+/**
+ * Start a new request on the current vring head position and associate it
+ * with an opaque cookie object. The previous request in given vq will be
+ * made visible to the device in hopes it can be processed early, but there's
+ * no guarantee it will be until the device is notified with \c
+ * virtqueue_req_flush. This behavior is simply an optimization and virtqueues
+ * must always be flushed. Empty requests (with no descriptors added) will be
+ * ignored. The device owning given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ * \param cookie opaque object to associate with this request. Once the request
+ * is sent, processed and a response is received, the same object will be
+ * returned to the user after calling the virtio poll API.
+ * \param iovcnt number of required iovectors for the request. This can be
+ * higher than than the actual number of iovectors to be added.
+ * \return 0 on success or negative errno otherwise. If the `iovcnt` is
+ * greater than virtqueue depth, -EINVAL is returned. If simply not enough
+ * iovectors are available, -ENOMEM is returned.
+ */
+int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt);
+
+/**
+ * Flush a virtqueue. This will notify the device if it's required.
+ * The device owning given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ */
+void virtqueue_req_flush(struct virtqueue *vq);
+
+/**
+ * Abort the very last request in a virtqueue. This will restore virtqueue
+ * state to the point before the last request was created. Note that this
+ * is only effective if a queue hasn't been flushed yet.  The device owning
+ * given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ */
+void virtqueue_req_abort(struct virtqueue *vq);
+
+/**
+ * Add iovec chain to the last created request. This call does not provide any
+ * error-checking. The caller has to ensure that he doesn't add more iovs than
+ * what was specified during request creation. The device owning given virtqueue
+ * must be started.
+ *
+ * \param vq virtio queue
+ * \param iovs iovec array
+ * \param iovcnt number of iovs in iovec array
+ * \param desc_type type of all given iovectors
+ */
+void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt,
+			    enum spdk_virtio_desc_type desc_type);
+
+/**
+ * Construct a virtio device.  The device will be in stopped state by default.
+ * Before doing any I/O, it has to be manually started via \c virtio_dev_restart.
+ *
+ * \param vdev memory for virtio device, must be zeroed
+ * \param name name for the virtio device
+ * \param ops backend callbacks
+ * \param ops_ctx argument for the backend callbacks
+ * \return zero on success, or negative error code otherwise
+ */
+int virtio_dev_construct(struct virtio_dev *vdev, const char *name,
+			 const struct virtio_dev_ops *ops, void *ops_ctx);
+
+/**
+ * Reset the device and prepare it to be `virtio_dev_start`ed.  This call
+ * will also renegotiate feature flags.
+ *
+ * \param vdev virtio device
+ * \param req_features features this driver supports. A VIRTIO_F_VERSION_1
+ * flag will be automatically appended, as legacy devices are not supported.
+ */
+int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features);
+
+/**
+ * Notify the host to start processing this virtio device.  This is
+ * a blocking call that won't return until the host has started.
+ * This will also allocate virtqueues.
+ *
+ * \param vdev virtio device
+ * \param max_queues number of queues to allocate. The max number of
+ * usable I/O queues is also limited by the host device. `vdev` will be
+ * started successfully even if the host supports less queues than requested.
+ * \param fixed_queue_num number of queues preceeding the first
+ * request queue. For Virtio-SCSI this is equal to 2, as there are
+ * additional event and control queues.
+ */
+int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues,
+		     uint16_t fixed_queues_num);
+
+/**
+ * Stop the host from processing the device.  This is a blocking call
+ * that won't return until all outstanding I/O has been processed on
+ * the host (virtio device) side. In order to re-start the device, it
+ * has to be `virtio_dev_reset` first.
+ *
+ * \param vdev virtio device
+ */
+void virtio_dev_stop(struct virtio_dev *vdev);
+
+/**
+ * Destruct a virtio device.  Note that it must be in the stopped state.
+ * The virtio_dev should be manually freed afterwards.
+ *
+ * \param vdev virtio device
+ */
+void virtio_dev_destruct(struct virtio_dev *vdev);
+
+/**
+ * Bind a virtqueue with given index to the current thread;
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index virtqueue index
+ * \return 0 on success, -1 in case a virtqueue with given index either
+ * does not exists or is already acquired.
+ */
+int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Look for unused queue and bind it to the current thread.  This will
+ * scan the queues in range from *start_index* (inclusive) up to
+ * vdev->max_queues (exclusive).
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param start_index virtqueue index to start looking from
+ * \return index of acquired queue or -1 in case no unused queue in given range
+ * has been found
+ */
+int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index);
+
+/**
+ * Get thread that acquired given virtqueue.
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue
+ * \return thread that acquired given virtqueue. If the queue is unused
+ * or doesn't exist a NULL is returned.
+ */
+struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Check if virtqueue with given index is acquired.
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue
+ * \return virtqueue acquire status. in case of invalid index *false* is returned.
+ */
+bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Release previously acquired queue.
+ *
+ * This function must be called from the thread that acquired the queue.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue to release
+ */
+void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Get Virtio status flags.
+ *
+ * \param vdev virtio device
+ */
+uint8_t virtio_dev_get_status(struct virtio_dev *vdev);
+
+/**
+ * Set Virtio status flag.  The flags have to be set in very specific order
+ * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the
+ * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to
+ * unset only particular flags.
+ *
+ * \param vdev virtio device
+ * \param flag flag to set
+ */
+void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag);
+
+/**
+ * Write raw data into the device config at given offset.  This call does not
+ * provide any error checking.
+ *
+ * \param vdev virtio device
+ * \param offset offset in bytes
+ * \param src pointer to data to copy from
+ * \param len length of data to copy in bytes
+ * \return 0 on success, negative errno otherwise
+ */
+int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len);
+
+/**
+ * Read raw data from the device config at given offset.  This call does not
+ * provide any error checking.
+ *
+ * \param vdev virtio device
+ * \param offset offset in bytes
+ * \param dst pointer to buffer to copy data into
+ * \param len length of data to copy in bytes
+ * \return 0 on success, negative errno otherwise
+ */
+int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len);
+
+/**
+ * Get backend-specific ops for given device.
+ *
+ * \param vdev virtio device
+ */
+const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev);
+
+/**
+ * Check if the device has negotiated given feature bit.
+ *
+ * \param vdev virtio device
+ * \param bit feature bit
+ */
+static inline bool
+virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit)
+{
+	return !!(vdev->negotiated_features & (1ULL << bit));
+}
+
+/**
+ * Dump all device specific information into given json stream.
+ *
+ * \param vdev virtio device
+ * \param w json stream
+ */
+void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w);
+
+/**
+ * Enumerate all PCI Virtio devices of given type on the system.
+ *
+ * \param enum_cb a function to be called for each valid PCI device.
+ * If a virtio_dev is has been created, the callback should return 0.
+ * Returning any other value will cause the PCI context to be freed,
+ * making it unusable.
+ * \param enum_ctx additional opaque context to be passed into `enum_cb`
+ * \param pci_device_id PCI Device ID of devices to iterate through
+ */
+int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
+			     uint16_t pci_device_id);
+
+/**
+ * Attach a PCI Virtio device of given type.
+ *
+ * \param create_cb callback to create a virtio_dev.
+ * If virtio_dev is has been created, the callback should return 0.
+ * Returning any other value will cause the PCI context to be freed,
+ * making it unusable.
+ * \param enum_ctx additional opaque context to be passed into `enum_cb`
+ * \param pci_device_id PCI Device ID of devices to iterate through
+ * \param pci_addr PCI address of the device to attach
+ */
+int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx,
+			  uint16_t pci_device_id, struct spdk_pci_addr *pci_addr);
+
+/**
+ * Connect to a vhost-user device and init corresponding virtio_dev struct.
+ * The virtio_dev will have to be freed with \c virtio_dev_free.
+ *
+ * \param vdev preallocated vhost device struct to operate on
+ * \param name name of this virtio device
+ * \param path path to the Unix domain socket of the vhost-user device
+ * \param queue_size size of each of the queues
+ * \return virtio device
+ */
+int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
+			 uint32_t queue_size);
+
+/**
+ * Initialize virtio_dev for a given PCI device.
+ * The virtio_dev has to be freed with \c virtio_dev_destruct.
+ *
+ * \param vdev preallocated vhost device struct to operate on
+ * \param name name of this virtio device
+ * \param pci_ctx context of the PCI device
+ * \return 0 on success, -1 on error.
+ */
+int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
+			struct virtio_pci_ctx *pci_ctx);
+
+#endif /* SPDK_VIRTIO_H */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
commit	19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree	42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/include/spdk_internal
parent	Initial commit. (diff)
download	ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip