summaryrefslogtreecommitdiffstats
path: root/src/spdk/include/spdk_internal
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/include/spdk_internal
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/include/spdk_internal')
-rw-r--r--src/spdk/include/spdk_internal/accel_engine.h130
-rw-r--r--src/spdk/include/spdk_internal/assert.h55
-rw-r--r--src/spdk/include/spdk_internal/event.h197
-rw-r--r--src/spdk/include/spdk_internal/idxd.h74
-rw-r--r--src/spdk/include/spdk_internal/log.h108
-rw-r--r--src/spdk/include/spdk_internal/lvolstore.h128
-rw-r--r--src/spdk/include/spdk_internal/mock.h135
-rw-r--r--src/spdk/include/spdk_internal/nvme_tcp.h633
-rw-r--r--src/spdk/include/spdk_internal/rdma.h117
-rw-r--r--src/spdk/include/spdk_internal/sock.h227
-rw-r--r--src/spdk/include/spdk_internal/thread.h136
-rw-r--r--src/spdk/include/spdk_internal/uring.h51
-rw-r--r--src/spdk/include/spdk_internal/utf.h325
-rw-r--r--src/spdk/include/spdk_internal/vhost_user.h140
-rw-r--r--src/spdk/include/spdk_internal/virtio.h486
15 files changed, 2942 insertions, 0 deletions
diff --git a/src/spdk/include/spdk_internal/accel_engine.h b/src/spdk/include/spdk_internal/accel_engine.h
new file mode 100644
index 000000000..9b78bc967
--- /dev/null
+++ b/src/spdk/include/spdk_internal/accel_engine.h
@@ -0,0 +1,130 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_ACCEL_ENGINE_H
+#define SPDK_INTERNAL_ACCEL_ENGINE_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/accel_engine.h"
+#include "spdk/queue.h"
+
+struct spdk_accel_task {
+ spdk_accel_completion_cb cb;
+ void *cb_arg;
+ uint8_t offload_ctx[0];
+};
+
+struct spdk_accel_engine {
+ uint64_t (*get_capabilities)(void);
+ int (*copy)(struct spdk_io_channel *ch, void *dst, void *src,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ uint32_t (*batch_get_max)(void);
+ struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch);
+ int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst1, void *dst2, void *src, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+ spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch);
+ int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill,
+ uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src,
+ uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
+ struct spdk_io_channel *(*get_io_channel)(void);
+};
+
+struct spdk_accel_module_if {
+ /** Initialization function for the module. Called by the spdk
+ * application during startup.
+ *
+ * Modules are required to define this function.
+ */
+ int (*module_init)(void);
+
+ /** Finish function for the module. Called by the spdk application
+ * before the spdk application exits to perform any necessary cleanup.
+ *
+ * Modules are not required to define this function.
+ */
+ void (*module_fini)(void *ctx);
+
+ /** Function called to return a text string representing the
+ * module's configuration options for inclusion in an
+ * spdk configuration file.
+ */
+ void (*config_text)(FILE *fp);
+
+ /**
+ * Write Acceleration module configuration into provided JSON context.
+ */
+ void (*write_config_json)(struct spdk_json_write_ctx *w);
+
+ /**
+ * Returns the allocation size required for the modules to use for context.
+ */
+ size_t (*get_ctx_size)(void);
+
+ TAILQ_ENTRY(spdk_accel_module_if) tailq;
+};
+
+void spdk_accel_hw_engine_register(struct spdk_accel_engine *accel_engine);
+void spdk_accel_module_list_add(struct spdk_accel_module_if *accel_module);
+
+#define SPDK_ACCEL_MODULE_REGISTER(init_fn, fini_fn, config_fn, config_json, ctx_size_fn) \
+ static struct spdk_accel_module_if init_fn ## _if = { \
+ .module_init = init_fn, \
+ .module_fini = fini_fn, \
+ .config_text = config_fn, \
+ .write_config_json = config_json, \
+ .get_ctx_size = ctx_size_fn, \
+ }; \
+ __attribute__((constructor)) static void init_fn ## _init(void) \
+ { \
+ spdk_accel_module_list_add(&init_fn ## _if); \
+ }
+
+#endif
diff --git a/src/spdk/include/spdk_internal/assert.h b/src/spdk/include/spdk_internal/assert.h
new file mode 100644
index 000000000..7e4c45070
--- /dev/null
+++ b/src/spdk/include/spdk_internal/assert.h
@@ -0,0 +1,55 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_ASSERT_H
+#define SPDK_INTERNAL_ASSERT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/assert.h"
+
+#if !defined(DEBUG) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+#define SPDK_UNREACHABLE() __builtin_unreachable()
+#else
+#define SPDK_UNREACHABLE() abort()
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_ASSERT_H */
diff --git a/src/spdk/include/spdk_internal/event.h b/src/spdk/include/spdk_internal/event.h
new file mode 100644
index 000000000..2d88d08ba
--- /dev/null
+++ b/src/spdk/include/spdk_internal/event.h
@@ -0,0 +1,197 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_EVENT_H
+#define SPDK_INTERNAL_EVENT_H
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "spdk/event.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+
+struct spdk_event {
+ uint32_t lcore;
+ spdk_event_fn fn;
+ void *arg1;
+ void *arg2;
+};
+
+enum spdk_reactor_state {
+ SPDK_REACTOR_STATE_UNINITIALIZED = 0,
+ SPDK_REACTOR_STATE_INITIALIZED = 1,
+ SPDK_REACTOR_STATE_RUNNING = 2,
+ SPDK_REACTOR_STATE_EXITING = 3,
+ SPDK_REACTOR_STATE_SHUTDOWN = 4,
+};
+
+struct spdk_lw_thread {
+ TAILQ_ENTRY(spdk_lw_thread) link;
+ bool resched;
+ uint64_t tsc_start;
+};
+
+struct spdk_reactor {
+ /* Lightweight threads running on this reactor */
+ TAILQ_HEAD(, spdk_lw_thread) threads;
+ uint32_t thread_count;
+
+ /* Logical core number for this reactor. */
+ uint32_t lcore;
+
+ struct {
+ uint32_t is_valid : 1;
+ uint32_t reserved : 31;
+ } flags;
+
+ uint64_t tsc_last;
+
+ struct spdk_ring *events;
+
+ /* The last known rusage values */
+ struct rusage rusage;
+ uint64_t last_rusage;
+
+ uint64_t busy_tsc;
+ uint64_t idle_tsc;
+} __attribute__((aligned(SPDK_CACHE_LINE_SIZE)));
+
+int spdk_reactors_init(void);
+void spdk_reactors_fini(void);
+
+void spdk_reactors_start(void);
+void spdk_reactors_stop(void *arg1);
+
+struct spdk_reactor *spdk_reactor_get(uint32_t lcore);
+
+/**
+ * Allocate and pass an event to each reactor, serially.
+ *
+ * The allocated event is processed asynchronously - i.e. spdk_for_each_reactor
+ * will return prior to `fn` being called on each reactor.
+ *
+ * \param fn This is the function that will be called on each reactor.
+ * \param arg1 Argument will be passed to fn when called.
+ * \param arg2 Argument will be passed to fn when called.
+ * \param cpl This will be called on the originating reactor after `fn` has been
+ * called on each reactor.
+ */
+void spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl);
+
+struct spdk_subsystem {
+ const char *name;
+ /* User must call spdk_subsystem_init_next() when they are done with their initialization. */
+ void (*init)(void);
+ void (*fini)(void);
+ void (*config)(FILE *fp);
+
+ /**
+ * Write JSON configuration handler.
+ *
+ * \param w JSON write context
+ */
+ void (*write_config_json)(struct spdk_json_write_ctx *w);
+ TAILQ_ENTRY(spdk_subsystem) tailq;
+};
+
+struct spdk_subsystem *spdk_subsystem_find(const char *name);
+struct spdk_subsystem *spdk_subsystem_get_first(void);
+struct spdk_subsystem *spdk_subsystem_get_next(struct spdk_subsystem *cur_subsystem);
+
+struct spdk_subsystem_depend {
+ const char *name;
+ const char *depends_on;
+ TAILQ_ENTRY(spdk_subsystem_depend) tailq;
+};
+
+struct spdk_subsystem_depend *spdk_subsystem_get_first_depend(void);
+struct spdk_subsystem_depend *spdk_subsystem_get_next_depend(struct spdk_subsystem_depend
+ *cur_depend);
+
+void spdk_add_subsystem(struct spdk_subsystem *subsystem);
+void spdk_add_subsystem_depend(struct spdk_subsystem_depend *depend);
+
+typedef void (*spdk_subsystem_init_fn)(int rc, void *ctx);
+void spdk_subsystem_init(spdk_subsystem_init_fn cb_fn, void *cb_arg);
+void spdk_subsystem_fini(spdk_msg_fn cb_fn, void *cb_arg);
+void spdk_subsystem_init_next(int rc);
+void spdk_subsystem_fini_next(void);
+void spdk_subsystem_config(FILE *fp);
+void spdk_app_json_config_load(const char *json_config_file, const char *rpc_addr,
+ spdk_subsystem_init_fn cb_fn, void *cb_arg,
+ bool stop_on_error);
+
+/**
+ * Save pointed \c subsystem configuration to the JSON write context \c w. In case of
+ * error \c null is written to the JSON context.
+ *
+ * \param w JSON write context
+ * \param subsystem the subsystem to query
+ */
+void spdk_subsystem_config_json(struct spdk_json_write_ctx *w, struct spdk_subsystem *subsystem);
+
+void spdk_rpc_initialize(const char *listen_addr);
+void spdk_rpc_finish(void);
+
+/**
+ * \brief Register a new subsystem
+ */
+#define SPDK_SUBSYSTEM_REGISTER(_name) \
+ __attribute__((constructor)) static void _name ## _register(void) \
+ { \
+ spdk_add_subsystem(&_name); \
+ }
+
+/**
+ * \brief Declare that a subsystem depends on another subsystem.
+ */
+#define SPDK_SUBSYSTEM_DEPEND(_name, _depends_on) \
+ static struct spdk_subsystem_depend __subsystem_ ## _name ## _depend_on ## _depends_on = { \
+ .name = #_name, \
+ .depends_on = #_depends_on, \
+ }; \
+ __attribute__((constructor)) static void _name ## _depend_on ## _depends_on(void) \
+ { \
+ spdk_add_subsystem_depend(&__subsystem_ ## _name ## _depend_on ## _depends_on); \
+ }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_EVENT_H */
diff --git a/src/spdk/include/spdk_internal/idxd.h b/src/spdk/include/spdk_internal/idxd.h
new file mode 100644
index 000000000..17db2405d
--- /dev/null
+++ b/src/spdk/include/spdk_internal/idxd.h
@@ -0,0 +1,74 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IDXD_INTERNAL_H__
+#define __IDXD_INTERNAL_H__
+
+#include "spdk/stdinc.h"
+
+#include "spdk/idxd.h"
+#include "spdk/queue.h"
+#include "spdk/mmio.h"
+#include "spdk/bit_array.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IDXD_MAX_CONFIG_NUM 1
+
+enum dsa_opcode {
+ IDXD_OPCODE_NOOP = 0,
+ IDXD_OPCODE_BATCH = 1,
+ IDXD_OPCODE_DRAIN = 2,
+ IDXD_OPCODE_MEMMOVE = 3,
+ IDXD_OPCODE_MEMFILL = 4,
+ IDXD_OPCODE_COMPARE = 5,
+ IDXD_OPCODE_COMPVAL = 6,
+ IDXD_OPCODE_CR_DELTA = 7,
+ IDXD_OPCODE_AP_DELTA = 8,
+ IDXD_OPCODE_DUALCAST = 9,
+ IDXD_OPCODE_CRC32C_GEN = 16,
+ IDXD_OPCODE_COPY_CRC = 17,
+ IDXD_OPCODE_DIF_CHECK = 18,
+ IDXD_OPCODE_DIF_INS = 19,
+ IDXD_OPCODE_DIF_STRP = 20,
+ IDXD_OPCODE_DIF_UPDT = 21,
+ IDXD_OPCODE_CFLUSH = 32,
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __IDXD_INTERNAL_H__ */
diff --git a/src/spdk/include/spdk_internal/log.h b/src/spdk/include/spdk_internal/log.h
new file mode 100644
index 000000000..0993d1016
--- /dev/null
+++ b/src/spdk/include/spdk_internal/log.h
@@ -0,0 +1,108 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Logging interfaces
+ */
+
+#ifndef SPDK_INTERNAL_LOG_H
+#define SPDK_INTERNAL_LOG_H
+
+#include "spdk/log.h"
+#include "spdk/queue.h"
+
+extern enum spdk_log_level g_spdk_log_level;
+extern enum spdk_log_level g_spdk_log_print_level;
+extern enum spdk_log_level g_spdk_log_backtrace_level;
+
+struct spdk_log_flag {
+ TAILQ_ENTRY(spdk_log_flag) tailq;
+ const char *name;
+ bool enabled;
+};
+
+void spdk_log_register_flag(const char *name, struct spdk_log_flag *flag);
+
+struct spdk_log_flag *spdk_log_get_first_flag(void);
+struct spdk_log_flag *spdk_log_get_next_flag(struct spdk_log_flag *flag);
+
+#define SPDK_LOG_REGISTER_COMPONENT(str, flag) \
+struct spdk_log_flag flag = { \
+ .enabled = false, \
+ .name = str, \
+}; \
+__attribute__((constructor)) static void register_flag_##flag(void) \
+{ \
+ spdk_log_register_flag(str, &flag); \
+}
+
+#define SPDK_INFOLOG(FLAG, ...) \
+ do { \
+ extern struct spdk_log_flag FLAG; \
+ if (FLAG.enabled) { \
+ spdk_log(SPDK_LOG_INFO, __FILE__, __LINE__, __func__, __VA_ARGS__); \
+ } \
+ } while (0)
+
+#ifdef DEBUG
+
+#define SPDK_DEBUGLOG(FLAG, ...) \
+ do { \
+ extern struct spdk_log_flag FLAG; \
+ if (FLAG.enabled) { \
+ spdk_log(SPDK_LOG_DEBUG, __FILE__, __LINE__, __func__, __VA_ARGS__); \
+ } \
+ } while (0)
+
+#define SPDK_LOGDUMP(FLAG, LABEL, BUF, LEN) \
+ do { \
+ extern struct spdk_log_flag FLAG; \
+ if ((FLAG.enabled) && (LEN)) { \
+ spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \
+ } \
+ } while (0)
+
+#else
+#define SPDK_DEBUGLOG(...) do { } while (0)
+#define SPDK_LOGDUMP(...) do { } while (0)
+#endif
+
+#define SPDK_ERRLOGDUMP(LABEL, BUF, LEN) \
+ do { \
+ if ((LEN)) { \
+ spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \
+ } \
+ } while (0)
+
+#endif /* SPDK_INTERNAL_LOG_H */
diff --git a/src/spdk/include/spdk_internal/lvolstore.h b/src/spdk/include/spdk_internal/lvolstore.h
new file mode 100644
index 000000000..f82157e53
--- /dev/null
+++ b/src/spdk/include/spdk_internal/lvolstore.h
@@ -0,0 +1,128 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_LVOLSTORE_H
+#define SPDK_INTERNAL_LVOLSTORE_H
+
+#include "spdk/blob.h"
+#include "spdk/lvol.h"
+#include "spdk/uuid.h"
+#include "spdk/bdev_module.h"
+
+/* Default size of blobstore cluster */
+#define SPDK_LVS_OPTS_CLUSTER_SZ (4 * 1024 * 1024)
+
+/* UUID + '_' + blobid (20 characters for uint64_t).
+ * Null terminator is already included in SPDK_UUID_STRING_LEN. */
+#define SPDK_LVOL_UNIQUE_ID_MAX (SPDK_UUID_STRING_LEN + 1 + 20)
+
+struct spdk_lvs_req {
+ spdk_lvs_op_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol_store *lvol_store;
+ int lvserrno;
+};
+
+struct spdk_lvol_req {
+ spdk_lvol_op_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol *lvol;
+ size_t sz;
+ struct spdk_io_channel *channel;
+ char name[SPDK_LVOL_NAME_MAX];
+};
+
+struct spdk_lvs_with_handle_req {
+ spdk_lvs_op_with_handle_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol_store *lvol_store;
+ struct spdk_bs_dev *bs_dev;
+ struct spdk_bdev *base_bdev;
+ int lvserrno;
+};
+
+struct spdk_lvs_destroy_req {
+ spdk_lvs_op_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol_store *lvs;
+};
+
+struct spdk_lvol_with_handle_req {
+ spdk_lvol_op_with_handle_complete cb_fn;
+ void *cb_arg;
+ struct spdk_lvol *lvol;
+};
+
+struct spdk_lvol_store {
+ struct spdk_bs_dev *bs_dev;
+ struct spdk_blob_store *blobstore;
+ struct spdk_blob *super_blob;
+ spdk_blob_id super_blob_id;
+ struct spdk_uuid uuid;
+ int lvol_count;
+ int lvols_opened;
+ bool destruct;
+ TAILQ_HEAD(, spdk_lvol) lvols;
+ TAILQ_HEAD(, spdk_lvol) pending_lvols;
+ bool on_list;
+ TAILQ_ENTRY(spdk_lvol_store) link;
+ char name[SPDK_LVS_NAME_MAX];
+ char new_name[SPDK_LVS_NAME_MAX];
+};
+
+struct spdk_lvol {
+ struct spdk_lvol_store *lvol_store;
+ struct spdk_blob *blob;
+ spdk_blob_id blob_id;
+ char unique_id[SPDK_LVOL_UNIQUE_ID_MAX];
+ char name[SPDK_LVOL_NAME_MAX];
+ struct spdk_uuid uuid;
+ char uuid_str[SPDK_UUID_STRING_LEN];
+ bool thin_provision;
+ struct spdk_bdev *bdev;
+ int ref_count;
+ bool action_in_progress;
+ enum blob_clear_method clear_method;
+ TAILQ_ENTRY(spdk_lvol) link;
+};
+
+struct lvol_store_bdev *vbdev_lvol_store_first(void);
+struct lvol_store_bdev *vbdev_lvol_store_next(struct lvol_store_bdev *prev);
+
+void spdk_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn,
+ void *cb_arg);
+
+void spdk_lvol_set_read_only(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn,
+ void *cb_arg);
+
+#endif /* SPDK_INTERNAL_LVOLSTORE_H */
diff --git a/src/spdk/include/spdk_internal/mock.h b/src/spdk/include/spdk_internal/mock.h
new file mode 100644
index 000000000..8de44ae55
--- /dev/null
+++ b/src/spdk/include/spdk_internal/mock.h
@@ -0,0 +1,135 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_MOCK_H
+#define SPDK_INTERNAL_MOCK_H
+
+#include "spdk/stdinc.h"
+
+#define MOCK_STRUCT_INIT(...) \
+ { __VA_ARGS__ }
+
+#define DEFINE_RETURN_MOCK(fn, ret) \
+ bool ut_ ## fn ## _mocked = false; \
+ ret ut_ ## fn
+
+/*
+ * For controlling mocked function behavior, setting
+ * and getting values from the stub, the _P macros are
+ * for mocking functions that return pointer values.
+ */
+#define MOCK_SET(fn, val) \
+ ut_ ## fn ## _mocked = true; \
+ ut_ ## fn = val
+
+#define MOCK_GET(fn) \
+ ut_ ## fn
+
+#define MOCK_CLEAR(fn) \
+ ut_ ## fn ## _mocked = false
+
+#define MOCK_CLEAR_P(fn) \
+ ut_ ## fn ## _mocked = false; \
+ ut_ ## fn = NULL
+
+/* for proving to *certain* static analysis tools that we didn't reset the mock function. */
+#define MOCK_CLEARED_ASSERT(fn) \
+ SPDK_CU_ASSERT_FATAL(ut_ ## fn ## _mocked == false)
+
+/* for declaring function protoypes for wrappers */
+#define DECLARE_WRAPPER(fn, ret, args) \
+ extern bool ut_ ## fn ## _mocked; \
+ extern ret ut_ ## fn; \
+ ret __wrap_ ## fn args; ret __real_ ## fn args
+
+/* for defining the implmentation of wrappers for syscalls */
+#define DEFINE_WRAPPER(fn, ret, dargs, pargs) \
+ DEFINE_RETURN_MOCK(fn, ret); \
+ __attribute__((used)) ret __wrap_ ## fn dargs \
+ { \
+ if (!ut_ ## fn ## _mocked) { \
+ return __real_ ## fn pargs; \
+ } else { \
+ return MOCK_GET(fn); \
+ } \
+ }
+
+/* DEFINE_STUB is for defining the implmentation of stubs for SPDK funcs. */
+#define DEFINE_STUB(fn, ret, dargs, val) \
+ bool ut_ ## fn ## _mocked = true; \
+ ret ut_ ## fn = val; \
+ ret fn dargs; \
+ ret fn dargs \
+ { \
+ return MOCK_GET(fn); \
+ }
+
+/* DEFINE_STUB_V macro is for stubs that don't have a return value */
+#define DEFINE_STUB_V(fn, dargs) \
+ void fn dargs; \
+ void fn dargs \
+ { \
+ }
+
+#define HANDLE_RETURN_MOCK(fn) \
+ if (ut_ ## fn ## _mocked) { \
+ return ut_ ## fn; \
+ }
+
+
+/* declare wrapper protos (alphabetically please) here */
+DECLARE_WRAPPER(calloc, void *, (size_t nmemb, size_t size));
+
+DECLARE_WRAPPER(pthread_mutex_init, int,
+ (pthread_mutex_t *mtx, const pthread_mutexattr_t *attr));
+
+DECLARE_WRAPPER(pthread_mutexattr_init, int,
+ (pthread_mutexattr_t *attr));
+
+DECLARE_WRAPPER(recvmsg, ssize_t, (int sockfd, struct msghdr *msg, int flags));
+
+DECLARE_WRAPPER(sendmsg, ssize_t, (int sockfd, const struct msghdr *msg, int flags));
+
+DECLARE_WRAPPER(writev, ssize_t, (int fd, const struct iovec *iov, int iovcnt));
+
+/* unlink is done a bit differently. */
+extern char *g_unlink_path;
+extern void (*g_unlink_callback)(void);
+/* If g_unlink_path is NULL, __wrap_unlink will return ENOENT.
+ * If the __wrap_unlink() parameter does not match g_unlink_path, it will return ENOENT.
+ * If g_unlink_path does match, and g_unlink_callback has been set, g_unlink_callback will
+ * be called before returning 0.
+ */
+int __wrap_unlink(const char *path);
+
+#endif /* SPDK_INTERNAL_MOCK_H */
diff --git a/src/spdk/include/spdk_internal/nvme_tcp.h b/src/spdk/include/spdk_internal/nvme_tcp.h
new file mode 100644
index 000000000..7065bc060
--- /dev/null
+++ b/src/spdk/include/spdk_internal/nvme_tcp.h
@@ -0,0 +1,633 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_NVME_TCP_H
+#define SPDK_INTERNAL_NVME_TCP_H
+
+#include "spdk/likely.h"
+#include "spdk/sock.h"
+#include "spdk/dif.h"
+
+#define SPDK_CRC32C_XOR 0xffffffffUL
+#define SPDK_NVME_TCP_DIGEST_LEN 4
+#define SPDK_NVME_TCP_DIGEST_ALIGNMENT 4
+#define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT 30
+#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 8
+
+/*
+ * Maximum number of SGL elements.
+ */
+#define NVME_TCP_MAX_SGL_DESCRIPTORS (16)
+
+#define MAKE_DIGEST_WORD(BUF, CRC32C) \
+ ( ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
+ ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
+ ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
+ ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
+
+#define MATCH_DIGEST_WORD(BUF, CRC32C) \
+ ( ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0) \
+ | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8) \
+ | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16) \
+ | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24)) \
+ == (CRC32C))
+
+#define DGET32(B) \
+ ((( (uint32_t) *((uint8_t *)(B)+0)) << 0) \
+ | (((uint32_t) *((uint8_t *)(B)+1)) << 8) \
+ | (((uint32_t) *((uint8_t *)(B)+2)) << 16) \
+ | (((uint32_t) *((uint8_t *)(B)+3)) << 24))
+
+#define DSET32(B,D) \
+ (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)), \
+ ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)), \
+ ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)), \
+ ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24)))
+
+typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg);
+
+struct _nvme_tcp_sgl {
+ struct iovec *iov;
+ int iovcnt;
+ uint32_t iov_offset;
+ uint32_t total_size;
+};
+
+struct nvme_tcp_pdu {
+ union {
+ /* to hold error pdu data */
+ uint8_t raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE];
+ struct spdk_nvme_tcp_common_pdu_hdr common;
+ struct spdk_nvme_tcp_ic_req ic_req;
+ struct spdk_nvme_tcp_term_req_hdr term_req;
+ struct spdk_nvme_tcp_cmd capsule_cmd;
+ struct spdk_nvme_tcp_h2c_data_hdr h2c_data;
+ struct spdk_nvme_tcp_ic_resp ic_resp;
+ struct spdk_nvme_tcp_rsp capsule_resp;
+ struct spdk_nvme_tcp_c2h_data_hdr c2h_data;
+ struct spdk_nvme_tcp_r2t_hdr r2t;
+
+ } hdr;
+
+ bool has_hdgst;
+ bool ddgst_enable;
+ uint8_t data_digest[SPDK_NVME_TCP_DIGEST_LEN];
+
+ uint8_t ch_valid_bytes;
+ uint8_t psh_valid_bytes;
+ uint8_t psh_len;
+
+ nvme_tcp_qpair_xfer_complete_cb cb_fn;
+ void *cb_arg;
+
+ /* The sock request ends with a 0 length iovec. Place the actual iovec immediately
+ * after it. There is a static assert below to check if the compiler inserted
+ * any unwanted padding */
+ struct spdk_sock_request sock_req;
+ struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2];
+
+ struct iovec data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
+ uint32_t data_iovcnt;
+ uint32_t data_len;
+
+ uint32_t readv_offset;
+ TAILQ_ENTRY(nvme_tcp_pdu) tailq;
+ uint32_t remaining;
+ uint32_t padding_len;
+ struct _nvme_tcp_sgl sgl;
+
+ struct spdk_dif_ctx *dif_ctx;
+
+ void *req; /* data tied to a tcp request */
+ void *qpair;
+};
+SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu,
+ sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov),
+ "Compiler inserted padding between iov and sock_req");
+
+enum nvme_tcp_pdu_recv_state {
+ /* Ready to wait for PDU */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY,
+
+ /* Active tqpair waiting for any PDU common header */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH,
+
+ /* Active tqpair waiting for any PDU specific header */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH,
+
+ /* Active tqpair waiting for a tcp request, only use in target side */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_REQ,
+
+ /* Active tqpair waiting for payload */
+ NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD,
+
+ /* Active tqpair does not wait for payload */
+ NVME_TCP_PDU_RECV_STATE_ERROR,
+};
+
+enum nvme_tcp_error_codes {
+ NVME_TCP_PDU_IN_PROGRESS = 0,
+ NVME_TCP_CONNECTION_FATAL = -1,
+ NVME_TCP_PDU_FATAL = -2,
+};
+
+enum nvme_tcp_qpair_state {
+ NVME_TCP_QPAIR_STATE_INVALID = 0,
+ NVME_TCP_QPAIR_STATE_INITIALIZING = 1,
+ NVME_TCP_QPAIR_STATE_RUNNING = 2,
+ NVME_TCP_QPAIR_STATE_EXITING = 3,
+ NVME_TCP_QPAIR_STATE_EXITED = 4,
+};
+
+static const bool g_nvme_tcp_hdgst[] = {
+ [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_R2T] = true
+};
+
+static const bool g_nvme_tcp_ddgst[] = {
+ [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = false,
+ [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true,
+ [SPDK_NVME_TCP_PDU_TYPE_R2T] = false
+};
+
+static uint32_t
+nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu)
+{
+ uint32_t crc32c;
+ uint32_t hlen = pdu->hdr.common.hlen;
+
+ crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0);
+ crc32c = crc32c ^ SPDK_CRC32C_XOR;
+ return crc32c;
+}
+
+static uint32_t
+_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c)
+{
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ assert(iov[i].iov_base != NULL);
+ assert(iov[i].iov_len != 0);
+ crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c);
+ }
+
+ return crc32c;
+}
+
+static uint32_t
+nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu)
+{
+ uint32_t crc32c = SPDK_CRC32C_XOR;
+ uint32_t mod;
+
+ assert(pdu->data_len != 0);
+
+ if (spdk_likely(!pdu->dif_ctx)) {
+ crc32c = _update_crc32c_iov(pdu->data_iov, pdu->data_iovcnt, crc32c);
+ } else {
+ spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt,
+ 0, pdu->data_len, &crc32c, pdu->dif_ctx);
+ }
+
+ mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT;
+ if (mod != 0) {
+ uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod;
+ uint8_t pad[3] = {0, 0, 0};
+
+ assert(pad_length > 0);
+ assert(pad_length <= sizeof(pad));
+ crc32c = spdk_crc32c_update(pad, pad_length, crc32c);
+ }
+ crc32c = crc32c ^ SPDK_CRC32C_XOR;
+ return crc32c;
+}
+
+static inline void
+_nvme_tcp_sgl_init(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt,
+ uint32_t iov_offset)
+{
+ s->iov = iov;
+ s->iovcnt = iovcnt;
+ s->iov_offset = iov_offset;
+ s->total_size = 0;
+}
+
+static inline void
+_nvme_tcp_sgl_advance(struct _nvme_tcp_sgl *s, uint32_t step)
+{
+ s->iov_offset += step;
+ while (s->iovcnt > 0) {
+ if (s->iov_offset < s->iov->iov_len) {
+ break;
+ }
+
+ s->iov_offset -= s->iov->iov_len;
+ s->iov++;
+ s->iovcnt--;
+ }
+}
+
+static inline void
+_nvme_tcp_sgl_get_buf(struct _nvme_tcp_sgl *s, void **_buf, uint32_t *_buf_len)
+{
+ if (_buf != NULL) {
+ *_buf = s->iov->iov_base + s->iov_offset;
+ }
+ if (_buf_len != NULL) {
+ *_buf_len = s->iov->iov_len - s->iov_offset;
+ }
+}
+
+static inline bool
+_nvme_tcp_sgl_append(struct _nvme_tcp_sgl *s, uint8_t *data, uint32_t data_len)
+{
+ if (s->iov_offset >= data_len) {
+ s->iov_offset -= data_len;
+ } else {
+ assert(s->iovcnt > 0);
+ s->iov->iov_base = data + s->iov_offset;
+ s->iov->iov_len = data_len - s->iov_offset;
+ s->total_size += data_len - s->iov_offset;
+ s->iov_offset = 0;
+ s->iov++;
+ s->iovcnt--;
+ if (s->iovcnt == 0) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline bool
+_nvme_tcp_sgl_append_multi(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt)
+{
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ if (!_nvme_tcp_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline uint32_t
+_get_iov_array_size(struct iovec *iov, int iovcnt)
+{
+ int i;
+ uint32_t size = 0;
+
+ for (i = 0; i < iovcnt; i++) {
+ size += iov[i].iov_len;
+ }
+
+ return size;
+}
+
+static inline bool
+_nvme_tcp_sgl_append_multi_with_md(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt,
+ uint32_t data_len, const struct spdk_dif_ctx *dif_ctx)
+{
+ int rc;
+ uint32_t mapped_len = 0;
+
+ if (s->iov_offset >= data_len) {
+ s->iov_offset -= _get_iov_array_size(iov, iovcnt);
+ } else {
+ rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt,
+ s->iov_offset, data_len - s->iov_offset,
+ &mapped_len, dif_ctx);
+ if (rc < 0) {
+ SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n");
+ return false;
+ }
+
+ s->total_size += mapped_len;
+ s->iov_offset = 0;
+ assert(s->iovcnt >= rc);
+ s->iovcnt -= rc;
+ s->iov += rc;
+
+ if (s->iovcnt == 0) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int
+nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
+ bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length)
+{
+ uint32_t hlen, plen;
+ struct _nvme_tcp_sgl *sgl;
+
+ if (iovcnt == 0) {
+ return 0;
+ }
+
+ sgl = &pdu->sgl;
+ _nvme_tcp_sgl_init(sgl, iov, iovcnt, 0);
+ hlen = pdu->hdr.common.hlen;
+
+ /* Header Digest */
+ if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
+ hlen += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ plen = hlen;
+ if (!pdu->data_len) {
+ /* PDU header + possible header digest */
+ _nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen);
+ goto end;
+ }
+
+ /* Padding */
+ if (pdu->padding_len > 0) {
+ hlen += pdu->padding_len;
+ plen = hlen;
+ }
+
+ if (!_nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen)) {
+ goto end;
+ }
+
+ /* Data Segment */
+ plen += pdu->data_len;
+ if (spdk_likely(!pdu->dif_ctx)) {
+ if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) {
+ goto end;
+ }
+ } else {
+ if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt,
+ pdu->data_len, pdu->dif_ctx)) {
+ goto end;
+ }
+ }
+
+ /* Data Digest */
+ if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) {
+ plen += SPDK_NVME_TCP_DIGEST_LEN;
+ _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
+ }
+
+ assert(plen == pdu->hdr.common.plen);
+
+end:
+ if (_mapped_length != NULL) {
+ *_mapped_length = sgl->total_size;
+ }
+
+ return iovcnt - sgl->iovcnt;
+}
+
+static int
+nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
+ bool ddgst_enable, uint32_t *_mapped_length)
+{
+ struct _nvme_tcp_sgl *sgl;
+
+ if (iovcnt == 0) {
+ return 0;
+ }
+
+ sgl = &pdu->sgl;
+ _nvme_tcp_sgl_init(sgl, iov, iovcnt, pdu->readv_offset);
+
+ if (spdk_likely(!pdu->dif_ctx)) {
+ if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) {
+ goto end;
+ }
+ } else {
+ if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt,
+ pdu->data_len, pdu->dif_ctx)) {
+ goto end;
+ }
+ }
+
+ /* Data Digest */
+ if (ddgst_enable) {
+ _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
+ }
+
+end:
+ if (_mapped_length != NULL) {
+ *_mapped_length = sgl->total_size;
+ }
+ return iovcnt - sgl->iovcnt;
+}
+
+static int
+nvme_tcp_read_data(struct spdk_sock *sock, int bytes,
+ void *buf)
+{
+ int ret;
+
+ ret = spdk_sock_recv(sock, buf, bytes);
+
+ if (ret > 0) {
+ return ret;
+ }
+
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ return 0;
+ }
+
+ /* For connect reset issue, do not output error log */
+ if (errno != ECONNRESET) {
+ SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
+ errno, spdk_strerror(errno));
+ }
+ }
+
+ /* connection closed */
+ return NVME_TCP_CONNECTION_FATAL;
+}
+
+static int
+nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt)
+{
+ int ret;
+
+ assert(sock != NULL);
+ if (iov == NULL || iovcnt == 0) {
+ return 0;
+ }
+
+ if (iovcnt == 1) {
+ return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base);
+ }
+
+ ret = spdk_sock_readv(sock, iov, iovcnt);
+
+ if (ret > 0) {
+ return ret;
+ }
+
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ return 0;
+ }
+
+ /* For connect reset issue, do not output error log */
+ if (errno != ECONNRESET) {
+ SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
+ errno, spdk_strerror(errno));
+ }
+ }
+
+ /* connection closed */
+ return NVME_TCP_CONNECTION_FATAL;
+}
+
+
+static int
+nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu)
+{
+ struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
+ int iovcnt;
+
+ iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
+ pdu->ddgst_enable, NULL);
+ assert(iovcnt >= 0);
+
+ return nvme_tcp_readv_data(sock, iov, iovcnt);
+}
+
+static void
+_nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
+{
+ pdu->data_iov[0].iov_base = data;
+ pdu->data_iov[0].iov_len = data_len;
+ pdu->data_iovcnt = 1;
+}
+
+static void
+nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
+{
+ _nvme_tcp_pdu_set_data(pdu, data, data_len);
+ pdu->data_len = data_len;
+}
+
+static void
+nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu,
+ struct iovec *iov, int iovcnt,
+ uint32_t data_offset, uint32_t data_len)
+{
+ uint32_t buf_offset, buf_len, remain_len, len;
+ uint8_t *buf;
+ struct _nvme_tcp_sgl *pdu_sgl, buf_sgl;
+
+ pdu->data_len = data_len;
+
+ if (spdk_likely(!pdu->dif_ctx)) {
+ buf_offset = data_offset;
+ buf_len = data_len;
+ } else {
+ spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset);
+ spdk_dif_get_range_with_md(data_offset, data_len,
+ &buf_offset, &buf_len, pdu->dif_ctx);
+ }
+
+ if (iovcnt == 1) {
+ _nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len);
+ } else {
+ pdu_sgl = &pdu->sgl;
+
+ _nvme_tcp_sgl_init(pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0);
+ _nvme_tcp_sgl_init(&buf_sgl, iov, iovcnt, 0);
+
+ _nvme_tcp_sgl_advance(&buf_sgl, buf_offset);
+ remain_len = buf_len;
+
+ while (remain_len > 0) {
+ _nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len);
+ len = spdk_min(len, remain_len);
+
+ _nvme_tcp_sgl_advance(&buf_sgl, len);
+ remain_len -= len;
+
+ if (!_nvme_tcp_sgl_append(pdu_sgl, buf, len)) {
+ break;
+ }
+ }
+
+ assert(remain_len == 0);
+ assert(pdu_sgl->total_size == buf_len);
+
+ pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl->iovcnt;
+ }
+}
+
+static void
+nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable)
+{
+ uint8_t psh_len, pdo, padding_len;
+
+ psh_len = pdu->hdr.common.hlen;
+
+ if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
+ pdu->has_hdgst = true;
+ psh_len += SPDK_NVME_TCP_DIGEST_LEN;
+ if (pdu->hdr.common.plen > psh_len) {
+ pdo = pdu->hdr.common.pdo;
+ padding_len = pdo - psh_len;
+ if (padding_len > 0) {
+ psh_len = pdo;
+ }
+ }
+ }
+
+ psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
+ pdu->psh_len = psh_len;
+}
+
+#endif /* SPDK_INTERNAL_NVME_TCP_H */
diff --git a/src/spdk/include/spdk_internal/rdma.h b/src/spdk/include/spdk_internal/rdma.h
new file mode 100644
index 000000000..4a6d5104b
--- /dev/null
+++ b/src/spdk/include/spdk_internal/rdma.h
@@ -0,0 +1,117 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_RDMA_H
+#define SPDK_RDMA_H
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+
+struct spdk_rdma_qp_init_attr {
+ void *qp_context;
+ struct ibv_cq *send_cq;
+ struct ibv_cq *recv_cq;
+ struct ibv_srq *srq;
+ struct ibv_qp_cap cap;
+ struct ibv_pd *pd;
+};
+
+struct spdk_rdma_send_wr_list {
+ struct ibv_send_wr *first;
+ struct ibv_send_wr *last;
+};
+
+struct spdk_rdma_qp {
+ struct ibv_qp *qp;
+ struct rdma_cm_id *cm_id;
+ struct spdk_rdma_send_wr_list send_wrs;
+};
+
+/**
+ * Create RDMA provider specific qpair
+ * \param cm_id Pointer to RDMACM cm_id
+ * \param qp_attr Pointer to qpair init attributes
+ * \return Pointer to a newly created qpair on success or NULL on failure
+ */
+struct spdk_rdma_qp *spdk_rdma_qp_create(struct rdma_cm_id *cm_id,
+ struct spdk_rdma_qp_init_attr *qp_attr);
+
+/**
+ * Accept a connection request. Called by the passive side (NVMEoF target)
+ * \param spdk_rdma_qp Pointer to a qpair
+ * \param conn_param Optional information needed to establish the connection
+ * \return 0 on success, errno on failure
+ */
+int spdk_rdma_qp_accept(struct spdk_rdma_qp *spdk_rdma_qp, struct rdma_conn_param *conn_param);
+
+/**
+ * Complete the connection process, must be called by the active
+ * side (NVMEoF initiator) upon receipt RDMA_CM_EVENT_CONNECT_RESPONSE
+ * \param spdk_rdma_qp Pointer to a qpair
+ * \return 0 on success, errno on failure
+ */
+int spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Destroy RDMA provider specific qpair
+ * \param spdk_rdma_qp Pointer to qpair to be destroyed
+ */
+void spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Disconnect a connection and transition assoiciated qpair to error state.
+ * Generates RDMA_CM_EVENT_DISCONNECTED on both connection sides
+ * \param spdk_rdma_qp Pointer to qpair to be destroyed
+ */
+int spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp);
+
+/**
+ * Append the given send wr structure to the qpair's outstanding sends list.
+ * This function accepts either a single Work Request or the first WR in a linked list.
+ *
+ * \param spdk_rdma_qp Pointer to SPDK RDMA qpair
+ * \param first Pointer to the first Work Request
+ * \return true if there were no outstanding WRs before, false otherwise
+ */
+bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first);
+
+/**
+ * Submit all queued Work Request
+ * \param spdk_rdma_qp Pointer to SPDK RDMA qpair
+ * \param bad_wr Stores a pointer to the first failed WR if this function return nonzero value
+ * \return 0 on succes, errno on failure
+ */
+int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr);
+
+#endif /* SPDK_RDMA_H */
diff --git a/src/spdk/include/spdk_internal/sock.h b/src/spdk/include/spdk_internal/sock.h
new file mode 100644
index 000000000..d88d6bd03
--- /dev/null
+++ b/src/spdk/include/spdk_internal/sock.h
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * TCP network implementation abstraction layer
+ */
+
+#ifndef SPDK_INTERNAL_SOCK_H
+#define SPDK_INTERNAL_SOCK_H
+
+#include "spdk/stdinc.h"
+#include "spdk/sock.h"
+#include "spdk/queue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_EVENTS_PER_POLL 32
+#define DEFAULT_SOCK_PRIORITY 0
+#define MIN_SOCK_PIPE_SIZE 1024
+
+struct spdk_sock {
+ struct spdk_net_impl *net_impl;
+ struct spdk_sock_opts opts;
+ int cb_cnt;
+ spdk_sock_cb cb_fn;
+ void *cb_arg;
+ struct spdk_sock_group_impl *group_impl;
+ TAILQ_ENTRY(spdk_sock) link;
+
+ int max_iovcnt;
+ TAILQ_HEAD(, spdk_sock_request) queued_reqs;
+ TAILQ_HEAD(, spdk_sock_request) pending_reqs;
+ int queued_iovcnt;
+
+ struct {
+ uint8_t closed : 1;
+ uint8_t reserved : 7;
+ } flags;
+};
+
+struct spdk_sock_group {
+ STAILQ_HEAD(, spdk_sock_group_impl) group_impls;
+ void *ctx;
+};
+
+struct spdk_sock_group_impl {
+ struct spdk_net_impl *net_impl;
+ TAILQ_HEAD(, spdk_sock) socks;
+ STAILQ_ENTRY(spdk_sock_group_impl) link;
+ /* List of removed sockets. refreshed each time we poll the sock group. */
+ int num_removed_socks;
+ /* Unfortunately, we can't just keep a tailq of the sockets in case they are freed
+ * or added to another poll group later.
+ */
+ uintptr_t removed_socks[MAX_EVENTS_PER_POLL];
+};
+
+struct spdk_net_impl {
+ const char *name;
+ int priority;
+
+ int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr,
+ int clen, uint16_t *cport);
+ struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts);
+ struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts);
+ struct spdk_sock *(*accept)(struct spdk_sock *sock);
+ int (*close)(struct spdk_sock *sock);
+ ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len);
+ ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+ ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
+
+ void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
+ int (*flush)(struct spdk_sock *sock);
+
+ int (*set_recvlowat)(struct spdk_sock *sock, int nbytes);
+ int (*set_recvbuf)(struct spdk_sock *sock, int sz);
+ int (*set_sendbuf)(struct spdk_sock *sock, int sz);
+
+ bool (*is_ipv6)(struct spdk_sock *sock);
+ bool (*is_ipv4)(struct spdk_sock *sock);
+ bool (*is_connected)(struct spdk_sock *sock);
+
+ int (*get_placement_id)(struct spdk_sock *sock, int *placement_id);
+ struct spdk_sock_group_impl *(*group_impl_create)(void);
+ int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
+ int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
+ int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events,
+ struct spdk_sock **socks);
+ int (*group_impl_close)(struct spdk_sock_group_impl *group);
+
+ int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len);
+ int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len);
+
+ STAILQ_ENTRY(spdk_net_impl) link;
+};
+
+void spdk_net_impl_register(struct spdk_net_impl *impl, int priority);
+
+#define SPDK_NET_IMPL_REGISTER(name, impl, priority) \
+static void __attribute__((constructor)) net_impl_register_##name(void) \
+{ \
+ spdk_net_impl_register(impl, priority); \
+}
+
+static inline void
+spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req)
+{
+ TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link);
+ sock->queued_iovcnt += req->iovcnt;
+}
+
+static inline void
+spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req)
+{
+ TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
+ assert(sock->queued_iovcnt >= req->iovcnt);
+ sock->queued_iovcnt -= req->iovcnt;
+ TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link);
+}
+
+static inline int
+spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
+{
+ bool closed;
+ int rc = 0;
+
+ TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
+
+ req->internal.offset = 0;
+
+ closed = sock->flags.closed;
+ sock->cb_cnt++;
+ req->cb_fn(req->cb_arg, err);
+ assert(sock->cb_cnt > 0);
+ sock->cb_cnt--;
+
+ if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
+ /* The user closed the socket in response to a callback above. */
+ rc = -1;
+ spdk_sock_close(&sock);
+ }
+
+ return rc;
+}
+
+static inline int
+spdk_sock_abort_requests(struct spdk_sock *sock)
+{
+ struct spdk_sock_request *req;
+ bool closed;
+ int rc = 0;
+
+ closed = sock->flags.closed;
+ sock->cb_cnt++;
+
+ req = TAILQ_FIRST(&sock->pending_reqs);
+ while (req) {
+ TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
+
+ req->cb_fn(req->cb_arg, -ECANCELED);
+
+ req = TAILQ_FIRST(&sock->pending_reqs);
+ }
+
+ req = TAILQ_FIRST(&sock->queued_reqs);
+ while (req) {
+ TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
+
+ assert(sock->queued_iovcnt >= req->iovcnt);
+ sock->queued_iovcnt -= req->iovcnt;
+
+ req->cb_fn(req->cb_arg, -ECANCELED);
+
+ req = TAILQ_FIRST(&sock->queued_reqs);
+ }
+ assert(sock->cb_cnt > 0);
+ sock->cb_cnt--;
+
+ assert(TAILQ_EMPTY(&sock->queued_reqs));
+ assert(TAILQ_EMPTY(&sock->pending_reqs));
+
+ if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
+ /* The user closed the socket in response to a callback above. */
+ rc = -1;
+ spdk_sock_close(&sock);
+ }
+
+ return rc;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPDK_INTERNAL_SOCK_H */
diff --git a/src/spdk/include/spdk_internal/thread.h b/src/spdk/include/spdk_internal/thread.h
new file mode 100644
index 000000000..10bc4824c
--- /dev/null
+++ b/src/spdk/include/spdk_internal/thread.h
@@ -0,0 +1,136 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_THREAD_INTERNAL_H_
+#define SPDK_THREAD_INTERNAL_H_
+
+#include "spdk/stdinc.h"
+#include "spdk/thread.h"
+
+#define SPDK_MAX_POLLER_NAME_LEN 256
+#define SPDK_MAX_THREAD_NAME_LEN 256
+
+enum spdk_poller_state {
+ /* The poller is registered with a thread but not currently executing its fn. */
+ SPDK_POLLER_STATE_WAITING,
+
+ /* The poller is currently running its fn. */
+ SPDK_POLLER_STATE_RUNNING,
+
+ /* The poller was unregistered during the execution of its fn. */
+ SPDK_POLLER_STATE_UNREGISTERED,
+
+ /* The poller is in the process of being paused. It will be paused
+ * during the next time it's supposed to be executed.
+ */
+ SPDK_POLLER_STATE_PAUSING,
+
+ /* The poller is registered but currently paused. It's on the
+ * paused_pollers list.
+ */
+ SPDK_POLLER_STATE_PAUSED,
+};
+
+struct spdk_poller {
+ TAILQ_ENTRY(spdk_poller) tailq;
+
+ /* Current state of the poller; should only be accessed from the poller's thread. */
+ enum spdk_poller_state state;
+
+ uint64_t period_ticks;
+ uint64_t next_run_tick;
+ uint64_t run_count;
+ uint64_t busy_count;
+ spdk_poller_fn fn;
+ void *arg;
+ struct spdk_thread *thread;
+
+ char name[SPDK_MAX_POLLER_NAME_LEN + 1];
+};
+
+enum spdk_thread_state {
+ /* The thread is pocessing poller and message by spdk_thread_poll(). */
+ SPDK_THREAD_STATE_RUNNING,
+
+ /* The thread is in the process of termination. It reaps unregistering
+ * poller are releasing I/O channel.
+ */
+ SPDK_THREAD_STATE_EXITING,
+
+ /* The thread is exited. It is ready to call spdk_thread_destroy(). */
+ SPDK_THREAD_STATE_EXITED,
+};
+
+struct spdk_thread {
+ uint64_t tsc_last;
+ struct spdk_thread_stats stats;
+ /*
+ * Contains pollers actively running on this thread. Pollers
+ * are run round-robin. The thread takes one poller from the head
+ * of the ring, executes it, then puts it back at the tail of
+ * the ring.
+ */
+ TAILQ_HEAD(active_pollers_head, spdk_poller) active_pollers;
+ /**
+ * Contains pollers running on this thread with a periodic timer.
+ */
+ TAILQ_HEAD(timed_pollers_head, spdk_poller) timed_pollers;
+ /*
+ * Contains paused pollers. Pollers on this queue are waiting until
+ * they are resumed (in which case they're put onto the active/timer
+ * queues) or unregistered.
+ */
+ TAILQ_HEAD(paused_pollers_head, spdk_poller) paused_pollers;
+ struct spdk_ring *messages;
+ SLIST_HEAD(, spdk_msg) msg_cache;
+ size_t msg_cache_count;
+ spdk_msg_fn critical_msg;
+ uint64_t id;
+ enum spdk_thread_state state;
+
+ TAILQ_HEAD(, spdk_io_channel) io_channels;
+ TAILQ_ENTRY(spdk_thread) tailq;
+
+ char name[SPDK_MAX_THREAD_NAME_LEN + 1];
+ struct spdk_cpuset cpumask;
+ uint64_t exit_timeout_tsc;
+
+ /* User context allocated at the end */
+ uint8_t ctx[0];
+};
+
+const char *spdk_poller_state_str(enum spdk_poller_state state);
+
+const char *spdk_io_device_get_name(struct io_device *dev);
+
+#endif /* SPDK_THREAD_INTERNAL_H_ */
diff --git a/src/spdk/include/spdk_internal/uring.h b/src/spdk/include/spdk_internal/uring.h
new file mode 100644
index 000000000..ff22f11d4
--- /dev/null
+++ b/src/spdk/include/spdk_internal/uring.h
@@ -0,0 +1,51 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_INTERNAL_URING_H
+#define SPDK_INTERNAL_URING_H
+
+#include <liburing.h>
+
+#ifndef __NR_sys_io_uring_enter
+#define __NR_sys_io_uring_enter 426
+#endif
+
+static int
+spdk_io_uring_enter(int ring_fd, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags)
+{
+ return syscall(__NR_sys_io_uring_enter, ring_fd, to_submit,
+ min_complete, flags, NULL, 0);
+}
+
+#endif /* SPDK_INTERNAL_URING_H */
diff --git a/src/spdk/include/spdk_internal/utf.h b/src/spdk/include/spdk_internal/utf.h
new file mode 100644
index 000000000..b2b1c3c45
--- /dev/null
+++ b/src/spdk/include/spdk_internal/utf.h
@@ -0,0 +1,325 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_UTF_H_
+#define SPDK_UTF_H_
+
+#include "spdk/stdinc.h"
+
+#include "spdk/endian.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+
+static inline bool
+utf8_tail(uint8_t c)
+{
+ /* c >= 0x80 && c <= 0xBF, or binary 01xxxxxx */
+ return (c & 0xC0) == 0x80;
+}
+
+/*
+ * Check for a valid UTF-8 encoding of a single codepoint.
+ *
+ * \return Length of valid UTF-8 byte sequence, or negative if invalid.
+ */
+static inline int
+utf8_valid(const uint8_t *start, const uint8_t *end)
+{
+ const uint8_t *p = start;
+ uint8_t b0, b1, b2, b3;
+
+ if (p == end) {
+ return 0;
+ }
+
+ b0 = *p;
+
+ if (b0 <= 0x7F) {
+ return 1;
+ }
+
+ if (b0 <= 0xC1) {
+ /* Invalid start byte */
+ return -1;
+ }
+
+ if (++p == end) {
+ /* Not enough bytes left */
+ return -1;
+ }
+ b1 = *p;
+
+ if (b0 <= 0xDF) {
+ /* C2..DF 80..BF */
+ if (!utf8_tail(b1)) {
+ return -1;
+ }
+ return 2;
+ }
+
+ if (++p == end) {
+ /* Not enough bytes left */
+ return -1;
+ }
+ b2 = *p;
+
+ if (b0 == 0xE0) {
+ /* E0 A0..BF 80..BF */
+ if (b1 < 0xA0 || b1 > 0xBF || !utf8_tail(b2)) {
+ return -1;
+ }
+ return 3;
+ } else if (b0 == 0xED && b1 >= 0xA0) {
+ /*
+ * UTF-16 surrogate pairs use U+D800..U+DFFF, which would be encoded as
+ * ED A0..BF 80..BF in UTF-8; however, surrogate pairs are not allowed in UTF-8.
+ */
+ return -1;
+ } else if (b0 <= 0xEF) {
+ /* E1..EF 80..BF 80..BF */
+ if (!utf8_tail(b1) || !utf8_tail(b2)) {
+ return -1;
+ }
+ return 3;
+ }
+
+ if (++p == end) {
+ /* Not enough bytes left */
+ return -1;
+ }
+ b3 = *p;
+
+ if (b0 == 0xF0) {
+ /* F0 90..BF 80..BF 80..BF */
+ if (b1 < 0x90 || b1 > 0xBF || !utf8_tail(b2) || !utf8_tail(b3)) {
+ return -1;
+ }
+ return 4;
+ } else if (b0 <= 0xF3) {
+ /* F1..F3 80..BF 80..BF 80..BF */
+ if (!utf8_tail(b1) || !utf8_tail(b2) || !utf8_tail(b3)) {
+ return -1;
+ }
+ return 4;
+ } else if (b0 == 0xF4) {
+ /* F4 80..8F 80..BF 80..BF */
+ if (b1 < 0x80 || b1 > 0x8F || !utf8_tail(b2) || !utf8_tail(b3)) {
+ return -1;
+ }
+ return 4;
+ }
+
+ return -1;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_1(const uint8_t *data)
+{
+ return data[0];
+}
+
+static inline uint32_t
+utf8_decode_unsafe_2(const uint8_t *data)
+{
+ uint32_t codepoint;
+
+ codepoint = ((data[0] & 0x1F) << 6);
+ codepoint |= (data[1] & 0x3F);
+
+ return codepoint;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_3(const uint8_t *data)
+{
+ uint32_t codepoint;
+
+ codepoint = ((data[0] & 0x0F) << 12);
+ codepoint |= (data[1] & 0x3F) << 6;
+ codepoint |= (data[2] & 0x3F);
+
+ return codepoint;
+}
+
+static inline uint32_t
+utf8_decode_unsafe_4(const uint8_t *data)
+{
+ uint32_t codepoint;
+
+ codepoint = ((data[0] & 0x07) << 18);
+ codepoint |= (data[1] & 0x3F) << 12;
+ codepoint |= (data[2] & 0x3F) << 6;
+ codepoint |= (data[3] & 0x3F);
+
+ return codepoint;
+}
+
+/*
+ * Encode a single Unicode codepoint as UTF-8.
+ *
+ * buf must have at least 4 bytes of space available (hence unsafe).
+ *
+ * \return Number of bytes appended to buf, or negative if encoding failed.
+ */
+static inline int
+utf8_encode_unsafe(uint8_t *buf, uint32_t c)
+{
+ if (c <= 0x7F) {
+ buf[0] = c;
+ return 1;
+ } else if (c <= 0x7FF) {
+ buf[0] = 0xC0 | (c >> 6);
+ buf[1] = 0x80 | (c & 0x3F);
+ return 2;
+ } else if (c >= 0xD800 && c <= 0xDFFF) {
+ /* UTF-16 surrogate pairs - invalid in UTF-8 */
+ return -1;
+ } else if (c <= 0xFFFF) {
+ buf[0] = 0xE0 | (c >> 12);
+ buf[1] = 0x80 | ((c >> 6) & 0x3F);
+ buf[2] = 0x80 | (c & 0x3F);
+ return 3;
+ } else if (c <= 0x10FFFF) {
+ buf[0] = 0xF0 | (c >> 18);
+ buf[1] = 0x80 | ((c >> 12) & 0x3F);
+ buf[2] = 0x80 | ((c >> 6) & 0x3F);
+ buf[3] = 0x80 | (c & 0x3F);
+ return 4;
+ }
+ return -1;
+}
+
+static inline int
+utf8_codepoint_len(uint32_t c)
+{
+ if (c <= 0x7F) {
+ return 1;
+ } else if (c <= 0x7FF) {
+ return 2;
+ } else if (c >= 0xD800 && c <= 0xDFFF) {
+ /* UTF-16 surrogate pairs - invalid in UTF-8 */
+ return -1;
+ } else if (c <= 0xFFFF) {
+ return 3;
+ } else if (c <= 0x10FFFF) {
+ return 4;
+ }
+ return -1;
+}
+
+static inline bool
+utf16_valid_surrogate_high(uint32_t val)
+{
+ return val >= 0xD800 && val <= 0xDBFF;
+}
+
+static inline bool
+utf16_valid_surrogate_low(uint32_t val)
+{
+ return val >= 0xDC00 && val <= 0xDFFF;
+}
+
+/*
+ * Check for a valid UTF-16LE encoding of a single codepoint.
+ *
+ * \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid.
+ */
+static inline int
+utf16le_valid(const uint16_t *start, const uint16_t *end)
+{
+ const uint16_t *p = start;
+ uint16_t high, low;
+
+ if (p == end) {
+ return 0;
+ }
+
+ high = from_le16(p);
+
+ if (high <= 0xD7FF || high >= 0xE000) {
+ /* Single code unit in BMP */
+ return 1;
+ }
+
+ if (high >= 0xDC00) {
+ /* Low surrogate in first code unit - invalid */
+ return -1;
+ }
+
+ assert(utf16_valid_surrogate_high(high));
+
+ if (++p == end) {
+ /* Not enough code units left */
+ return -1;
+ }
+ low = from_le16(p);
+
+ if (!utf16_valid_surrogate_low(low)) {
+ return -1;
+ }
+
+ /* Valid surrogate pair */
+ return 2;
+}
+
+static inline uint32_t
+utf16_decode_surrogate_pair(uint32_t high, uint32_t low)
+{
+ uint32_t codepoint;
+
+ assert(utf16_valid_surrogate_high(high));
+ assert(utf16_valid_surrogate_low(low));
+
+ codepoint = low;
+ codepoint &= 0x3FF;
+ codepoint |= ((high & 0x3FF) << 10);
+ codepoint += 0x10000;
+
+ return codepoint;
+}
+
+static inline void
+utf16_encode_surrogate_pair(uint32_t codepoint, uint16_t *high, uint16_t *low)
+{
+ assert(codepoint >= 0x10000);
+ assert(codepoint <= 0x10FFFF);
+
+ codepoint -= 0x10000;
+ *high = 0xD800 | (codepoint >> 10);
+ *low = 0xDC00 | (codepoint & 0x3FF);
+
+ assert(utf16_valid_surrogate_high(*high));
+ assert(utf16_valid_surrogate_low(*low));
+}
+
+#endif
diff --git a/src/spdk/include/spdk_internal/vhost_user.h b/src/spdk/include/spdk_internal/vhost_user.h
new file mode 100644
index 000000000..92ed3b65b
--- /dev/null
+++ b/src/spdk/include/spdk_internal/vhost_user.h
@@ -0,0 +1,140 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Structures defined in the vhost-user specification
+ */
+
+#ifndef SPDK_VHOST_USER_H
+#define SPDK_VHOST_USER_H
+
+#include "spdk/stdinc.h"
+
+#include <linux/vhost.h>
+
+#ifndef VHOST_USER_MEMORY_MAX_NREGIONS
+#define VHOST_USER_MEMORY_MAX_NREGIONS 8
+#endif
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_MQ
+#define VHOST_USER_PROTOCOL_F_MQ 0
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
+#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
+#endif
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+enum vhost_user_request {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_NET_SET_MTU = 20,
+ VHOST_USER_SET_SLAVE_REQ_FD = 21,
+ VHOST_USER_IOTLB_MSG = 22,
+ VHOST_USER_GET_CONFIG = 24,
+ VHOST_USER_SET_CONFIG = 25,
+ VHOST_USER_CRYPTO_CREATE_SESS = 26,
+ VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+ VHOST_USER_POSTCOPY_ADVISE = 28,
+ VHOST_USER_POSTCOPY_LISTEN = 29,
+ VHOST_USER_POSTCOPY_END = 30,
+ VHOST_USER_MAX
+};
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+ uint32_t offset;
+ uint32_t size;
+ uint32_t flags;
+ uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+/** Fixed-size vhost_memory struct */
+struct vhost_memory_padded {
+ uint32_t nregions;
+ uint32_t padding;
+ struct vhost_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS];
+};
+
+struct vhost_user_msg {
+ enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK 0x3
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /**< the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK 0xff
+#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ struct vhost_memory_padded memory;
+ struct vhost_user_config cfg;
+ } payload;
+} __attribute((packed));
+
+#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
+#define VHOST_USER_PAYLOAD_SIZE \
+ (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
+
+#endif /* SPDK_VHOST_USER_H */
diff --git a/src/spdk/include/spdk_internal/virtio.h b/src/spdk/include/spdk_internal/virtio.h
new file mode 100644
index 000000000..c30013efe
--- /dev/null
+++ b/src/spdk/include/spdk_internal/virtio.h
@@ -0,0 +1,486 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_VIRTIO_H
+#define SPDK_VIRTIO_H
+
+#include "spdk/stdinc.h"
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_config.h>
+
+#include "spdk_internal/log.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/json.h"
+#include "spdk/thread.h"
+#include "spdk/pci_ids.h"
+#include "spdk/env.h"
+
+/**
+ * The maximum virtqueue size is 2^15. Use that value as the end of
+ * descriptor chain terminator since it will never be a valid index
+ * in the descriptor table. This is used to verify we are correctly
+ * handling vq_free_cnt.
+ */
+#define VQ_RING_DESC_CHAIN_END 32768
+
+#define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100
+
+/* Extra status define for readability */
+#define VIRTIO_CONFIG_S_RESET 0
+
+struct virtio_dev_ops;
+
+struct virtio_dev {
+ struct virtqueue **vqs;
+
+ /** Name of this virtio dev set by backend */
+ char *name;
+
+ /** Fixed number of backend-specific non-I/O virtqueues. */
+ uint16_t fixed_queues_num;
+
+ /** Max number of virtqueues the host supports. */
+ uint16_t max_queues;
+
+ /** Common device & guest features. */
+ uint64_t negotiated_features;
+
+ int is_hw;
+
+ /** Modern/legacy virtio device flag. */
+ uint8_t modern;
+
+ /** Mutex for asynchronous virtqueue-changing operations. */
+ pthread_mutex_t mutex;
+
+ /** Backend-specific callbacks. */
+ const struct virtio_dev_ops *backend_ops;
+
+ /** Context for the backend ops */
+ void *ctx;
+};
+
+struct virtio_dev_ops {
+ int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset,
+ void *dst, int len);
+ int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset,
+ const void *src, int len);
+ uint8_t (*get_status)(struct virtio_dev *hw);
+ void (*set_status)(struct virtio_dev *hw, uint8_t status);
+
+ /**
+ * Get device features. The features might be already
+ * negotiated with driver (guest) features.
+ */
+ uint64_t (*get_features)(struct virtio_dev *vdev);
+
+ /**
+ * Negotiate and set device features.
+ * The negotiation can fail with return code -1.
+ * This function should also set vdev->negotiated_features field.
+ */
+ int (*set_features)(struct virtio_dev *vdev, uint64_t features);
+
+ /** Destruct virtio device */
+ void (*destruct_dev)(struct virtio_dev *vdev);
+
+ uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id);
+ int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+ void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+ void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq);
+
+ void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
+ void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
+};
+
+struct vq_desc_extra {
+ void *cookie;
+ uint16_t ndescs;
+};
+
+struct virtqueue {
+ struct virtio_dev *vdev; /**< owner of this virtqueue */
+ struct vring vq_ring; /**< vring keeping desc, used and avail */
+ /**
+ * Last consumed descriptor in the used table,
+ * trails vq_ring.used->idx.
+ */
+ uint16_t vq_used_cons_idx;
+ uint16_t vq_nentries; /**< vring desc numbers */
+ uint16_t vq_free_cnt; /**< num of desc available */
+ uint16_t vq_avail_idx; /**< sync until needed */
+
+ void *vq_ring_virt_mem; /**< virtual address of vring */
+ unsigned int vq_ring_size;
+
+ uint64_t vq_ring_mem; /**< physical address of vring */
+
+ /**
+ * Head of the free chain in the descriptor table. If
+ * there are no free descriptors, this will be set to
+ * VQ_RING_DESC_CHAIN_END.
+ */
+ uint16_t vq_desc_head_idx;
+
+ /**
+ * Tail of the free chain in desc table. If
+ * there are no free descriptors, this will be set to
+ * VQ_RING_DESC_CHAIN_END.
+ */
+ uint16_t vq_desc_tail_idx;
+ uint16_t vq_queue_index; /**< PCI queue index */
+ uint16_t *notify_addr;
+
+ /** Thread that's polling this queue. */
+ struct spdk_thread *owner_thread;
+
+ uint16_t req_start;
+ uint16_t req_end;
+ uint16_t reqs_finished;
+
+ struct vq_desc_extra vq_descx[0];
+};
+
+enum spdk_virtio_desc_type {
+ SPDK_VIRTIO_DESC_RO = 0, /**< Read only */
+ SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */
+ /* TODO VIRTIO_DESC_INDIRECT */
+};
+
+/** Context for creating PCI virtio_devs */
+struct virtio_pci_ctx;
+
+/**
+ * Callback for creating virtio_dev from a PCI device.
+ * \param pci_ctx PCI context to be associated with a virtio_dev
+ * \param ctx context provided by the user
+ * \return 0 on success, -1 on error.
+ */
+typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx);
+
+uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt);
+
+/**
+ * Start a new request on the current vring head position and associate it
+ * with an opaque cookie object. The previous request in given vq will be
+ * made visible to the device in hopes it can be processed early, but there's
+ * no guarantee it will be until the device is notified with \c
+ * virtqueue_req_flush. This behavior is simply an optimization and virtqueues
+ * must always be flushed. Empty requests (with no descriptors added) will be
+ * ignored. The device owning given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ * \param cookie opaque object to associate with this request. Once the request
+ * is sent, processed and a response is received, the same object will be
+ * returned to the user after calling the virtio poll API.
+ * \param iovcnt number of required iovectors for the request. This can be
+ * higher than than the actual number of iovectors to be added.
+ * \return 0 on success or negative errno otherwise. If the `iovcnt` is
+ * greater than virtqueue depth, -EINVAL is returned. If simply not enough
+ * iovectors are available, -ENOMEM is returned.
+ */
+int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt);
+
+/**
+ * Flush a virtqueue. This will notify the device if it's required.
+ * The device owning given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ */
+void virtqueue_req_flush(struct virtqueue *vq);
+
+/**
+ * Abort the very last request in a virtqueue. This will restore virtqueue
+ * state to the point before the last request was created. Note that this
+ * is only effective if a queue hasn't been flushed yet. The device owning
+ * given virtqueue must be started.
+ *
+ * \param vq virtio queue
+ */
+void virtqueue_req_abort(struct virtqueue *vq);
+
+/**
+ * Add iovec chain to the last created request. This call does not provide any
+ * error-checking. The caller has to ensure that he doesn't add more iovs than
+ * what was specified during request creation. The device owning given virtqueue
+ * must be started.
+ *
+ * \param vq virtio queue
+ * \param iovs iovec array
+ * \param iovcnt number of iovs in iovec array
+ * \param desc_type type of all given iovectors
+ */
+void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt,
+ enum spdk_virtio_desc_type desc_type);
+
+/**
+ * Construct a virtio device. The device will be in stopped state by default.
+ * Before doing any I/O, it has to be manually started via \c virtio_dev_restart.
+ *
+ * \param vdev memory for virtio device, must be zeroed
+ * \param name name for the virtio device
+ * \param ops backend callbacks
+ * \param ops_ctx argument for the backend callbacks
+ * \return zero on success, or negative error code otherwise
+ */
+int virtio_dev_construct(struct virtio_dev *vdev, const char *name,
+ const struct virtio_dev_ops *ops, void *ops_ctx);
+
+/**
+ * Reset the device and prepare it to be `virtio_dev_start`ed. This call
+ * will also renegotiate feature flags.
+ *
+ * \param vdev virtio device
+ * \param req_features features this driver supports. A VIRTIO_F_VERSION_1
+ * flag will be automatically appended, as legacy devices are not supported.
+ */
+int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features);
+
+/**
+ * Notify the host to start processing this virtio device. This is
+ * a blocking call that won't return until the host has started.
+ * This will also allocate virtqueues.
+ *
+ * \param vdev virtio device
+ * \param max_queues number of queues to allocate. The max number of
+ * usable I/O queues is also limited by the host device. `vdev` will be
+ * started successfully even if the host supports less queues than requested.
+ * \param fixed_queue_num number of queues preceeding the first
+ * request queue. For Virtio-SCSI this is equal to 2, as there are
+ * additional event and control queues.
+ */
+int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues,
+ uint16_t fixed_queues_num);
+
+/**
+ * Stop the host from processing the device. This is a blocking call
+ * that won't return until all outstanding I/O has been processed on
+ * the host (virtio device) side. In order to re-start the device, it
+ * has to be `virtio_dev_reset` first.
+ *
+ * \param vdev virtio device
+ */
+void virtio_dev_stop(struct virtio_dev *vdev);
+
+/**
+ * Destruct a virtio device. Note that it must be in the stopped state.
+ * The virtio_dev should be manually freed afterwards.
+ *
+ * \param vdev virtio device
+ */
+void virtio_dev_destruct(struct virtio_dev *vdev);
+
+/**
+ * Bind a virtqueue with given index to the current thread;
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index virtqueue index
+ * \return 0 on success, -1 in case a virtqueue with given index either
+ * does not exists or is already acquired.
+ */
+int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Look for unused queue and bind it to the current thread. This will
+ * scan the queues in range from *start_index* (inclusive) up to
+ * vdev->max_queues (exclusive).
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param start_index virtqueue index to start looking from
+ * \return index of acquired queue or -1 in case no unused queue in given range
+ * has been found
+ */
+int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index);
+
+/**
+ * Get thread that acquired given virtqueue.
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue
+ * \return thread that acquired given virtqueue. If the queue is unused
+ * or doesn't exist a NULL is returned.
+ */
+struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Check if virtqueue with given index is acquired.
+ *
+ * This function is thread-safe.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue
+ * \return virtqueue acquire status. in case of invalid index *false* is returned.
+ */
+bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Release previously acquired queue.
+ *
+ * This function must be called from the thread that acquired the queue.
+ *
+ * \param vdev vhost device
+ * \param index index of virtqueue to release
+ */
+void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index);
+
+/**
+ * Get Virtio status flags.
+ *
+ * \param vdev virtio device
+ */
+uint8_t virtio_dev_get_status(struct virtio_dev *vdev);
+
+/**
+ * Set Virtio status flag. The flags have to be set in very specific order
+ * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the
+ * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to
+ * unset only particular flags.
+ *
+ * \param vdev virtio device
+ * \param flag flag to set
+ */
+void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag);
+
+/**
+ * Write raw data into the device config at given offset. This call does not
+ * provide any error checking.
+ *
+ * \param vdev virtio device
+ * \param offset offset in bytes
+ * \param src pointer to data to copy from
+ * \param len length of data to copy in bytes
+ * \return 0 on success, negative errno otherwise
+ */
+int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len);
+
+/**
+ * Read raw data from the device config at given offset. This call does not
+ * provide any error checking.
+ *
+ * \param vdev virtio device
+ * \param offset offset in bytes
+ * \param dst pointer to buffer to copy data into
+ * \param len length of data to copy in bytes
+ * \return 0 on success, negative errno otherwise
+ */
+int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len);
+
+/**
+ * Get backend-specific ops for given device.
+ *
+ * \param vdev virtio device
+ */
+const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev);
+
+/**
+ * Check if the device has negotiated given feature bit.
+ *
+ * \param vdev virtio device
+ * \param bit feature bit
+ */
+static inline bool
+virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit)
+{
+ return !!(vdev->negotiated_features & (1ULL << bit));
+}
+
+/**
+ * Dump all device specific information into given json stream.
+ *
+ * \param vdev virtio device
+ * \param w json stream
+ */
+void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w);
+
+/**
+ * Enumerate all PCI Virtio devices of given type on the system.
+ *
+ * \param enum_cb a function to be called for each valid PCI device.
+ * If a virtio_dev is has been created, the callback should return 0.
+ * Returning any other value will cause the PCI context to be freed,
+ * making it unusable.
+ * \param enum_ctx additional opaque context to be passed into `enum_cb`
+ * \param pci_device_id PCI Device ID of devices to iterate through
+ */
+int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
+ uint16_t pci_device_id);
+
+/**
+ * Attach a PCI Virtio device of given type.
+ *
+ * \param create_cb callback to create a virtio_dev.
+ * If virtio_dev is has been created, the callback should return 0.
+ * Returning any other value will cause the PCI context to be freed,
+ * making it unusable.
+ * \param enum_ctx additional opaque context to be passed into `enum_cb`
+ * \param pci_device_id PCI Device ID of devices to iterate through
+ * \param pci_addr PCI address of the device to attach
+ */
+int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx,
+ uint16_t pci_device_id, struct spdk_pci_addr *pci_addr);
+
+/**
+ * Connect to a vhost-user device and init corresponding virtio_dev struct.
+ * The virtio_dev will have to be freed with \c virtio_dev_free.
+ *
+ * \param vdev preallocated vhost device struct to operate on
+ * \param name name of this virtio device
+ * \param path path to the Unix domain socket of the vhost-user device
+ * \param queue_size size of each of the queues
+ * \return virtio device
+ */
+int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
+ uint32_t queue_size);
+
+/**
+ * Initialize virtio_dev for a given PCI device.
+ * The virtio_dev has to be freed with \c virtio_dev_destruct.
+ *
+ * \param vdev preallocated vhost device struct to operate on
+ * \param name name of this virtio device
+ * \param pci_ctx context of the PCI device
+ * \return 0 on success, -1 on error.
+ */
+int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
+ struct virtio_pci_ctx *pci_ctx);
+
+#endif /* SPDK_VIRTIO_H */