diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/include/spdk_internal | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/include/spdk_internal')
-rw-r--r-- | src/spdk/include/spdk_internal/accel_engine.h | 130 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/assert.h | 55 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/event.h | 197 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/idxd.h | 74 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/log.h | 108 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/lvolstore.h | 128 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/mock.h | 135 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/nvme_tcp.h | 633 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/rdma.h | 117 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/sock.h | 227 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/thread.h | 136 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/uring.h | 51 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/utf.h | 325 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/vhost_user.h | 140 | ||||
-rw-r--r-- | src/spdk/include/spdk_internal/virtio.h | 486 |
15 files changed, 2942 insertions, 0 deletions
diff --git a/src/spdk/include/spdk_internal/accel_engine.h b/src/spdk/include/spdk_internal/accel_engine.h new file mode 100644 index 000000000..9b78bc967 --- /dev/null +++ b/src/spdk/include/spdk_internal/accel_engine.h @@ -0,0 +1,130 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_ACCEL_ENGINE_H +#define SPDK_INTERNAL_ACCEL_ENGINE_H + +#include "spdk/stdinc.h" + +#include "spdk/accel_engine.h" +#include "spdk/queue.h" + +struct spdk_accel_task { + spdk_accel_completion_cb cb; + void *cb_arg; + uint8_t offload_ctx[0]; +}; + +struct spdk_accel_engine { + uint64_t (*get_capabilities)(void); + int (*copy)(struct spdk_io_channel *ch, void *dst, void *src, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + uint32_t (*batch_get_max)(void); + struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch); + int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst1, void *dst2, void *src, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes, + spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, + spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch); + int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill, + uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src, + uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg); + struct spdk_io_channel *(*get_io_channel)(void); +}; + +struct spdk_accel_module_if { + /** Initialization function for the module. Called by the spdk + * application during startup. + * + * Modules are required to define this function. + */ + int (*module_init)(void); + + /** Finish function for the module. Called by the spdk application + * before the spdk application exits to perform any necessary cleanup. + * + * Modules are not required to define this function. + */ + void (*module_fini)(void *ctx); + + /** Function called to return a text string representing the + * module's configuration options for inclusion in an + * spdk configuration file. + */ + void (*config_text)(FILE *fp); + + /** + * Write Acceleration module configuration into provided JSON context. + */ + void (*write_config_json)(struct spdk_json_write_ctx *w); + + /** + * Returns the allocation size required for the modules to use for context. + */ + size_t (*get_ctx_size)(void); + + TAILQ_ENTRY(spdk_accel_module_if) tailq; +}; + +void spdk_accel_hw_engine_register(struct spdk_accel_engine *accel_engine); +void spdk_accel_module_list_add(struct spdk_accel_module_if *accel_module); + +#define SPDK_ACCEL_MODULE_REGISTER(init_fn, fini_fn, config_fn, config_json, ctx_size_fn) \ + static struct spdk_accel_module_if init_fn ## _if = { \ + .module_init = init_fn, \ + .module_fini = fini_fn, \ + .config_text = config_fn, \ + .write_config_json = config_json, \ + .get_ctx_size = ctx_size_fn, \ + }; \ + __attribute__((constructor)) static void init_fn ## _init(void) \ + { \ + spdk_accel_module_list_add(&init_fn ## _if); \ + } + +#endif diff --git a/src/spdk/include/spdk_internal/assert.h b/src/spdk/include/spdk_internal/assert.h new file mode 100644 index 000000000..7e4c45070 --- /dev/null +++ b/src/spdk/include/spdk_internal/assert.h @@ -0,0 +1,55 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_ASSERT_H +#define SPDK_INTERNAL_ASSERT_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/assert.h" + +#if !defined(DEBUG) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#define SPDK_UNREACHABLE() __builtin_unreachable() +#else +#define SPDK_UNREACHABLE() abort() +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_INTERNAL_ASSERT_H */ diff --git a/src/spdk/include/spdk_internal/event.h b/src/spdk/include/spdk_internal/event.h new file mode 100644 index 000000000..2d88d08ba --- /dev/null +++ b/src/spdk/include/spdk_internal/event.h @@ -0,0 +1,197 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_EVENT_H +#define SPDK_INTERNAL_EVENT_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "spdk/event.h" +#include "spdk/json.h" +#include "spdk/thread.h" +#include "spdk/util.h" + +struct spdk_event { + uint32_t lcore; + spdk_event_fn fn; + void *arg1; + void *arg2; +}; + +enum spdk_reactor_state { + SPDK_REACTOR_STATE_UNINITIALIZED = 0, + SPDK_REACTOR_STATE_INITIALIZED = 1, + SPDK_REACTOR_STATE_RUNNING = 2, + SPDK_REACTOR_STATE_EXITING = 3, + SPDK_REACTOR_STATE_SHUTDOWN = 4, +}; + +struct spdk_lw_thread { + TAILQ_ENTRY(spdk_lw_thread) link; + bool resched; + uint64_t tsc_start; +}; + +struct spdk_reactor { + /* Lightweight threads running on this reactor */ + TAILQ_HEAD(, spdk_lw_thread) threads; + uint32_t thread_count; + + /* Logical core number for this reactor. */ + uint32_t lcore; + + struct { + uint32_t is_valid : 1; + uint32_t reserved : 31; + } flags; + + uint64_t tsc_last; + + struct spdk_ring *events; + + /* The last known rusage values */ + struct rusage rusage; + uint64_t last_rusage; + + uint64_t busy_tsc; + uint64_t idle_tsc; +} __attribute__((aligned(SPDK_CACHE_LINE_SIZE))); + +int spdk_reactors_init(void); +void spdk_reactors_fini(void); + +void spdk_reactors_start(void); +void spdk_reactors_stop(void *arg1); + +struct spdk_reactor *spdk_reactor_get(uint32_t lcore); + +/** + * Allocate and pass an event to each reactor, serially. + * + * The allocated event is processed asynchronously - i.e. spdk_for_each_reactor + * will return prior to `fn` being called on each reactor. + * + * \param fn This is the function that will be called on each reactor. + * \param arg1 Argument will be passed to fn when called. + * \param arg2 Argument will be passed to fn when called. + * \param cpl This will be called on the originating reactor after `fn` has been + * called on each reactor. + */ +void spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl); + +struct spdk_subsystem { + const char *name; + /* User must call spdk_subsystem_init_next() when they are done with their initialization. */ + void (*init)(void); + void (*fini)(void); + void (*config)(FILE *fp); + + /** + * Write JSON configuration handler. + * + * \param w JSON write context + */ + void (*write_config_json)(struct spdk_json_write_ctx *w); + TAILQ_ENTRY(spdk_subsystem) tailq; +}; + +struct spdk_subsystem *spdk_subsystem_find(const char *name); +struct spdk_subsystem *spdk_subsystem_get_first(void); +struct spdk_subsystem *spdk_subsystem_get_next(struct spdk_subsystem *cur_subsystem); + +struct spdk_subsystem_depend { + const char *name; + const char *depends_on; + TAILQ_ENTRY(spdk_subsystem_depend) tailq; +}; + +struct spdk_subsystem_depend *spdk_subsystem_get_first_depend(void); +struct spdk_subsystem_depend *spdk_subsystem_get_next_depend(struct spdk_subsystem_depend + *cur_depend); + +void spdk_add_subsystem(struct spdk_subsystem *subsystem); +void spdk_add_subsystem_depend(struct spdk_subsystem_depend *depend); + +typedef void (*spdk_subsystem_init_fn)(int rc, void *ctx); +void spdk_subsystem_init(spdk_subsystem_init_fn cb_fn, void *cb_arg); +void spdk_subsystem_fini(spdk_msg_fn cb_fn, void *cb_arg); +void spdk_subsystem_init_next(int rc); +void spdk_subsystem_fini_next(void); +void spdk_subsystem_config(FILE *fp); +void spdk_app_json_config_load(const char *json_config_file, const char *rpc_addr, + spdk_subsystem_init_fn cb_fn, void *cb_arg, + bool stop_on_error); + +/** + * Save pointed \c subsystem configuration to the JSON write context \c w. In case of + * error \c null is written to the JSON context. + * + * \param w JSON write context + * \param subsystem the subsystem to query + */ +void spdk_subsystem_config_json(struct spdk_json_write_ctx *w, struct spdk_subsystem *subsystem); + +void spdk_rpc_initialize(const char *listen_addr); +void spdk_rpc_finish(void); + +/** + * \brief Register a new subsystem + */ +#define SPDK_SUBSYSTEM_REGISTER(_name) \ + __attribute__((constructor)) static void _name ## _register(void) \ + { \ + spdk_add_subsystem(&_name); \ + } + +/** + * \brief Declare that a subsystem depends on another subsystem. + */ +#define SPDK_SUBSYSTEM_DEPEND(_name, _depends_on) \ + static struct spdk_subsystem_depend __subsystem_ ## _name ## _depend_on ## _depends_on = { \ + .name = #_name, \ + .depends_on = #_depends_on, \ + }; \ + __attribute__((constructor)) static void _name ## _depend_on ## _depends_on(void) \ + { \ + spdk_add_subsystem_depend(&__subsystem_ ## _name ## _depend_on ## _depends_on); \ + } + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_INTERNAL_EVENT_H */ diff --git a/src/spdk/include/spdk_internal/idxd.h b/src/spdk/include/spdk_internal/idxd.h new file mode 100644 index 000000000..17db2405d --- /dev/null +++ b/src/spdk/include/spdk_internal/idxd.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IDXD_INTERNAL_H__ +#define __IDXD_INTERNAL_H__ + +#include "spdk/stdinc.h" + +#include "spdk/idxd.h" +#include "spdk/queue.h" +#include "spdk/mmio.h" +#include "spdk/bit_array.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define IDXD_MAX_CONFIG_NUM 1 + +enum dsa_opcode { + IDXD_OPCODE_NOOP = 0, + IDXD_OPCODE_BATCH = 1, + IDXD_OPCODE_DRAIN = 2, + IDXD_OPCODE_MEMMOVE = 3, + IDXD_OPCODE_MEMFILL = 4, + IDXD_OPCODE_COMPARE = 5, + IDXD_OPCODE_COMPVAL = 6, + IDXD_OPCODE_CR_DELTA = 7, + IDXD_OPCODE_AP_DELTA = 8, + IDXD_OPCODE_DUALCAST = 9, + IDXD_OPCODE_CRC32C_GEN = 16, + IDXD_OPCODE_COPY_CRC = 17, + IDXD_OPCODE_DIF_CHECK = 18, + IDXD_OPCODE_DIF_INS = 19, + IDXD_OPCODE_DIF_STRP = 20, + IDXD_OPCODE_DIF_UPDT = 21, + IDXD_OPCODE_CFLUSH = 32, +}; + +#ifdef __cplusplus +} +#endif + +#endif /* __IDXD_INTERNAL_H__ */ diff --git a/src/spdk/include/spdk_internal/log.h b/src/spdk/include/spdk_internal/log.h new file mode 100644 index 000000000..0993d1016 --- /dev/null +++ b/src/spdk/include/spdk_internal/log.h @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * Logging interfaces + */ + +#ifndef SPDK_INTERNAL_LOG_H +#define SPDK_INTERNAL_LOG_H + +#include "spdk/log.h" +#include "spdk/queue.h" + +extern enum spdk_log_level g_spdk_log_level; +extern enum spdk_log_level g_spdk_log_print_level; +extern enum spdk_log_level g_spdk_log_backtrace_level; + +struct spdk_log_flag { + TAILQ_ENTRY(spdk_log_flag) tailq; + const char *name; + bool enabled; +}; + +void spdk_log_register_flag(const char *name, struct spdk_log_flag *flag); + +struct spdk_log_flag *spdk_log_get_first_flag(void); +struct spdk_log_flag *spdk_log_get_next_flag(struct spdk_log_flag *flag); + +#define SPDK_LOG_REGISTER_COMPONENT(str, flag) \ +struct spdk_log_flag flag = { \ + .enabled = false, \ + .name = str, \ +}; \ +__attribute__((constructor)) static void register_flag_##flag(void) \ +{ \ + spdk_log_register_flag(str, &flag); \ +} + +#define SPDK_INFOLOG(FLAG, ...) \ + do { \ + extern struct spdk_log_flag FLAG; \ + if (FLAG.enabled) { \ + spdk_log(SPDK_LOG_INFO, __FILE__, __LINE__, __func__, __VA_ARGS__); \ + } \ + } while (0) + +#ifdef DEBUG + +#define SPDK_DEBUGLOG(FLAG, ...) \ + do { \ + extern struct spdk_log_flag FLAG; \ + if (FLAG.enabled) { \ + spdk_log(SPDK_LOG_DEBUG, __FILE__, __LINE__, __func__, __VA_ARGS__); \ + } \ + } while (0) + +#define SPDK_LOGDUMP(FLAG, LABEL, BUF, LEN) \ + do { \ + extern struct spdk_log_flag FLAG; \ + if ((FLAG.enabled) && (LEN)) { \ + spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \ + } \ + } while (0) + +#else +#define SPDK_DEBUGLOG(...) do { } while (0) +#define SPDK_LOGDUMP(...) do { } while (0) +#endif + +#define SPDK_ERRLOGDUMP(LABEL, BUF, LEN) \ + do { \ + if ((LEN)) { \ + spdk_log_dump(stderr, (LABEL), (BUF), (LEN)); \ + } \ + } while (0) + +#endif /* SPDK_INTERNAL_LOG_H */ diff --git a/src/spdk/include/spdk_internal/lvolstore.h b/src/spdk/include/spdk_internal/lvolstore.h new file mode 100644 index 000000000..f82157e53 --- /dev/null +++ b/src/spdk/include/spdk_internal/lvolstore.h @@ -0,0 +1,128 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_LVOLSTORE_H +#define SPDK_INTERNAL_LVOLSTORE_H + +#include "spdk/blob.h" +#include "spdk/lvol.h" +#include "spdk/uuid.h" +#include "spdk/bdev_module.h" + +/* Default size of blobstore cluster */ +#define SPDK_LVS_OPTS_CLUSTER_SZ (4 * 1024 * 1024) + +/* UUID + '_' + blobid (20 characters for uint64_t). + * Null terminator is already included in SPDK_UUID_STRING_LEN. */ +#define SPDK_LVOL_UNIQUE_ID_MAX (SPDK_UUID_STRING_LEN + 1 + 20) + +struct spdk_lvs_req { + spdk_lvs_op_complete cb_fn; + void *cb_arg; + struct spdk_lvol_store *lvol_store; + int lvserrno; +}; + +struct spdk_lvol_req { + spdk_lvol_op_complete cb_fn; + void *cb_arg; + struct spdk_lvol *lvol; + size_t sz; + struct spdk_io_channel *channel; + char name[SPDK_LVOL_NAME_MAX]; +}; + +struct spdk_lvs_with_handle_req { + spdk_lvs_op_with_handle_complete cb_fn; + void *cb_arg; + struct spdk_lvol_store *lvol_store; + struct spdk_bs_dev *bs_dev; + struct spdk_bdev *base_bdev; + int lvserrno; +}; + +struct spdk_lvs_destroy_req { + spdk_lvs_op_complete cb_fn; + void *cb_arg; + struct spdk_lvol_store *lvs; +}; + +struct spdk_lvol_with_handle_req { + spdk_lvol_op_with_handle_complete cb_fn; + void *cb_arg; + struct spdk_lvol *lvol; +}; + +struct spdk_lvol_store { + struct spdk_bs_dev *bs_dev; + struct spdk_blob_store *blobstore; + struct spdk_blob *super_blob; + spdk_blob_id super_blob_id; + struct spdk_uuid uuid; + int lvol_count; + int lvols_opened; + bool destruct; + TAILQ_HEAD(, spdk_lvol) lvols; + TAILQ_HEAD(, spdk_lvol) pending_lvols; + bool on_list; + TAILQ_ENTRY(spdk_lvol_store) link; + char name[SPDK_LVS_NAME_MAX]; + char new_name[SPDK_LVS_NAME_MAX]; +}; + +struct spdk_lvol { + struct spdk_lvol_store *lvol_store; + struct spdk_blob *blob; + spdk_blob_id blob_id; + char unique_id[SPDK_LVOL_UNIQUE_ID_MAX]; + char name[SPDK_LVOL_NAME_MAX]; + struct spdk_uuid uuid; + char uuid_str[SPDK_UUID_STRING_LEN]; + bool thin_provision; + struct spdk_bdev *bdev; + int ref_count; + bool action_in_progress; + enum blob_clear_method clear_method; + TAILQ_ENTRY(spdk_lvol) link; +}; + +struct lvol_store_bdev *vbdev_lvol_store_first(void); +struct lvol_store_bdev *vbdev_lvol_store_next(struct lvol_store_bdev *prev); + +void spdk_lvol_resize(struct spdk_lvol *lvol, uint64_t sz, spdk_lvol_op_complete cb_fn, + void *cb_arg); + +void spdk_lvol_set_read_only(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, + void *cb_arg); + +#endif /* SPDK_INTERNAL_LVOLSTORE_H */ diff --git a/src/spdk/include/spdk_internal/mock.h b/src/spdk/include/spdk_internal/mock.h new file mode 100644 index 000000000..8de44ae55 --- /dev/null +++ b/src/spdk/include/spdk_internal/mock.h @@ -0,0 +1,135 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_MOCK_H +#define SPDK_INTERNAL_MOCK_H + +#include "spdk/stdinc.h" + +#define MOCK_STRUCT_INIT(...) \ + { __VA_ARGS__ } + +#define DEFINE_RETURN_MOCK(fn, ret) \ + bool ut_ ## fn ## _mocked = false; \ + ret ut_ ## fn + +/* + * For controlling mocked function behavior, setting + * and getting values from the stub, the _P macros are + * for mocking functions that return pointer values. + */ +#define MOCK_SET(fn, val) \ + ut_ ## fn ## _mocked = true; \ + ut_ ## fn = val + +#define MOCK_GET(fn) \ + ut_ ## fn + +#define MOCK_CLEAR(fn) \ + ut_ ## fn ## _mocked = false + +#define MOCK_CLEAR_P(fn) \ + ut_ ## fn ## _mocked = false; \ + ut_ ## fn = NULL + +/* for proving to *certain* static analysis tools that we didn't reset the mock function. */ +#define MOCK_CLEARED_ASSERT(fn) \ + SPDK_CU_ASSERT_FATAL(ut_ ## fn ## _mocked == false) + +/* for declaring function protoypes for wrappers */ +#define DECLARE_WRAPPER(fn, ret, args) \ + extern bool ut_ ## fn ## _mocked; \ + extern ret ut_ ## fn; \ + ret __wrap_ ## fn args; ret __real_ ## fn args + +/* for defining the implmentation of wrappers for syscalls */ +#define DEFINE_WRAPPER(fn, ret, dargs, pargs) \ + DEFINE_RETURN_MOCK(fn, ret); \ + __attribute__((used)) ret __wrap_ ## fn dargs \ + { \ + if (!ut_ ## fn ## _mocked) { \ + return __real_ ## fn pargs; \ + } else { \ + return MOCK_GET(fn); \ + } \ + } + +/* DEFINE_STUB is for defining the implmentation of stubs for SPDK funcs. */ +#define DEFINE_STUB(fn, ret, dargs, val) \ + bool ut_ ## fn ## _mocked = true; \ + ret ut_ ## fn = val; \ + ret fn dargs; \ + ret fn dargs \ + { \ + return MOCK_GET(fn); \ + } + +/* DEFINE_STUB_V macro is for stubs that don't have a return value */ +#define DEFINE_STUB_V(fn, dargs) \ + void fn dargs; \ + void fn dargs \ + { \ + } + +#define HANDLE_RETURN_MOCK(fn) \ + if (ut_ ## fn ## _mocked) { \ + return ut_ ## fn; \ + } + + +/* declare wrapper protos (alphabetically please) here */ +DECLARE_WRAPPER(calloc, void *, (size_t nmemb, size_t size)); + +DECLARE_WRAPPER(pthread_mutex_init, int, + (pthread_mutex_t *mtx, const pthread_mutexattr_t *attr)); + +DECLARE_WRAPPER(pthread_mutexattr_init, int, + (pthread_mutexattr_t *attr)); + +DECLARE_WRAPPER(recvmsg, ssize_t, (int sockfd, struct msghdr *msg, int flags)); + +DECLARE_WRAPPER(sendmsg, ssize_t, (int sockfd, const struct msghdr *msg, int flags)); + +DECLARE_WRAPPER(writev, ssize_t, (int fd, const struct iovec *iov, int iovcnt)); + +/* unlink is done a bit differently. */ +extern char *g_unlink_path; +extern void (*g_unlink_callback)(void); +/* If g_unlink_path is NULL, __wrap_unlink will return ENOENT. + * If the __wrap_unlink() parameter does not match g_unlink_path, it will return ENOENT. + * If g_unlink_path does match, and g_unlink_callback has been set, g_unlink_callback will + * be called before returning 0. + */ +int __wrap_unlink(const char *path); + +#endif /* SPDK_INTERNAL_MOCK_H */ diff --git a/src/spdk/include/spdk_internal/nvme_tcp.h b/src/spdk/include/spdk_internal/nvme_tcp.h new file mode 100644 index 000000000..7065bc060 --- /dev/null +++ b/src/spdk/include/spdk_internal/nvme_tcp.h @@ -0,0 +1,633 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_NVME_TCP_H +#define SPDK_INTERNAL_NVME_TCP_H + +#include "spdk/likely.h" +#include "spdk/sock.h" +#include "spdk/dif.h" + +#define SPDK_CRC32C_XOR 0xffffffffUL +#define SPDK_NVME_TCP_DIGEST_LEN 4 +#define SPDK_NVME_TCP_DIGEST_ALIGNMENT 4 +#define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT 30 +#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 8 + +/* + * Maximum number of SGL elements. + */ +#define NVME_TCP_MAX_SGL_DESCRIPTORS (16) + +#define MAKE_DIGEST_WORD(BUF, CRC32C) \ + ( ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \ + ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \ + ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \ + ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24))) + +#define MATCH_DIGEST_WORD(BUF, CRC32C) \ + ( ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0) \ + | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8) \ + | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16) \ + | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24)) \ + == (CRC32C)) + +#define DGET32(B) \ + ((( (uint32_t) *((uint8_t *)(B)+0)) << 0) \ + | (((uint32_t) *((uint8_t *)(B)+1)) << 8) \ + | (((uint32_t) *((uint8_t *)(B)+2)) << 16) \ + | (((uint32_t) *((uint8_t *)(B)+3)) << 24)) + +#define DSET32(B,D) \ + (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)), \ + ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)), \ + ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)), \ + ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24))) + +typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg); + +struct _nvme_tcp_sgl { + struct iovec *iov; + int iovcnt; + uint32_t iov_offset; + uint32_t total_size; +}; + +struct nvme_tcp_pdu { + union { + /* to hold error pdu data */ + uint8_t raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE]; + struct spdk_nvme_tcp_common_pdu_hdr common; + struct spdk_nvme_tcp_ic_req ic_req; + struct spdk_nvme_tcp_term_req_hdr term_req; + struct spdk_nvme_tcp_cmd capsule_cmd; + struct spdk_nvme_tcp_h2c_data_hdr h2c_data; + struct spdk_nvme_tcp_ic_resp ic_resp; + struct spdk_nvme_tcp_rsp capsule_resp; + struct spdk_nvme_tcp_c2h_data_hdr c2h_data; + struct spdk_nvme_tcp_r2t_hdr r2t; + + } hdr; + + bool has_hdgst; + bool ddgst_enable; + uint8_t data_digest[SPDK_NVME_TCP_DIGEST_LEN]; + + uint8_t ch_valid_bytes; + uint8_t psh_valid_bytes; + uint8_t psh_len; + + nvme_tcp_qpair_xfer_complete_cb cb_fn; + void *cb_arg; + + /* The sock request ends with a 0 length iovec. Place the actual iovec immediately + * after it. There is a static assert below to check if the compiler inserted + * any unwanted padding */ + struct spdk_sock_request sock_req; + struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2]; + + struct iovec data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; + uint32_t data_iovcnt; + uint32_t data_len; + + uint32_t readv_offset; + TAILQ_ENTRY(nvme_tcp_pdu) tailq; + uint32_t remaining; + uint32_t padding_len; + struct _nvme_tcp_sgl sgl; + + struct spdk_dif_ctx *dif_ctx; + + void *req; /* data tied to a tcp request */ + void *qpair; +}; +SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu, + sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov), + "Compiler inserted padding between iov and sock_req"); + +enum nvme_tcp_pdu_recv_state { + /* Ready to wait for PDU */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY, + + /* Active tqpair waiting for any PDU common header */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH, + + /* Active tqpair waiting for any PDU specific header */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH, + + /* Active tqpair waiting for a tcp request, only use in target side */ + NVME_TCP_PDU_RECV_STATE_AWAIT_REQ, + + /* Active tqpair waiting for payload */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD, + + /* Active tqpair does not wait for payload */ + NVME_TCP_PDU_RECV_STATE_ERROR, +}; + +enum nvme_tcp_error_codes { + NVME_TCP_PDU_IN_PROGRESS = 0, + NVME_TCP_CONNECTION_FATAL = -1, + NVME_TCP_PDU_FATAL = -2, +}; + +enum nvme_tcp_qpair_state { + NVME_TCP_QPAIR_STATE_INVALID = 0, + NVME_TCP_QPAIR_STATE_INITIALIZING = 1, + NVME_TCP_QPAIR_STATE_RUNNING = 2, + NVME_TCP_QPAIR_STATE_EXITING = 3, + NVME_TCP_QPAIR_STATE_EXITED = 4, +}; + +static const bool g_nvme_tcp_hdgst[] = { + [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = true, + [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_R2T] = true +}; + +static const bool g_nvme_tcp_ddgst[] = { + [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_R2T] = false +}; + +static uint32_t +nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu) +{ + uint32_t crc32c; + uint32_t hlen = pdu->hdr.common.hlen; + + crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0); + crc32c = crc32c ^ SPDK_CRC32C_XOR; + return crc32c; +} + +static uint32_t +_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c) +{ + int i; + + for (i = 0; i < iovcnt; i++) { + assert(iov[i].iov_base != NULL); + assert(iov[i].iov_len != 0); + crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c); + } + + return crc32c; +} + +static uint32_t +nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu) +{ + uint32_t crc32c = SPDK_CRC32C_XOR; + uint32_t mod; + + assert(pdu->data_len != 0); + + if (spdk_likely(!pdu->dif_ctx)) { + crc32c = _update_crc32c_iov(pdu->data_iov, pdu->data_iovcnt, crc32c); + } else { + spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt, + 0, pdu->data_len, &crc32c, pdu->dif_ctx); + } + + mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT; + if (mod != 0) { + uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod; + uint8_t pad[3] = {0, 0, 0}; + + assert(pad_length > 0); + assert(pad_length <= sizeof(pad)); + crc32c = spdk_crc32c_update(pad, pad_length, crc32c); + } + crc32c = crc32c ^ SPDK_CRC32C_XOR; + return crc32c; +} + +static inline void +_nvme_tcp_sgl_init(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt, + uint32_t iov_offset) +{ + s->iov = iov; + s->iovcnt = iovcnt; + s->iov_offset = iov_offset; + s->total_size = 0; +} + +static inline void +_nvme_tcp_sgl_advance(struct _nvme_tcp_sgl *s, uint32_t step) +{ + s->iov_offset += step; + while (s->iovcnt > 0) { + if (s->iov_offset < s->iov->iov_len) { + break; + } + + s->iov_offset -= s->iov->iov_len; + s->iov++; + s->iovcnt--; + } +} + +static inline void +_nvme_tcp_sgl_get_buf(struct _nvme_tcp_sgl *s, void **_buf, uint32_t *_buf_len) +{ + if (_buf != NULL) { + *_buf = s->iov->iov_base + s->iov_offset; + } + if (_buf_len != NULL) { + *_buf_len = s->iov->iov_len - s->iov_offset; + } +} + +static inline bool +_nvme_tcp_sgl_append(struct _nvme_tcp_sgl *s, uint8_t *data, uint32_t data_len) +{ + if (s->iov_offset >= data_len) { + s->iov_offset -= data_len; + } else { + assert(s->iovcnt > 0); + s->iov->iov_base = data + s->iov_offset; + s->iov->iov_len = data_len - s->iov_offset; + s->total_size += data_len - s->iov_offset; + s->iov_offset = 0; + s->iov++; + s->iovcnt--; + if (s->iovcnt == 0) { + return false; + } + } + + return true; +} + +static inline bool +_nvme_tcp_sgl_append_multi(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt) +{ + int i; + + for (i = 0; i < iovcnt; i++) { + if (!_nvme_tcp_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) { + return false; + } + } + + return true; +} + +static inline uint32_t +_get_iov_array_size(struct iovec *iov, int iovcnt) +{ + int i; + uint32_t size = 0; + + for (i = 0; i < iovcnt; i++) { + size += iov[i].iov_len; + } + + return size; +} + +static inline bool +_nvme_tcp_sgl_append_multi_with_md(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt, + uint32_t data_len, const struct spdk_dif_ctx *dif_ctx) +{ + int rc; + uint32_t mapped_len = 0; + + if (s->iov_offset >= data_len) { + s->iov_offset -= _get_iov_array_size(iov, iovcnt); + } else { + rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt, + s->iov_offset, data_len - s->iov_offset, + &mapped_len, dif_ctx); + if (rc < 0) { + SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n"); + return false; + } + + s->total_size += mapped_len; + s->iov_offset = 0; + assert(s->iovcnt >= rc); + s->iovcnt -= rc; + s->iov += rc; + + if (s->iovcnt == 0) { + return false; + } + } + + return true; +} + +static int +nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu, + bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length) +{ + uint32_t hlen, plen; + struct _nvme_tcp_sgl *sgl; + + if (iovcnt == 0) { + return 0; + } + + sgl = &pdu->sgl; + _nvme_tcp_sgl_init(sgl, iov, iovcnt, 0); + hlen = pdu->hdr.common.hlen; + + /* Header Digest */ + if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) { + hlen += SPDK_NVME_TCP_DIGEST_LEN; + } + + plen = hlen; + if (!pdu->data_len) { + /* PDU header + possible header digest */ + _nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen); + goto end; + } + + /* Padding */ + if (pdu->padding_len > 0) { + hlen += pdu->padding_len; + plen = hlen; + } + + if (!_nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen)) { + goto end; + } + + /* Data Segment */ + plen += pdu->data_len; + if (spdk_likely(!pdu->dif_ctx)) { + if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) { + goto end; + } + } else { + if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt, + pdu->data_len, pdu->dif_ctx)) { + goto end; + } + } + + /* Data Digest */ + if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) { + plen += SPDK_NVME_TCP_DIGEST_LEN; + _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN); + } + + assert(plen == pdu->hdr.common.plen); + +end: + if (_mapped_length != NULL) { + *_mapped_length = sgl->total_size; + } + + return iovcnt - sgl->iovcnt; +} + +static int +nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu, + bool ddgst_enable, uint32_t *_mapped_length) +{ + struct _nvme_tcp_sgl *sgl; + + if (iovcnt == 0) { + return 0; + } + + sgl = &pdu->sgl; + _nvme_tcp_sgl_init(sgl, iov, iovcnt, pdu->readv_offset); + + if (spdk_likely(!pdu->dif_ctx)) { + if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) { + goto end; + } + } else { + if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt, + pdu->data_len, pdu->dif_ctx)) { + goto end; + } + } + + /* Data Digest */ + if (ddgst_enable) { + _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN); + } + +end: + if (_mapped_length != NULL) { + *_mapped_length = sgl->total_size; + } + return iovcnt - sgl->iovcnt; +} + +static int +nvme_tcp_read_data(struct spdk_sock *sock, int bytes, + void *buf) +{ + int ret; + + ret = spdk_sock_recv(sock, buf, bytes); + + if (ret > 0) { + return ret; + } + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + /* For connect reset issue, do not output error log */ + if (errno != ECONNRESET) { + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + } + + /* connection closed */ + return NVME_TCP_CONNECTION_FATAL; +} + +static int +nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt) +{ + int ret; + + assert(sock != NULL); + if (iov == NULL || iovcnt == 0) { + return 0; + } + + if (iovcnt == 1) { + return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base); + } + + ret = spdk_sock_readv(sock, iov, iovcnt); + + if (ret > 0) { + return ret; + } + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + /* For connect reset issue, do not output error log */ + if (errno != ECONNRESET) { + SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + } + + /* connection closed */ + return NVME_TCP_CONNECTION_FATAL; +} + + +static int +nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu) +{ + struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1]; + int iovcnt; + + iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu, + pdu->ddgst_enable, NULL); + assert(iovcnt >= 0); + + return nvme_tcp_readv_data(sock, iov, iovcnt); +} + +static void +_nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len) +{ + pdu->data_iov[0].iov_base = data; + pdu->data_iov[0].iov_len = data_len; + pdu->data_iovcnt = 1; +} + +static void +nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len) +{ + _nvme_tcp_pdu_set_data(pdu, data, data_len); + pdu->data_len = data_len; +} + +static void +nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu, + struct iovec *iov, int iovcnt, + uint32_t data_offset, uint32_t data_len) +{ + uint32_t buf_offset, buf_len, remain_len, len; + uint8_t *buf; + struct _nvme_tcp_sgl *pdu_sgl, buf_sgl; + + pdu->data_len = data_len; + + if (spdk_likely(!pdu->dif_ctx)) { + buf_offset = data_offset; + buf_len = data_len; + } else { + spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset); + spdk_dif_get_range_with_md(data_offset, data_len, + &buf_offset, &buf_len, pdu->dif_ctx); + } + + if (iovcnt == 1) { + _nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len); + } else { + pdu_sgl = &pdu->sgl; + + _nvme_tcp_sgl_init(pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0); + _nvme_tcp_sgl_init(&buf_sgl, iov, iovcnt, 0); + + _nvme_tcp_sgl_advance(&buf_sgl, buf_offset); + remain_len = buf_len; + + while (remain_len > 0) { + _nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len); + len = spdk_min(len, remain_len); + + _nvme_tcp_sgl_advance(&buf_sgl, len); + remain_len -= len; + + if (!_nvme_tcp_sgl_append(pdu_sgl, buf, len)) { + break; + } + } + + assert(remain_len == 0); + assert(pdu_sgl->total_size == buf_len); + + pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl->iovcnt; + } +} + +static void +nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable) +{ + uint8_t psh_len, pdo, padding_len; + + psh_len = pdu->hdr.common.hlen; + + if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) { + pdu->has_hdgst = true; + psh_len += SPDK_NVME_TCP_DIGEST_LEN; + if (pdu->hdr.common.plen > psh_len) { + pdo = pdu->hdr.common.pdo; + padding_len = pdo - psh_len; + if (padding_len > 0) { + psh_len = pdo; + } + } + } + + psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr); + pdu->psh_len = psh_len; +} + +#endif /* SPDK_INTERNAL_NVME_TCP_H */ diff --git a/src/spdk/include/spdk_internal/rdma.h b/src/spdk/include/spdk_internal/rdma.h new file mode 100644 index 000000000..4a6d5104b --- /dev/null +++ b/src/spdk/include/spdk_internal/rdma.h @@ -0,0 +1,117 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_RDMA_H +#define SPDK_RDMA_H + +#include <infiniband/verbs.h> +#include <rdma/rdma_cma.h> +#include <rdma/rdma_verbs.h> + +struct spdk_rdma_qp_init_attr { + void *qp_context; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_qp_cap cap; + struct ibv_pd *pd; +}; + +struct spdk_rdma_send_wr_list { + struct ibv_send_wr *first; + struct ibv_send_wr *last; +}; + +struct spdk_rdma_qp { + struct ibv_qp *qp; + struct rdma_cm_id *cm_id; + struct spdk_rdma_send_wr_list send_wrs; +}; + +/** + * Create RDMA provider specific qpair + * \param cm_id Pointer to RDMACM cm_id + * \param qp_attr Pointer to qpair init attributes + * \return Pointer to a newly created qpair on success or NULL on failure + */ +struct spdk_rdma_qp *spdk_rdma_qp_create(struct rdma_cm_id *cm_id, + struct spdk_rdma_qp_init_attr *qp_attr); + +/** + * Accept a connection request. Called by the passive side (NVMEoF target) + * \param spdk_rdma_qp Pointer to a qpair + * \param conn_param Optional information needed to establish the connection + * \return 0 on success, errno on failure + */ +int spdk_rdma_qp_accept(struct spdk_rdma_qp *spdk_rdma_qp, struct rdma_conn_param *conn_param); + +/** + * Complete the connection process, must be called by the active + * side (NVMEoF initiator) upon receipt RDMA_CM_EVENT_CONNECT_RESPONSE + * \param spdk_rdma_qp Pointer to a qpair + * \return 0 on success, errno on failure + */ +int spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp); + +/** + * Destroy RDMA provider specific qpair + * \param spdk_rdma_qp Pointer to qpair to be destroyed + */ +void spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp); + +/** + * Disconnect a connection and transition assoiciated qpair to error state. + * Generates RDMA_CM_EVENT_DISCONNECTED on both connection sides + * \param spdk_rdma_qp Pointer to qpair to be destroyed + */ +int spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp); + +/** + * Append the given send wr structure to the qpair's outstanding sends list. + * This function accepts either a single Work Request or the first WR in a linked list. + * + * \param spdk_rdma_qp Pointer to SPDK RDMA qpair + * \param first Pointer to the first Work Request + * \return true if there were no outstanding WRs before, false otherwise + */ +bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first); + +/** + * Submit all queued Work Request + * \param spdk_rdma_qp Pointer to SPDK RDMA qpair + * \param bad_wr Stores a pointer to the first failed WR if this function return nonzero value + * \return 0 on succes, errno on failure + */ +int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr); + +#endif /* SPDK_RDMA_H */ diff --git a/src/spdk/include/spdk_internal/sock.h b/src/spdk/include/spdk_internal/sock.h new file mode 100644 index 000000000..d88d6bd03 --- /dev/null +++ b/src/spdk/include/spdk_internal/sock.h @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * TCP network implementation abstraction layer + */ + +#ifndef SPDK_INTERNAL_SOCK_H +#define SPDK_INTERNAL_SOCK_H + +#include "spdk/stdinc.h" +#include "spdk/sock.h" +#include "spdk/queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_EVENTS_PER_POLL 32 +#define DEFAULT_SOCK_PRIORITY 0 +#define MIN_SOCK_PIPE_SIZE 1024 + +struct spdk_sock { + struct spdk_net_impl *net_impl; + struct spdk_sock_opts opts; + int cb_cnt; + spdk_sock_cb cb_fn; + void *cb_arg; + struct spdk_sock_group_impl *group_impl; + TAILQ_ENTRY(spdk_sock) link; + + int max_iovcnt; + TAILQ_HEAD(, spdk_sock_request) queued_reqs; + TAILQ_HEAD(, spdk_sock_request) pending_reqs; + int queued_iovcnt; + + struct { + uint8_t closed : 1; + uint8_t reserved : 7; + } flags; +}; + +struct spdk_sock_group { + STAILQ_HEAD(, spdk_sock_group_impl) group_impls; + void *ctx; +}; + +struct spdk_sock_group_impl { + struct spdk_net_impl *net_impl; + TAILQ_HEAD(, spdk_sock) socks; + STAILQ_ENTRY(spdk_sock_group_impl) link; + /* List of removed sockets. refreshed each time we poll the sock group. */ + int num_removed_socks; + /* Unfortunately, we can't just keep a tailq of the sockets in case they are freed + * or added to another poll group later. + */ + uintptr_t removed_socks[MAX_EVENTS_PER_POLL]; +}; + +struct spdk_net_impl { + const char *name; + int priority; + + int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr, + int clen, uint16_t *cport); + struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts); + struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts); + struct spdk_sock *(*accept)(struct spdk_sock *sock); + int (*close)(struct spdk_sock *sock); + ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len); + ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); + ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); + + void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req); + int (*flush)(struct spdk_sock *sock); + + int (*set_recvlowat)(struct spdk_sock *sock, int nbytes); + int (*set_recvbuf)(struct spdk_sock *sock, int sz); + int (*set_sendbuf)(struct spdk_sock *sock, int sz); + + bool (*is_ipv6)(struct spdk_sock *sock); + bool (*is_ipv4)(struct spdk_sock *sock); + bool (*is_connected)(struct spdk_sock *sock); + + int (*get_placement_id)(struct spdk_sock *sock, int *placement_id); + struct spdk_sock_group_impl *(*group_impl_create)(void); + int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); + int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); + int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events, + struct spdk_sock **socks); + int (*group_impl_close)(struct spdk_sock_group_impl *group); + + int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len); + int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len); + + STAILQ_ENTRY(spdk_net_impl) link; +}; + +void spdk_net_impl_register(struct spdk_net_impl *impl, int priority); + +#define SPDK_NET_IMPL_REGISTER(name, impl, priority) \ +static void __attribute__((constructor)) net_impl_register_##name(void) \ +{ \ + spdk_net_impl_register(impl, priority); \ +} + +static inline void +spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req) +{ + TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link); + sock->queued_iovcnt += req->iovcnt; +} + +static inline void +spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req) +{ + TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); + assert(sock->queued_iovcnt >= req->iovcnt); + sock->queued_iovcnt -= req->iovcnt; + TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link); +} + +static inline int +spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err) +{ + bool closed; + int rc = 0; + + TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); + + req->internal.offset = 0; + + closed = sock->flags.closed; + sock->cb_cnt++; + req->cb_fn(req->cb_arg, err); + assert(sock->cb_cnt > 0); + sock->cb_cnt--; + + if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { + /* The user closed the socket in response to a callback above. */ + rc = -1; + spdk_sock_close(&sock); + } + + return rc; +} + +static inline int +spdk_sock_abort_requests(struct spdk_sock *sock) +{ + struct spdk_sock_request *req; + bool closed; + int rc = 0; + + closed = sock->flags.closed; + sock->cb_cnt++; + + req = TAILQ_FIRST(&sock->pending_reqs); + while (req) { + TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); + + req->cb_fn(req->cb_arg, -ECANCELED); + + req = TAILQ_FIRST(&sock->pending_reqs); + } + + req = TAILQ_FIRST(&sock->queued_reqs); + while (req) { + TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); + + assert(sock->queued_iovcnt >= req->iovcnt); + sock->queued_iovcnt -= req->iovcnt; + + req->cb_fn(req->cb_arg, -ECANCELED); + + req = TAILQ_FIRST(&sock->queued_reqs); + } + assert(sock->cb_cnt > 0); + sock->cb_cnt--; + + assert(TAILQ_EMPTY(&sock->queued_reqs)); + assert(TAILQ_EMPTY(&sock->pending_reqs)); + + if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { + /* The user closed the socket in response to a callback above. */ + rc = -1; + spdk_sock_close(&sock); + } + + return rc; +} + +#ifdef __cplusplus +} +#endif + +#endif /* SPDK_INTERNAL_SOCK_H */ diff --git a/src/spdk/include/spdk_internal/thread.h b/src/spdk/include/spdk_internal/thread.h new file mode 100644 index 000000000..10bc4824c --- /dev/null +++ b/src/spdk/include/spdk_internal/thread.h @@ -0,0 +1,136 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_THREAD_INTERNAL_H_ +#define SPDK_THREAD_INTERNAL_H_ + +#include "spdk/stdinc.h" +#include "spdk/thread.h" + +#define SPDK_MAX_POLLER_NAME_LEN 256 +#define SPDK_MAX_THREAD_NAME_LEN 256 + +enum spdk_poller_state { + /* The poller is registered with a thread but not currently executing its fn. */ + SPDK_POLLER_STATE_WAITING, + + /* The poller is currently running its fn. */ + SPDK_POLLER_STATE_RUNNING, + + /* The poller was unregistered during the execution of its fn. */ + SPDK_POLLER_STATE_UNREGISTERED, + + /* The poller is in the process of being paused. It will be paused + * during the next time it's supposed to be executed. + */ + SPDK_POLLER_STATE_PAUSING, + + /* The poller is registered but currently paused. It's on the + * paused_pollers list. + */ + SPDK_POLLER_STATE_PAUSED, +}; + +struct spdk_poller { + TAILQ_ENTRY(spdk_poller) tailq; + + /* Current state of the poller; should only be accessed from the poller's thread. */ + enum spdk_poller_state state; + + uint64_t period_ticks; + uint64_t next_run_tick; + uint64_t run_count; + uint64_t busy_count; + spdk_poller_fn fn; + void *arg; + struct spdk_thread *thread; + + char name[SPDK_MAX_POLLER_NAME_LEN + 1]; +}; + +enum spdk_thread_state { + /* The thread is pocessing poller and message by spdk_thread_poll(). */ + SPDK_THREAD_STATE_RUNNING, + + /* The thread is in the process of termination. It reaps unregistering + * poller are releasing I/O channel. + */ + SPDK_THREAD_STATE_EXITING, + + /* The thread is exited. It is ready to call spdk_thread_destroy(). */ + SPDK_THREAD_STATE_EXITED, +}; + +struct spdk_thread { + uint64_t tsc_last; + struct spdk_thread_stats stats; + /* + * Contains pollers actively running on this thread. Pollers + * are run round-robin. The thread takes one poller from the head + * of the ring, executes it, then puts it back at the tail of + * the ring. + */ + TAILQ_HEAD(active_pollers_head, spdk_poller) active_pollers; + /** + * Contains pollers running on this thread with a periodic timer. + */ + TAILQ_HEAD(timed_pollers_head, spdk_poller) timed_pollers; + /* + * Contains paused pollers. Pollers on this queue are waiting until + * they are resumed (in which case they're put onto the active/timer + * queues) or unregistered. + */ + TAILQ_HEAD(paused_pollers_head, spdk_poller) paused_pollers; + struct spdk_ring *messages; + SLIST_HEAD(, spdk_msg) msg_cache; + size_t msg_cache_count; + spdk_msg_fn critical_msg; + uint64_t id; + enum spdk_thread_state state; + + TAILQ_HEAD(, spdk_io_channel) io_channels; + TAILQ_ENTRY(spdk_thread) tailq; + + char name[SPDK_MAX_THREAD_NAME_LEN + 1]; + struct spdk_cpuset cpumask; + uint64_t exit_timeout_tsc; + + /* User context allocated at the end */ + uint8_t ctx[0]; +}; + +const char *spdk_poller_state_str(enum spdk_poller_state state); + +const char *spdk_io_device_get_name(struct io_device *dev); + +#endif /* SPDK_THREAD_INTERNAL_H_ */ diff --git a/src/spdk/include/spdk_internal/uring.h b/src/spdk/include/spdk_internal/uring.h new file mode 100644 index 000000000..ff22f11d4 --- /dev/null +++ b/src/spdk/include/spdk_internal/uring.h @@ -0,0 +1,51 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_URING_H +#define SPDK_INTERNAL_URING_H + +#include <liburing.h> + +#ifndef __NR_sys_io_uring_enter +#define __NR_sys_io_uring_enter 426 +#endif + +static int +spdk_io_uring_enter(int ring_fd, unsigned int to_submit, + unsigned int min_complete, unsigned int flags) +{ + return syscall(__NR_sys_io_uring_enter, ring_fd, to_submit, + min_complete, flags, NULL, 0); +} + +#endif /* SPDK_INTERNAL_URING_H */ diff --git a/src/spdk/include/spdk_internal/utf.h b/src/spdk/include/spdk_internal/utf.h new file mode 100644 index 000000000..b2b1c3c45 --- /dev/null +++ b/src/spdk/include/spdk_internal/utf.h @@ -0,0 +1,325 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_UTF_H_ +#define SPDK_UTF_H_ + +#include "spdk/stdinc.h" + +#include "spdk/endian.h" +#include "spdk/likely.h" +#include "spdk/string.h" + +static inline bool +utf8_tail(uint8_t c) +{ + /* c >= 0x80 && c <= 0xBF, or binary 01xxxxxx */ + return (c & 0xC0) == 0x80; +} + +/* + * Check for a valid UTF-8 encoding of a single codepoint. + * + * \return Length of valid UTF-8 byte sequence, or negative if invalid. + */ +static inline int +utf8_valid(const uint8_t *start, const uint8_t *end) +{ + const uint8_t *p = start; + uint8_t b0, b1, b2, b3; + + if (p == end) { + return 0; + } + + b0 = *p; + + if (b0 <= 0x7F) { + return 1; + } + + if (b0 <= 0xC1) { + /* Invalid start byte */ + return -1; + } + + if (++p == end) { + /* Not enough bytes left */ + return -1; + } + b1 = *p; + + if (b0 <= 0xDF) { + /* C2..DF 80..BF */ + if (!utf8_tail(b1)) { + return -1; + } + return 2; + } + + if (++p == end) { + /* Not enough bytes left */ + return -1; + } + b2 = *p; + + if (b0 == 0xE0) { + /* E0 A0..BF 80..BF */ + if (b1 < 0xA0 || b1 > 0xBF || !utf8_tail(b2)) { + return -1; + } + return 3; + } else if (b0 == 0xED && b1 >= 0xA0) { + /* + * UTF-16 surrogate pairs use U+D800..U+DFFF, which would be encoded as + * ED A0..BF 80..BF in UTF-8; however, surrogate pairs are not allowed in UTF-8. + */ + return -1; + } else if (b0 <= 0xEF) { + /* E1..EF 80..BF 80..BF */ + if (!utf8_tail(b1) || !utf8_tail(b2)) { + return -1; + } + return 3; + } + + if (++p == end) { + /* Not enough bytes left */ + return -1; + } + b3 = *p; + + if (b0 == 0xF0) { + /* F0 90..BF 80..BF 80..BF */ + if (b1 < 0x90 || b1 > 0xBF || !utf8_tail(b2) || !utf8_tail(b3)) { + return -1; + } + return 4; + } else if (b0 <= 0xF3) { + /* F1..F3 80..BF 80..BF 80..BF */ + if (!utf8_tail(b1) || !utf8_tail(b2) || !utf8_tail(b3)) { + return -1; + } + return 4; + } else if (b0 == 0xF4) { + /* F4 80..8F 80..BF 80..BF */ + if (b1 < 0x80 || b1 > 0x8F || !utf8_tail(b2) || !utf8_tail(b3)) { + return -1; + } + return 4; + } + + return -1; +} + +static inline uint32_t +utf8_decode_unsafe_1(const uint8_t *data) +{ + return data[0]; +} + +static inline uint32_t +utf8_decode_unsafe_2(const uint8_t *data) +{ + uint32_t codepoint; + + codepoint = ((data[0] & 0x1F) << 6); + codepoint |= (data[1] & 0x3F); + + return codepoint; +} + +static inline uint32_t +utf8_decode_unsafe_3(const uint8_t *data) +{ + uint32_t codepoint; + + codepoint = ((data[0] & 0x0F) << 12); + codepoint |= (data[1] & 0x3F) << 6; + codepoint |= (data[2] & 0x3F); + + return codepoint; +} + +static inline uint32_t +utf8_decode_unsafe_4(const uint8_t *data) +{ + uint32_t codepoint; + + codepoint = ((data[0] & 0x07) << 18); + codepoint |= (data[1] & 0x3F) << 12; + codepoint |= (data[2] & 0x3F) << 6; + codepoint |= (data[3] & 0x3F); + + return codepoint; +} + +/* + * Encode a single Unicode codepoint as UTF-8. + * + * buf must have at least 4 bytes of space available (hence unsafe). + * + * \return Number of bytes appended to buf, or negative if encoding failed. + */ +static inline int +utf8_encode_unsafe(uint8_t *buf, uint32_t c) +{ + if (c <= 0x7F) { + buf[0] = c; + return 1; + } else if (c <= 0x7FF) { + buf[0] = 0xC0 | (c >> 6); + buf[1] = 0x80 | (c & 0x3F); + return 2; + } else if (c >= 0xD800 && c <= 0xDFFF) { + /* UTF-16 surrogate pairs - invalid in UTF-8 */ + return -1; + } else if (c <= 0xFFFF) { + buf[0] = 0xE0 | (c >> 12); + buf[1] = 0x80 | ((c >> 6) & 0x3F); + buf[2] = 0x80 | (c & 0x3F); + return 3; + } else if (c <= 0x10FFFF) { + buf[0] = 0xF0 | (c >> 18); + buf[1] = 0x80 | ((c >> 12) & 0x3F); + buf[2] = 0x80 | ((c >> 6) & 0x3F); + buf[3] = 0x80 | (c & 0x3F); + return 4; + } + return -1; +} + +static inline int +utf8_codepoint_len(uint32_t c) +{ + if (c <= 0x7F) { + return 1; + } else if (c <= 0x7FF) { + return 2; + } else if (c >= 0xD800 && c <= 0xDFFF) { + /* UTF-16 surrogate pairs - invalid in UTF-8 */ + return -1; + } else if (c <= 0xFFFF) { + return 3; + } else if (c <= 0x10FFFF) { + return 4; + } + return -1; +} + +static inline bool +utf16_valid_surrogate_high(uint32_t val) +{ + return val >= 0xD800 && val <= 0xDBFF; +} + +static inline bool +utf16_valid_surrogate_low(uint32_t val) +{ + return val >= 0xDC00 && val <= 0xDFFF; +} + +/* + * Check for a valid UTF-16LE encoding of a single codepoint. + * + * \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid. + */ +static inline int +utf16le_valid(const uint16_t *start, const uint16_t *end) +{ + const uint16_t *p = start; + uint16_t high, low; + + if (p == end) { + return 0; + } + + high = from_le16(p); + + if (high <= 0xD7FF || high >= 0xE000) { + /* Single code unit in BMP */ + return 1; + } + + if (high >= 0xDC00) { + /* Low surrogate in first code unit - invalid */ + return -1; + } + + assert(utf16_valid_surrogate_high(high)); + + if (++p == end) { + /* Not enough code units left */ + return -1; + } + low = from_le16(p); + + if (!utf16_valid_surrogate_low(low)) { + return -1; + } + + /* Valid surrogate pair */ + return 2; +} + +static inline uint32_t +utf16_decode_surrogate_pair(uint32_t high, uint32_t low) +{ + uint32_t codepoint; + + assert(utf16_valid_surrogate_high(high)); + assert(utf16_valid_surrogate_low(low)); + + codepoint = low; + codepoint &= 0x3FF; + codepoint |= ((high & 0x3FF) << 10); + codepoint += 0x10000; + + return codepoint; +} + +static inline void +utf16_encode_surrogate_pair(uint32_t codepoint, uint16_t *high, uint16_t *low) +{ + assert(codepoint >= 0x10000); + assert(codepoint <= 0x10FFFF); + + codepoint -= 0x10000; + *high = 0xD800 | (codepoint >> 10); + *low = 0xDC00 | (codepoint & 0x3FF); + + assert(utf16_valid_surrogate_high(*high)); + assert(utf16_valid_surrogate_low(*low)); +} + +#endif diff --git a/src/spdk/include/spdk_internal/vhost_user.h b/src/spdk/include/spdk_internal/vhost_user.h new file mode 100644 index 000000000..92ed3b65b --- /dev/null +++ b/src/spdk/include/spdk_internal/vhost_user.h @@ -0,0 +1,140 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Structures defined in the vhost-user specification + */ + +#ifndef SPDK_VHOST_USER_H +#define SPDK_VHOST_USER_H + +#include "spdk/stdinc.h" + +#include <linux/vhost.h> + +#ifndef VHOST_USER_MEMORY_MAX_NREGIONS +#define VHOST_USER_MEMORY_MAX_NREGIONS 8 +#endif + +#ifndef VHOST_USER_MAX_CONFIG_SIZE +#define VHOST_USER_MAX_CONFIG_SIZE 256 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_MQ +#define VHOST_USER_PROTOCOL_F_MQ 0 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_CONFIG +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + +#ifndef VHOST_USER_F_PROTOCOL_FEATURES +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#endif + +enum vhost_user_request { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_NET_SET_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, + VHOST_USER_CRYPTO_CREATE_SESS = 26, + VHOST_USER_CRYPTO_CLOSE_SESS = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, + VHOST_USER_MAX +}; + +/** Get/set config msg payload */ +struct vhost_user_config { + uint32_t offset; + uint32_t size; + uint32_t flags; + uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; +}; + +/** Fixed-size vhost_memory struct */ +struct vhost_memory_padded { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS]; +}; + +struct vhost_user_msg { + enum vhost_user_request request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /**< the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_memory_padded memory; + struct vhost_user_config cfg; + } payload; +} __attribute((packed)); + +#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) +#define VHOST_USER_PAYLOAD_SIZE \ + (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) + +#endif /* SPDK_VHOST_USER_H */ diff --git a/src/spdk/include/spdk_internal/virtio.h b/src/spdk/include/spdk_internal/virtio.h new file mode 100644 index 000000000..c30013efe --- /dev/null +++ b/src/spdk/include/spdk_internal/virtio.h @@ -0,0 +1,486 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_VIRTIO_H +#define SPDK_VIRTIO_H + +#include "spdk/stdinc.h" + +#include <linux/virtio_ring.h> +#include <linux/virtio_pci.h> +#include <linux/virtio_config.h> + +#include "spdk_internal/log.h" +#include "spdk/likely.h" +#include "spdk/queue.h" +#include "spdk/json.h" +#include "spdk/thread.h" +#include "spdk/pci_ids.h" +#include "spdk/env.h" + +/** + * The maximum virtqueue size is 2^15. Use that value as the end of + * descriptor chain terminator since it will never be a valid index + * in the descriptor table. This is used to verify we are correctly + * handling vq_free_cnt. + */ +#define VQ_RING_DESC_CHAIN_END 32768 + +#define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100 + +/* Extra status define for readability */ +#define VIRTIO_CONFIG_S_RESET 0 + +struct virtio_dev_ops; + +struct virtio_dev { + struct virtqueue **vqs; + + /** Name of this virtio dev set by backend */ + char *name; + + /** Fixed number of backend-specific non-I/O virtqueues. */ + uint16_t fixed_queues_num; + + /** Max number of virtqueues the host supports. */ + uint16_t max_queues; + + /** Common device & guest features. */ + uint64_t negotiated_features; + + int is_hw; + + /** Modern/legacy virtio device flag. */ + uint8_t modern; + + /** Mutex for asynchronous virtqueue-changing operations. */ + pthread_mutex_t mutex; + + /** Backend-specific callbacks. */ + const struct virtio_dev_ops *backend_ops; + + /** Context for the backend ops */ + void *ctx; +}; + +struct virtio_dev_ops { + int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset, + void *dst, int len); + int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset, + const void *src, int len); + uint8_t (*get_status)(struct virtio_dev *hw); + void (*set_status)(struct virtio_dev *hw, uint8_t status); + + /** + * Get device features. The features might be already + * negotiated with driver (guest) features. + */ + uint64_t (*get_features)(struct virtio_dev *vdev); + + /** + * Negotiate and set device features. + * The negotiation can fail with return code -1. + * This function should also set vdev->negotiated_features field. + */ + int (*set_features)(struct virtio_dev *vdev, uint64_t features); + + /** Destruct virtio device */ + void (*destruct_dev)(struct virtio_dev *vdev); + + uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id); + int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq); + void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq); + void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq); + + void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w); + void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w); +}; + +struct vq_desc_extra { + void *cookie; + uint16_t ndescs; +}; + +struct virtqueue { + struct virtio_dev *vdev; /**< owner of this virtqueue */ + struct vring vq_ring; /**< vring keeping desc, used and avail */ + /** + * Last consumed descriptor in the used table, + * trails vq_ring.used->idx. + */ + uint16_t vq_used_cons_idx; + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_free_cnt; /**< num of desc available */ + uint16_t vq_avail_idx; /**< sync until needed */ + + void *vq_ring_virt_mem; /**< virtual address of vring */ + unsigned int vq_ring_size; + + uint64_t vq_ring_mem; /**< physical address of vring */ + + /** + * Head of the free chain in the descriptor table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_head_idx; + + /** + * Tail of the free chain in desc table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_tail_idx; + uint16_t vq_queue_index; /**< PCI queue index */ + uint16_t *notify_addr; + + /** Thread that's polling this queue. */ + struct spdk_thread *owner_thread; + + uint16_t req_start; + uint16_t req_end; + uint16_t reqs_finished; + + struct vq_desc_extra vq_descx[0]; +}; + +enum spdk_virtio_desc_type { + SPDK_VIRTIO_DESC_RO = 0, /**< Read only */ + SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */ + /* TODO VIRTIO_DESC_INDIRECT */ +}; + +/** Context for creating PCI virtio_devs */ +struct virtio_pci_ctx; + +/** + * Callback for creating virtio_dev from a PCI device. + * \param pci_ctx PCI context to be associated with a virtio_dev + * \param ctx context provided by the user + * \return 0 on success, -1 on error. + */ +typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx); + +uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt); + +/** + * Start a new request on the current vring head position and associate it + * with an opaque cookie object. The previous request in given vq will be + * made visible to the device in hopes it can be processed early, but there's + * no guarantee it will be until the device is notified with \c + * virtqueue_req_flush. This behavior is simply an optimization and virtqueues + * must always be flushed. Empty requests (with no descriptors added) will be + * ignored. The device owning given virtqueue must be started. + * + * \param vq virtio queue + * \param cookie opaque object to associate with this request. Once the request + * is sent, processed and a response is received, the same object will be + * returned to the user after calling the virtio poll API. + * \param iovcnt number of required iovectors for the request. This can be + * higher than than the actual number of iovectors to be added. + * \return 0 on success or negative errno otherwise. If the `iovcnt` is + * greater than virtqueue depth, -EINVAL is returned. If simply not enough + * iovectors are available, -ENOMEM is returned. + */ +int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt); + +/** + * Flush a virtqueue. This will notify the device if it's required. + * The device owning given virtqueue must be started. + * + * \param vq virtio queue + */ +void virtqueue_req_flush(struct virtqueue *vq); + +/** + * Abort the very last request in a virtqueue. This will restore virtqueue + * state to the point before the last request was created. Note that this + * is only effective if a queue hasn't been flushed yet. The device owning + * given virtqueue must be started. + * + * \param vq virtio queue + */ +void virtqueue_req_abort(struct virtqueue *vq); + +/** + * Add iovec chain to the last created request. This call does not provide any + * error-checking. The caller has to ensure that he doesn't add more iovs than + * what was specified during request creation. The device owning given virtqueue + * must be started. + * + * \param vq virtio queue + * \param iovs iovec array + * \param iovcnt number of iovs in iovec array + * \param desc_type type of all given iovectors + */ +void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt, + enum spdk_virtio_desc_type desc_type); + +/** + * Construct a virtio device. The device will be in stopped state by default. + * Before doing any I/O, it has to be manually started via \c virtio_dev_restart. + * + * \param vdev memory for virtio device, must be zeroed + * \param name name for the virtio device + * \param ops backend callbacks + * \param ops_ctx argument for the backend callbacks + * \return zero on success, or negative error code otherwise + */ +int virtio_dev_construct(struct virtio_dev *vdev, const char *name, + const struct virtio_dev_ops *ops, void *ops_ctx); + +/** + * Reset the device and prepare it to be `virtio_dev_start`ed. This call + * will also renegotiate feature flags. + * + * \param vdev virtio device + * \param req_features features this driver supports. A VIRTIO_F_VERSION_1 + * flag will be automatically appended, as legacy devices are not supported. + */ +int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features); + +/** + * Notify the host to start processing this virtio device. This is + * a blocking call that won't return until the host has started. + * This will also allocate virtqueues. + * + * \param vdev virtio device + * \param max_queues number of queues to allocate. The max number of + * usable I/O queues is also limited by the host device. `vdev` will be + * started successfully even if the host supports less queues than requested. + * \param fixed_queue_num number of queues preceeding the first + * request queue. For Virtio-SCSI this is equal to 2, as there are + * additional event and control queues. + */ +int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues, + uint16_t fixed_queues_num); + +/** + * Stop the host from processing the device. This is a blocking call + * that won't return until all outstanding I/O has been processed on + * the host (virtio device) side. In order to re-start the device, it + * has to be `virtio_dev_reset` first. + * + * \param vdev virtio device + */ +void virtio_dev_stop(struct virtio_dev *vdev); + +/** + * Destruct a virtio device. Note that it must be in the stopped state. + * The virtio_dev should be manually freed afterwards. + * + * \param vdev virtio device + */ +void virtio_dev_destruct(struct virtio_dev *vdev); + +/** + * Bind a virtqueue with given index to the current thread; + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param index virtqueue index + * \return 0 on success, -1 in case a virtqueue with given index either + * does not exists or is already acquired. + */ +int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index); + +/** + * Look for unused queue and bind it to the current thread. This will + * scan the queues in range from *start_index* (inclusive) up to + * vdev->max_queues (exclusive). + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param start_index virtqueue index to start looking from + * \return index of acquired queue or -1 in case no unused queue in given range + * has been found + */ +int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index); + +/** + * Get thread that acquired given virtqueue. + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param index index of virtqueue + * \return thread that acquired given virtqueue. If the queue is unused + * or doesn't exist a NULL is returned. + */ +struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index); + +/** + * Check if virtqueue with given index is acquired. + * + * This function is thread-safe. + * + * \param vdev vhost device + * \param index index of virtqueue + * \return virtqueue acquire status. in case of invalid index *false* is returned. + */ +bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index); + +/** + * Release previously acquired queue. + * + * This function must be called from the thread that acquired the queue. + * + * \param vdev vhost device + * \param index index of virtqueue to release + */ +void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index); + +/** + * Get Virtio status flags. + * + * \param vdev virtio device + */ +uint8_t virtio_dev_get_status(struct virtio_dev *vdev); + +/** + * Set Virtio status flag. The flags have to be set in very specific order + * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the + * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to + * unset only particular flags. + * + * \param vdev virtio device + * \param flag flag to set + */ +void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag); + +/** + * Write raw data into the device config at given offset. This call does not + * provide any error checking. + * + * \param vdev virtio device + * \param offset offset in bytes + * \param src pointer to data to copy from + * \param len length of data to copy in bytes + * \return 0 on success, negative errno otherwise + */ +int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len); + +/** + * Read raw data from the device config at given offset. This call does not + * provide any error checking. + * + * \param vdev virtio device + * \param offset offset in bytes + * \param dst pointer to buffer to copy data into + * \param len length of data to copy in bytes + * \return 0 on success, negative errno otherwise + */ +int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len); + +/** + * Get backend-specific ops for given device. + * + * \param vdev virtio device + */ +const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev); + +/** + * Check if the device has negotiated given feature bit. + * + * \param vdev virtio device + * \param bit feature bit + */ +static inline bool +virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit) +{ + return !!(vdev->negotiated_features & (1ULL << bit)); +} + +/** + * Dump all device specific information into given json stream. + * + * \param vdev virtio device + * \param w json stream + */ +void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w); + +/** + * Enumerate all PCI Virtio devices of given type on the system. + * + * \param enum_cb a function to be called for each valid PCI device. + * If a virtio_dev is has been created, the callback should return 0. + * Returning any other value will cause the PCI context to be freed, + * making it unusable. + * \param enum_ctx additional opaque context to be passed into `enum_cb` + * \param pci_device_id PCI Device ID of devices to iterate through + */ +int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx, + uint16_t pci_device_id); + +/** + * Attach a PCI Virtio device of given type. + * + * \param create_cb callback to create a virtio_dev. + * If virtio_dev is has been created, the callback should return 0. + * Returning any other value will cause the PCI context to be freed, + * making it unusable. + * \param enum_ctx additional opaque context to be passed into `enum_cb` + * \param pci_device_id PCI Device ID of devices to iterate through + * \param pci_addr PCI address of the device to attach + */ +int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx, + uint16_t pci_device_id, struct spdk_pci_addr *pci_addr); + +/** + * Connect to a vhost-user device and init corresponding virtio_dev struct. + * The virtio_dev will have to be freed with \c virtio_dev_free. + * + * \param vdev preallocated vhost device struct to operate on + * \param name name of this virtio device + * \param path path to the Unix domain socket of the vhost-user device + * \param queue_size size of each of the queues + * \return virtio device + */ +int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path, + uint32_t queue_size); + +/** + * Initialize virtio_dev for a given PCI device. + * The virtio_dev has to be freed with \c virtio_dev_destruct. + * + * \param vdev preallocated vhost device struct to operate on + * \param name name of this virtio device + * \param pci_ctx context of the PCI device + * \return 0 on success, -1 on error. + */ +int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name, + struct virtio_pci_ctx *pci_ctx); + +#endif /* SPDK_VIRTIO_H */ |