summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/vhost/rte_vhost_compat.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/lib/vhost/rte_vhost_compat.c
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/lib/vhost/rte_vhost_compat.c')
-rw-r--r--src/spdk/lib/vhost/rte_vhost_compat.c402
1 files changed, 402 insertions, 0 deletions
diff --git a/src/spdk/lib/vhost/rte_vhost_compat.c b/src/spdk/lib/vhost/rte_vhost_compat.c
new file mode 100644
index 000000000..53f31bfd7
--- /dev/null
+++ b/src/spdk/lib/vhost/rte_vhost_compat.c
@@ -0,0 +1,402 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Set of workarounds for rte_vhost to make it work with device types
+ * other than vhost-net.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "spdk/env.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+#include "spdk/memory.h"
+#include "spdk/barrier.h"
+#include "spdk/vhost.h"
+#include "vhost_internal.h"
+
+#include "spdk_internal/vhost_user.h"
+
+static inline void
+vhost_session_mem_region_calc(uint64_t *previous_start, uint64_t *start, uint64_t *end,
+ uint64_t *len, struct rte_vhost_mem_region *region)
+{
+ *start = FLOOR_2MB(region->mmap_addr);
+ *end = CEIL_2MB(region->mmap_addr + region->mmap_size);
+ if (*start == *previous_start) {
+ *start += (size_t) VALUE_2MB;
+ }
+ *previous_start = *start;
+ *len = *end - *start;
+}
+
+void
+vhost_session_mem_register(struct rte_vhost_memory *mem)
+{
+ uint64_t start, end, len;
+ uint32_t i;
+ uint64_t previous_start = UINT64_MAX;
+
+
+ for (i = 0; i < mem->nregions; i++) {
+ vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
+ SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
+ start, len);
+
+ if (spdk_mem_register((void *)start, len) != 0) {
+ SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
+ i);
+ continue;
+ }
+ }
+}
+
+void
+vhost_session_mem_unregister(struct rte_vhost_memory *mem)
+{
+ uint64_t start, end, len;
+ uint32_t i;
+ uint64_t previous_start = UINT64_MAX;
+
+ for (i = 0; i < mem->nregions; i++) {
+ vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
+ if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) {
+ continue; /* region has not been registered */
+ }
+
+ if (spdk_mem_unregister((void *)start, len) != 0) {
+ assert(false);
+ }
+ }
+}
+
+static int
+new_connection(int vid)
+{
+ char ifname[PATH_MAX];
+
+ if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
+ SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
+ return -1;
+ }
+
+ return vhost_new_connection_cb(vid, ifname);
+}
+
+static int
+start_device(int vid)
+{
+ return vhost_start_device_cb(vid);
+}
+
+static void
+stop_device(int vid)
+{
+ vhost_stop_device_cb(vid);
+}
+
+static void
+destroy_connection(int vid)
+{
+ vhost_destroy_connection_cb(vid);
+}
+
+static const struct vhost_device_ops g_spdk_vhost_ops = {
+ .new_device = start_device,
+ .destroy_device = stop_device,
+ .new_connection = new_connection,
+ .destroy_connection = destroy_connection,
+#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
+ .get_config = vhost_get_config_cb,
+ .set_config = vhost_set_config_cb,
+ .vhost_nvme_admin_passthrough = vhost_nvme_admin_passthrough,
+ .vhost_nvme_set_cq_call = vhost_nvme_set_cq_call,
+ .vhost_nvme_get_cap = vhost_nvme_get_cap,
+ .vhost_nvme_set_bar_mr = vhost_nvme_set_bar_mr,
+#endif
+};
+
+#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB
+
+static enum rte_vhost_msg_result
+extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+ struct vhost_user_msg *msg = _msg;
+ struct spdk_vhost_session *vsession;
+
+ vsession = vhost_session_find_by_vid(vid);
+ if (vsession == NULL) {
+ SPDK_ERRLOG("Received a message to unitialized session (vid %d).\n", vid);
+ assert(false);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ switch (msg->request) {
+ case VHOST_USER_GET_VRING_BASE:
+ if (vsession->forced_polling && vsession->started) {
+ /* Our queue is stopped for whatever reason, but we may still
+ * need to poll it after it's initialized again.
+ */
+ g_spdk_vhost_ops.destroy_device(vid);
+ }
+ break;
+ case VHOST_USER_SET_VRING_BASE:
+ case VHOST_USER_SET_VRING_ADDR:
+ case VHOST_USER_SET_VRING_NUM:
+ case VHOST_USER_SET_VRING_KICK:
+ if (vsession->forced_polling && vsession->started) {
+ /* Additional queues are being initialized, so we either processed
+ * enough I/Os and are switching from SeaBIOS to the OS now, or
+ * we were never in SeaBIOS in the first place. Either way, we
+ * don't need our workaround anymore.
+ */
+ g_spdk_vhost_ops.destroy_device(vid);
+ vsession->forced_polling = false;
+ }
+ break;
+ case VHOST_USER_SET_VRING_CALL:
+ /* rte_vhost will close the previous callfd and won't notify
+ * us about any change. This will effectively make SPDK fail
+ * to deliver any subsequent interrupts until a session is
+ * restarted. We stop the session here before closing the previous
+ * fd (so that all interrupts must have been delivered by the
+ * time the descriptor is closed) and start right after (which
+ * will make SPDK retrieve the latest, up-to-date callfd from
+ * rte_vhost.
+ */
+ case VHOST_USER_SET_MEM_TABLE:
+ /* rte_vhost will unmap previous memory that SPDK may still
+ * have pending DMA operations on. We can't let that happen,
+ * so stop the device before letting rte_vhost unmap anything.
+ * This will block until all pending I/Os are finished.
+ * We will start the device again from the post-processing
+ * message handler.
+ */
+ if (vsession->started) {
+ g_spdk_vhost_ops.destroy_device(vid);
+ vsession->needs_restart = true;
+ }
+ break;
+ case VHOST_USER_GET_CONFIG: {
+ int rc = 0;
+
+ spdk_vhost_lock();
+ if (vsession->vdev->backend->vhost_get_config) {
+ rc = vsession->vdev->backend->vhost_get_config(vsession->vdev,
+ msg->payload.cfg.region, msg->payload.cfg.size);
+ if (rc != 0) {
+ msg->size = 0;
+ }
+ }
+ spdk_vhost_unlock();
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+ }
+ case VHOST_USER_SET_CONFIG: {
+ int rc = 0;
+
+ spdk_vhost_lock();
+ if (vsession->vdev->backend->vhost_set_config) {
+ rc = vsession->vdev->backend->vhost_set_config(vsession->vdev,
+ msg->payload.cfg.region, msg->payload.cfg.offset,
+ msg->payload.cfg.size, msg->payload.cfg.flags);
+ }
+ spdk_vhost_unlock();
+
+ return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+ }
+ default:
+ break;
+ }
+
+ return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+static enum rte_vhost_msg_result
+extern_vhost_post_msg_handler(int vid, void *_msg)
+{
+ struct vhost_user_msg *msg = _msg;
+ struct spdk_vhost_session *vsession;
+
+ vsession = vhost_session_find_by_vid(vid);
+ if (vsession == NULL) {
+ SPDK_ERRLOG("Received a message to unitialized session (vid %d).\n", vid);
+ assert(false);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ if (vsession->needs_restart) {
+ g_spdk_vhost_ops.new_device(vid);
+ vsession->needs_restart = false;
+ return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+ }
+
+ switch (msg->request) {
+ case VHOST_USER_SET_FEATURES:
+ /* rte_vhost requires all queues to be fully initialized in order
+ * to start I/O processing. This behavior is not compliant with the
+ * vhost-user specification and doesn't work with QEMU 2.12+, which
+ * will only initialize 1 I/O queue for the SeaBIOS boot.
+ * Theoretically, we should start polling each virtqueue individually
+ * after receiving its SET_VRING_KICK message, but rte_vhost is not
+ * designed to poll individual queues. So here we use a workaround
+ * to detect when the vhost session could be potentially at that SeaBIOS
+ * stage and we mark it to start polling as soon as its first virtqueue
+ * gets initialized. This doesn't hurt any non-QEMU vhost slaves
+ * and allows QEMU 2.12+ to boot correctly. SET_FEATURES could be sent
+ * at any time, but QEMU will send it at least once on SeaBIOS
+ * initialization - whenever powered-up or rebooted.
+ */
+ vsession->forced_polling = true;
+ break;
+ case VHOST_USER_SET_VRING_KICK:
+ /* vhost-user spec tells us to start polling a queue after receiving
+ * its SET_VRING_KICK message. Let's do it!
+ */
+ if (vsession->forced_polling && !vsession->started) {
+ g_spdk_vhost_ops.new_device(vid);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+struct rte_vhost_user_extern_ops g_spdk_extern_vhost_ops = {
+ .pre_msg_handle = extern_vhost_pre_msg_handler,
+ .post_msg_handle = extern_vhost_post_msg_handler,
+};
+
+void
+vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
+{
+ int rc;
+
+ rc = rte_vhost_extern_callback_register(vsession->vid, &g_spdk_extern_vhost_ops, NULL);
+ if (rc != 0) {
+ SPDK_ERRLOG("rte_vhost_extern_callback_register() failed for vid = %d\n",
+ vsession->vid);
+ return;
+ }
+}
+
+#else /* SPDK_CONFIG_VHOST_INTERNAL_LIB */
+
+void
+vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
+{
+ /* nothing to do. all the changes are already incorporated into rte_vhost */
+}
+
+#endif
+
+int
+vhost_register_unix_socket(const char *path, const char *ctrl_name,
+ uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features)
+{
+ struct stat file_stat;
+#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB
+ uint64_t features = 0;
+#endif
+
+ /* Register vhost driver to handle vhost messages. */
+ if (stat(path, &file_stat) != -1) {
+ if (!S_ISSOCK(file_stat.st_mode)) {
+ SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
+ "The file already exists and is not a socket.\n",
+ path);
+ return -EIO;
+ } else if (unlink(path) != 0) {
+ SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
+ "The socket already exists and failed to unlink.\n",
+ path);
+ return -EIO;
+ }
+ }
+
+ if (rte_vhost_driver_register(path, 0) != 0) {
+ SPDK_ERRLOG("Could not register controller %s with vhost library\n", ctrl_name);
+ SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
+ return -EIO;
+ }
+ if (rte_vhost_driver_set_features(path, virtio_features) ||
+ rte_vhost_driver_disable_features(path, disabled_features)) {
+ SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", ctrl_name);
+
+ rte_vhost_driver_unregister(path);
+ return -EIO;
+ }
+
+ if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
+ rte_vhost_driver_unregister(path);
+ SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", ctrl_name);
+ return -EIO;
+ }
+
+#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB
+ rte_vhost_driver_get_protocol_features(path, &features);
+ features |= protocol_features;
+ rte_vhost_driver_set_protocol_features(path, features);
+#endif
+
+ if (rte_vhost_driver_start(path) != 0) {
+ SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
+ ctrl_name, errno, spdk_strerror(errno));
+ rte_vhost_driver_unregister(path);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int
+vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
+{
+ return rte_vhost_get_mem_table(vid, mem);
+}
+
+int
+vhost_driver_unregister(const char *path)
+{
+ return rte_vhost_driver_unregister(path);
+}
+
+int
+vhost_get_negotiated_features(int vid, uint64_t *negotiated_features)
+{
+ return rte_vhost_get_negotiated_features(vid, negotiated_features);
+}