diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/module/bdev/crypto/vbdev_crypto.c | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/module/bdev/crypto/vbdev_crypto.c')
-rw-r--r-- | src/spdk/module/bdev/crypto/vbdev_crypto.c | 2040 |
1 files changed, 2040 insertions, 0 deletions
diff --git a/src/spdk/module/bdev/crypto/vbdev_crypto.c b/src/spdk/module/bdev/crypto/vbdev_crypto.c new file mode 100644 index 000000000..f5dd0f814 --- /dev/null +++ b/src/spdk/module/bdev/crypto/vbdev_crypto.c @@ -0,0 +1,2040 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "vbdev_crypto.h" + +#include "spdk/env.h" +#include "spdk/conf.h" +#include "spdk/endian.h" +#include "spdk/thread.h" +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" + +#include <rte_config.h> +#include <rte_version.h> +#include <rte_bus_vdev.h> +#include <rte_crypto.h> +#include <rte_cryptodev.h> +#include <rte_cryptodev_pmd.h> + +/* To add support for new device types, follow the examples of the following... + * Note that the string names are defined by the DPDK PMD in question so be + * sure to use the exact names. + */ +#define MAX_NUM_DRV_TYPES 2 + +/* The VF spread is the number of queue pairs between virtual functions, we use this to + * load balance the QAT device. + */ +#define QAT_VF_SPREAD 32 +static uint8_t g_qat_total_qp = 0; +static uint8_t g_next_qat_index; + +const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT }; + +/* Global list of available crypto devices. */ +struct vbdev_dev { + struct rte_cryptodev_info cdev_info; /* includes device friendly name */ + uint8_t cdev_id; /* identifier for the device */ + TAILQ_ENTRY(vbdev_dev) link; +}; +static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs); + +/* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD + * so that we can optimize per PMD where it make sense. For example, with QAT there an optimal + * pattern for assigning queue pairs where with AESNI there is not. + */ +struct device_qp { + struct vbdev_dev *device; /* ptr to crypto device */ + uint8_t qp; /* queue pair for this node */ + bool in_use; /* whether this node is in use or not */ + uint8_t index; /* used by QAT to load balance placement of qpairs */ + TAILQ_ENTRY(device_qp) link; +}; +static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat); +static TAILQ_HEAD(, device_qp) g_device_qp_aesni_mb = TAILQ_HEAD_INITIALIZER(g_device_qp_aesni_mb); +static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER; + + +/* In order to limit the number of resources we need to do one crypto + * operation per LBA (we use LBA as IV), we tell the bdev layer that + * our max IO size is something reasonable. Units here are in bytes. + */ +#define CRYPTO_MAX_IO (64 * 1024) + +/* This controls how many ops will be dequeued from the crypto driver in one run + * of the poller. It is mainly a performance knob as it effectively determines how + * much work the poller has to do. However even that can vary between crypto drivers + * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the + * QAT driver just dequeues what has been completed already. + */ +#define MAX_DEQUEUE_BURST_SIZE 64 + +/* When enqueueing, we need to supply the crypto driver with an array of pointers to + * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO + * value in conjunction with the other defines to make sure we're not using crazy amounts + * of memory. All of these numbers can and probably should be adjusted based on the + * workload. By default we'll use the worst case (smallest) block size for the + * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B + * blocks would give us an enqueue array size of 128. + */ +#define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512) + +/* The number of MBUFS we need must be a power of two and to support other small IOs + * in addition to the limits mentioned above, we go to the next power of two. It is + * big number because it is one mempool for source and destination mbufs. It may + * need to be bigger to support multiple crypto drivers at once. + */ +#define NUM_MBUFS 32768 +#define POOL_CACHE_SIZE 256 +#define MAX_CRYPTO_VOLUMES 128 +#define NUM_SESSIONS (2 * MAX_CRYPTO_VOLUMES) +#define SESS_MEMPOOL_CACHE_SIZE 0 +uint8_t g_number_of_claimed_volumes = 0; + +/* This is the max number of IOs we can supply to any crypto device QP at one time. + * It can vary between drivers. + */ +#define CRYPTO_QP_DESCRIPTORS 2048 + +/* Specific to AES_CBC. */ +#define AES_CBC_IV_LENGTH 16 +#define AES_CBC_KEY_LENGTH 16 +#define AES_XTS_KEY_LENGTH 16 /* XTS uses 2 keys, each of this size. */ +#define AESNI_MB_NUM_QP 64 + +/* Common for suported devices. */ +#define IV_OFFSET (sizeof(struct rte_crypto_op) + \ + sizeof(struct rte_crypto_sym_op)) +#define QUEUED_OP_OFFSET (IV_OFFSET + AES_CBC_IV_LENGTH) + +static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); +static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); +static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); +static void vbdev_crypto_examine(struct spdk_bdev *bdev); +static int vbdev_crypto_claim(struct spdk_bdev *bdev); +static void vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); + +/* List of crypto_bdev names and their base bdevs via configuration file. */ +struct bdev_names { + char *vbdev_name; /* name of the vbdev to create */ + char *bdev_name; /* base bdev name */ + + /* Note, for dev/test we allow use of key in the config file, for production + * use, you must use an RPC to specify the key for security reasons. + */ + uint8_t *key; /* key per bdev */ + char *drv_name; /* name of the crypto device driver */ + char *cipher; /* AES_CBC or AES_XTS */ + uint8_t *key2; /* key #2 for AES_XTS, per bdev */ + TAILQ_ENTRY(bdev_names) link; +}; +static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names); + +/* List of virtual bdevs and associated info for each. We keep the device friendly name here even + * though its also in the device struct because we use it early on. + */ +struct vbdev_crypto { + struct spdk_bdev *base_bdev; /* the thing we're attaching to */ + struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ + struct spdk_bdev crypto_bdev; /* the crypto virtual bdev */ + uint8_t *key; /* key per bdev */ + uint8_t *key2; /* for XTS */ + uint8_t *xts_key; /* key + key 2 */ + char *drv_name; /* name of the crypto device driver */ + char *cipher; /* cipher used */ + struct rte_cryptodev_sym_session *session_encrypt; /* encryption session for this bdev */ + struct rte_cryptodev_sym_session *session_decrypt; /* decryption session for this bdev */ + struct rte_crypto_sym_xform cipher_xform; /* crypto control struct for this bdev */ + TAILQ_ENTRY(vbdev_crypto) link; + struct spdk_thread *thread; /* thread where base device is opened */ +}; +static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto); + +/* Shared mempools between all devices on this system */ +static struct rte_mempool *g_session_mp = NULL; +static struct rte_mempool *g_session_mp_priv = NULL; +static struct spdk_mempool *g_mbuf_mp = NULL; /* mbuf mempool */ +static struct rte_mempool *g_crypto_op_mp = NULL; /* crypto operations, must be rte* mempool */ + +/* For queueing up crypto operations that we can't submit for some reason */ +struct vbdev_crypto_op { + uint8_t cdev_id; + uint8_t qp; + struct rte_crypto_op *crypto_op; + struct spdk_bdev_io *bdev_io; + TAILQ_ENTRY(vbdev_crypto_op) link; +}; +#define QUEUED_OP_LENGTH (sizeof(struct vbdev_crypto_op)) + +/* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code. + * We store things in here that are needed on per thread basis like the base_channel for this thread, + * and the poller for this thread. + */ +struct crypto_io_channel { + struct spdk_io_channel *base_ch; /* IO channel of base device */ + struct spdk_poller *poller; /* completion poller */ + struct device_qp *device_qp; /* unique device/qp combination for this channel */ + TAILQ_HEAD(, spdk_bdev_io) pending_cry_ios; /* outstanding operations to the crypto device */ + struct spdk_io_channel_iter *iter; /* used with for_each_channel in reset */ + TAILQ_HEAD(, vbdev_crypto_op) queued_cry_ops; /* queued for re-submission to CryptoDev */ +}; + +/* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to + * each IO for us. + */ +struct crypto_bdev_io { + int cryop_cnt_remaining; /* counter used when completing crypto ops */ + struct crypto_io_channel *crypto_ch; /* need to store for crypto completion handling */ + struct vbdev_crypto *crypto_bdev; /* the crypto node struct associated with this IO */ + struct spdk_bdev_io *orig_io; /* the original IO */ + struct spdk_bdev_io *read_io; /* the read IO we issued */ + int8_t bdev_io_status; /* the status we'll report back on the bdev IO */ + bool on_pending_list; + /* Used for the single contiguous buffer that serves as the crypto destination target for writes */ + uint64_t aux_num_blocks; /* num of blocks for the contiguous buffer */ + uint64_t aux_offset_blocks; /* block offset on media */ + void *aux_buf_raw; /* raw buffer that the bdev layer gave us for write buffer */ + struct iovec aux_buf_iov; /* iov representing aligned contig write buffer */ + + /* for bdev_io_wait */ + struct spdk_bdev_io_wait_entry bdev_io_wait; + struct spdk_io_channel *ch; +}; + +/* Called by vbdev_crypto_init_crypto_drivers() to init each discovered crypto device */ +static int +create_vbdev_dev(uint8_t index, uint16_t num_lcores) +{ + struct vbdev_dev *device; + uint8_t j, cdev_id, cdrv_id; + struct device_qp *dev_qp; + struct device_qp *tmp_qp; + int rc; + TAILQ_HEAD(device_qps, device_qp) *dev_qp_head; + + device = calloc(1, sizeof(struct vbdev_dev)); + if (!device) { + return -ENOMEM; + } + + /* Get details about this device. */ + rte_cryptodev_info_get(index, &device->cdev_info); + cdrv_id = device->cdev_info.driver_id; + cdev_id = device->cdev_id = index; + + /* Before going any further, make sure we have enough resources for this + * device type to function. We need a unique queue pair per core accross each + * device type to remain lockless.... + */ + if ((rte_cryptodev_device_count_by_driver(cdrv_id) * + device->cdev_info.max_nb_queue_pairs) < num_lcores) { + SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n", + device->cdev_info.driver_name); + SPDK_ERRLOG("Either add more crypto devices or decrease core count\n"); + rc = -EINVAL; + goto err; + } + + /* Setup queue pairs. */ + struct rte_cryptodev_config conf = { + .nb_queue_pairs = device->cdev_info.max_nb_queue_pairs, + .socket_id = SPDK_ENV_SOCKET_ID_ANY + }; + + rc = rte_cryptodev_configure(cdev_id, &conf); + if (rc < 0) { + SPDK_ERRLOG("Failed to configure cryptodev %u\n", cdev_id); + rc = -EINVAL; + goto err; + } + + struct rte_cryptodev_qp_conf qp_conf = { + .nb_descriptors = CRYPTO_QP_DESCRIPTORS, +#if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0) + .mp_session = g_session_mp, + .mp_session_private = g_session_mp_priv, +#endif + }; + + /* Pre-setup all potential qpairs now and assign them in the channel + * callback. If we were to create them there, we'd have to stop the + * entire device affecting all other threads that might be using it + * even on other queue pairs. + */ + for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) { +#if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0) + rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY); +#else + rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY, + g_session_mp); +#endif + + if (rc < 0) { + SPDK_ERRLOG("Failed to setup queue pair %u on " + "cryptodev %u\n", j, cdev_id); + rc = -EINVAL; + goto err; + } + } + + rc = rte_cryptodev_start(cdev_id); + if (rc < 0) { + SPDK_ERRLOG("Failed to start device %u: error %d\n", + cdev_id, rc); + rc = -EINVAL; + goto err; + } + + /* Select the right device/qp list based on driver name + * or error if it does not exist. + */ + if (strcmp(device->cdev_info.driver_name, QAT) == 0) { + dev_qp_head = (struct device_qps *)&g_device_qp_qat; + } else if (strcmp(device->cdev_info.driver_name, AESNI_MB) == 0) { + dev_qp_head = (struct device_qps *)&g_device_qp_aesni_mb; + } else { + rc = -EINVAL; + goto err; + } + + /* Build up lists of device/qp combinations per PMD */ + for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) { + dev_qp = calloc(1, sizeof(struct device_qp)); + if (!dev_qp) { + rc = -ENOMEM; + goto err_qp_alloc; + } + dev_qp->device = device; + dev_qp->qp = j; + dev_qp->in_use = false; + if (strcmp(device->cdev_info.driver_name, QAT) == 0) { + g_qat_total_qp++; + } + TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link); + } + + /* Add to our list of available crypto devices. */ + TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link); + + return 0; +err_qp_alloc: + TAILQ_FOREACH_SAFE(dev_qp, dev_qp_head, link, tmp_qp) { + TAILQ_REMOVE(dev_qp_head, dev_qp, link); + free(dev_qp); + } +err: + free(device); + + return rc; +} + +/* This is called from the module's init function. We setup all crypto devices early on as we are unable + * to easily dynamically configure queue pairs after the drivers are up and running. So, here, we + * configure the max capabilities of each device and assign threads to queue pairs as channels are + * requested. + */ +static int +vbdev_crypto_init_crypto_drivers(void) +{ + uint8_t cdev_count; + uint8_t cdev_id; + int i, rc = 0; + struct vbdev_dev *device; + struct vbdev_dev *tmp_dev; + struct device_qp *dev_qp; + unsigned int max_sess_size = 0, sess_size; + uint16_t num_lcores = rte_lcore_count(); + char aesni_args[32]; + + /* Only the first call, via RPC or module init should init the crypto drivers. */ + if (g_session_mp != NULL) { + return 0; + } + + /* We always init AESNI_MB */ + snprintf(aesni_args, sizeof(aesni_args), "max_nb_queue_pairs=%d", AESNI_MB_NUM_QP); + rc = rte_vdev_init(AESNI_MB, aesni_args); + if (rc) { + SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB); + return -EINVAL; + } + + /* If we have no crypto devices, there's no reason to continue. */ + cdev_count = rte_cryptodev_count(); + if (cdev_count == 0) { + return 0; + } + + /* + * Create global mempools, shared by all devices regardless of type. + */ + + /* First determine max session size, most pools are shared by all the devices, + * so we need to find the global max sessions size. + */ + for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) { + sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id); + if (sess_size > max_sess_size) { + max_sess_size = sess_size; + } + } + +#if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0) + g_session_mp_priv = rte_mempool_create("session_mp_priv", NUM_SESSIONS, max_sess_size, + SESS_MEMPOOL_CACHE_SIZE, 0, NULL, NULL, NULL, + NULL, SOCKET_ID_ANY, 0); + if (g_session_mp_priv == NULL) { + SPDK_ERRLOG("Cannot create private session pool max size 0x%x\n", max_sess_size); + return -ENOMEM; + } + + g_session_mp = rte_cryptodev_sym_session_pool_create( + "session_mp", + NUM_SESSIONS, 0, SESS_MEMPOOL_CACHE_SIZE, 0, + SOCKET_ID_ANY); +#else + g_session_mp = rte_mempool_create("session_mp", NUM_SESSIONS, max_sess_size, + SESS_MEMPOOL_CACHE_SIZE, + 0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0); +#endif + if (g_session_mp == NULL) { + SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size); + goto error_create_session_mp; + return -ENOMEM; + } + + g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf), + SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, + SPDK_ENV_SOCKET_ID_ANY); + if (g_mbuf_mp == NULL) { + SPDK_ERRLOG("Cannot create mbuf pool\n"); + rc = -ENOMEM; + goto error_create_mbuf; + } + + /* We use per op private data to store the IV and our own struct + * for queueing ops. + */ + g_crypto_op_mp = rte_crypto_op_pool_create("op_mp", + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + NUM_MBUFS, + POOL_CACHE_SIZE, + AES_CBC_IV_LENGTH + QUEUED_OP_LENGTH, + rte_socket_id()); + + if (g_crypto_op_mp == NULL) { + SPDK_ERRLOG("Cannot create op pool\n"); + rc = -ENOMEM; + goto error_create_op; + } + + /* Init all devices */ + for (i = 0; i < cdev_count; i++) { + rc = create_vbdev_dev(i, num_lcores); + if (rc) { + goto err; + } + } + + /* Assign index values to the QAT device qp nodes so that we can + * assign them for optimal performance. + */ + i = 0; + TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) { + dev_qp->index = i++; + } + + return 0; + + /* Error cleanup paths. */ +err: + TAILQ_FOREACH_SAFE(device, &g_vbdev_devs, link, tmp_dev) { + TAILQ_REMOVE(&g_vbdev_devs, device, link); + free(device); + } + rte_mempool_free(g_crypto_op_mp); + g_crypto_op_mp = NULL; +error_create_op: + spdk_mempool_free(g_mbuf_mp); + g_mbuf_mp = NULL; +error_create_mbuf: + rte_mempool_free(g_session_mp); + g_session_mp = NULL; +error_create_session_mp: + if (g_session_mp_priv != NULL) { + rte_mempool_free(g_session_mp_priv); + g_session_mp_priv = NULL; + } + return rc; +} + +/* Following an encrypt or decrypt we need to then either write the encrypted data or finish + * the read on decrypted data. Do that here. + */ +static void +_crypto_operation_complete(struct spdk_bdev_io *bdev_io) +{ + struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto, + crypto_bdev); + struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch; + struct spdk_bdev_io *free_me = io_ctx->read_io; + int rc = 0; + + TAILQ_REMOVE(&crypto_ch->pending_cry_ios, bdev_io, module_link); + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + + /* Complete the original IO and then free the one that we created + * as a result of issuing an IO via submit_request. + */ + if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); + } else { + SPDK_ERRLOG("Issue with decryption on bdev_io %p\n", bdev_io); + rc = -EINVAL; + } + spdk_bdev_free_io(free_me); + + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + + if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) { + /* Write the encrypted data. */ + rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, + &io_ctx->aux_buf_iov, 1, io_ctx->aux_offset_blocks, + io_ctx->aux_num_blocks, _complete_internal_write, + bdev_io); + } else { + SPDK_ERRLOG("Issue with encryption on bdev_io %p\n", bdev_io); + rc = -EINVAL; + } + + } else { + SPDK_ERRLOG("Unknown bdev type %u on crypto operation completion\n", + bdev_io->type); + rc = -EINVAL; + } + + if (rc) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static int _crypto_operation(struct spdk_bdev_io *bdev_io, + enum rte_crypto_cipher_operation crypto_op, + void *aux_buf); + +/* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at + * the device. Then we need to decide if what we've got so far (including previous poller + * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io + * accordingly. This means either completing a read or issuing a new write. + */ +static int +crypto_dev_poller(void *args) +{ + struct crypto_io_channel *crypto_ch = args; + uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id; + int i, num_dequeued_ops, num_enqueued_ops; + struct spdk_bdev_io *bdev_io = NULL; + struct crypto_bdev_io *io_ctx = NULL; + struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE]; + struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE]; + int num_mbufs = 0; + struct vbdev_crypto_op *op_to_resubmit; + + /* Each run of the poller will get just what the device has available + * at the moment we call it, we don't check again after draining the + * first batch. + */ + num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp, + dequeued_ops, MAX_DEQUEUE_BURST_SIZE); + + /* Check if operation was processed successfully */ + for (i = 0; i < num_dequeued_ops; i++) { + + /* We don't know the order or association of the crypto ops wrt any + * partiular bdev_io so need to look at each and determine if it's + * the last one for it's bdev_io or not. + */ + bdev_io = (struct spdk_bdev_io *)dequeued_ops[i]->sym->m_src->userdata; + assert(bdev_io != NULL); + io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + + if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { + SPDK_ERRLOG("error with op %d status %u\n", i, + dequeued_ops[i]->status); + /* Update the bdev status to error, we'll still process the + * rest of the crypto ops for this bdev_io though so they + * aren't left hanging. + */ + io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + } + + assert(io_ctx->cryop_cnt_remaining > 0); + + /* Return the associated src and dst mbufs by collecting them into + * an array that we can use the bulk API to free after the loop. + */ + dequeued_ops[i]->sym->m_src->userdata = NULL; + mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src; + if (dequeued_ops[i]->sym->m_dst) { + mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst; + } + + /* done encrypting, complete the bdev_io */ + if (--io_ctx->cryop_cnt_remaining == 0) { + + /* If we're completing this with an outstanding reset we need + * to fail it. + */ + if (crypto_ch->iter) { + io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + } + + /* Complete the IO */ + _crypto_operation_complete(bdev_io); + } + } + + /* Now bulk free both mbufs and crypto operations. */ + if (num_dequeued_ops > 0) { + rte_mempool_put_bulk(g_crypto_op_mp, + (void **)dequeued_ops, + num_dequeued_ops); + assert(num_mbufs > 0); + spdk_mempool_put_bulk(g_mbuf_mp, + (void **)mbufs_to_free, + num_mbufs); + } + + /* Check if there are any pending crypto ops to process */ + while (!TAILQ_EMPTY(&crypto_ch->queued_cry_ops)) { + op_to_resubmit = TAILQ_FIRST(&crypto_ch->queued_cry_ops); + io_ctx = (struct crypto_bdev_io *)op_to_resubmit->bdev_io->driver_ctx; + num_enqueued_ops = rte_cryptodev_enqueue_burst(op_to_resubmit->cdev_id, + op_to_resubmit->qp, + &op_to_resubmit->crypto_op, + 1); + if (num_enqueued_ops == 1) { + /* Make sure we don't put this on twice as one bdev_io is made up + * of many crypto ops. + */ + if (io_ctx->on_pending_list == false) { + TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, op_to_resubmit->bdev_io, module_link); + io_ctx->on_pending_list = true; + } + TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_resubmit, link); + } else { + /* if we couldn't get one, just break and try again later. */ + break; + } + } + + /* If the channel iter is not NULL, we need to continue to poll + * until the pending list is empty, then we can move on to the + * next channel. + */ + if (crypto_ch->iter && TAILQ_EMPTY(&crypto_ch->pending_cry_ios)) { + SPDK_NOTICELOG("Channel %p has been quiesced.\n", crypto_ch); + spdk_for_each_channel_continue(crypto_ch->iter, 0); + crypto_ch->iter = NULL; + } + + return num_dequeued_ops; +} + +/* We're either encrypting on the way down or decrypting on the way back. */ +static int +_crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op, + void *aux_buf) +{ + uint16_t num_enqueued_ops = 0; + uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks; + struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch; + uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id; + uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen; + uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len; + int rc; + uint32_t iov_index = 0; + uint32_t allocated = 0; + uint8_t *current_iov = NULL; + uint64_t total_remaining = 0; + uint64_t updated_length, current_iov_remaining = 0; + uint32_t crypto_index = 0; + uint32_t en_offset = 0; + struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE]; + struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE]; + struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE]; + int burst; + struct vbdev_crypto_op *op_to_queue; + uint64_t alignment = spdk_bdev_get_buf_align(&io_ctx->crypto_bdev->crypto_bdev); + + assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO); + + /* Get the number of source mbufs that we need. These will always be 1:1 because we + * don't support chaining. The reason we don't is because of our decision to use + * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the + * op would be > 1 LBA. + */ + rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt); + if (rc) { + SPDK_ERRLOG("ERROR trying to get src_mbufs!\n"); + return -ENOMEM; + } + + /* Get the same amount but these buffers to describe the encrypted data location (dst). */ + if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { + rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt); + if (rc) { + SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n"); + rc = -ENOMEM; + goto error_get_dst; + } + } + +#ifdef __clang_analyzer__ + /* silence scan-build false positive */ + SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(crypto_ops, MAX_ENQUEUE_ARRAY_SIZE, 0x1000); +#endif + /* Allocate crypto operations. */ + allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + crypto_ops, cryop_cnt); + if (allocated < cryop_cnt) { + SPDK_ERRLOG("ERROR trying to get crypto ops!\n"); + rc = -ENOMEM; + goto error_get_ops; + } + + /* For encryption, we need to prepare a single contiguous buffer as the encryption + * destination, we'll then pass that along for the write after encryption is done. + * This is done to avoiding encrypting the provided write buffer which may be + * undesirable in some use cases. + */ + if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { + io_ctx->aux_buf_iov.iov_len = total_length; + io_ctx->aux_buf_raw = aux_buf; + io_ctx->aux_buf_iov.iov_base = (void *)(((uintptr_t)aux_buf + (alignment - 1)) & ~(alignment - 1)); + io_ctx->aux_offset_blocks = bdev_io->u.bdev.offset_blocks; + io_ctx->aux_num_blocks = bdev_io->u.bdev.num_blocks; + } + + /* This value is used in the completion callback to determine when the bdev_io is + * complete. + */ + io_ctx->cryop_cnt_remaining = cryop_cnt; + + /* As we don't support chaining because of a decision to use LBA as IV, construction + * of crypto operations is straightforward. We build both the op, the mbuf and the + * dst_mbuf in our local arrays by looping through the length of the bdev IO and + * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each + * LBA sized chunk of memory will correspond 1:1 to a crypto operation and a single + * mbuf per crypto operation. + */ + total_remaining = total_length; + current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base; + current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len; + do { + uint8_t *iv_ptr; + uint64_t op_block_offset; + + /* Set the mbuf elements address and length. Null out the next pointer. */ + src_mbufs[crypto_index]->buf_addr = current_iov; + src_mbufs[crypto_index]->data_len = updated_length = crypto_len; + /* TODO: Make this assignment conditional on QAT usage and add an assert. */ + src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov, &updated_length); + src_mbufs[crypto_index]->next = NULL; + /* Store context in every mbuf as we don't know anything about completion order */ + src_mbufs[crypto_index]->userdata = bdev_io; + + /* Set the IV - we use the LBA of the crypto_op */ + iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *, + IV_OFFSET); + memset(iv_ptr, 0, AES_CBC_IV_LENGTH); + op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index; + rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t)); + + /* Set the data to encrypt/decrypt length */ + crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len; + crypto_ops[crypto_index]->sym->cipher.data.offset = 0; + + /* link the mbuf to the crypto op. */ + crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index]; + if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { + crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index]; + } else { + crypto_ops[crypto_index]->sym->m_dst = NULL; + } + + /* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io + * that will be used to process the write on completion to the same buffer. Setting + * up the en_buffer is a little simpler as we know the destination buffer is single IOV. + */ + if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { + + /* Set the relevant destination en_mbuf elements. */ + dst_mbufs[crypto_index]->buf_addr = io_ctx->aux_buf_iov.iov_base + en_offset; + dst_mbufs[crypto_index]->data_len = updated_length = crypto_len; + /* TODO: Make this assignment conditional on QAT usage and add an assert. */ + dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr, + &updated_length); + crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index]; + en_offset += crypto_len; + dst_mbufs[crypto_index]->next = NULL; + + /* Attach the crypto session to the operation */ + rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index], + io_ctx->crypto_bdev->session_encrypt); + if (rc) { + rc = -EINVAL; + goto error_attach_session; + } + + } else { + /* Attach the crypto session to the operation */ + rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index], + io_ctx->crypto_bdev->session_decrypt); + if (rc) { + rc = -EINVAL; + goto error_attach_session; + } + + + } + + /* Subtract our running totals for the op in progress and the overall bdev io */ + total_remaining -= crypto_len; + current_iov_remaining -= crypto_len; + + /* move our current IOV pointer accordingly. */ + current_iov += crypto_len; + + /* move on to the next crypto operation */ + crypto_index++; + + /* If we're done with this IOV, move to the next one. */ + if (current_iov_remaining == 0 && total_remaining > 0) { + iov_index++; + current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base; + current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len; + } + } while (total_remaining > 0); + + /* Enqueue everything we've got but limit by the max number of descriptors we + * configured the crypto device for. + */ + burst = spdk_min(cryop_cnt, CRYPTO_QP_DESCRIPTORS); + num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp, + &crypto_ops[0], + burst); + + /* Add this bdev_io to our outstanding list if any of its crypto ops made it. */ + if (num_enqueued_ops > 0) { + TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link); + io_ctx->on_pending_list = true; + } + /* We were unable to enqueue everything but did get some, so need to decide what + * to do based on the status of the last op. + */ + if (num_enqueued_ops < cryop_cnt) { + switch (crypto_ops[num_enqueued_ops]->status) { + case RTE_CRYPTO_OP_STATUS_NOT_PROCESSED: + /* Queue them up on a linked list to be resubmitted via the poller. */ + for (crypto_index = num_enqueued_ops; crypto_index < cryop_cnt; crypto_index++) { + op_to_queue = (struct vbdev_crypto_op *)rte_crypto_op_ctod_offset(crypto_ops[crypto_index], + uint8_t *, QUEUED_OP_OFFSET); + op_to_queue->cdev_id = cdev_id; + op_to_queue->qp = crypto_ch->device_qp->qp; + op_to_queue->crypto_op = crypto_ops[crypto_index]; + op_to_queue->bdev_io = bdev_io; + TAILQ_INSERT_TAIL(&crypto_ch->queued_cry_ops, + op_to_queue, + link); + } + break; + default: + /* For all other statuses, set the io_ctx bdev_io status so that + * the poller will pick the failure up for the overall bdev status. + */ + io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; + if (num_enqueued_ops == 0) { + /* If nothing was enqueued, but the last one wasn't because of + * busy, fail it now as the poller won't know anything about it. + */ + _crypto_operation_complete(bdev_io); + rc = -EINVAL; + goto error_attach_session; + } + break; + } + } + + return rc; + + /* Error cleanup paths. */ +error_attach_session: +error_get_ops: + if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { + spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], + cryop_cnt); + } + if (allocated > 0) { + rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops, + allocated); + } +error_get_dst: + spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0], + cryop_cnt); + return rc; +} + +/* This function is called after all channels have been quiesced following + * a bdev reset. + */ +static void +_ch_quiesce_done(struct spdk_io_channel_iter *i, int status) +{ + struct crypto_bdev_io *io_ctx = spdk_io_channel_iter_get_ctx(i); + + assert(TAILQ_EMPTY(&io_ctx->crypto_ch->pending_cry_ios)); + assert(io_ctx->orig_io != NULL); + + spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS); +} + +/* This function is called per channel to quiesce IOs before completing a + * bdev reset that we received. + */ +static void +_ch_quiesce(struct spdk_io_channel_iter *i) +{ + struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); + struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch); + + crypto_ch->iter = i; + /* When the poller runs, it will see the non-NULL iter and handle + * the quiesce. + */ +} + +/* Completion callback for IO that were issued from this bdev other than read/write. + * They have their own for readability. + */ +static void +_complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *orig_io = cb_arg; + int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { + struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx; + + assert(orig_io == orig_ctx->orig_io); + + spdk_bdev_free_io(bdev_io); + + spdk_for_each_channel(orig_ctx->crypto_bdev, + _ch_quiesce, + orig_ctx, + _ch_quiesce_done); + return; + } + + spdk_bdev_io_complete(orig_io, status); + spdk_bdev_free_io(bdev_io); +} + +/* Completion callback for writes that were issued from this bdev. */ +static void +_complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *orig_io = cb_arg; + int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; + struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx; + + spdk_bdev_io_put_aux_buf(orig_io, orig_ctx->aux_buf_raw); + + spdk_bdev_io_complete(orig_io, status); + spdk_bdev_free_io(bdev_io); +} + +/* Completion callback for reads that were issued from this bdev. */ +static void +_complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *orig_io = cb_arg; + struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx; + + if (success) { + + /* Save off this bdev_io so it can be freed after decryption. */ + orig_ctx->read_io = bdev_io; + + if (!_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT, NULL)) { + return; + } else { + SPDK_ERRLOG("ERROR decrypting\n"); + } + } else { + SPDK_ERRLOG("ERROR on read prior to decrypting\n"); + } + + spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED); + spdk_bdev_free_io(bdev_io); +} + +static void +vbdev_crypto_resubmit_io(void *arg) +{ + struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg; + struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + + vbdev_crypto_submit_request(io_ctx->ch, bdev_io); +} + +static void +vbdev_crypto_queue_io(struct spdk_bdev_io *bdev_io) +{ + struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + int rc; + + io_ctx->bdev_io_wait.bdev = bdev_io->bdev; + io_ctx->bdev_io_wait.cb_fn = vbdev_crypto_resubmit_io; + io_ctx->bdev_io_wait.cb_arg = bdev_io; + + rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->crypto_ch->base_ch, &io_ctx->bdev_io_wait); + if (rc != 0) { + SPDK_ERRLOG("Queue io failed in vbdev_crypto_queue_io, rc=%d.\n", rc); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +/* Callback for getting a buf from the bdev pool in the event that the caller passed + * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module + * beneath us before we're done with it. + */ +static void +crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, + bool success) +{ + struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto, + crypto_bdev); + struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch); + struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + int rc; + + if (!success) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks, _complete_internal_read, + bdev_io); + if (rc != 0) { + if (rc == -ENOMEM) { + SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n"); + io_ctx->ch = ch; + vbdev_crypto_queue_io(bdev_io); + } else { + SPDK_ERRLOG("ERROR on bdev_io submission!\n"); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } + } +} + +/* For encryption we don't want to encrypt the data in place as the host isn't + * expecting us to mangle its data buffers so we need to encrypt into the bdev + * aux buffer, then we can use that as the source for the disk data transfer. + */ +static void +crypto_write_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, + void *aux_buf) +{ + struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + int rc = 0; + + rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT, aux_buf); + if (rc != 0) { + spdk_bdev_io_put_aux_buf(bdev_io, aux_buf); + if (rc == -ENOMEM) { + SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n"); + io_ctx->ch = ch; + vbdev_crypto_queue_io(bdev_io); + } else { + SPDK_ERRLOG("ERROR on bdev_io submission!\n"); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } + } +} + +/* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto, + * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO + * and call our cpl callback provided below along with the original bdev_io so that we can + * complete it once this IO completes. For crypto operations, we'll either encrypt it first + * (writes) then call back into bdev to submit it or we'll submit a read and then catch it + * on the way back for decryption. + */ +static void +vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto, + crypto_bdev); + struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch); + struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; + int rc = 0; + + memset(io_ctx, 0, sizeof(struct crypto_bdev_io)); + io_ctx->crypto_bdev = crypto_bdev; + io_ctx->crypto_ch = crypto_ch; + io_ctx->orig_io = bdev_io; + io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb, + bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); + break; + case SPDK_BDEV_IO_TYPE_WRITE: + /* Tell the bdev layer that we need an aux buf in addition to the data + * buf already associated with the bdev. + */ + spdk_bdev_io_get_aux_buf(bdev_io, crypto_write_get_buf_cb); + break; + case SPDK_BDEV_IO_TYPE_UNMAP: + rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks, + _complete_internal_io, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_FLUSH: + rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks, + _complete_internal_io, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_RESET: + rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch, + _complete_internal_io, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + default: + SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + if (rc != 0) { + if (rc == -ENOMEM) { + SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n"); + io_ctx->ch = ch; + vbdev_crypto_queue_io(bdev_io); + } else { + SPDK_ERRLOG("ERROR on bdev_io submission!\n"); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } + } +} + +/* We'll just call the base bdev and let it answer except for WZ command which + * we always say we don't support so that the bdev layer will actually send us + * real writes that we can encrypt. + */ +static bool +vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) +{ + struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; + + switch (io_type) { + case SPDK_BDEV_IO_TYPE_WRITE: + case SPDK_BDEV_IO_TYPE_UNMAP: + case SPDK_BDEV_IO_TYPE_RESET: + case SPDK_BDEV_IO_TYPE_READ: + case SPDK_BDEV_IO_TYPE_FLUSH: + return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type); + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + /* Force the bdev layer to issue actual writes of zeroes so we can + * encrypt them as regular writes. + */ + default: + return false; + } +} + +/* Callback for unregistering the IO device. */ +static void +_device_unregister_cb(void *io_device) +{ + struct vbdev_crypto *crypto_bdev = io_device; + + /* Done with this crypto_bdev. */ + rte_cryptodev_sym_session_free(crypto_bdev->session_decrypt); + rte_cryptodev_sym_session_free(crypto_bdev->session_encrypt); + free(crypto_bdev->drv_name); + if (crypto_bdev->key) { + memset(crypto_bdev->key, 0, strnlen(crypto_bdev->key, (AES_CBC_KEY_LENGTH + 1))); + free(crypto_bdev->key); + } + if (crypto_bdev->key2) { + memset(crypto_bdev->key2, 0, strnlen(crypto_bdev->key2, (AES_XTS_KEY_LENGTH + 1))); + free(crypto_bdev->key2); + } + if (crypto_bdev->xts_key) { + memset(crypto_bdev->xts_key, 0, strnlen(crypto_bdev->xts_key, (AES_XTS_KEY_LENGTH * 2) + 1)); + free(crypto_bdev->xts_key); + } + free(crypto_bdev->crypto_bdev.name); + free(crypto_bdev); +} + +/* Wrapper for the bdev close operation. */ +static void +_vbdev_crypto_destruct(void *ctx) +{ + struct spdk_bdev_desc *desc = ctx; + + spdk_bdev_close(desc); +} + +/* Called after we've unregistered following a hot remove callback. + * Our finish entry point will be called next. + */ +static int +vbdev_crypto_destruct(void *ctx) +{ + struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; + + /* Remove this device from the internal list */ + TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link); + + /* Unclaim the underlying bdev. */ + spdk_bdev_module_release_bdev(crypto_bdev->base_bdev); + + /* Close the underlying bdev on its same opened thread. */ + if (crypto_bdev->thread && crypto_bdev->thread != spdk_get_thread()) { + spdk_thread_send_msg(crypto_bdev->thread, _vbdev_crypto_destruct, crypto_bdev->base_desc); + } else { + spdk_bdev_close(crypto_bdev->base_desc); + } + + /* Unregister the io_device. */ + spdk_io_device_unregister(crypto_bdev, _device_unregister_cb); + + g_number_of_claimed_volumes--; + + return 0; +} + +/* We supplied this as an entry point for upper layers who want to communicate to this + * bdev. This is how they get a channel. We are passed the same context we provided when + * we created our crypto vbdev in examine() which, for this bdev, is the address of one of + * our context nodes. From here we'll ask the SPDK channel code to fill out our channel + * struct and we'll keep it in our crypto node. + */ +static struct spdk_io_channel * +vbdev_crypto_get_io_channel(void *ctx) +{ + struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; + + /* The IO channel code will allocate a channel for us which consists of + * the SPDK channel structure plus the size of our crypto_io_channel struct + * that we passed in when we registered our IO device. It will then call + * our channel create callback to populate any elements that we need to + * update. + */ + return spdk_get_io_channel(crypto_bdev); +} + +/* This is the output for bdev_get_bdevs() for this vbdev */ +static int +vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; + + spdk_json_write_name(w, "crypto"); + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev)); + spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev)); + spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name); + spdk_json_write_named_string(w, "key", crypto_bdev->key); + if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) { + spdk_json_write_named_string(w, "key2", crypto_bdev->key); + } + spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher); + spdk_json_write_object_end(w); + return 0; +} + +static int +vbdev_crypto_config_json(struct spdk_json_write_ctx *w) +{ + struct vbdev_crypto *crypto_bdev; + + TAILQ_FOREACH(crypto_bdev, &g_vbdev_crypto, link) { + spdk_json_write_object_begin(w); + spdk_json_write_named_string(w, "method", "bdev_crypto_create"); + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev)); + spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev)); + spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name); + spdk_json_write_named_string(w, "key", crypto_bdev->key); + if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) { + spdk_json_write_named_string(w, "key2", crypto_bdev->key); + } + spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher); + spdk_json_write_object_end(w); + spdk_json_write_object_end(w); + } + return 0; +} + +/* Helper function for the channel creation callback. */ +static void +_assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp, + struct crypto_io_channel *crypto_ch) +{ + pthread_mutex_lock(&g_device_qp_lock); + if (strcmp(crypto_bdev->drv_name, QAT) == 0) { + /* For some QAT devices, the optimal qp to use is every 32nd as this spreads the + * workload out over the multiple virtual functions in the device. For the devices + * where this isn't the case, it doesn't hurt. + */ + TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) { + if (device_qp->index != g_next_qat_index) { + continue; + } + if (device_qp->in_use == false) { + crypto_ch->device_qp = device_qp; + device_qp->in_use = true; + g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp; + break; + } else { + /* if the preferred index is used, skip to the next one in this set. */ + g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp; + } + } + } else if (strcmp(crypto_bdev->drv_name, AESNI_MB) == 0) { + TAILQ_FOREACH(device_qp, &g_device_qp_aesni_mb, link) { + if (device_qp->in_use == false) { + crypto_ch->device_qp = device_qp; + device_qp->in_use = true; + break; + } + } + } + pthread_mutex_unlock(&g_device_qp_lock); +} + +/* We provide this callback for the SPDK channel code to create a channel using + * the channel struct we provided in our module get_io_channel() entry point. Here + * we get and save off an underlying base channel of the device below us so that + * we can communicate with the base bdev on a per channel basis. We also register the + * poller used to complete crypto operations from the device. + */ +static int +crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf) +{ + struct crypto_io_channel *crypto_ch = ctx_buf; + struct vbdev_crypto *crypto_bdev = io_device; + struct device_qp *device_qp = NULL; + + crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc); + crypto_ch->poller = SPDK_POLLER_REGISTER(crypto_dev_poller, crypto_ch, 0); + crypto_ch->device_qp = NULL; + + /* Assign a device/qp combination that is unique per channel per PMD. */ + _assign_device_qp(crypto_bdev, device_qp, crypto_ch); + assert(crypto_ch->device_qp); + + /* We use this queue to track outstanding IO in our layer. */ + TAILQ_INIT(&crypto_ch->pending_cry_ios); + + /* We use this to queue up crypto ops when the device is busy. */ + TAILQ_INIT(&crypto_ch->queued_cry_ops); + + return 0; +} + +/* We provide this callback for the SPDK channel code to destroy a channel + * created with our create callback. We just need to undo anything we did + * when we created. + */ +static void +crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf) +{ + struct crypto_io_channel *crypto_ch = ctx_buf; + + pthread_mutex_lock(&g_device_qp_lock); + crypto_ch->device_qp->in_use = false; + pthread_mutex_unlock(&g_device_qp_lock); + + spdk_poller_unregister(&crypto_ch->poller); + spdk_put_io_channel(crypto_ch->base_ch); +} + +/* Create the association from the bdev and vbdev name and insert + * on the global list. */ +static int +vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name, + const char *crypto_pmd, const char *key, + const char *cipher, const char *key2) +{ + struct bdev_names *name; + int rc, j; + bool found = false; + + TAILQ_FOREACH(name, &g_bdev_names, link) { + if (strcmp(vbdev_name, name->vbdev_name) == 0) { + SPDK_ERRLOG("crypto bdev %s already exists\n", vbdev_name); + return -EEXIST; + } + } + + name = calloc(1, sizeof(struct bdev_names)); + if (!name) { + SPDK_ERRLOG("could not allocate bdev_names\n"); + return -ENOMEM; + } + + name->bdev_name = strdup(bdev_name); + if (!name->bdev_name) { + SPDK_ERRLOG("could not allocate name->bdev_name\n"); + rc = -ENOMEM; + goto error_alloc_bname; + } + + name->vbdev_name = strdup(vbdev_name); + if (!name->vbdev_name) { + SPDK_ERRLOG("could not allocate name->vbdev_name\n"); + rc = -ENOMEM; + goto error_alloc_vname; + } + + name->drv_name = strdup(crypto_pmd); + if (!name->drv_name) { + SPDK_ERRLOG("could not allocate name->drv_name\n"); + rc = -ENOMEM; + goto error_alloc_dname; + } + for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) { + if (strcmp(crypto_pmd, g_driver_names[j]) == 0) { + found = true; + break; + } + } + if (!found) { + SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd); + rc = -EINVAL; + goto error_invalid_pmd; + } + + name->key = strdup(key); + if (!name->key) { + SPDK_ERRLOG("could not allocate name->key\n"); + rc = -ENOMEM; + goto error_alloc_key; + } + if (strnlen(name->key, (AES_CBC_KEY_LENGTH + 1)) != AES_CBC_KEY_LENGTH) { + SPDK_ERRLOG("invalid AES_CBC key length\n"); + rc = -EINVAL; + goto error_invalid_key; + } + + if (strncmp(cipher, AES_XTS, sizeof(AES_XTS)) == 0) { + /* To please scan-build, input validation makes sure we can't + * have this cipher without providing a key2. + */ + name->cipher = AES_XTS; + assert(key2); + if (strnlen(key2, (AES_XTS_KEY_LENGTH + 1)) != AES_XTS_KEY_LENGTH) { + SPDK_ERRLOG("invalid AES_XTS key length\n"); + rc = -EINVAL; + goto error_invalid_key2; + } + + name->key2 = strdup(key2); + if (!name->key2) { + SPDK_ERRLOG("could not allocate name->key2\n"); + rc = -ENOMEM; + goto error_alloc_key2; + } + } else if (strncmp(cipher, AES_CBC, sizeof(AES_CBC)) == 0) { + name->cipher = AES_CBC; + } else { + SPDK_ERRLOG("Invalid cipher: %s\n", cipher); + rc = -EINVAL; + goto error_cipher; + } + + TAILQ_INSERT_TAIL(&g_bdev_names, name, link); + + return 0; + + /* Error cleanup paths. */ +error_cipher: + free(name->key2); +error_alloc_key2: +error_invalid_key2: +error_invalid_key: + free(name->key); +error_alloc_key: +error_invalid_pmd: + free(name->drv_name); +error_alloc_dname: + free(name->vbdev_name); +error_alloc_vname: + free(name->bdev_name); +error_alloc_bname: + free(name); + return rc; +} + +/* RPC entry point for crypto creation. */ +int +create_crypto_disk(const char *bdev_name, const char *vbdev_name, + const char *crypto_pmd, const char *key, + const char *cipher, const char *key2) +{ + struct spdk_bdev *bdev = NULL; + int rc = 0; + + bdev = spdk_bdev_get_by_name(bdev_name); + + rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key, cipher, key2); + if (rc) { + return rc; + } + + if (!bdev) { + SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n"); + return 0; + } + + rc = vbdev_crypto_claim(bdev); + if (rc) { + return rc; + } + + return rc; +} + +/* Called at driver init time, parses config file to prepare for examine calls, + * also fully initializes the crypto drivers. + */ +static int +vbdev_crypto_init(void) +{ + struct spdk_conf_section *sp = NULL; + const char *conf_bdev_name = NULL; + const char *conf_vbdev_name = NULL; + const char *crypto_pmd = NULL; + int i; + int rc = 0; + const char *key = NULL; + const char *cipher = NULL; + const char *key2 = NULL; + + /* Fully configure both SW and HW drivers. */ + rc = vbdev_crypto_init_crypto_drivers(); + if (rc) { + SPDK_ERRLOG("Error setting up crypto devices\n"); + return rc; + } + + sp = spdk_conf_find_section(NULL, "crypto"); + if (sp == NULL) { + return 0; + } + + for (i = 0; ; i++) { + + if (!spdk_conf_section_get_nval(sp, "CRY", i)) { + break; + } + + conf_bdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 0); + if (!conf_bdev_name) { + SPDK_ERRLOG("crypto configuration missing bdev name\n"); + return -EINVAL; + } + + conf_vbdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 1); + if (!conf_vbdev_name) { + SPDK_ERRLOG("crypto configuration missing crypto_bdev name\n"); + return -EINVAL; + } + + key = spdk_conf_section_get_nmval(sp, "CRY", i, 2); + if (!key) { + SPDK_ERRLOG("crypto configuration missing crypto_bdev key\n"); + return -EINVAL; + } + SPDK_NOTICELOG("WARNING: You are storing your key in a plain text file!!\n"); + + crypto_pmd = spdk_conf_section_get_nmval(sp, "CRY", i, 3); + if (!crypto_pmd) { + SPDK_ERRLOG("crypto configuration missing driver type\n"); + return -EINVAL; + } + + /* These are optional. */ + cipher = spdk_conf_section_get_nmval(sp, "CRY", i, 4); + if (cipher == NULL) { + cipher = AES_CBC; + } + key2 = spdk_conf_section_get_nmval(sp, "CRY", i, 5); + + /* Note: config file options do not support QAT AES_XTS, use RPC */ + rc = vbdev_crypto_insert_name(conf_bdev_name, conf_vbdev_name, + crypto_pmd, key, cipher, key2); + if (rc != 0) { + return rc; + } + } + + return rc; +} + +/* Called when the entire module is being torn down. */ +static void +vbdev_crypto_finish(void) +{ + struct bdev_names *name; + struct vbdev_dev *device; + struct device_qp *dev_qp; + unsigned i; + int rc; + + while ((name = TAILQ_FIRST(&g_bdev_names))) { + TAILQ_REMOVE(&g_bdev_names, name, link); + free(name->drv_name); + free(name->key); + free(name->bdev_name); + free(name->vbdev_name); + free(name->key2); + free(name); + } + + while ((device = TAILQ_FIRST(&g_vbdev_devs))) { + struct rte_cryptodev *rte_dev; + + TAILQ_REMOVE(&g_vbdev_devs, device, link); + rte_cryptodev_stop(device->cdev_id); + + assert(device->cdev_id < RTE_CRYPTO_MAX_DEVS); + rte_dev = &rte_cryptodevs[device->cdev_id]; + + if (rte_dev->dev_ops->queue_pair_release != NULL) { + for (i = 0; i < device->cdev_info.max_nb_queue_pairs; i++) { + rte_dev->dev_ops->queue_pair_release(rte_dev, i); + } + } + free(device); + } + rc = rte_vdev_uninit(AESNI_MB); + if (rc) { + SPDK_ERRLOG("%d from rte_vdev_uninit\n", rc); + } + + while ((dev_qp = TAILQ_FIRST(&g_device_qp_qat))) { + TAILQ_REMOVE(&g_device_qp_qat, dev_qp, link); + free(dev_qp); + } + + while ((dev_qp = TAILQ_FIRST(&g_device_qp_aesni_mb))) { + TAILQ_REMOVE(&g_device_qp_aesni_mb, dev_qp, link); + free(dev_qp); + } + + rte_mempool_free(g_crypto_op_mp); + spdk_mempool_free(g_mbuf_mp); + rte_mempool_free(g_session_mp); + if (g_session_mp_priv != NULL) { + rte_mempool_free(g_session_mp_priv); + } +} + +/* During init we'll be asked how much memory we'd like passed to us + * in bev_io structures as context. Here's where we specify how + * much context we want per IO. + */ +static int +vbdev_crypto_get_ctx_size(void) +{ + return sizeof(struct crypto_bdev_io); +} + +/* Called when SPDK wants to save the current config of this vbdev module to + * a file. + */ +static void +vbdev_crypto_get_spdk_running_config(FILE *fp) +{ + struct bdev_names *names = NULL; + fprintf(fp, "\n[crypto]\n"); + TAILQ_FOREACH(names, &g_bdev_names, link) { + fprintf(fp, " crypto %s %s ", names->bdev_name, names->vbdev_name); + fprintf(fp, "\n"); + } + + fprintf(fp, "\n"); +} + +/* Called when the underlying base bdev goes away. */ +static void +vbdev_crypto_examine_hotremove_cb(void *ctx) +{ + struct vbdev_crypto *crypto_bdev, *tmp; + struct spdk_bdev *bdev_find = ctx; + + TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) { + if (bdev_find == crypto_bdev->base_bdev) { + spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL); + } + } +} + +static void +vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) +{ + /* No config per bdev needed */ +} + +/* When we register our bdev this is how we specify our entry points. */ +static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = { + .destruct = vbdev_crypto_destruct, + .submit_request = vbdev_crypto_submit_request, + .io_type_supported = vbdev_crypto_io_type_supported, + .get_io_channel = vbdev_crypto_get_io_channel, + .dump_info_json = vbdev_crypto_dump_info_json, + .write_config_json = vbdev_crypto_write_config_json +}; + +static struct spdk_bdev_module crypto_if = { + .name = "crypto", + .module_init = vbdev_crypto_init, + .config_text = vbdev_crypto_get_spdk_running_config, + .get_ctx_size = vbdev_crypto_get_ctx_size, + .examine_config = vbdev_crypto_examine, + .module_fini = vbdev_crypto_finish, + .config_json = vbdev_crypto_config_json +}; + +SPDK_BDEV_MODULE_REGISTER(crypto, &crypto_if) + +static int +vbdev_crypto_claim(struct spdk_bdev *bdev) +{ + struct bdev_names *name; + struct vbdev_crypto *vbdev; + struct vbdev_dev *device; + bool found = false; + int rc = 0; + + if (g_number_of_claimed_volumes >= MAX_CRYPTO_VOLUMES) { + SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Reached max number of claimed volumes\n"); + rc = -EINVAL; + goto error_vbdev_alloc; + } + g_number_of_claimed_volumes++; + + /* Check our list of names from config versus this bdev and if + * there's a match, create the crypto_bdev & bdev accordingly. + */ + TAILQ_FOREACH(name, &g_bdev_names, link) { + if (strcmp(name->bdev_name, bdev->name) != 0) { + continue; + } + SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Match on %s\n", bdev->name); + + vbdev = calloc(1, sizeof(struct vbdev_crypto)); + if (!vbdev) { + SPDK_ERRLOG("could not allocate crypto_bdev\n"); + rc = -ENOMEM; + goto error_vbdev_alloc; + } + + /* The base bdev that we're attaching to. */ + vbdev->base_bdev = bdev; + vbdev->crypto_bdev.name = strdup(name->vbdev_name); + if (!vbdev->crypto_bdev.name) { + SPDK_ERRLOG("could not allocate crypto_bdev name\n"); + rc = -ENOMEM; + goto error_bdev_name; + } + + vbdev->key = strdup(name->key); + if (!vbdev->key) { + SPDK_ERRLOG("could not allocate crypto_bdev key\n"); + rc = -ENOMEM; + goto error_alloc_key; + } + + if (name->key2) { + vbdev->key2 = strdup(name->key2); + if (!vbdev->key2) { + SPDK_ERRLOG("could not allocate crypto_bdev key2\n"); + rc = -ENOMEM; + goto error_alloc_key2; + } + } + + vbdev->drv_name = strdup(name->drv_name); + if (!vbdev->drv_name) { + SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n"); + rc = -ENOMEM; + goto error_drv_name; + } + + vbdev->crypto_bdev.product_name = "crypto"; + vbdev->crypto_bdev.write_cache = bdev->write_cache; + vbdev->cipher = AES_CBC; + if (strcmp(vbdev->drv_name, QAT) == 0) { + vbdev->crypto_bdev.required_alignment = + spdk_max(spdk_u32log2(bdev->blocklen), bdev->required_alignment); + SPDK_NOTICELOG("QAT in use: Required alignment set to %u\n", + vbdev->crypto_bdev.required_alignment); + if (strcmp(name->cipher, AES_CBC) == 0) { + SPDK_NOTICELOG("QAT using cipher: AES_CBC\n"); + } else { + SPDK_NOTICELOG("QAT using cipher: AES_XTS\n"); + vbdev->cipher = AES_XTS; + /* DPDK expects they keys to be concatenated together. */ + vbdev->xts_key = calloc(1, (AES_XTS_KEY_LENGTH * 2) + 1); + if (vbdev->xts_key == NULL) { + SPDK_ERRLOG("could not allocate memory for XTS key\n"); + rc = -ENOMEM; + goto error_xts_key; + } + memcpy(vbdev->xts_key, vbdev->key, AES_XTS_KEY_LENGTH); + assert(name->key2); + memcpy(vbdev->xts_key + AES_XTS_KEY_LENGTH, name->key2, AES_XTS_KEY_LENGTH + 1); + } + } else { + vbdev->crypto_bdev.required_alignment = bdev->required_alignment; + } + /* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is + * in units of blocks. + */ + if (bdev->optimal_io_boundary > 0) { + vbdev->crypto_bdev.optimal_io_boundary = + spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary); + } else { + vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen); + } + vbdev->crypto_bdev.split_on_optimal_io_boundary = true; + vbdev->crypto_bdev.blocklen = bdev->blocklen; + vbdev->crypto_bdev.blockcnt = bdev->blockcnt; + + /* This is the context that is passed to us when the bdev + * layer calls in so we'll save our crypto_bdev node here. + */ + vbdev->crypto_bdev.ctxt = vbdev; + vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table; + vbdev->crypto_bdev.module = &crypto_if; + TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link); + + spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb, + sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name); + + rc = spdk_bdev_open(bdev, true, vbdev_crypto_examine_hotremove_cb, + bdev, &vbdev->base_desc); + if (rc) { + SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev)); + goto error_open; + } + + /* Save the thread where the base device is opened */ + vbdev->thread = spdk_get_thread(); + + rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module); + if (rc) { + SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev)); + goto error_claim; + } + + /* To init the session we have to get the cryptoDev device ID for this vbdev */ + TAILQ_FOREACH(device, &g_vbdev_devs, link) { + if (strcmp(device->cdev_info.driver_name, vbdev->drv_name) == 0) { + found = true; + break; + } + } + if (found == false) { + SPDK_ERRLOG("ERROR can't match crypto device driver to crypto vbdev!\n"); + rc = -EINVAL; + goto error_cant_find_devid; + } + + /* Get sessions. */ + vbdev->session_encrypt = rte_cryptodev_sym_session_create(g_session_mp); + if (NULL == vbdev->session_encrypt) { + SPDK_ERRLOG("ERROR trying to create crypto session!\n"); + rc = -EINVAL; + goto error_session_en_create; + } + + vbdev->session_decrypt = rte_cryptodev_sym_session_create(g_session_mp); + if (NULL == vbdev->session_decrypt) { + SPDK_ERRLOG("ERROR trying to create crypto session!\n"); + rc = -EINVAL; + goto error_session_de_create; + } + + /* Init our per vbdev xform with the desired cipher options. */ + vbdev->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + vbdev->cipher_xform.cipher.iv.offset = IV_OFFSET; + if (strcmp(name->cipher, AES_CBC) == 0) { + vbdev->cipher_xform.cipher.key.data = vbdev->key; + vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; + vbdev->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH; + } else { + vbdev->cipher_xform.cipher.key.data = vbdev->xts_key; + vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_XTS; + vbdev->cipher_xform.cipher.key.length = AES_XTS_KEY_LENGTH * 2; + } + vbdev->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH; + + vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_encrypt, + &vbdev->cipher_xform, + g_session_mp_priv ? g_session_mp_priv : g_session_mp); + if (rc < 0) { + SPDK_ERRLOG("ERROR trying to init encrypt session!\n"); + rc = -EINVAL; + goto error_session_init; + } + + vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_decrypt, + &vbdev->cipher_xform, + g_session_mp_priv ? g_session_mp_priv : g_session_mp); + if (rc < 0) { + SPDK_ERRLOG("ERROR trying to init decrypt session!\n"); + rc = -EINVAL; + goto error_session_init; + } + + rc = spdk_bdev_register(&vbdev->crypto_bdev); + if (rc < 0) { + SPDK_ERRLOG("ERROR trying to register bdev\n"); + rc = -EINVAL; + goto error_bdev_register; + } + SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "registered io_device and virtual bdev for: %s\n", + name->vbdev_name); + break; + } + + return rc; + + /* Error cleanup paths. */ +error_bdev_register: +error_session_init: + rte_cryptodev_sym_session_free(vbdev->session_decrypt); +error_session_de_create: + rte_cryptodev_sym_session_free(vbdev->session_encrypt); +error_session_en_create: +error_cant_find_devid: +error_claim: + spdk_bdev_close(vbdev->base_desc); +error_open: + TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link); + spdk_io_device_unregister(vbdev, NULL); + free(vbdev->xts_key); +error_xts_key: + free(vbdev->drv_name); +error_drv_name: + free(vbdev->key2); +error_alloc_key2: + free(vbdev->key); +error_alloc_key: + free(vbdev->crypto_bdev.name); +error_bdev_name: + free(vbdev); +error_vbdev_alloc: + g_number_of_claimed_volumes--; + return rc; +} + +/* RPC entry for deleting a crypto vbdev. */ +void +delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn, + void *cb_arg) +{ + struct bdev_names *name; + + if (!bdev || bdev->module != &crypto_if) { + cb_fn(cb_arg, -ENODEV); + return; + } + + /* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the + * vbdev does not get re-created if the same bdev is constructed at some other time, + * unless the underlying bdev was hot-removed. + */ + TAILQ_FOREACH(name, &g_bdev_names, link) { + if (strcmp(name->vbdev_name, bdev->name) == 0) { + TAILQ_REMOVE(&g_bdev_names, name, link); + free(name->bdev_name); + free(name->vbdev_name); + free(name->drv_name); + free(name->key); + free(name->key2); + free(name); + break; + } + } + + /* Additional cleanup happens in the destruct callback. */ + spdk_bdev_unregister(bdev, cb_fn, cb_arg); +} + +/* Because we specified this function in our crypto bdev function table when we + * registered our crypto bdev, we'll get this call anytime a new bdev shows up. + * Here we need to decide if we care about it and if so what to do. We + * parsed the config file at init so we check the new bdev against the list + * we built up at that time and if the user configured us to attach to this + * bdev, here's where we do it. + */ +static void +vbdev_crypto_examine(struct spdk_bdev *bdev) +{ + vbdev_crypto_claim(bdev); + spdk_bdev_module_examine_done(&crypto_if); +} + +SPDK_LOG_REGISTER_COMPONENT("vbdev_crypto", SPDK_LOG_CRYPTO) |