diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:40:19 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:40:19 +0000 |
commit | 9f0fc191371843c4fc000a226b0a26b6c059aacd (patch) | |
tree | 35f8be3ef04506ac891ad001e8c41e535ae8d01d /drivers/nvme | |
parent | Releasing progress-linux version 6.6.15-2~progress7.99u1. (diff) | |
download | linux-9f0fc191371843c4fc000a226b0a26b6c059aacd.tar.xz linux-9f0fc191371843c4fc000a226b0a26b6c059aacd.zip |
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/nvme')
27 files changed, 1187 insertions, 299 deletions
diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile index eedca8c720..74f59ceed3 100644 --- a/drivers/nvme/Makefile +++ b/drivers/nvme/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_NVME_COMMON) += common/ +obj-y += common/ obj-y += host/ obj-y += target/ diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig index 4514f44362..244432e0b7 100644 --- a/drivers/nvme/common/Kconfig +++ b/drivers/nvme/common/Kconfig @@ -1,4 +1,14 @@ # SPDX-License-Identifier: GPL-2.0-only -config NVME_COMMON +config NVME_KEYRING tristate + select KEYS + +config NVME_AUTH + tristate + select CRYPTO + select CRYPTO_HMAC + select CRYPTO_SHA256 + select CRYPTO_SHA512 + select CRYPTO_DH + select CRYPTO_DH_RFC7919_GROUPS diff --git a/drivers/nvme/common/Makefile b/drivers/nvme/common/Makefile index 720c625b8a..681514cf2e 100644 --- a/drivers/nvme/common/Makefile +++ b/drivers/nvme/common/Makefile @@ -2,6 +2,8 @@ ccflags-y += -I$(src) -obj-$(CONFIG_NVME_COMMON) += nvme-common.o +obj-$(CONFIG_NVME_AUTH) += nvme-auth.o +obj-$(CONFIG_NVME_KEYRING) += nvme-keyring.o -nvme-common-y += auth.o +nvme-auth-y += auth.o +nvme-keyring-y += keyring.o diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index d90e4f0c08..a23ab5c968 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -150,6 +150,14 @@ size_t nvme_auth_hmac_hash_len(u8 hmac_id) } EXPORT_SYMBOL_GPL(nvme_auth_hmac_hash_len); +u32 nvme_auth_key_struct_size(u32 key_len) +{ + struct nvme_dhchap_key key; + + return struct_size(&key, key, key_len); +} +EXPORT_SYMBOL_GPL(nvme_auth_key_struct_size); + struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, u8 key_hash) { @@ -163,14 +171,9 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, p = strrchr(secret, ':'); if (p) allocated_len = p - secret; - key = kzalloc(sizeof(*key), GFP_KERNEL); + key = nvme_auth_alloc_key(allocated_len, 0); if (!key) return ERR_PTR(-ENOMEM); - key->key = kzalloc(allocated_len, GFP_KERNEL); - if (!key->key) { - ret = -ENOMEM; - goto out_free_key; - } key_len = base64_decode(secret, allocated_len, key->key); if (key_len < 0) { @@ -187,14 +190,6 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, goto out_free_secret; } - if (key_hash > 0 && - (key_len - 4) != nvme_auth_hmac_hash_len(key_hash)) { - pr_err("Mismatched key len %d for %s\n", key_len, - nvme_auth_hmac_name(key_hash)); - ret = -EINVAL; - goto out_free_secret; - } - /* The last four bytes is the CRC in little-endian format */ key_len -= 4; /* @@ -213,37 +208,51 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, key->hash = key_hash; return key; out_free_secret: - kfree_sensitive(key->key); -out_free_key: - kfree(key); + nvme_auth_free_key(key); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(nvme_auth_extract_key); +struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash) +{ + u32 num_bytes = nvme_auth_key_struct_size(len); + struct nvme_dhchap_key *key = kzalloc(num_bytes, GFP_KERNEL); + + if (key) { + key->len = len; + key->hash = hash; + } + return key; +} +EXPORT_SYMBOL_GPL(nvme_auth_alloc_key); + void nvme_auth_free_key(struct nvme_dhchap_key *key) { if (!key) return; - kfree_sensitive(key->key); - kfree(key); + kfree_sensitive(key); } EXPORT_SYMBOL_GPL(nvme_auth_free_key); -u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn) +struct nvme_dhchap_key *nvme_auth_transform_key( + struct nvme_dhchap_key *key, char *nqn) { const char *hmac_name; struct crypto_shash *key_tfm; struct shash_desc *shash; - u8 *transformed_key; - int ret; + struct nvme_dhchap_key *transformed_key; + int ret, key_len; - if (!key || !key->key) { + if (!key) { pr_warn("No key specified\n"); return ERR_PTR(-ENOKEY); } if (key->hash == 0) { - transformed_key = kmemdup(key->key, key->len, GFP_KERNEL); - return transformed_key ? transformed_key : ERR_PTR(-ENOMEM); + key_len = nvme_auth_key_struct_size(key->len); + transformed_key = kmemdup(key, key_len, GFP_KERNEL); + if (!transformed_key) + return ERR_PTR(-ENOMEM); + return transformed_key; } hmac_name = nvme_auth_hmac_name(key->hash); if (!hmac_name) { @@ -253,7 +262,7 @@ u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn) key_tfm = crypto_alloc_shash(hmac_name, 0, 0); if (IS_ERR(key_tfm)) - return (u8 *)key_tfm; + return ERR_CAST(key_tfm); shash = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(key_tfm), @@ -263,7 +272,8 @@ u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn) goto out_free_key; } - transformed_key = kzalloc(crypto_shash_digestsize(key_tfm), GFP_KERNEL); + key_len = crypto_shash_digestsize(key_tfm); + transformed_key = nvme_auth_alloc_key(key_len, key->hash); if (!transformed_key) { ret = -ENOMEM; goto out_free_shash; @@ -282,7 +292,7 @@ u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn) ret = crypto_shash_update(shash, "NVMe-over-Fabrics", 17); if (ret < 0) goto out_free_transformed_key; - ret = crypto_shash_final(shash, transformed_key); + ret = crypto_shash_final(shash, transformed_key->key); if (ret < 0) goto out_free_transformed_key; @@ -292,7 +302,7 @@ u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn) return transformed_key; out_free_transformed_key: - kfree_sensitive(transformed_key); + nvme_auth_free_key(transformed_key); out_free_shash: kfree(shash); out_free_key: @@ -331,7 +341,6 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, u8 *challenge, u8 *aug, size_t hlen) { struct crypto_shash *tfm; - struct shash_desc *desc; u8 *hashed_key; const char *hmac_name; int ret; @@ -359,29 +368,11 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, goto out_free_key; } - desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), - GFP_KERNEL); - if (!desc) { - ret = -ENOMEM; - goto out_free_hash; - } - desc->tfm = tfm; - ret = crypto_shash_setkey(tfm, hashed_key, hlen); if (ret) - goto out_free_desc; - - ret = crypto_shash_init(desc); - if (ret) - goto out_free_desc; - - ret = crypto_shash_update(desc, challenge, hlen); - if (ret) - goto out_free_desc; + goto out_free_hash; - ret = crypto_shash_final(desc, aug); -out_free_desc: - kfree_sensitive(desc); + ret = crypto_shash_tfm_digest(tfm, challenge, hlen, aug); out_free_hash: crypto_free_shash(tfm); out_free_key: diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c new file mode 100644 index 0000000000..ee341b83ee --- /dev/null +++ b/drivers/nvme/common/keyring.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Hannes Reinecke, SUSE Labs + */ + +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/key.h> +#include <linux/key-type.h> +#include <keys/user-type.h> +#include <linux/nvme.h> +#include <linux/nvme-tcp.h> +#include <linux/nvme-keyring.h> + +static struct key *nvme_keyring; + +key_serial_t nvme_keyring_id(void) +{ + return nvme_keyring->serial; +} +EXPORT_SYMBOL_GPL(nvme_keyring_id); + +static void nvme_tls_psk_describe(const struct key *key, struct seq_file *m) +{ + seq_puts(m, key->description); + seq_printf(m, ": %u", key->datalen); +} + +static bool nvme_tls_psk_match(const struct key *key, + const struct key_match_data *match_data) +{ + const char *match_id; + size_t match_len; + + if (!key->description) { + pr_debug("%s: no key description\n", __func__); + return false; + } + match_len = strlen(key->description); + pr_debug("%s: id %s len %zd\n", __func__, key->description, match_len); + + if (!match_data->raw_data) { + pr_debug("%s: no match data\n", __func__); + return false; + } + match_id = match_data->raw_data; + pr_debug("%s: match '%s' '%s' len %zd\n", + __func__, match_id, key->description, match_len); + return !memcmp(key->description, match_id, match_len); +} + +static int nvme_tls_psk_match_preparse(struct key_match_data *match_data) +{ + match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE; + match_data->cmp = nvme_tls_psk_match; + return 0; +} + +static struct key_type nvme_tls_psk_key_type = { + .name = "psk", + .flags = KEY_TYPE_NET_DOMAIN, + .preparse = user_preparse, + .free_preparse = user_free_preparse, + .match_preparse = nvme_tls_psk_match_preparse, + .instantiate = generic_key_instantiate, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = nvme_tls_psk_describe, + .read = user_read, +}; + +static struct key *nvme_tls_psk_lookup(struct key *keyring, + const char *hostnqn, const char *subnqn, + int hmac, bool generated) +{ + char *identity; + size_t identity_len = (NVMF_NQN_SIZE) * 2 + 11; + key_ref_t keyref; + key_serial_t keyring_id; + + identity = kzalloc(identity_len, GFP_KERNEL); + if (!identity) + return ERR_PTR(-ENOMEM); + + snprintf(identity, identity_len, "NVMe0%c%02d %s %s", + generated ? 'G' : 'R', hmac, hostnqn, subnqn); + + if (!keyring) + keyring = nvme_keyring; + keyring_id = key_serial(keyring); + pr_debug("keyring %x lookup tls psk '%s'\n", + keyring_id, identity); + keyref = keyring_search(make_key_ref(keyring, true), + &nvme_tls_psk_key_type, + identity, false); + if (IS_ERR(keyref)) { + pr_debug("lookup tls psk '%s' failed, error %ld\n", + identity, PTR_ERR(keyref)); + kfree(identity); + return ERR_PTR(-ENOKEY); + } + kfree(identity); + + return key_ref_to_ptr(keyref); +} + +/* + * NVMe PSK priority list + * + * 'Retained' PSKs (ie 'generated == false') + * should be preferred to 'generated' PSKs, + * and SHA-384 should be preferred to SHA-256. + */ +struct nvme_tls_psk_priority_list { + bool generated; + enum nvme_tcp_tls_cipher cipher; +} nvme_tls_psk_prio[] = { + { .generated = false, + .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, + { .generated = false, + .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, + { .generated = true, + .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, + { .generated = true, + .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, +}; + +/* + * nvme_tls_psk_default - Return the preferred PSK to use for TLS ClientHello + */ +key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn) +{ + struct key *tls_key; + key_serial_t tls_key_id; + int prio; + + for (prio = 0; prio < ARRAY_SIZE(nvme_tls_psk_prio); prio++) { + bool generated = nvme_tls_psk_prio[prio].generated; + enum nvme_tcp_tls_cipher cipher = nvme_tls_psk_prio[prio].cipher; + + tls_key = nvme_tls_psk_lookup(keyring, hostnqn, subnqn, + cipher, generated); + if (!IS_ERR(tls_key)) { + tls_key_id = tls_key->serial; + key_put(tls_key); + return tls_key_id; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(nvme_tls_psk_default); + +static int __init nvme_keyring_init(void) +{ + int err; + + nvme_keyring = keyring_alloc(".nvme", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + (KEY_USR_ALL & ~KEY_USR_SETATTR), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + if (IS_ERR(nvme_keyring)) + return PTR_ERR(nvme_keyring); + + err = register_key_type(&nvme_tls_psk_key_type); + if (err) { + key_put(nvme_keyring); + return err; + } + return 0; +} + +static void __exit nvme_keyring_exit(void) +{ + unregister_key_type(&nvme_tls_psk_key_type); + key_revoke(nvme_keyring); + key_put(nvme_keyring); +} + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); +module_init(nvme_keyring_init); +module_exit(nvme_keyring_exit); diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 2f6a7f8c94..b309c8be72 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -92,18 +92,27 @@ config NVME_TCP If unsure, say N. -config NVME_AUTH - bool "NVM Express over Fabrics In-Band Authentication" +config NVME_TCP_TLS + bool "NVMe over Fabrics TCP TLS encryption support" + depends on NVME_TCP + select NVME_KEYRING + select NET_HANDSHAKE + select KEYS + help + Enables TLS encryption for NVMe TCP using the netlink handshake API. + + The TLS handshake daemon is availble at + https://github.com/oracle/ktls-utils. + + If unsure, say N. + +config NVME_HOST_AUTH + bool "NVMe over Fabrics In-Band Authentication in host side" depends on NVME_CORE - select NVME_COMMON - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_SHA256 - select CRYPTO_SHA512 - select CRYPTO_DH - select CRYPTO_DH_RFC7919_GROUPS + select NVME_AUTH help - This provides support for NVMe over Fabrics In-Band Authentication. + This provides support for NVMe over Fabrics In-Band Authentication in + host side. If unsure, say N. diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index c7c3cf202d..6414ec968f 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -17,7 +17,7 @@ nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o -nvme-core-$(CONFIG_NVME_AUTH) += auth.o +nvme-core-$(CONFIG_NVME_HOST_AUTH) += auth.o nvme-y += pci.o diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 811541ce20..72c0525c75 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -23,11 +23,13 @@ struct nvme_dhchap_queue_context { struct nvme_ctrl *ctrl; struct crypto_shash *shash_tfm; struct crypto_kpp *dh_tfm; + struct nvme_dhchap_key *transformed_key; void *buf; int qid; int error; u32 s1; u32 s2; + bool bi_directional; u16 transaction; u8 status; u8 dhgroup_id; @@ -36,7 +38,6 @@ struct nvme_dhchap_queue_context { u8 c1[64]; u8 c2[64]; u8 response[64]; - u8 *host_response; u8 *ctrl_key; u8 *host_key; u8 *sess_key; @@ -312,17 +313,17 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, data->dhvlen = cpu_to_le16(chap->host_key_len); memcpy(data->rval, chap->response, chap->hash_len); if (ctrl->ctrl_key) { + chap->bi_directional = true; get_random_bytes(chap->c2, chap->hash_len); data->cvalid = 1; - chap->s2 = nvme_auth_get_seqnum(); memcpy(data->rval + chap->hash_len, chap->c2, chap->hash_len); dev_dbg(ctrl->device, "%s: qid %d ctrl challenge %*ph\n", __func__, chap->qid, (int)chap->hash_len, chap->c2); } else { memset(chap->c2, 0, chap->hash_len); - chap->s2 = 0; } + chap->s2 = nvme_auth_get_seqnum(); data->seqnum = cpu_to_le32(chap->s2); if (chap->host_key_len) { dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n", @@ -339,10 +340,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvme_dhchap_queue_context *chap) { struct nvmf_auth_dhchap_success1_data *data = chap->buf; - size_t size = sizeof(*data); - - if (chap->s2) - size += chap->hash_len; + size_t size = sizeof(*data) + chap->hash_len; if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; @@ -428,12 +426,12 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, dev_dbg(ctrl->device, "%s: qid %d host response seq %u transaction %d\n", __func__, chap->qid, chap->s1, chap->transaction); - if (!chap->host_response) { - chap->host_response = nvme_auth_transform_key(ctrl->host_key, + if (!chap->transformed_key) { + chap->transformed_key = nvme_auth_transform_key(ctrl->host_key, ctrl->opts->host->nqn); - if (IS_ERR(chap->host_response)) { - ret = PTR_ERR(chap->host_response); - chap->host_response = NULL; + if (IS_ERR(chap->transformed_key)) { + ret = PTR_ERR(chap->transformed_key); + chap->transformed_key = NULL; return ret; } } else { @@ -442,7 +440,7 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, } ret = crypto_shash_setkey(chap->shash_tfm, - chap->host_response, ctrl->host_key->len); + chap->transformed_key->key, chap->transformed_key->len); if (ret) { dev_warn(ctrl->device, "qid %d: failed to set key, error %d\n", chap->qid, ret); @@ -508,19 +506,19 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl, struct nvme_dhchap_queue_context *chap) { SHASH_DESC_ON_STACK(shash, chap->shash_tfm); - u8 *ctrl_response; + struct nvme_dhchap_key *transformed_key; u8 buf[4], *challenge = chap->c2; int ret; - ctrl_response = nvme_auth_transform_key(ctrl->ctrl_key, + transformed_key = nvme_auth_transform_key(ctrl->ctrl_key, ctrl->opts->subsysnqn); - if (IS_ERR(ctrl_response)) { - ret = PTR_ERR(ctrl_response); + if (IS_ERR(transformed_key)) { + ret = PTR_ERR(transformed_key); return ret; } ret = crypto_shash_setkey(chap->shash_tfm, - ctrl_response, ctrl->ctrl_key->len); + transformed_key->key, transformed_key->len); if (ret) { dev_warn(ctrl->device, "qid %d: failed to set key, error %d\n", chap->qid, ret); @@ -586,7 +584,7 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl, out: if (challenge != chap->c2) kfree(challenge); - kfree(ctrl_response); + nvme_auth_free_key(transformed_key); return ret; } @@ -648,8 +646,8 @@ gen_sesskey: static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap) { - kfree_sensitive(chap->host_response); - chap->host_response = NULL; + nvme_auth_free_key(chap->transformed_key); + chap->transformed_key = NULL; kfree_sensitive(chap->host_key); chap->host_key = NULL; chap->host_key_len = 0; @@ -663,6 +661,7 @@ static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap) chap->error = 0; chap->s1 = 0; chap->s2 = 0; + chap->bi_directional = false; chap->transaction = 0; memset(chap->c1, 0, sizeof(chap->c1)); memset(chap->c2, 0, sizeof(chap->c2)); @@ -758,12 +757,11 @@ static void nvme_queue_auth_work(struct work_struct *work) __func__, chap->qid); mutex_lock(&ctrl->dhchap_auth_mutex); ret = nvme_auth_dhchap_setup_host_response(ctrl, chap); + mutex_unlock(&ctrl->dhchap_auth_mutex); if (ret) { - mutex_unlock(&ctrl->dhchap_auth_mutex); chap->error = ret; goto fail2; } - mutex_unlock(&ctrl->dhchap_auth_mutex); /* DH-HMAC-CHAP Step 3: send reply */ dev_dbg(ctrl->device, "%s: qid %d send reply\n", @@ -825,7 +823,7 @@ static void nvme_queue_auth_work(struct work_struct *work) goto fail2; } - if (chap->s2) { + if (chap->bi_directional) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d4564a2517..86149275cc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -420,7 +420,7 @@ void nvme_complete_rq(struct request *req) nvme_failover_req(req); return; case AUTHENTICATE: -#ifdef CONFIG_NVME_AUTH +#ifdef CONFIG_NVME_HOST_AUTH queue_work(nvme_wq, &ctrl->dhchap_auth_work); nvme_retry_req(req); #else @@ -1194,8 +1194,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl) static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) { - queue_delayed_work(nvme_wq, &ctrl->ka_work, - nvme_keep_alive_work_period(ctrl)); + unsigned long now = jiffies; + unsigned long delay = nvme_keep_alive_work_period(ctrl); + unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay; + + if (time_after(now, ka_next_check_tm)) + delay = 0; + else + delay = ka_next_check_tm - now; + + queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); } static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, @@ -3221,6 +3229,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags); ctrl->identified = true; + nvme_start_keep_alive(ctrl); + return 0; } EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish); @@ -4097,14 +4107,30 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) { struct nvme_fw_slot_info_log *log; + u8 next_fw_slot, cur_fw_slot; log = kmalloc(sizeof(*log), GFP_KERNEL); if (!log) return; if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, NVME_CSI_NVM, - log, sizeof(*log), 0)) + log, sizeof(*log), 0)) { dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); + goto out_free_log; + } + + cur_fw_slot = log->afi & 0x7; + next_fw_slot = (log->afi & 0x70) >> 4; + if (!cur_fw_slot || (next_fw_slot && (cur_fw_slot != next_fw_slot))) { + dev_info(ctrl->device, + "Firmware is activated after next Controller Level Reset\n"); + goto out_free_log; + } + + memcpy(ctrl->subsys->firmware_rev, &log->frs[cur_fw_slot - 1], + sizeof(ctrl->subsys->firmware_rev)); + +out_free_log: kfree(log); } @@ -4374,8 +4400,6 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl) { - nvme_start_keep_alive(ctrl); - nvme_enable_aen(ctrl); /* @@ -4430,7 +4454,7 @@ static void nvme_free_ctrl(struct device *dev) if (!subsys || ctrl->instance != subsys->instance) ida_free(&nvme_instance_ida, ctrl->instance); - + key_put(ctrl->tls_key); nvme_free_cels(ctrl); nvme_mpath_uninit(ctrl); nvme_auth_stop(ctrl); @@ -4482,6 +4506,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; + ctrl->ka_last_check_time = jiffies; BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) > PAGE_SIZE); @@ -4756,7 +4781,6 @@ static int __init nvme_core_init(void) result = PTR_ERR(nvme_ns_chr_class); goto unregister_generic_ns; } - result = nvme_init_auth(); if (result) goto destroy_ns_chr; diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 92ba315cfe..aa88606a44 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -12,6 +12,7 @@ #include <linux/seq_file.h> #include "nvme.h" #include "fabrics.h" +#include <linux/nvme-keyring.h> static LIST_HEAD(nvmf_transports); static DECLARE_RWSEM(nvmf_transports_rwsem); @@ -622,6 +623,23 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( return NULL; } +static struct key *nvmf_parse_key(int key_id) +{ + struct key *key; + + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { + pr_err("TLS is not supported\n"); + return ERR_PTR(-EINVAL); + } + + key = key_lookup(key_id); + if (!IS_ERR(key)) + pr_err("key id %08x not found\n", key_id); + else + pr_debug("Using key id %08x\n", key_id); + return key; +} + static const match_table_t opt_tokens = { { NVMF_OPT_TRANSPORT, "transport=%s" }, { NVMF_OPT_TRADDR, "traddr=%s" }, @@ -643,12 +661,19 @@ static const match_table_t opt_tokens = { { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" }, { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_TOS, "tos=%d" }, +#ifdef CONFIG_NVME_TCP_TLS + { NVMF_OPT_KEYRING, "keyring=%d" }, + { NVMF_OPT_TLS_KEY, "tls_key=%d" }, +#endif { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, { NVMF_OPT_DISCOVERY, "discovery" }, #ifdef CONFIG_NVME_HOST_AUTH { NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" }, { NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" }, #endif +#ifdef CONFIG_NVME_TCP_TLS + { NVMF_OPT_TLS, "tls" }, +#endif { NVMF_OPT_ERR, NULL } }; @@ -659,9 +684,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, char *options, *o, *p; int token, ret = 0; size_t nqnlen = 0; - int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; + int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO, key_id; uuid_t hostid; char hostnqn[NVMF_NQN_SIZE]; + struct key *key; /* Set defaults */ opts->queue_size = NVMF_DEF_QUEUE_SIZE; @@ -673,6 +699,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->hdr_digest = false; opts->data_digest = false; opts->tos = -1; /* < 0 == use transport default */ + opts->tls = false; + opts->tls_key = NULL; + opts->keyring = NULL; options = o = kstrdup(buf, GFP_KERNEL); if (!options) @@ -926,6 +955,32 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->tos = token; break; + case NVMF_OPT_KEYRING: + if (match_int(args, &key_id) || key_id <= 0) { + ret = -EINVAL; + goto out; + } + key = nvmf_parse_key(key_id); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto out; + } + key_put(opts->keyring); + opts->keyring = key; + break; + case NVMF_OPT_TLS_KEY: + if (match_int(args, &key_id) || key_id <= 0) { + ret = -EINVAL; + goto out; + } + key = nvmf_parse_key(key_id); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto out; + } + key_put(opts->tls_key); + opts->tls_key = key; + break; case NVMF_OPT_DISCOVERY: opts->discovery_nqn = true; break; @@ -957,6 +1012,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, kfree(opts->dhchap_ctrl_secret); opts->dhchap_ctrl_secret = p; break; + case NVMF_OPT_TLS: + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { + pr_err("TLS is not supported\n"); + ret = -EINVAL; + goto out; + } + opts->tls = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -1158,6 +1221,8 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, void nvmf_free_options(struct nvmf_ctrl_options *opts) { nvmf_host_put(opts->host); + key_put(opts->keyring); + key_put(opts->tls_key); kfree(opts->transport); kfree(opts->traddr); kfree(opts->trsvcid); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 82e7a27ffb..fbaee5a7be 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -70,6 +70,9 @@ enum { NVMF_OPT_DISCOVERY = 1 << 22, NVMF_OPT_DHCHAP_SECRET = 1 << 23, NVMF_OPT_DHCHAP_CTRL_SECRET = 1 << 24, + NVMF_OPT_TLS = 1 << 25, + NVMF_OPT_KEYRING = 1 << 26, + NVMF_OPT_TLS_KEY = 1 << 27, }; /** @@ -102,6 +105,9 @@ enum { * @dhchap_secret: DH-HMAC-CHAP secret * @dhchap_ctrl_secret: DH-HMAC-CHAP controller secret for bi-directional * authentication + * @keyring: Keyring to use for key lookups + * @tls_key: TLS key for encrypted connections (TCP) + * @tls: Start TLS encrypted connections (TCP) * @disable_sqflow: disable controller sq flow control * @hdr_digest: generate/verify header digest (TCP) * @data_digest: generate/verify data digest (TCP) @@ -128,6 +134,9 @@ struct nvmf_ctrl_options { struct nvmf_host *host; char *dhchap_secret; char *dhchap_ctrl_secret; + struct key *keyring; + struct key *tls_key; + bool tls; bool disable_sqflow; bool hdr_digest; bool data_digest; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 46cce0ec35..e0f4129c3a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3125,11 +3118,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) nvme_unquiesce_admin_queue(&ctrl->ctrl); ret = nvme_init_ctrl_finish(&ctrl->ctrl, false); - if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) - ret = -EIO; if (ret) goto out_disconnect_admin_queue; - + if (test_bit(ASSOC_FAILED, &ctrl->flags)) { + ret = -EIO; + goto out_stop_keep_alive; + } /* sanity checks */ /* FC-NVME does not have other data in the capsule */ @@ -3137,7 +3131,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", ctrl->ctrl.icdoff); ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_disconnect_admin_queue; + goto out_stop_keep_alive; } /* FC-NVME supports normal SGL Data Block Descriptors */ @@ -3145,7 +3139,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) dev_err(ctrl->ctrl.device, "Mandatory sgls are not supported!\n"); ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_disconnect_admin_queue; + goto out_stop_keep_alive; } if (opts->queue_size > ctrl->ctrl.maxcmd) { @@ -3188,6 +3182,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); +out_stop_keep_alive: + nvme_stop_keep_alive(&ctrl->ctrl); out_disconnect_admin_queue: dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n", @@ -3893,10 +3889,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3914,7 +3906,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3938,8 +3930,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3959,45 +3949,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ba62d42d2a..e7411dac00 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -355,7 +355,7 @@ struct nvme_ctrl { struct work_struct ana_work; #endif -#ifdef CONFIG_NVME_AUTH +#ifdef CONFIG_NVME_HOST_AUTH struct work_struct dhchap_auth_work; struct mutex dhchap_auth_mutex; struct nvme_dhchap_queue_context *dhchap_ctxs; @@ -363,6 +363,7 @@ struct nvme_ctrl { struct nvme_dhchap_key *ctrl_key; u16 transaction; #endif + struct key *tls_key; /* Power saving configuration */ u64 ps_max_latency_us; @@ -1059,7 +1060,7 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl) return ctrl->sgls & ((1 << 0) | (1 << 1)); } -#ifdef CONFIG_NVME_AUTH +#ifdef CONFIG_NVME_HOST_AUTH int __init nvme_init_auth(void); void __exit nvme_exit_auth(void); int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f8e92404a6..61af7ff1a9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -924,7 +924,6 @@ static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req) if (unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true))) return false; - req->mq_hctx->tags->rqs[req->tag] = req; return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c04317a966..81e2621169 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1083,6 +1083,7 @@ destroy_io: nvme_rdma_free_io_queues(ctrl); } destroy_admin: + nvme_stop_keep_alive(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 212e1b05d2..c6b7fbd4d3 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -409,7 +409,7 @@ static ssize_t dctype_show(struct device *dev, } static DEVICE_ATTR_RO(dctype); -#ifdef CONFIG_NVME_AUTH +#ifdef CONFIG_NVME_HOST_AUTH static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -527,6 +527,19 @@ static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR, nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store); #endif +#ifdef CONFIG_NVME_TCP_TLS +static ssize_t tls_key_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (!ctrl->tls_key) + return 0; + return sysfs_emit(buf, "%08x", key_serial(ctrl->tls_key)); +} +static DEVICE_ATTR_RO(tls_key); +#endif + static struct attribute *nvme_dev_attrs[] = { &dev_attr_reset_controller.attr, &dev_attr_rescan_controller.attr, @@ -550,10 +563,13 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_kato.attr, &dev_attr_cntrltype.attr, &dev_attr_dctype.attr, -#ifdef CONFIG_NVME_AUTH +#ifdef CONFIG_NVME_HOST_AUTH &dev_attr_dhchap_secret.attr, &dev_attr_dhchap_ctrl_secret.attr, #endif +#ifdef CONFIG_NVME_TCP_TLS + &dev_attr_tls_key.attr, +#endif NULL }; @@ -577,12 +593,17 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts) return 0; -#ifdef CONFIG_NVME_AUTH +#ifdef CONFIG_NVME_HOST_AUTH if (a == &dev_attr_dhchap_secret.attr && !ctrl->opts) return 0; if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts) return 0; #endif +#ifdef CONFIG_NVME_TCP_TLS + if (a == &dev_attr_tls_key.attr && + (!ctrl->opts || strcmp(ctrl->opts->transport, "tcp"))) + return 0; +#endif return a->mode; } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index f1d62d7442..08805f0278 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -8,9 +8,14 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/err.h> +#include <linux/key.h> #include <linux/nvme-tcp.h> +#include <linux/nvme-keyring.h> #include <net/sock.h> #include <net/tcp.h> +#include <net/tls.h> +#include <net/tls_prot.h> +#include <net/handshake.h> #include <linux/blk-mq.h> #include <crypto/hash.h> #include <net/busy_poll.h> @@ -31,6 +36,16 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); +/* + * TLS handshake timeout + */ +static int tls_handshake_timeout = 10; +#ifdef CONFIG_NVME_TCP_TLS +module_param(tls_handshake_timeout, int, 0644); +MODULE_PARM_DESC(tls_handshake_timeout, + "nvme TLS handshake timeout in seconds (default 10)"); +#endif + #ifdef CONFIG_DEBUG_LOCK_ALLOC /* lockdep can detect a circular dependency of the form * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock @@ -146,7 +161,8 @@ struct nvme_tcp_queue { struct ahash_request *snd_hash; __le32 exp_ddgst; __le32 recv_ddgst; - + struct completion tls_complete; + int tls_err; struct page_frag_cache pf_cache; void (*state_change)(struct sock *); @@ -189,6 +205,14 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } +static inline bool nvme_tcp_tls(struct nvme_ctrl *ctrl) +{ + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) + return 0; + + return ctrl->opts->tls; +} + static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) { u32 queue_idx = nvme_tcp_queue_id(queue); @@ -1338,7 +1362,9 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) } noreclaim_flag = memalloc_noreclaim_save(); - sock_release(queue->sock); + /* ->sock will be released by fput() */ + fput(queue->sock->file); + queue->sock = NULL; memalloc_noreclaim_restore(noreclaim_flag); kfree(queue->pdu); @@ -1350,6 +1376,8 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) { struct nvme_tcp_icreq_pdu *icreq; struct nvme_tcp_icresp_pdu *icresp; + char cbuf[CMSG_LEN(sizeof(char))] = {}; + u8 ctype; struct msghdr msg = {}; struct kvec iov; bool ctrl_hdgst, ctrl_ddgst; @@ -1381,17 +1409,36 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) iov.iov_base = icreq; iov.iov_len = sizeof(*icreq); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); - if (ret < 0) + if (ret < 0) { + pr_warn("queue %d: failed to send icreq, error %d\n", + nvme_tcp_queue_id(queue), ret); goto free_icresp; + } memset(&msg, 0, sizeof(msg)); iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); + if (nvme_tcp_tls(&queue->ctrl->ctrl)) { + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + } ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (ret < 0) + if (ret < 0) { + pr_warn("queue %d: failed to receive icresp, error %d\n", + nvme_tcp_queue_id(queue), ret); goto free_icresp; - + } + ret = -ENOTCONN; + if (nvme_tcp_tls(&queue->ctrl->ctrl)) { + ctype = tls_get_record_type(queue->sock->sk, + (struct cmsghdr *)cbuf); + if (ctype != TLS_RECORD_TYPE_DATA) { + pr_err("queue %d: unhandled TLS record %d\n", + nvme_tcp_queue_id(queue), ctype); + goto free_icresp; + } + } ret = -EINVAL; if (icresp->hdr.type != nvme_tcp_icresp) { pr_err("queue %d: bad type returned %d\n", @@ -1507,11 +1554,90 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); } -static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) +{ + struct nvme_tcp_queue *queue = data; + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + struct key *tls_key; + + dev_dbg(ctrl->ctrl.device, "queue %d: TLS handshake done, key %x, status %d\n", + qid, pskid, status); + + if (status) { + queue->tls_err = -status; + goto out_complete; + } + + tls_key = key_lookup(pskid); + if (IS_ERR(tls_key)) { + dev_warn(ctrl->ctrl.device, "queue %d: Invalid key %x\n", + qid, pskid); + queue->tls_err = -ENOKEY; + } else { + ctrl->ctrl.tls_key = tls_key; + queue->tls_err = 0; + } + +out_complete: + complete(&queue->tls_complete); +} + +static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl, + struct nvme_tcp_queue *queue, + key_serial_t pskid) +{ + int qid = nvme_tcp_queue_id(queue); + int ret; + struct tls_handshake_args args; + unsigned long tmo = tls_handshake_timeout * HZ; + key_serial_t keyring = nvme_keyring_id(); + + dev_dbg(nctrl->device, "queue %d: start TLS with key %x\n", + qid, pskid); + memset(&args, 0, sizeof(args)); + args.ta_sock = queue->sock; + args.ta_done = nvme_tcp_tls_done; + args.ta_data = queue; + args.ta_my_peerids[0] = pskid; + args.ta_num_peerids = 1; + if (nctrl->opts->keyring) + keyring = key_serial(nctrl->opts->keyring); + args.ta_keyring = keyring; + args.ta_timeout_ms = tls_handshake_timeout * 1000; + queue->tls_err = -EOPNOTSUPP; + init_completion(&queue->tls_complete); + ret = tls_client_hello_psk(&args, GFP_KERNEL); + if (ret) { + dev_err(nctrl->device, "queue %d: failed to start TLS: %d\n", + qid, ret); + return ret; + } + ret = wait_for_completion_interruptible_timeout(&queue->tls_complete, tmo); + if (ret <= 0) { + if (ret == 0) + ret = -ETIMEDOUT; + + dev_err(nctrl->device, + "queue %d: TLS handshake failed, error %d\n", + qid, ret); + tls_handshake_cancel(queue->sock->sk); + } else { + dev_dbg(nctrl->device, + "queue %d: TLS handshake complete, error %d\n", + qid, queue->tls_err); + ret = queue->tls_err; + } + return ret; +} + +static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, + key_serial_t pskid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; int ret, rcv_pdu_size; + struct file *sock_file; mutex_init(&queue->queue_lock); queue->ctrl = ctrl; @@ -1534,6 +1660,11 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) goto err_destroy_mutex; } + sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, NULL); + if (IS_ERR(sock_file)) { + ret = PTR_ERR(sock_file); + goto err_destroy_mutex; + } nvme_tcp_reclassify_socket(queue->sock); /* Single syn retry */ @@ -1624,6 +1755,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) goto err_rcv_pdu; } + /* If PSKs are configured try to start TLS */ + if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && pskid) { + ret = nvme_tcp_start_tls(nctrl, queue, pskid); + if (ret) + goto err_init_connect; + } + ret = nvme_tcp_init_connection(queue); if (ret) goto err_init_connect; @@ -1640,7 +1778,8 @@ err_crypto: if (queue->hdr_digest || queue->data_digest) nvme_tcp_free_crypto(queue); err_sock: - sock_release(queue->sock); + /* ->sock will be released by fput() */ + fput(queue->sock->file); queue->sock = NULL; err_destroy_mutex: mutex_destroy(&queue->send_mutex); @@ -1772,10 +1911,25 @@ out_stop_queues: static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) { int ret; + key_serial_t pskid = 0; + + if (nvme_tcp_tls(ctrl)) { + if (ctrl->opts->tls_key) + pskid = key_serial(ctrl->opts->tls_key); + else + pskid = nvme_tls_psk_default(ctrl->opts->keyring, + ctrl->opts->host->nqn, + ctrl->opts->subsysnqn); + if (!pskid) { + dev_err(ctrl->device, "no valid PSK found\n"); + ret = -ENOKEY; + goto out_free_queue; + } + } - ret = nvme_tcp_alloc_queue(ctrl, 0); + ret = nvme_tcp_alloc_queue(ctrl, 0, pskid); if (ret) - return ret; + goto out_free_queue; ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); if (ret) @@ -1792,8 +1946,13 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; + if (nvme_tcp_tls(ctrl) && !ctrl->tls_key) { + dev_err(ctrl->device, "no PSK negotiated\n"); + return -ENOKEY; + } for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_tcp_alloc_queue(ctrl, i); + ret = nvme_tcp_alloc_queue(ctrl, i, + key_serial(ctrl->tls_key)); if (ret) goto out_free_queues; } @@ -2078,11 +2237,8 @@ destroy_io: nvme_tcp_destroy_io_queues(ctrl, new); } destroy_admin: - nvme_quiesce_admin_queue(ctrl); - blk_sync_queue(ctrl->admin_q); - nvme_tcp_stop_queue(ctrl, 0); - nvme_cancel_admin_tagset(ctrl); - nvme_tcp_destroy_admin_queue(ctrl, new); + nvme_stop_keep_alive(ctrl); + nvme_tcp_teardown_admin_queue(ctrl, false); return ret; } @@ -2628,7 +2784,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = { NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | - NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE, + NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS | + NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY, .create_ctrl = nvme_tcp_create_ctrl, }; diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 79fc64035e..872dd1a0ac 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -4,6 +4,8 @@ config NVME_TARGET tristate "NVMe Target support" depends on BLOCK depends on CONFIGFS_FS + select NVME_KEYRING if NVME_TARGET_TCP_TLS + select KEYS if NVME_TARGET_TCP_TLS select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY select SGL_ALLOC help @@ -84,17 +86,24 @@ config NVME_TARGET_TCP If unsure, say N. +config NVME_TARGET_TCP_TLS + bool "NVMe over Fabrics TCP target TLS encryption support" + depends on NVME_TARGET_TCP + select NET_HANDSHAKE + help + Enables TLS encryption for the NVMe TCP target using the netlink handshake API. + + The TLS handshake daemon is available at + https://github.com/oracle/ktls-utils. + + If unsure, say N. + config NVME_TARGET_AUTH - bool "NVMe over Fabrics In-band Authentication support" + bool "NVMe over Fabrics In-band Authentication in target side" depends on NVME_TARGET - select NVME_COMMON - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_SHA256 - select CRYPTO_SHA512 - select CRYPTO_DH - select CRYPTO_DH_RFC7919_GROUPS + select NVME_AUTH help - This enables support for NVMe over Fabrics In-band Authentication + This enables support for NVMe over Fabrics In-band Authentication in + target side. If unsure, say N. diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 4dcddcf952..3ddbc3880c 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -267,7 +267,8 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, struct shash_desc *shash; struct nvmet_ctrl *ctrl = req->sq->ctrl; const char *hash_name; - u8 *challenge = req->sq->dhchap_c1, *host_response; + u8 *challenge = req->sq->dhchap_c1; + struct nvme_dhchap_key *transformed_key; u8 buf[4]; int ret; @@ -291,14 +292,15 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, goto out_free_tfm; } - host_response = nvme_auth_transform_key(ctrl->host_key, ctrl->hostnqn); - if (IS_ERR(host_response)) { - ret = PTR_ERR(host_response); + transformed_key = nvme_auth_transform_key(ctrl->host_key, + ctrl->hostnqn); + if (IS_ERR(transformed_key)) { + ret = PTR_ERR(transformed_key); goto out_free_tfm; } - ret = crypto_shash_setkey(shash_tfm, host_response, - ctrl->host_key->len); + ret = crypto_shash_setkey(shash_tfm, transformed_key->key, + transformed_key->len); if (ret) goto out_free_response; @@ -365,7 +367,7 @@ out: kfree(challenge); kfree(shash); out_free_response: - kfree_sensitive(host_response); + nvme_auth_free_key(transformed_key); out_free_tfm: crypto_free_shash(shash_tfm); return 0; @@ -378,7 +380,8 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, struct shash_desc *shash; struct nvmet_ctrl *ctrl = req->sq->ctrl; const char *hash_name; - u8 *challenge = req->sq->dhchap_c2, *ctrl_response; + u8 *challenge = req->sq->dhchap_c2; + struct nvme_dhchap_key *transformed_key; u8 buf[4]; int ret; @@ -402,15 +405,15 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, goto out_free_tfm; } - ctrl_response = nvme_auth_transform_key(ctrl->ctrl_key, + transformed_key = nvme_auth_transform_key(ctrl->ctrl_key, ctrl->subsysnqn); - if (IS_ERR(ctrl_response)) { - ret = PTR_ERR(ctrl_response); + if (IS_ERR(transformed_key)) { + ret = PTR_ERR(transformed_key); goto out_free_tfm; } - ret = crypto_shash_setkey(shash_tfm, ctrl_response, - ctrl->ctrl_key->len); + ret = crypto_shash_setkey(shash_tfm, transformed_key->key, + transformed_key->len); if (ret) goto out_free_response; @@ -474,7 +477,7 @@ out: kfree(challenge); kfree(shash); out_free_response: - kfree_sensitive(ctrl_response); + nvme_auth_free_key(transformed_key); out_free_tfm: crypto_free_shash(shash_tfm); return 0; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 01b2a3d1a5..d937fe0512 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -15,6 +15,7 @@ #ifdef CONFIG_NVME_TARGET_AUTH #include <linux/nvme-auth.h> #endif +#include <linux/nvme-keyring.h> #include <crypto/hash.h> #include <crypto/kpp.h> #include <linux/nospec.h> @@ -160,10 +161,14 @@ static const struct nvmet_type_name_map nvmet_addr_treq[] = { { NVMF_TREQ_NOT_REQUIRED, "not required" }, }; +static inline u8 nvmet_port_disc_addr_treq_mask(struct nvmet_port *port) +{ + return (port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK); +} + static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { - u8 treq = to_nvmet_port(item)->disc_addr.treq & - NVME_TREQ_SECURE_CHANNEL_MASK; + u8 treq = nvmet_port_disc_addr_treq_secure_channel(to_nvmet_port(item)); int i; for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { @@ -179,7 +184,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); - u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; + u8 treq = nvmet_port_disc_addr_treq_mask(port); int i; if (nvmet_is_port_enabled(port, __func__)) @@ -194,6 +199,20 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, return -EINVAL; found: + if (port->disc_addr.trtype == NVMF_TRTYPE_TCP && + port->disc_addr.tsas.tcp.sectype == NVMF_TCP_SECTYPE_TLS13) { + switch (nvmet_addr_treq[i].type) { + case NVMF_TREQ_NOT_SPECIFIED: + pr_debug("treq '%s' not allowed for TLS1.3\n", + nvmet_addr_treq[i].name); + return -EINVAL; + case NVMF_TREQ_NOT_REQUIRED: + pr_warn("Allow non-TLS connections while TLS1.3 is enabled\n"); + break; + default: + break; + } + } treq |= nvmet_addr_treq[i].type; port->disc_addr.treq = treq; return count; @@ -304,6 +323,11 @@ static void nvmet_port_init_tsas_rdma(struct nvmet_port *port) port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM; } +static void nvmet_port_init_tsas_tcp(struct nvmet_port *port, int sectype) +{ + port->disc_addr.tsas.tcp.sectype = sectype; +} + static ssize_t nvmet_addr_trtype_store(struct config_item *item, const char *page, size_t count) { @@ -326,11 +350,99 @@ found: port->disc_addr.trtype = nvmet_transport[i].type; if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) nvmet_port_init_tsas_rdma(port); + else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) + nvmet_port_init_tsas_tcp(port, NVMF_TCP_SECTYPE_NONE); return count; } CONFIGFS_ATTR(nvmet_, addr_trtype); +static const struct nvmet_type_name_map nvmet_addr_tsas_tcp[] = { + { NVMF_TCP_SECTYPE_NONE, "none" }, + { NVMF_TCP_SECTYPE_TLS13, "tls1.3" }, +}; + +static const struct nvmet_type_name_map nvmet_addr_tsas_rdma[] = { + { NVMF_RDMA_QPTYPE_CONNECTED, "connected" }, + { NVMF_RDMA_QPTYPE_DATAGRAM, "datagram" }, +}; + +static ssize_t nvmet_addr_tsas_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + int i; + + if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) { + for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { + if (port->disc_addr.tsas.tcp.sectype == nvmet_addr_tsas_tcp[i].type) + return sprintf(page, "%s\n", nvmet_addr_tsas_tcp[i].name); + } + } else if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) { + for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) { + if (port->disc_addr.tsas.rdma.qptype == nvmet_addr_tsas_rdma[i].type) + return sprintf(page, "%s\n", nvmet_addr_tsas_rdma[i].name); + } + } + return sprintf(page, "reserved\n"); +} + +static ssize_t nvmet_addr_tsas_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + u8 treq = nvmet_port_disc_addr_treq_mask(port); + u8 sectype; + int i; + + if (nvmet_is_port_enabled(port, __func__)) + return -EACCES; + + if (port->disc_addr.trtype != NVMF_TRTYPE_TCP) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { + if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) { + sectype = nvmet_addr_tsas_tcp[i].type; + goto found; + } + } + + pr_err("Invalid value '%s' for tsas\n", page); + return -EINVAL; + +found: + if (sectype == NVMF_TCP_SECTYPE_TLS13) { + if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) { + pr_err("TLS is not supported\n"); + return -EINVAL; + } + if (!port->keyring) { + pr_err("TLS keyring not configured\n"); + return -EINVAL; + } + } + + nvmet_port_init_tsas_tcp(port, sectype); + /* + * If TLS is enabled TREQ should be set to 'required' per default + */ + if (sectype == NVMF_TCP_SECTYPE_TLS13) { + u8 sc = nvmet_port_disc_addr_treq_secure_channel(port); + + if (sc == NVMF_TREQ_NOT_SPECIFIED) + treq |= NVMF_TREQ_REQUIRED; + else + treq |= sc; + } else { + treq |= NVMF_TREQ_NOT_SPECIFIED; + } + port->disc_addr.treq = treq; + return count; +} + +CONFIGFS_ATTR(nvmet_, addr_tsas); + /* * Namespace structures & file operation functions below */ @@ -1734,6 +1846,7 @@ static void nvmet_port_release(struct config_item *item) flush_workqueue(nvmet_wq); list_del(&port->global_entry); + key_put(port->keyring); kfree(port->ana_state); kfree(port); } @@ -1744,6 +1857,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_traddr, &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, + &nvmet_attr_addr_tsas, &nvmet_attr_param_inline_data_size, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_attr_param_pi_enable, @@ -1782,6 +1896,14 @@ static struct config_group *nvmet_ports_make(struct config_group *group, return ERR_PTR(-ENOMEM); } + if (IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS) && nvme_keyring_id()) { + port->keyring = key_lookup(nvme_keyring_id()); + if (IS_ERR(port->keyring)) { + pr_warn("NVMe keyring not available, disabling TLS\n"); + port->keyring = NULL; + } + } + for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { if (i == NVMET_DEFAULT_ANA_GRPID) port->ana_state[1] = NVME_ANA_OPTIMIZED; diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 1d9854484e..eb7785be0c 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -163,11 +163,11 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) pr_debug("%s: ctrl %d qid %d challenge %*ph\n", __func__, ctrl->cntlid, req->sq->qid, data->hl, req->sq->dhchap_c2); - req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); } else { req->sq->authenticated = true; req->sq->dhchap_c2 = NULL; } + req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); return 0; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1ab6601fdd..666130878e 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -145,8 +147,8 @@ struct nvmet_fc_tgt_queue { struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; - struct rcu_head rcu; - struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ + /* array of fcp_iods */ + struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize); } __aligned(sizeof(unsigned long long)); struct nvmet_fc_hostport { @@ -165,10 +167,9 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; - struct rcu_head rcu; }; @@ -248,6 +249,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -359,7 +367,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -372,7 +380,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -801,14 +810,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -830,15 +836,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -851,15 +855,11 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); - kfree_rcu(queue, rcu); + kfree(queue); } static void @@ -968,7 +968,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1077,8 +1077,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; @@ -1093,13 +1091,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1111,6 +1124,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -1130,7 +1146,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1171,13 +1187,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1187,8 +1208,8 @@ nvmet_fc_target_assoc_free(struct kref *ref) dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); - kfree_rcu(assoc, rcu); nvmet_fc_tgtport_put(tgtport); + kfree(assoc); } static void @@ -1207,7 +1228,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1216,29 +1237,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); - for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } + synchronize_rcu(); - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + /* ensure all in-flight I/Os have been processed */ + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1414,6 +1427,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1491,9 +1505,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1546,9 +1559,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1580,7 +1592,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1592,9 +1604,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1624,6 +1635,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1869,9 +1882,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1886,8 +1896,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1903,6 +1911,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2539,8 +2550,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2901,6 +2913,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2932,6 +2947,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c65a73433c..e6d4226827 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 468833675c..f11400a908 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -50,9 +50,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) void nvmet_bdev_ns_disable(struct nvmet_ns *ns) { - if (ns->bdev) { - blkdev_put(ns->bdev, NULL); + if (ns->bdev_handle) { + bdev_release(ns->bdev_handle); ns->bdev = NULL; + ns->bdev_handle = NULL; } } @@ -84,17 +85,18 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) if (ns->buffered_io) return -ENOTBLK; - ns->bdev = blkdev_get_by_path(ns->device_path, - BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL); - if (IS_ERR(ns->bdev)) { - ret = PTR_ERR(ns->bdev); + ns->bdev_handle = bdev_open_by_path(ns->device_path, + BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL); + if (IS_ERR(ns->bdev_handle)) { + ret = PTR_ERR(ns->bdev_handle); if (ret != -ENOTBLK) { - pr_err("failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); + pr_err("failed to open block device %s: (%d)\n", + ns->device_path, ret); } - ns->bdev = NULL; + ns->bdev_handle = NULL; return ret; } + ns->bdev = ns->bdev_handle->bdev; ns->size = bdev_nr_bytes(ns->bdev); ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 48d5df054c..9cb434c580 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -466,6 +466,8 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) out_destroy_io: nvme_loop_destroy_io_queues(ctrl); out_destroy_admin: + nvme_quiesce_admin_queue(&ctrl->ctrl); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); @@ -600,6 +602,8 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: + nvme_quiesce_admin_queue(&ctrl->ctrl); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); out_free_queues: kfree(ctrl->queues); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 8cfd60f3b5..6c8acebe1a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -58,6 +58,7 @@ struct nvmet_ns { struct percpu_ref ref; + struct bdev_handle *bdev_handle; struct block_device *bdev; struct file *file; bool readonly; @@ -158,6 +159,7 @@ struct nvmet_port { struct config_group ana_groups_group; struct nvmet_ana_group ana_default_group; enum nvme_ana_state *ana_state; + struct key *keyring; void *priv; bool enabled; int inline_data_size; @@ -178,6 +180,16 @@ static inline struct nvmet_port *ana_groups_to_port( ana_groups_group); } +static inline u8 nvmet_port_disc_addr_treq_secure_channel(struct nvmet_port *port) +{ + return (port->disc_addr.treq & NVME_TREQ_SECURE_CHANNEL_MASK); +} + +static inline bool nvmet_port_secure_channel_required(struct nvmet_port *port) +{ + return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED; +} + struct nvmet_ctrl { struct nvmet_subsys *subsys; struct nvmet_sq **sqs; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index a4f802790c..bb42ae42b1 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -8,9 +8,14 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/err.h> +#include <linux/key.h> #include <linux/nvme-tcp.h> +#include <linux/nvme-keyring.h> #include <net/sock.h> #include <net/tcp.h> +#include <net/tls.h> +#include <net/tls_prot.h> +#include <net/handshake.h> #include <linux/inet.h> #include <linux/llist.h> #include <crypto/hash.h> @@ -67,6 +72,16 @@ device_param_cb(idle_poll_period_usecs, &set_param_ops, MODULE_PARM_DESC(idle_poll_period_usecs, "nvmet tcp io_work poll till idle time period in usecs: Default 0"); +#ifdef CONFIG_NVME_TARGET_TCP_TLS +/* + * TLS handshake timeout + */ +static int tls_handshake_timeout = 10; +module_param(tls_handshake_timeout, int, 0644); +MODULE_PARM_DESC(tls_handshake_timeout, + "nvme TLS handshake timeout in seconds (default 10)"); +#endif + #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_IO_WORK_BUDGET 64 @@ -105,6 +120,7 @@ struct nvmet_tcp_cmd { u32 pdu_len; u32 pdu_recv; int sg_idx; + char recv_cbuf[CMSG_LEN(sizeof(char))]; struct msghdr recv_msg; struct bio_vec *iov; u32 flags; @@ -123,8 +139,10 @@ struct nvmet_tcp_cmd { enum nvmet_tcp_queue_state { NVMET_TCP_Q_CONNECTING, + NVMET_TCP_Q_TLS_HANDSHAKE, NVMET_TCP_Q_LIVE, NVMET_TCP_Q_DISCONNECTING, + NVMET_TCP_Q_FAILED, }; struct nvmet_tcp_queue { @@ -133,6 +151,7 @@ struct nvmet_tcp_queue { struct work_struct io_work; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; + struct kref kref; /* send state */ struct nvmet_tcp_cmd *cmds; @@ -156,6 +175,10 @@ struct nvmet_tcp_queue { struct ahash_request *snd_hash; struct ahash_request *rcv_hash; + /* TLS state */ + key_serial_t tls_pskid; + struct delayed_work tls_handshake_tmo_work; + unsigned long poll_end; spinlock_t state_lock; @@ -911,8 +934,10 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); - if (ret < 0) + if (ret < 0) { + queue->state = NVMET_TCP_Q_FAILED; return ret; /* queue removal will cleanup */ + } queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); @@ -1110,20 +1135,65 @@ static inline bool nvmet_tcp_pdu_valid(u8 type) return false; } +static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue *queue, + struct msghdr *msg, char *cbuf) +{ + struct cmsghdr *cmsg = (struct cmsghdr *)cbuf; + u8 ctype, level, description; + int ret = 0; + + ctype = tls_get_record_type(queue->sock->sk, cmsg); + switch (ctype) { + case 0: + break; + case TLS_RECORD_TYPE_DATA: + break; + case TLS_RECORD_TYPE_ALERT: + tls_alert_recv(queue->sock->sk, msg, &level, &description); + if (level == TLS_ALERT_LEVEL_FATAL) { + pr_err("queue %d: TLS Alert desc %u\n", + queue->idx, description); + ret = -ENOTCONN; + } else { + pr_warn("queue %d: TLS Alert desc %u\n", + queue->idx, description); + ret = -EAGAIN; + } + break; + default: + /* discard this record type */ + pr_err("queue %d: TLS record %d unhandled\n", + queue->idx, ctype); + ret = -EAGAIN; + break; + } + return ret; +} + static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) { struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; - int len; + int len, ret; struct kvec iov; + char cbuf[CMSG_LEN(sizeof(char))] = {}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; recv: iov.iov_base = (void *)&queue->pdu + queue->offset; iov.iov_len = queue->left; + if (queue->tls_pskid) { + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + } len = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); if (unlikely(len < 0)) return len; + if (queue->tls_pskid) { + ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + if (ret < 0) + return ret; + } queue->offset += len; queue->left -= len; @@ -1176,16 +1246,22 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmd; - int ret; + int len, ret; while (msg_data_left(&cmd->recv_msg)) { - ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, + len = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, cmd->recv_msg.msg_flags); - if (ret <= 0) - return ret; + if (len <= 0) + return len; + if (queue->tls_pskid) { + ret = nvmet_tcp_tls_record_ok(cmd->queue, + &cmd->recv_msg, cmd->recv_cbuf); + if (ret < 0) + return ret; + } - cmd->pdu_recv += ret; - cmd->rbytes_done += ret; + cmd->pdu_recv += len; + cmd->rbytes_done += len; } if (queue->data_digest) { @@ -1203,20 +1279,30 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmd; - int ret; + int ret, len; + char cbuf[CMSG_LEN(sizeof(char))] = {}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { .iov_base = (void *)&cmd->recv_ddgst + queue->offset, .iov_len = queue->left }; - ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, + if (queue->tls_pskid) { + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + } + len = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (unlikely(ret < 0)) - return ret; + if (unlikely(len < 0)) + return len; + if (queue->tls_pskid) { + ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + if (ret < 0) + return ret; + } - queue->offset += ret; - queue->left -= ret; + queue->offset += len; + queue->left -= len; if (queue->left) return -EAGAIN; @@ -1294,14 +1380,27 @@ done: return ret; } +static void nvmet_tcp_release_queue(struct kref *kref) +{ + struct nvmet_tcp_queue *queue = + container_of(kref, struct nvmet_tcp_queue, kref); + + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); + queue_work(nvmet_wq, &queue->release_work); +} + static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) { - spin_lock(&queue->state_lock); + spin_lock_bh(&queue->state_lock); + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { + /* Socket closed during handshake */ + tls_handshake_cancel(queue->sock->sk); + } if (queue->state != NVMET_TCP_Q_DISCONNECTING) { queue->state = NVMET_TCP_Q_DISCONNECTING; - queue_work(nvmet_wq, &queue->release_work); + kref_put(&queue->kref, nvmet_tcp_release_queue); } - spin_unlock(&queue->state_lock); + spin_unlock_bh(&queue->state_lock); } static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue *queue) @@ -1383,6 +1482,10 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, if (!c->r2t_pdu) goto out_free_data; + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { + c->recv_msg.msg_control = c->recv_cbuf; + c->recv_msg.msg_controllen = sizeof(c->recv_cbuf); + } c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; list_add_tail(&c->entry, &queue->free_list); @@ -1496,6 +1599,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) mutex_unlock(&nvmet_tcp_queue_mutex); nvmet_tcp_restore_socket_callbacks(queue); + cancel_delayed_work_sync(&queue->tls_handshake_tmo_work); cancel_work_sync(&queue->io_work); /* stop accepting incoming data */ queue->rcv_state = NVMET_TCP_RECV_ERR; @@ -1504,12 +1608,12 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_sq_destroy(&queue->nvme_sq); cancel_work_sync(&queue->io_work); nvmet_tcp_free_cmd_data_in_buffers(queue); - sock_release(queue->sock); + /* ->sock will be released by fput() */ + fput(queue->sock->file); nvmet_tcp_free_cmds(queue); if (queue->hdr_digest || queue->data_digest) nvmet_tcp_free_crypto(queue); ida_free(&nvmet_tcp_queue_ida, queue->idx); - page = virt_to_head_page(queue->pf_cache.va); __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); kfree(queue); @@ -1523,8 +1627,13 @@ static void nvmet_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; - if (likely(queue)) - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); + if (likely(queue)) { + if (queue->data_ready) + queue->data_ready(sk); + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, + &queue->io_work); + } read_unlock_bh(&sk->sk_callback_lock); } @@ -1632,31 +1741,176 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) return ret; } -static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, +#ifdef CONFIG_NVME_TARGET_TCP_TLS +static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) +{ + struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; + int len, ret; + struct kvec iov = { + .iov_base = (u8 *)&queue->pdu + queue->offset, + .iov_len = sizeof(struct nvme_tcp_hdr), + }; + char cbuf[CMSG_LEN(sizeof(char))] = {}; + struct msghdr msg = { + .msg_control = cbuf, + .msg_controllen = sizeof(cbuf), + .msg_flags = MSG_PEEK, + }; + + if (nvmet_port_secure_channel_required(queue->port->nport)) + return 0; + + len = kernel_recvmsg(queue->sock, &msg, &iov, 1, + iov.iov_len, msg.msg_flags); + if (unlikely(len < 0)) { + pr_debug("queue %d: peek error %d\n", + queue->idx, len); + return len; + } + + ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf); + if (ret < 0) + return ret; + + if (len < sizeof(struct nvme_tcp_hdr)) { + pr_debug("queue %d: short read, %d bytes missing\n", + queue->idx, (int)iov.iov_len - len); + return -EAGAIN; + } + pr_debug("queue %d: hdr type %d hlen %d plen %d size %d\n", + queue->idx, hdr->type, hdr->hlen, hdr->plen, + (int)sizeof(struct nvme_tcp_icreq_pdu)); + if (hdr->type == nvme_tcp_icreq && + hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) && + hdr->plen == cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu))) { + pr_debug("queue %d: icreq detected\n", + queue->idx); + return len; + } + return 0; +} + +static void nvmet_tcp_tls_handshake_done(void *data, int status, + key_serial_t peerid) +{ + struct nvmet_tcp_queue *queue = data; + + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", + queue->idx, peerid, status); + spin_lock_bh(&queue->state_lock); + if (WARN_ON(queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)) { + spin_unlock_bh(&queue->state_lock); + return; + } + if (!status) { + queue->tls_pskid = peerid; + queue->state = NVMET_TCP_Q_CONNECTING; + } else + queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); + + cancel_delayed_work_sync(&queue->tls_handshake_tmo_work); + if (status) + nvmet_tcp_schedule_release_queue(queue); + else + nvmet_tcp_set_queue_sock(queue); + kref_put(&queue->kref, nvmet_tcp_release_queue); +} + +static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w) +{ + struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w), + struct nvmet_tcp_queue, tls_handshake_tmo_work); + + pr_warn("queue %d: TLS handshake timeout\n", queue->idx); + /* + * If tls_handshake_cancel() fails we've lost the race with + * nvmet_tcp_tls_handshake_done() */ + if (!tls_handshake_cancel(queue->sock->sk)) + return; + spin_lock_bh(&queue->state_lock); + if (WARN_ON(queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)) { + spin_unlock_bh(&queue->state_lock); + return; + } + queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); + nvmet_tcp_schedule_release_queue(queue); + kref_put(&queue->kref, nvmet_tcp_release_queue); +} + +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue) +{ + int ret = -EOPNOTSUPP; + struct tls_handshake_args args; + + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { + pr_warn("cannot start TLS in state %d\n", queue->state); + return -EINVAL; + } + + kref_get(&queue->kref); + pr_debug("queue %d: TLS ServerHello\n", queue->idx); + memset(&args, 0, sizeof(args)); + args.ta_sock = queue->sock; + args.ta_done = nvmet_tcp_tls_handshake_done; + args.ta_data = queue; + args.ta_keyring = key_serial(queue->port->nport->keyring); + args.ta_timeout_ms = tls_handshake_timeout * 1000; + + ret = tls_server_hello_psk(&args, GFP_KERNEL); + if (ret) { + kref_put(&queue->kref, nvmet_tcp_release_queue); + pr_err("failed to start TLS, err=%d\n", ret); + } else { + queue_delayed_work(nvmet_wq, &queue->tls_handshake_tmo_work, + tls_handshake_timeout * HZ); + } + return ret; +} +#else +static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w) {} +#endif + +static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, struct socket *newsock) { struct nvmet_tcp_queue *queue; + struct file *sock_file = NULL; int ret; queue = kzalloc(sizeof(*queue), GFP_KERNEL); - if (!queue) - return -ENOMEM; + if (!queue) { + ret = -ENOMEM; + goto out_release; + } INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); INIT_WORK(&queue->io_work, nvmet_tcp_io_work); + kref_init(&queue->kref); queue->sock = newsock; queue->port = port; queue->nr_cmds = 0; spin_lock_init(&queue->state_lock); - queue->state = NVMET_TCP_Q_CONNECTING; + if (queue->port->nport->disc_addr.tsas.tcp.sectype == + NVMF_TCP_SECTYPE_TLS13) + queue->state = NVMET_TCP_Q_TLS_HANDSHAKE; + else + queue->state = NVMET_TCP_Q_CONNECTING; INIT_LIST_HEAD(&queue->free_list); init_llist_head(&queue->resp_list); INIT_LIST_HEAD(&queue->resp_send_list); + sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, NULL); + if (IS_ERR(sock_file)) { + ret = PTR_ERR(sock_file); + goto out_free_queue; + } + queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL); if (queue->idx < 0) { ret = queue->idx; - goto out_free_queue; + goto out_sock; } ret = nvmet_tcp_alloc_cmd(queue, &queue->connect); @@ -1673,11 +1927,33 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); mutex_unlock(&nvmet_tcp_queue_mutex); + INIT_DELAYED_WORK(&queue->tls_handshake_tmo_work, + nvmet_tcp_tls_handshake_timeout); +#ifdef CONFIG_NVME_TARGET_TCP_TLS + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { + struct sock *sk = queue->sock->sk; + + /* Restore the default callbacks before starting upcall */ + read_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + sk->sk_data_ready = port->data_ready; + read_unlock_bh(&sk->sk_callback_lock); + if (!nvmet_tcp_try_peek_pdu(queue)) { + if (!nvmet_tcp_tls_handshake(queue)) + return; + /* TLS handshake failed, terminate the connection */ + goto out_destroy_sq; + } + /* Not a TLS connection, continue with normal processing */ + queue->state = NVMET_TCP_Q_CONNECTING; + } +#endif + ret = nvmet_tcp_set_queue_sock(queue); if (ret) goto out_destroy_sq; - return 0; + return; out_destroy_sq: mutex_lock(&nvmet_tcp_queue_mutex); list_del_init(&queue->queue_list); @@ -1687,9 +1963,14 @@ out_free_connect: nvmet_tcp_free_cmd(&queue->connect); out_ida_remove: ida_free(&nvmet_tcp_queue_ida, queue->idx); +out_sock: + fput(queue->sock->file); out_free_queue: kfree(queue); - return ret; +out_release: + pr_err("failed to allocate queue, error %d\n", ret); + if (!sock_file) + sock_release(newsock); } static void nvmet_tcp_accept_work(struct work_struct *w) @@ -1706,11 +1987,7 @@ static void nvmet_tcp_accept_work(struct work_struct *w) pr_warn("failed to accept err=%d\n", ret); return; } - ret = nvmet_tcp_alloc_queue(port, newsock); - if (ret) { - pr_err("failed to allocate queue\n"); - sock_release(newsock); - } + nvmet_tcp_alloc_queue(port, newsock); } } @@ -1927,6 +2204,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); |