diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/libnvme-mi.map | 1 | ||||
-rw-r--r-- | src/libnvme.map | 114 | ||||
-rw-r--r-- | src/meson.build | 3 | ||||
-rw-r--r-- | src/nvme/base64.c | 94 | ||||
-rw-r--r-- | src/nvme/base64.h | 8 | ||||
-rw-r--r-- | src/nvme/cleanup.c | 5 | ||||
-rw-r--r-- | src/nvme/cleanup.h | 24 | ||||
-rw-r--r-- | src/nvme/fabrics.c | 395 | ||||
-rw-r--r-- | src/nvme/ioctl.c | 272 | ||||
-rw-r--r-- | src/nvme/ioctl.h | 262 | ||||
-rw-r--r-- | src/nvme/json.c | 28 | ||||
-rw-r--r-- | src/nvme/linux.c | 829 | ||||
-rw-r--r-- | src/nvme/linux.h | 73 | ||||
-rw-r--r-- | src/nvme/log.c | 19 | ||||
-rw-r--r-- | src/nvme/log.h | 13 | ||||
-rw-r--r-- | src/nvme/mi-mctp.c | 180 | ||||
-rw-r--r-- | src/nvme/mi.c | 15 | ||||
-rw-r--r-- | src/nvme/mi.h | 39 | ||||
-rw-r--r-- | src/nvme/nbft.c | 42 | ||||
-rw-r--r-- | src/nvme/private.h | 10 | ||||
-rw-r--r-- | src/nvme/tree.c | 1006 | ||||
-rw-r--r-- | src/nvme/tree.h | 127 | ||||
-rw-r--r-- | src/nvme/types.h | 282 | ||||
-rw-r--r-- | src/nvme/util.c | 230 | ||||
-rw-r--r-- | src/nvme/util.h | 67 |
25 files changed, 3068 insertions, 1070 deletions
diff --git a/src/libnvme-mi.map b/src/libnvme-mi.map index f1ce712..41e8110 100644 --- a/src/libnvme-mi.map +++ b/src/libnvme-mi.map @@ -49,7 +49,6 @@ LIBNVME_MI_1_1 { nvme_mi_admin_security_send; nvme_mi_admin_security_recv; nvme_mi_endpoint_desc; - nvme_mi_root_close; nvme_mi_first_endpoint; nvme_mi_next_endpoint; nvme_mi_first_ctrl; diff --git a/src/libnvme.map b/src/libnvme.map index 82387d4..742f635 100644 --- a/src/libnvme.map +++ b/src/libnvme.map @@ -1,4 +1,37 @@ # SPDX-License-Identifier: LGPL-2.1-or-later +LIBNVME_1_7 { + global: + nvme_init_copy_range_f2; + nvme_init_copy_range_f3; + nvme_insert_tls_key_versioned; + nvme_generate_tls_key_identity; +}; + +LIBNVME_1_6 { + global: + nvme_ctrl_config_match; + nvme_ctrl_find; + nvme_ctrl_get_src_addr; + nvme_ctrl_release_fd; + nvme_get_debug; + nvme_get_features_err_recovery2; + nvme_get_features_host_mem_buf2; + nvme_get_features_iocs_profile; + nvme_get_features_lba_range2; + nvme_get_features_resv_mask2; + nvme_get_features_resv_persist2; + nvme_host_release_fds; + nvme_ns_release_fd; + nvme_root_release_fds; + nvme_set_debug; + nvme_set_features_iocs_profile; + nvme_set_features_resv_mask2; + nvme_set_features_resv_persist2; + nvme_set_features_write_protect2; + nvme_set_root; + nvme_subsystem_get_iopolicy; + nvme_subsystem_release_fds; +}; LIBNVME_1_5 { global: @@ -25,8 +58,6 @@ LIBNVME_1_3 { global: nvme_ctrl_is_unique_discovery_ctrl; nvme_ctrl_set_unique_discovery_ctrl; - nvme_fdp_reclaim_unit_handle_status; - nvme_fdp_reclaim_unit_handle_update; nvme_io_mgmt_recv; nvme_io_mgmt_send; nvme_host_is_pdc_enabled; @@ -55,19 +86,15 @@ LIBNVME_1_0 { global: nvme_admin_passthru64; nvme_admin_passthru; - nvme_attach_ns; nvme_capacity_mgmt; - nvme_compare; nvme_copy; nvme_create_root; nvme_create_ctrl; - nvme_ctrl_disconnect; nvme_ctrl_first_ns; nvme_ctrl_first_path; nvme_ctrl_get_address; nvme_ctrl_get_config; nvme_ctrl_get_dhchap_key; - nvme_ctrl_get_discovery_ctrl; nvme_ctrl_get_fd; nvme_ctrl_get_firmware; nvme_ctrl_get_host_iface; @@ -98,22 +125,14 @@ LIBNVME_1_0 { nvme_dev_self_test; nvme_dim_send; nvme_directive_recv; - nvme_directive_recv_identify_parameters; - nvme_directive_recv_stream_allocate; - nvme_directive_recv_stream_parameters; - nvme_directive_recv_stream_status; nvme_directive_send; nvme_directive_send_id_endir; - nvme_directive_send_stream_release_identifier; - nvme_directive_send_stream_release_resource; nvme_disconnect_ctrl; nvme_dsm; - nvme_dsm_range; nvme_dump_config; nvme_errno_to_string; nvme_first_host; nvme_first_subsystem; - nvme_flush; nvme_format_nvm; nvme_free_ctrl; nvme_free_host; @@ -163,31 +182,7 @@ LIBNVME_1_0 { nvme_get_host_telemetry; nvme_get_lba_status; nvme_get_log; - nvme_get_log_ana; - nvme_get_log_ana_groups; - nvme_get_log_boot_partition; - nvme_get_log_changed_ns_list; - nvme_get_log_cmd_effects; - nvme_get_log_create_telemetry_host; - nvme_get_log_device_self_test; - nvme_get_log_discovery; - nvme_get_log_endurance_group; - nvme_get_log_endurance_grp_evt; - nvme_get_log_error; - nvme_get_log_fid_supported_effects; - nvme_get_log_fw_slot; - nvme_get_log_lba_status; nvme_get_log_page; - nvme_get_log_persistent_event; - nvme_get_log_predictable_lat_event; - nvme_get_log_predictable_lat_nvmset; - nvme_get_log_reservation; - nvme_get_log_sanitize; - nvme_get_log_smart; - nvme_get_log_supported_log_pages; - nvme_get_log_telemetry_ctrl; - nvme_get_log_telemetry_host; - nvme_get_log_zns_changed_zones; nvme_get_logical_block_size; nvme_get_new_host_telemetry; nvme_get_ns_attr; @@ -196,6 +191,7 @@ LIBNVME_1_0 { nvme_get_property; nvme_get_subsys_attr; nvme_get_telemetry_log; + nvme_get_telemetry_max; nvme_host_get_dhchap_key; nvme_host_get_hostid; nvme_host_get_hostnqn; @@ -204,25 +200,6 @@ LIBNVME_1_0 { nvme_host_set_dhchap_key; nvme_host_set_hostsymname; nvme_identify; - nvme_identify_active_ns_list; - nvme_identify_allocated_ns; - nvme_identify_allocated_ns_list; - nvme_identify_ctrl; - nvme_identify_ctrl_list; - nvme_identify_domain_list; - nvme_identify_endurance_group_list; - nvme_identify_independent_identify_ns; - nvme_identify_ns_csi_user_data_format; - nvme_identify_iocs_ns_csi_user_data_format; - nvme_identify_iocs; - nvme_identify_ns; - nvme_identify_ns_descs; - nvme_identify_ns_granularity; - nvme_identify_nsid_ctrl_list; - nvme_identify_nvmset_list; - nvme_identify_primary_ctrl; - nvme_identify_secondary_ctrl_list; - nvme_identify_uuid; nvme_init_copy_range; nvme_init_ctrl; nvme_init_ctrl_list; @@ -243,10 +220,7 @@ LIBNVME_1_0 { nvme_next_host; nvme_next_subsystem; nvme_ns_attach; - nvme_ns_attach_ctrls; nvme_ns_compare; - nvme_ns_detach_ctrls; - nvme_ns_dettach_ctrls; nvme_ns_flush; nvme_ns_get_csi; nvme_ns_get_ctrl; @@ -259,7 +233,6 @@ LIBNVME_1_0 { nvme_ns_get_lba_util; nvme_ns_get_meta_size; nvme_ns_get_model; - nvme_ns_get_model; nvme_ns_get_name; nvme_ns_get_nguid; nvme_ns_get_nsid; @@ -269,16 +242,12 @@ LIBNVME_1_0 { nvme_ns_get_uuid; nvme_ns_identify; nvme_ns_mgmt; - nvme_ns_mgmt_create; - nvme_ns_mgmt_delete; - nvme_ns_open; nvme_ns_read; nvme_ns_rescan; nvme_ns_verify; nvme_ns_write; nvme_ns_write_uncorrectable; nvme_ns_write_zeros; - nvme_nvm_identify_ctrl; nvme_open; nvme_path_get_ana_state; nvme_path_get_ctrl; @@ -286,7 +255,6 @@ LIBNVME_1_0 { nvme_path_get_ns; nvme_path_get_sysfs_dir; nvme_paths_filter; - nvme_read; nvme_read_config; nvme_refresh_topology; nvme_rescan_ctrl; @@ -294,7 +262,6 @@ LIBNVME_1_0 { nvme_resv_register; nvme_resv_release; nvme_resv_report; - nvme_sanitize; nvme_sanitize_nvm; nvme_scan; nvme_scan_ctrl; @@ -306,9 +273,7 @@ LIBNVME_1_0 { nvme_scan_subsystem_namespaces; nvme_scan_subsystems; nvme_security_receive; - nvme_security_receive; nvme_security_send; - nvme_set_feature; nvme_set_features; nvme_set_features_arbitration; nvme_set_features_async_event; @@ -337,11 +302,8 @@ LIBNVME_1_0 { nvme_set_features_write_atomic; nvme_set_features_write_protect; nvme_set_property; - nvme_setup_ctrl_list; - nvme_setup_id_ns; nvme_status_to_errno; nvme_status_to_string; - nvme_status_type; nvme_submit_admin_passthru64; nvme_submit_admin_passthru; nvme_submit_io_passthru64; @@ -360,18 +322,10 @@ LIBNVME_1_0 { nvme_subsystem_reset; nvme_unlink_ctrl; nvme_update_config; - nvme_verify; nvme_virtual_mgmt; - nvme_write; - nvme_write_uncorrectable; - nvme_write_zeros; nvme_zns_append; - nvme_zns_identify_ctrl; - nvme_zns_identify_ns; - nvme_zns_identify_ns; nvme_zns_mgmt_recv; nvme_zns_mgmt_send; - nvme_zns_report_zones; nvmf_add_ctrl; nvmf_adrfam_str; nvmf_cms_str; diff --git a/src/meson.build b/src/meson.build index e8b667c..811f0f8 100644 --- a/src/meson.build +++ b/src/meson.build @@ -6,7 +6,6 @@ # Authors: Martin Belanger <Martin.Belanger@dell.com> # sources = [ - 'nvme/cleanup.c', 'nvme/nbft.c', 'nvme/fabrics.c', 'nvme/filters.c', @@ -15,10 +14,10 @@ sources = [ 'nvme/log.c', 'nvme/tree.c', 'nvme/util.c', + 'nvme/base64.c' ] mi_sources = [ - 'nvme/cleanup.c', 'nvme/log.c', 'nvme/mi.c', 'nvme/mi-mctp.c', diff --git a/src/nvme/base64.c b/src/nvme/base64.c new file mode 100644 index 0000000..5fae829 --- /dev/null +++ b/src/nvme/base64.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * base64.c - RFC4648-compliant base64 encoding + * + * Copyright (c) 2020 SUSE LLC + * + * Author: Hannes Reinecke <hare@suse.de> + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sys/types.h> + +static const char base64_table[65] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/** + * base64_encode() - base64-encode some bytes + * @src: the bytes to encode + * @srclen: number of bytes to encode + * @dst: (output) the base64-encoded string. Not NUL-terminated. + * + * Encodes the input string using characters from the set [A-Za-z0-9+,]. + * The encoded string is roughly 4/3 times the size of the input string. + * + * Return: length of the encoded string + */ +int base64_encode(const unsigned char *src, int srclen, char *dst) +{ + int i, bits = 0; + u_int32_t ac = 0; + char *cp = dst; + + for (i = 0; i < srclen; i++) { + ac = (ac << 8) | src[i]; + bits += 8; + do { + bits -= 6; + *cp++ = base64_table[(ac >> bits) & 0x3f]; + } while (bits >= 6); + } + if (bits) { + *cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; + bits -= 6; + } + while (bits < 0) { + *cp++ = '='; + bits += 2; + } + + return cp - dst; +} + +/** + * base64_decode() - base64-decode some bytes + * @src: the base64-encoded string to decode + * @len: number of bytes to decode + * @dst: (output) the decoded bytes. + * + * Decodes the base64-encoded bytes @src according to RFC 4648. + * + * Return: number of decoded bytes + */ +int base64_decode(const char *src, int srclen, unsigned char *dst) +{ + u_int32_t ac = 0; + int i, bits = 0; + unsigned char *bp = dst; + + for (i = 0; i < srclen; i++) { + const char *p = strchr(base64_table, src[i]); + + if (src[i] == '=') { + ac = (ac << 6); + bits += 6; + if (bits >= 8) + bits -= 8; + continue; + } + if (!p || !src[i]) + return -EINVAL; + ac = (ac << 6) | (p - base64_table); + bits += 6; + if (bits >= 8) { + bits -= 8; + *bp++ = (unsigned char)(ac >> bits); + } + } + if (ac && ((1 << bits) - 1)) + return -EAGAIN; + + return bp - dst; +} diff --git a/src/nvme/base64.h b/src/nvme/base64.h new file mode 100644 index 0000000..c0f62e2 --- /dev/null +++ b/src/nvme/base64.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _BASE64_H +#define _BASE64_H + +int base64_encode(const unsigned char *src, int len, char *dst); +int base64_decode(const char *src, int len, unsigned char *dst); + +#endif /* _BASE64_H */ diff --git a/src/nvme/cleanup.c b/src/nvme/cleanup.c deleted file mode 100644 index e652e33..0000000 --- a/src/nvme/cleanup.c +++ /dev/null @@ -1,5 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1-or-later -#include <stdlib.h> -#include "cleanup.h" - -DEFINE_CLEANUP_FUNC(cleanup_charp, char *, free); diff --git a/src/nvme/cleanup.h b/src/nvme/cleanup.h index b7e1533..4327600 100644 --- a/src/nvme/cleanup.h +++ b/src/nvme/cleanup.h @@ -2,6 +2,11 @@ #ifndef __CLEANUP_H #define __CLEANUP_H +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + #define __cleanup__(fn) __attribute__((cleanup(fn))) #define DECLARE_CLEANUP_FUNC(name, type) \ @@ -14,6 +19,23 @@ DECLARE_CLEANUP_FUNC(name, type) \ free_fn(*__p); \ } -DECLARE_CLEANUP_FUNC(cleanup_charp, char *); +static inline void freep(void *p) +{ + free(*(void **)p); +} +#define _cleanup_free_ __cleanup__(freep) + +static inline DEFINE_CLEANUP_FUNC(cleanup_file, FILE *, fclose) +#define _cleanup_file_ __cleanup__(cleanup_file) + +static inline DEFINE_CLEANUP_FUNC(cleanup_dir, DIR *, closedir) +#define _cleanup_dir_ __cleanup__(cleanup_dir) + +static inline void cleanup_fd(int *fd) +{ + if (*fd >= 0) + close(*fd); +} +#define _cleanup_fd_ __cleanup__(cleanup_fd) #endif diff --git a/src/nvme/fabrics.c b/src/nvme/fabrics.c index f0a06e8..4e042d8 100644 --- a/src/nvme/fabrics.c +++ b/src/nvme/fabrics.c @@ -32,6 +32,7 @@ #include <ccan/array_size/array_size.h> #include <ccan/str/str.h> +#include "cleanup.h" #include "fabrics.h" #include "linux.h" #include "ioctl.h" @@ -47,7 +48,7 @@ const char *nvmf_dev = "/dev/nvme-fabrics"; /** - * strchomp() - Strip trailing white space + * strchomp() - Strip trailing spaces * @str: String to strip * @max: Maximum length of string */ @@ -55,11 +56,8 @@ static void strchomp(char *str, int max) { int i; - for (i = max - 1; i >= 0; i--) { - if (str[i] != '\0' && str[i] != ' ') - return; - else - str[i] = '\0'; + for (i = max - 1; i >= 0 && str[i] == ' '; i--) { + str[i] = '\0'; } } @@ -357,10 +355,16 @@ static int __add_argument(char **argstr, const char *tok, const char *arg) return 0; } +static int __nvmf_supported_options(nvme_root_t r); +#define nvmf_check_option(r, tok) \ +({ \ + !__nvmf_supported_options(r) && (r)->options->tok; \ +}) + #define add_bool_argument(o, argstr, tok, arg) \ ({ \ int ret; \ - if (r->options->tok) { \ + if (nvmf_check_option(r, tok)) { \ ret = __add_bool_argument(argstr, \ stringify(tok), \ arg); \ @@ -376,7 +380,7 @@ static int __add_argument(char **argstr, const char *tok, const char *arg) #define add_int_argument(o, argstr, tok, arg, allow_zero) \ ({ \ int ret; \ - if (r->options->tok) { \ + if (nvmf_check_option(r, tok)) { \ ret = __add_int_argument(argstr, \ stringify(tok), \ arg, \ @@ -393,7 +397,7 @@ static int __add_argument(char **argstr, const char *tok, const char *arg) #define add_int_or_minus_one_argument(o, argstr, tok, arg) \ ({ \ int ret; \ - if (r->options->tok) { \ + if (nvmf_check_option(r, tok)) { \ ret = __add_int_or_minus_one_argument(argstr, \ stringify(tok), \ arg); \ @@ -409,7 +413,7 @@ static int __add_argument(char **argstr, const char *tok, const char *arg) #define add_argument(r, argstr, tok, arg) \ ({ \ int ret; \ - if (r->options->tok) { \ + if (nvmf_check_option(r, tok)) { \ ret = __add_argument(argstr, \ stringify(tok), \ arg); \ @@ -442,7 +446,6 @@ static int inet4_pton(const char *src, uint16_t port, static int inet6_pton(nvme_root_t r, const char *src, uint16_t port, struct sockaddr_storage *addr) { - int ret = -EINVAL; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; const char *scope = NULL; char *p; @@ -450,7 +453,7 @@ static int inet6_pton(nvme_root_t r, const char *src, uint16_t port, if (strlen(src) > INET6_ADDRSTRLEN) return -EINVAL; - char *tmp = strdup(src); + _cleanup_free_ char *tmp = strdup(src); if (!tmp) { nvme_msg(r, LOG_ERR, "cannot copy: %s\n", src); return -ENOMEM; @@ -463,24 +466,20 @@ static int inet6_pton(nvme_root_t r, const char *src, uint16_t port, } if (inet_pton(AF_INET6, tmp, &addr6->sin6_addr) != 1) - goto free_tmp; + return -EINVAL; if (IN6_IS_ADDR_LINKLOCAL(&addr6->sin6_addr) && scope) { addr6->sin6_scope_id = if_nametoindex(scope); if (addr6->sin6_scope_id == 0) { nvme_msg(r, LOG_ERR, "can't find iface index for: %s (%m)\n", scope); - goto free_tmp; + return -EINVAL; } } addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(port); - ret = 0; - -free_tmp: - free(tmp); - return ret; + return 0; } /** @@ -655,7 +654,7 @@ static int build_options(nvme_host_t h, nvme_ctrl_t c, char **argstr) static int __nvmf_supported_options(nvme_root_t r) { char buf[0x1000], *options, *p, *v; - int fd, ret; + _cleanup_fd_ int fd = -1; ssize_t len; if (r->options) @@ -684,14 +683,12 @@ static int __nvmf_supported_options(nvme_root_t r) "Cannot read %s, using default options\n", nvmf_dev); *r->options = default_supported_options; - ret = 0; - goto out_close; + return 0; } nvme_msg(r, LOG_ERR, "Failed to read from %s: %s\n", nvmf_dev, strerror(errno)); - ret = -ENVME_CONNECT_READ; - goto out_close; + return -ENVME_CONNECT_READ; } buf[len] = '\0'; @@ -738,16 +735,13 @@ static int __nvmf_supported_options(nvme_root_t r) parse_option(r, v, trsvcid); } nvme_msg(r, LOG_DEBUG, "\n"); - ret = 0; - -out_close: - close(fd); - return ret; + return 0; } static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr) { - int ret, fd, len = strlen(argstr); + _cleanup_fd_ int fd; + int ret, len = strlen(argstr); char buf[0x1000], *options, *p; fd = open(nvmf_dev, O_RDWR); @@ -765,31 +759,22 @@ static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr) nvmf_dev, strerror(errno)); switch (errno) { case EALREADY: - ret = -ENVME_CONNECT_ALREADY; - break; + return -ENVME_CONNECT_ALREADY; case EINVAL: - ret = -ENVME_CONNECT_INVAL; - break; + return -ENVME_CONNECT_INVAL; case EADDRINUSE: - ret = -ENVME_CONNECT_ADDRINUSE; - break; + return -ENVME_CONNECT_ADDRINUSE; case ENODEV: - ret = -ENVME_CONNECT_NODEV; - break; + return -ENVME_CONNECT_NODEV; case EOPNOTSUPP: - ret = -ENVME_CONNECT_OPNOTSUPP; - break; + return -ENVME_CONNECT_OPNOTSUPP; case ECONNREFUSED: - ret = -ENVME_CONNECT_CONNREFUSED; - break; + return -ENVME_CONNECT_CONNREFUSED; case EADDRNOTAVAIL: - ret = -ENVME_CONNECT_ADDRNOTAVAIL; - break; + return -ENVME_CONNECT_ADDRNOTAVAIL; default: - ret = -ENVME_CONNECT_WRITE; - break; + return -ENVME_CONNECT_WRITE; } - goto out_close; } memset(buf, 0x0, sizeof(buf)); @@ -797,8 +782,7 @@ static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr) if (len < 0) { nvme_msg(r, LOG_ERR, "Failed to read from %s: %s\n", nvmf_dev, strerror(errno)); - ret = -ENVME_CONNECT_READ; - goto out_close; + return -ENVME_CONNECT_READ; } nvme_msg(r, LOG_DEBUG, "connect ctrl, response '%.*s'\n", (int)strcspn(buf, "\n"), buf); @@ -808,14 +792,33 @@ static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr) if (!*p) continue; if (sscanf(p, "instance=%d", &ret) == 1) - goto out_close; + return ret; } nvme_msg(r, LOG_ERR, "Failed to parse ctrl info for \"%s\"\n", argstr); - ret = -ENVME_CONNECT_PARSE; -out_close: - close(fd); - return ret; + return -ENVME_CONNECT_PARSE; +} + +static const char *lookup_context(nvme_root_t r, nvme_ctrl_t c) +{ + + nvme_host_t h; + nvme_subsystem_t s; + + nvme_for_each_host(r, h) { + nvme_for_each_subsystem(h, s) { + if (__nvme_lookup_ctrl(s, nvme_ctrl_get_transport(c), + nvme_ctrl_get_traddr(c), + NULL, + NULL, + nvme_ctrl_get_trsvcid(c), + NULL, + NULL)) + return nvme_subsystem_get_application(s); + } + } + + return NULL; } int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c, @@ -823,7 +826,7 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c, { nvme_subsystem_t s; const char *root_app, *app; - char *argstr; + _cleanup_free_ char *argstr = NULL; int ret; /* highest prio have configs from command line */ @@ -839,6 +842,7 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c, nvme_ctrl_get_host_traddr(c), nvme_ctrl_get_host_iface(c), nvme_ctrl_get_trsvcid(c), + NULL, NULL); if (fc) { const char *key; @@ -862,24 +866,9 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c, root_app = nvme_root_get_application(h->r); if (root_app) { app = nvme_subsystem_get_application(s); - if (!app && nvme_ctrl_is_discovery_ctrl(c)) { - nvme_subsystem_t s; - nvme_ctrl_t fc; - - nvme_for_each_subsystem(h, s) { - fc = __nvme_lookup_ctrl(s, nvme_ctrl_get_transport(c), - nvme_ctrl_get_traddr(c), - NULL, - NULL, - nvme_ctrl_get_trsvcid(c), - NULL); - - if (fc) { - app = nvme_subsystem_get_application(s); - break; - } - } - } + if (!app && nvme_ctrl_is_discovery_ctrl(c)) + app = lookup_context(h->r, c); + /* * configuration is managed by an application, * refuse to act on subsystems which either have @@ -907,15 +896,11 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c, free(traddr); } - ret = __nvmf_supported_options(h->r); - if (ret) - return ret; ret = build_options(h, c, &argstr); if (ret) return ret; ret = __nvmf_add_ctrl(h->r, argstr); - free(argstr); if (ret < 0) { errno = -ret; return -1; @@ -1020,12 +1005,12 @@ nvme_ctrl_t nvmf_connect_disc_entry(nvme_host_t h, return NULL; } - if (e->treq & NVMF_TREQ_DISABLE_SQFLOW) + if (e->treq & NVMF_TREQ_DISABLE_SQFLOW && + nvmf_check_option(h->r, disable_sqflow)) c->cfg.disable_sqflow = true; if (e->trtype == NVMF_TRTYPE_TCP && - (e->treq & NVMF_TREQ_REQUIRED || - e->treq & NVMF_TREQ_NOT_REQUIRED)) + e->tsas.tcp.sectype != NVMF_TCP_SECTYPE_NONE) c->cfg.tls = true; ret = nvmf_add_ctrl(h, c, cfg); @@ -1046,45 +1031,55 @@ nvme_ctrl_t nvmf_connect_disc_entry(nvme_host_t h, return NULL; } -static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c, - struct nvme_get_log_args *args, - int max_retries) +/* + * Most of nvmf_discovery_log is reserved, so only fetch the initial bytes. + * 8 bytes for GENCTR, 8 for NUMREC, and 2 for RECFMT. + * Since only multiples of 4 bytes are allowed, round 18 up to 20. + */ +#define DISCOVERY_HEADER_LEN 20 + +static struct nvmf_discovery_log *nvme_discovery_log( + const struct nvme_get_discovery_args *args) { - nvme_root_t r = c->s && c->s->h ? c->s->h->r : NULL; - struct nvmf_discovery_log *log = NULL; - int ret, retries = 0; - const char *name = nvme_ctrl_get_name(c); + nvme_root_t r = root_from_ctrl(args->c); + struct nvmf_discovery_log *log; + int retries = 0; + const char *name = nvme_ctrl_get_name(args->c); uint64_t genctr, numrec; - unsigned int size; - int fd = nvme_ctrl_get_fd(c); - - args->fd = fd; + int fd = nvme_ctrl_get_fd(args->c); + struct nvme_get_log_args log_args = { + .result = args->result, + .args_size = sizeof(log_args), + .timeout = args->timeout, + .lid = NVME_LOG_LID_DISCOVER, + .nsid = NVME_NSID_NONE, + .csi = NVME_CSI_NVM, + .lsi = NVME_LOG_LSI_NONE, + .lsp = args->lsp, + .uuidx = NVME_UUID_NONE, + }; - do { - size = sizeof(struct nvmf_discovery_log); + log = __nvme_alloc(sizeof(*log)); + if (!log) { + nvme_msg(r, LOG_ERR, + "could not allocate memory for discovery log header\n"); + errno = ENOMEM; + return NULL; + } - free(log); - log = calloc(1, size); - if (!log) { - nvme_msg(r, LOG_ERR, - "could not allocate memory for discovery log header\n"); - errno = ENOMEM; - return NULL; - } + nvme_msg(r, LOG_DEBUG, "%s: get header (try %d/%d)\n", + name, retries, args->max_retries); + log_args.log = log; + log_args.len = DISCOVERY_HEADER_LEN; + if (nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &log_args)) { + nvme_msg(r, LOG_INFO, + "%s: discover try %d/%d failed, error %d\n", + name, retries, args->max_retries, errno); + goto out_free_log; + } - nvme_msg(r, LOG_DEBUG, "%s: get header (try %d/%d)\n", - name, retries, max_retries); - args->rae = true; - args->lpo = 0; - args->len = size; - args->log = log; - ret = nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, args); - if (ret) { - nvme_msg(r, LOG_INFO, - "%s: discover try %d/%d failed, error %d\n", - name, retries, max_retries, errno); - goto out_free_log; - } + do { + size_t entries_size; numrec = le64_to_cpu(log->numrec); genctr = le64_to_cpu(log->genctr); @@ -1092,11 +1087,9 @@ static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c, if (numrec == 0) break; - size = sizeof(struct nvmf_discovery_log) + - sizeof(struct nvmf_disc_log_entry) * numrec; - free(log); - log = calloc(1, size); + entries_size = sizeof(*log->entries) * numrec; + log = __nvme_alloc(sizeof(*log) + entries_size); if (!log) { nvme_msg(r, LOG_ERR, "could not alloc memory for discovery log page\n"); @@ -1105,19 +1098,16 @@ static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c, } nvme_msg(r, LOG_DEBUG, - "%s: get %" PRIu64 - " records (length %d genctr %" PRIu64 ")\n", - name, numrec, size, genctr); - - args->rae = true; - args->lpo = sizeof(struct nvmf_discovery_log); - args->len = size - sizeof(struct nvmf_discovery_log); - args->log = log->entries; - ret = nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, args); - if (ret) { + "%s: get %" PRIu64 " records (genctr %" PRIu64 ")\n", + name, numrec, genctr); + + log_args.lpo = sizeof(*log); + log_args.log = log->entries; + log_args.len = entries_size; + if (nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &log_args)) { nvme_msg(r, LOG_INFO, "%s: discover try %d/%d failed, error %d\n", - name, retries, max_retries, errno); + name, retries, args->max_retries, errno); goto out_free_log; } @@ -1127,19 +1117,17 @@ static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c, */ nvme_msg(r, LOG_DEBUG, "%s: get header again\n", name); - args->rae = false; - args->lpo = 0; - args->len = sizeof(struct nvmf_discovery_log); - args->log = log; - ret = nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, args); - if (ret) { + log_args.lpo = 0; + log_args.log = log; + log_args.len = DISCOVERY_HEADER_LEN; + if (nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &log_args)) { nvme_msg(r, LOG_INFO, "%s: discover try %d/%d failed, error %d\n", - name, retries, max_retries, errno); + name, retries, args->max_retries, errno); goto out_free_log; } } while (genctr != le64_to_cpu(log->genctr) && - ++retries < max_retries); + ++retries < args->max_retries); if (genctr != le64_to_cpu(log->genctr)) { nvme_msg(r, LOG_INFO, "%s: discover genctr mismatch\n", name); @@ -1159,87 +1147,31 @@ out_free_log: return NULL; } -static void sanitize_discovery_log_entry(struct nvmf_disc_log_entry *e) +static void sanitize_discovery_log_entry(struct nvmf_disc_log_entry *e) { - switch (e->trtype) { - case NVMF_TRTYPE_RDMA: - case NVMF_TRTYPE_TCP: - switch (e->adrfam) { - case NVMF_ADDR_FAMILY_IP4: - case NVMF_ADDR_FAMILY_IP6: - strchomp(e->traddr, NVMF_TRADDR_SIZE); - strchomp(e->trsvcid, NVMF_TRSVCID_SIZE); - break; - } - break; - case NVMF_TRTYPE_FC: - switch (e->adrfam) { - case NVMF_ADDR_FAMILY_FC: - strchomp(e->traddr, NVMF_TRADDR_SIZE); - break; - } - break; - case NVMF_TRTYPE_LOOP: - strchomp(e->traddr, NVMF_TRADDR_SIZE); - break; - } + strchomp(e->trsvcid, sizeof(e->trsvcid)); + strchomp(e->traddr, sizeof(e->traddr)); } int nvmf_get_discovery_log(nvme_ctrl_t c, struct nvmf_discovery_log **logp, int max_retries) { - struct nvmf_discovery_log *log; - - struct nvme_get_log_args args = { - .args_size = sizeof(args), - .fd = nvme_ctrl_get_fd(c), - .nsid = NVME_NSID_NONE, - .lsp = NVMF_LOG_DISC_LSP_NONE, - .lsi = NVME_LOG_LSI_NONE, - .uuidx = NVME_UUID_NONE, + struct nvme_get_discovery_args args = { + .c = c, + .max_retries = max_retries, .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, - .result = NULL, - .lid = NVME_LOG_LID_DISCOVER, - .log = NULL, - .len = 0, - .csi = NVME_CSI_NVM, - .rae = false, - .ot = false, + .lsp = NVMF_LOG_DISC_LSP_NONE, }; - log = nvme_discovery_log(c, &args, max_retries); - if (!log) - return -1; - - for (int i = 0; i < le64_to_cpu(log->numrec); i++) - sanitize_discovery_log_entry(&log->entries[i]); - - *logp = log; - return 0; + *logp = nvmf_get_discovery_wargs(&args); + return *logp ? 0 : -1; } struct nvmf_discovery_log *nvmf_get_discovery_wargs(struct nvme_get_discovery_args *args) { struct nvmf_discovery_log *log; - struct nvme_get_log_args _args = { - .args_size = sizeof(_args), - .fd = nvme_ctrl_get_fd(args->c), - .nsid = NVME_NSID_NONE, - .lsp = args->lsp, - .lsi = NVME_LOG_LSI_NONE, - .uuidx = NVME_UUID_NONE, - .timeout = args->timeout, - .result = args->result, - .lid = NVME_LOG_LID_DISCOVER, - .log = NULL, - .len = 0, - .csi = NVME_CSI_NVM, - .rae = false, - .ot = false, - }; - - log = nvme_discovery_log(args->c, &_args, args->max_retries); + log = nvme_discovery_log(args); if (!log) return NULL; @@ -1254,7 +1186,7 @@ struct nvmf_discovery_log *nvmf_get_discovery_wargs(struct nvme_get_discovery_ar static int uuid_from_device_tree(char *system_uuid) { ssize_t len; - int f; + _cleanup_fd_ int f; f = open(PATH_UUID_IBM, O_RDONLY); if (f < 0) @@ -1262,7 +1194,6 @@ static int uuid_from_device_tree(char *system_uuid) memset(system_uuid, 0, NVME_UUID_LEN_STRING); len = read(f, system_uuid, NVME_UUID_LEN_STRING - 1); - close(f); if (len < 0) return -ENXIO; @@ -1299,7 +1230,7 @@ static bool is_dmi_uuid_valid(const char *buf, size_t len) static int uuid_from_dmi_entries(char *system_uuid) { int f; - DIR *d; + _cleanup_dir_ DIR *d; struct dirent *de; char buf[512] = {0}; @@ -1350,7 +1281,6 @@ static int uuid_from_dmi_entries(char *system_uuid) (uint8_t)buf[8 + 14], (uint8_t)buf[8 + 15]); break; } - closedir(d); return strlen(system_uuid) ? 0 : -ENXIO; } @@ -1364,10 +1294,9 @@ static int uuid_from_dmi_entries(char *system_uuid) */ static int uuid_from_product_uuid(char *system_uuid) { - FILE *stream; + _cleanup_file_ FILE *stream; ssize_t nread; - int ret; - char *line = NULL; + _cleanup_free_ char *line = NULL; size_t len = 0; stream = fopen(PATH_DMI_PROD_UUID, "re"); @@ -1376,10 +1305,8 @@ static int uuid_from_product_uuid(char *system_uuid) system_uuid[0] = '\0'; nread = getline(&line, &len, stream); - if (nread != NVME_UUID_LEN_STRING) { - ret = -ENXIO; - goto out; - } + if (nread != NVME_UUID_LEN_STRING) + return -ENXIO; /* The kernel is handling the byte swapping according DMTF * SMBIOS 3.0 Section 7.2.1 System UUID */ @@ -1387,13 +1314,7 @@ static int uuid_from_product_uuid(char *system_uuid) memcpy(system_uuid, line, NVME_UUID_LEN_STRING - 1); system_uuid[NVME_UUID_LEN_STRING - 1] = '\0'; - ret = 0; - -out: - free(line); - fclose(stream); - - return ret; + return 0; } /** @@ -1443,7 +1364,8 @@ char *nvmf_hostnqn_generate() static char *nvmf_read_file(const char *f, int len) { char buf[len]; - int ret, fd; + _cleanup_fd_ int fd; + int ret; fd = open(f, O_RDONLY); if (fd < 0) @@ -1451,7 +1373,6 @@ static char *nvmf_read_file(const char *f, int len) memset(buf, 0, len); ret = read(fd, buf, len - 1); - close (fd); if (ret < 0 || !strlen(buf)) return NULL; @@ -1575,7 +1496,7 @@ static int nvmf_dim(nvme_ctrl_t c, enum nvmf_dim_tas tas, __u8 trtype, __u32 *result) { nvme_root_t r = c->s && c->s->h ? c->s->h->r : NULL; - struct nvmf_dim_data *dim; + _cleanup_free_ struct nvmf_dim_data *dim = NULL; struct nvmf_ext_die *die; __u32 tdl; __u32 tel; @@ -1662,11 +1583,7 @@ static int nvmf_dim(nvme_ctrl_t c, enum nvmf_dim_tas tas, __u8 trtype, args.data_len = tdl; args.data = dim; - ret = nvme_dim_send(&args); - - free(dim); - - return ret; + return nvme_dim_send(&args); } /** @@ -1720,25 +1637,31 @@ static const char *dctype_str[] = { */ static int nvme_fetch_cntrltype_dctype_from_id(nvme_ctrl_t c) { - struct nvme_id_ctrl id = { 0 }; + _cleanup_free_ struct nvme_id_ctrl *id; int ret; - ret = nvme_ctrl_identify(c, &id); + id = __nvme_alloc(sizeof(*id)); + if (!id) { + errno = ENOMEM; + return -1; + } + + ret = nvme_ctrl_identify(c, id); if (ret) return ret; if (!c->cntrltype) { - if (id.cntrltype > NVME_CTRL_CNTRLTYPE_ADMIN || !cntrltype_str[id.cntrltype]) + if (id->cntrltype > NVME_CTRL_CNTRLTYPE_ADMIN || !cntrltype_str[id->cntrltype]) c->cntrltype = strdup("reserved"); else - c->cntrltype = strdup(cntrltype_str[id.cntrltype]); + c->cntrltype = strdup(cntrltype_str[id->cntrltype]); } - if (!c->dctype) { - if (id.dctype > NVME_CTRL_DCTYPE_CDC || !dctype_str[id.dctype]) + if (!c->dctype) { + if (id->dctype > NVME_CTRL_DCTYPE_CDC || !dctype_str[id->dctype]) c->dctype = strdup("reserved"); else - c->dctype = strdup(dctype_str[id.dctype]); + c->dctype = strdup(dctype_str[id->dctype]); } return 0; } diff --git a/src/nvme/ioctl.c b/src/nvme/ioctl.c index b9710b3..9090b7e 100644 --- a/src/nvme/ioctl.c +++ b/src/nvme/ioctl.c @@ -13,9 +13,11 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <inttypes.h> #include <sys/ioctl.h> #include <sys/stat.h> +#include <sys/time.h> #include <ccan/build_assert/build_assert.h> #include <ccan/endian/endian.h> @@ -23,6 +25,8 @@ #include "ioctl.h" #include "util.h" +static bool nvme_debug; + static int nvme_verify_chr(int fd) { static struct stat nvme_stat; @@ -86,13 +90,62 @@ static int nvme_submit_passthru64(int fd, unsigned long ioctl_cmd, return err; } +static void nvme_show_command(struct nvme_passthru_cmd *cmd, int err, struct timeval start, + struct timeval end) +{ + printf("opcode : %02x\n", cmd->opcode); + printf("flags : %02x\n", cmd->flags); + printf("rsvd1 : %04x\n", cmd->rsvd1); + printf("nsid : %08x\n", cmd->nsid); + printf("cdw2 : %08x\n", cmd->cdw2); + printf("cdw3 : %08x\n", cmd->cdw3); + printf("data_len : %08x\n", cmd->data_len); + printf("metadata_len : %08x\n", cmd->metadata_len); + printf("addr : %"PRIx64"\n", (uint64_t)(uintptr_t)cmd->addr); + printf("metadata : %"PRIx64"\n", (uint64_t)(uintptr_t)cmd->metadata); + printf("cdw10 : %08x\n", cmd->cdw10); + printf("cdw11 : %08x\n", cmd->cdw11); + printf("cdw12 : %08x\n", cmd->cdw12); + printf("cdw13 : %08x\n", cmd->cdw13); + printf("cdw14 : %08x\n", cmd->cdw14); + printf("cdw15 : %08x\n", cmd->cdw15); + printf("timeout_ms : %08x\n", cmd->timeout_ms); + printf("result : %08x\n", cmd->result); + printf("err : %d\n", err); + printf("latency : %lu us\n", + (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)); +} + +void nvme_set_debug(bool debug) +{ + nvme_debug = debug; +} + +bool nvme_get_debug(void) +{ + return nvme_debug; +} + static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd, struct nvme_passthru_cmd *cmd, __u32 *result) { - int err = ioctl(fd, ioctl_cmd, cmd); + struct timeval start; + struct timeval end; + int err; + + if (nvme_get_debug()) + gettimeofday(&start, NULL); + + err = ioctl(fd, ioctl_cmd, cmd); + + if (nvme_get_debug()) { + gettimeofday(&end, NULL); + nvme_show_command(cmd, err, start, end); + } if (err >= 0 && result) *result = cmd->result; + return err; } @@ -532,16 +585,18 @@ int nvme_set_features_power_mgmt(int fd, __u8 ps, __u8 wh, bool save, __u32 *result) { __u32 value = NVME_SET(ps, FEAT_PWRMGMT_PS) | - NVME_SET(wh, FEAT_PWRMGMT_PS); + NVME_SET(wh, FEAT_PWRMGMT_WH); return __nvme_set_features(fd, NVME_FEAT_FID_POWER_MGMT, value, save, result); } -int nvme_set_features_lba_range(int fd, __u32 nsid, __u32 nr_ranges, bool save, +int nvme_set_features_lba_range(int fd, __u32 nsid, __u8 nr_ranges, bool save, struct nvme_lba_range_type *data, __u32 *result) { - return -1; + return nvme_set_features_data( + fd, NVME_FEAT_FID_LBA_RANGE, nsid, nr_ranges - 1, save, + sizeof(*data), data, result); } int nvme_set_features_temp_thresh(int fd, __u16 tmpth, __u8 tmpsel, @@ -562,8 +617,8 @@ int nvme_set_features_err_recovery(int fd, __u32 nsid, __u16 tler, bool dulbe, __u32 value = NVME_SET(tler, FEAT_ERROR_RECOVERY_TLER) | NVME_SET(!!dulbe, FEAT_ERROR_RECOVERY_DULBE); - return __nvme_set_features(fd, NVME_FEAT_FID_ERR_RECOVERY, value, save, - result); + return nvme_set_features_simple( + fd, NVME_FEAT_FID_ERR_RECOVERY, nsid, value, save, result); } int nvme_set_features_volatile_wc(int fd, bool wce, bool save, __u32 *result) @@ -577,8 +632,8 @@ int nvme_set_features_volatile_wc(int fd, bool wce, bool save, __u32 *result) int nvme_set_features_irq_coalesce(int fd, __u8 thr, __u8 time, bool save, __u32 *result) { - __u32 value = NVME_SET(thr, FEAT_IRQC_TIME) | - NVME_SET(time, FEAT_IRQC_THR); + __u32 value = NVME_SET(thr, FEAT_IRQC_THR) | + NVME_SET(time, FEAT_IRQC_TIME); return __nvme_set_features(fd, NVME_FEAT_FID_IRQ_COALESCE, value, save, result); @@ -612,19 +667,31 @@ int nvme_set_features_async_event(int fd, __u32 events, int nvme_set_features_auto_pst(int fd, bool apste, bool save, struct nvme_feat_auto_pst *apst, __u32 *result) { - __u32 value = NVME_SET(!!apste, FEAT_APST_APSTE); + struct nvme_set_features_args args = { + .args_size = sizeof(args), + .fd = fd, + .fid = NVME_FEAT_FID_AUTO_PST, + .nsid = NVME_NSID_NONE, + .cdw11 = NVME_SET(!!apste, FEAT_APST_APSTE), + .save = save, + .uuidx = NVME_UUID_NONE, + .data = apst, + .data_len = sizeof(*apst), + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .result = result, + }; - return __nvme_set_features(fd, NVME_FEAT_FID_AUTO_PST, value, save, - result); + return nvme_set_features(&args); } int nvme_set_features_timestamp(int fd, bool save, __u64 timestamp) { __le64 t = cpu_to_le64(timestamp); - struct nvme_timestamp ts; + struct nvme_timestamp ts = {}; struct nvme_set_features_args args = { .args_size = sizeof(args), .fd = fd, + .fid = NVME_FEAT_FID_TIMESTAMP, .nsid = NVME_NSID_NONE, .cdw11 = 0, .cdw12 = 0, @@ -694,8 +761,8 @@ int nvme_set_features_plm_config(int fd, bool plm, __u16 nvmsetid, bool save, .save = save, .uuidx = NVME_UUID_NONE, .cdw15 = 0, - .data_len = 0, - .data = NULL, + .data_len = sizeof(*data), + .data = data, .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, .result = result, }; @@ -746,7 +813,7 @@ int nvme_set_features_host_behavior(int fd, bool save, .nsid = NVME_NSID_NONE, .cdw11 = 0, .cdw12 = 0, - .save = save, + .save = false, .uuidx = NVME_UUID_NONE, .cdw15 = 0, .data_len = sizeof(*data), @@ -780,7 +847,7 @@ int nvme_set_features_sw_progress(int fd, __u8 pbslc, bool save, result); } -int nvme_set_features_host_id(int fd, bool save, bool exhid, __u8 *hostid) +int nvme_set_features_host_id(int fd, bool exhid, bool save, __u8 *hostid) { __u32 len = exhid ? 16 : 8; __u32 value = !!exhid; @@ -809,20 +876,42 @@ int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result) result); } +int nvme_set_features_resv_mask2(int fd, __u32 nsid, __u32 mask, bool save, + __u32 *result) +{ + return nvme_set_features_simple( + fd, NVME_FEAT_FID_RESV_MASK, nsid, mask, save, result); +} + int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result) { return __nvme_set_features(fd, NVME_FEAT_FID_RESV_PERSIST, !!ptpl, save, result); } +int nvme_set_features_resv_persist2(int fd, __u32 nsid, bool ptpl, bool save, + __u32 *result) +{ + return nvme_set_features_simple( + fd, NVME_FEAT_FID_RESV_PERSIST, nsid, !!ptpl, save, result); +} + int nvme_set_features_write_protect(int fd, enum nvme_feat_nswpcfg_state state, bool save, __u32 *result) { return __nvme_set_features(fd, NVME_FEAT_FID_WRITE_PROTECT, state, - save, result); + false, result); +} + +int nvme_set_features_write_protect2(int fd, __u32 nsid, + enum nvme_feat_nswpcfg_state state, + bool save, __u32 *result) +{ + return nvme_set_features_simple( + fd, NVME_FEAT_FID_WRITE_PROTECT, nsid, state, false, result); } -int nvme_set_features_iocs_profile(int fd, __u8 iocsi, bool save) +int nvme_set_features_iocs_profile(int fd, __u16 iocsi, bool save) { __u32 value = NVME_SET(iocsi, FEAT_IOCSP_IOCSCI); @@ -898,8 +987,28 @@ int nvme_get_features_lba_range(int fd, enum nvme_get_features_sel sel, .sel = sel, .cdw11 = 0, .uuidx = NVME_UUID_NONE, - .data_len = 0, - .data = NULL, + .data_len = sizeof(*data), + .data = data, + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .result = result, + }; + + return nvme_get_features(&args); +} + +int nvme_get_features_lba_range2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, struct nvme_lba_range_type *data, + __u32 *result) +{ + struct nvme_get_features_args args = { + .args_size = sizeof(args), + .fd = fd, + .fid = NVME_FEAT_FID_LBA_RANGE, + .nsid = nsid, + .sel = sel, + .uuidx = NVME_UUID_NONE, + .data = data, + .data_len = sizeof(*data), .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, .result = result, }; @@ -920,6 +1029,24 @@ int nvme_get_features_err_recovery(int fd, enum nvme_get_features_sel sel, result); } +int nvme_get_features_err_recovery2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, __u32 *result) +{ + + struct nvme_get_features_args args = { + .args_size = sizeof(args), + .fd = fd, + .fid = NVME_FEAT_FID_ERR_RECOVERY, + .nsid = nsid, + .sel = sel, + .uuidx = NVME_UUID_NONE, + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .result = result, + }; + + return nvme_get_features(&args); +} + int nvme_get_features_volatile_wc(int fd, enum nvme_get_features_sel sel, __u32 *result) { @@ -945,7 +1072,7 @@ int nvme_get_features_irq_config(int fd, enum nvme_get_features_sel sel, struct nvme_get_features_args args = { .args_size = sizeof(args), .fd = fd, - .fid = NVME_FEAT_FID_LBA_RANGE, + .fid = NVME_FEAT_FID_IRQ_CONFIG, .nsid = NVME_NSID_NONE, .sel = sel, .cdw11 = iv, @@ -978,13 +1105,13 @@ int nvme_get_features_auto_pst(int fd, enum nvme_get_features_sel sel, struct nvme_get_features_args args = { .args_size = sizeof(args), .fd = fd, - .fid = NVME_FEAT_FID_LBA_RANGE, + .fid = NVME_FEAT_FID_AUTO_PST, .nsid = NVME_NSID_NONE, .sel = sel, .cdw11 = 0, .uuidx = NVME_UUID_NONE, - .data_len = 0, - .data = NULL, + .data_len = sizeof(*apst), + .data = apst, .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, .result = result, }; @@ -998,6 +1125,26 @@ int nvme_get_features_host_mem_buf(int fd, enum nvme_get_features_sel sel, return __nvme_get_features(fd, NVME_FEAT_FID_HOST_MEM_BUF, sel, result); } +int nvme_get_features_host_mem_buf2(int fd, enum nvme_get_features_sel sel, + struct nvme_host_mem_buf_attrs *attrs, + __u32 *result) +{ + struct nvme_get_features_args args = { + .args_size = sizeof(args), + .fd = fd, + .fid = NVME_FEAT_FID_HOST_MEM_BUF, + .nsid = NVME_NSID_NONE, + .sel = sel, + .uuidx = NVME_UUID_NONE, + .data = attrs, + .data_len = sizeof(*attrs), + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .result = result, + }; + + return nvme_get_features(&args); +} + int nvme_get_features_timestamp(int fd, enum nvme_get_features_sel sel, struct nvme_timestamp *ts) { @@ -1050,8 +1197,8 @@ int nvme_get_features_plm_config(int fd, enum nvme_get_features_sel sel, .sel = sel, .cdw11 = nvmsetid, .uuidx = NVME_UUID_NONE, - .data_len = 0, - .data = NULL, + .data_len = sizeof(*data), + .data = data, .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, .result = result, }; @@ -1098,8 +1245,8 @@ int nvme_get_features_host_behavior(int fd, enum nvme_get_features_sel sel, .sel = sel, .cdw11 = 0, .uuidx = NVME_UUID_NONE, - .data_len = 0, - .data = NULL, + .data_len = sizeof(*data), + .data = data, .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, .result = result, }; @@ -1122,7 +1269,7 @@ int nvme_get_features_endurance_event_cfg(int fd, enum nvme_get_features_sel sel .fid = NVME_FEAT_FID_ENDURANCE_EVT_CFG, .nsid = NVME_NSID_NONE, .sel = sel, - .cdw11 = 0, + .cdw11 = endgid, .uuidx = NVME_UUID_NONE, .data_len = 0, .data = NULL, @@ -1165,12 +1312,46 @@ int nvme_get_features_resv_mask(int fd, enum nvme_get_features_sel sel, return __nvme_get_features(fd, NVME_FEAT_FID_RESV_MASK, sel, result); } +int nvme_get_features_resv_mask2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, __u32 *result) +{ + struct nvme_get_features_args args = { + .args_size = sizeof(args), + .fd = fd, + .fid = NVME_FEAT_FID_RESV_MASK, + .nsid = nsid, + .sel = sel, + .uuidx = NVME_UUID_NONE, + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .result = result, + }; + + return nvme_get_features(&args); +} + int nvme_get_features_resv_persist(int fd, enum nvme_get_features_sel sel, __u32 *result) { return __nvme_get_features(fd, NVME_FEAT_FID_RESV_PERSIST, sel, result); } +int nvme_get_features_resv_persist2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, __u32 *result) +{ + struct nvme_get_features_args args = { + .args_size = sizeof(args), + .fd = fd, + .fid = NVME_FEAT_FID_RESV_PERSIST, + .nsid = nsid, + .sel = sel, + .uuidx = NVME_UUID_NONE, + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .result = result, + }; + + return nvme_get_features(&args); +} + int nvme_get_features_write_protect(int fd, __u32 nsid, enum nvme_get_features_sel sel, __u32 *result) @@ -1387,6 +1568,7 @@ int nvme_get_lba_status(struct nvme_get_lba_status_args *args) .opcode = nvme_admin_get_lba_status, .nsid = args->nsid, .addr = (__u64)(uintptr_t)args->lbas, + .data_len = (args->mndw + 1) << 2, .cdw10 = cdw10, .cdw11 = cdw11, .cdw12 = cdw12, @@ -1655,33 +1837,35 @@ static int nvme_set_var_size_tags(__u32 *cmd_dw2, __u32 *cmd_dw3, __u32 *cmd_dw1 __u8 pif, __u8 sts, __u64 reftag, __u64 storage_tag) { __u32 cdw2 = 0, cdw3 = 0, cdw14; + beint64_t be_reftag = cpu_to_be64(reftag); + beint64_t be_storage_tag = cpu_to_be64(storage_tag); switch (pif) { /* 16b Protection Information */ case 0: - cdw14 = reftag & 0xffffffff; - cdw14 |= ((storage_tag << (32 - sts)) & 0xffffffff); + cdw14 = be_reftag & 0xffffffff; + cdw14 |= ((be_storage_tag << (32 - sts)) & 0xffffffff); break; /* 32b Protection Information */ case 1: - cdw14 = reftag & 0xffffffff; - cdw3 = reftag >> 32; - cdw14 |= ((storage_tag << (80 - sts)) & 0xffff0000); + cdw14 = be_reftag & 0xffffffff; + cdw3 = be_reftag >> 32; + cdw14 |= ((be_storage_tag << (80 - sts)) & 0xffff0000); if (sts >= 48) - cdw3 |= ((storage_tag >> (sts - 48)) & 0xffffffff); + cdw3 |= ((be_storage_tag >> (sts - 48)) & 0xffffffff); else - cdw3 |= ((storage_tag << (48 - sts)) & 0xffffffff); - cdw2 = (storage_tag >> (sts - 16)) & 0xffff; + cdw3 |= ((be_storage_tag << (48 - sts)) & 0xffffffff); + cdw2 = (be_storage_tag >> (sts - 16)) & 0xffff; break; /* 64b Protection Information */ case 2: - cdw14 = reftag & 0xffffffff; - cdw3 = (reftag >> 32) & 0xffff; - cdw14 |= ((storage_tag << (48 - sts)) & 0xffffffff); + cdw14 = be_reftag & 0xffffffff; + cdw3 = (be_reftag >> 32) & 0xffff; + cdw14 |= ((be_storage_tag << (48 - sts)) & 0xffffffff); if (sts >= 16) - cdw3 |= ((storage_tag >> (sts - 16)) & 0xffff); + cdw3 |= ((be_storage_tag >> (sts - 16)) & 0xffff); else - cdw3 |= ((storage_tag << (16 - sts)) & 0xffff); + cdw3 |= ((be_storage_tag << (16 - sts)) & 0xffff); break; default: perror("Unsupported Protection Information Format"); @@ -1793,6 +1977,10 @@ int nvme_copy(struct nvme_copy_args *args) if (args->format == 1) data_len = args->nr * sizeof(struct nvme_copy_range_f1); + else if (args->format == 2) + data_len = args->nr * sizeof(struct nvme_copy_range_f2); + else if (args->format == 3) + data_len = args->nr * sizeof(struct nvme_copy_range_f3); else data_len = args->nr * sizeof(struct nvme_copy_range); diff --git a/src/nvme/ioctl.h b/src/nvme/ioctl.h index 4d843bc..4a0698f 100644 --- a/src/nvme/ioctl.h +++ b/src/nvme/ioctl.h @@ -748,7 +748,6 @@ static inline int nvme_identify_primary_ctrl(int fd, __u16 cntid, /** * nvme_identify_secondary_ctrl_list() - Retrieves secondary controller list * @fd: File descriptor of nvme device - * @nsid: Namespace identifier * @cntid: Return controllers starting at this identifier * @sc_list: User space destination address to transfer the data * @@ -763,7 +762,7 @@ static inline int nvme_identify_primary_ctrl(int fd, __u16 cntid, * Return: The nvme command status if a response was received (see * &enum nvme_status_field) or -1 with errno set otherwise. */ -static inline int nvme_identify_secondary_ctrl_list(int fd, __u32 nsid, +static inline int nvme_identify_secondary_ctrl_list(int fd, __u16 cntid, struct nvme_secondary_ctrl_list *sc_list) { struct nvme_identify_args args = { @@ -774,7 +773,7 @@ static inline int nvme_identify_secondary_ctrl_list(int fd, __u32 nsid, .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, .cns = NVME_IDENTIFY_CNS_SECONDARY_CTRL_LIST, .csi = NVME_CSI_NVM, - .nsid = nsid, + .nsid = NVME_NSID_NONE, .cntid = cntid, .cns_specific_id = NVME_CNSSPECID_NONE, .uuidx = NVME_UUID_NONE, @@ -981,21 +980,8 @@ static inline int nvme_identify_allocated_ns_list_csi(int fd, __u32 nsid, static inline int nvme_identify_independent_identify_ns(int fd, __u32 nsid, struct nvme_id_independent_id_ns *ns) { - struct nvme_identify_args args = { - .result = NULL, - .data = ns, - .args_size = sizeof(args), - .fd = fd, - .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, - .cns = NVME_IDENTIFY_CNS_CSI_INDEPENDENT_ID_NS, - .csi = NVME_CSI_NVM, - .nsid = nsid, - .cntid = NVME_CNTLID_NONE, - .cns_specific_id = NVME_CNSSPECID_NONE, - .uuidx = NVME_UUID_NONE, - }; - - return nvme_identify(&args); + return nvme_identify_cns_nsid( + fd, NVME_IDENTIFY_CNS_CSI_INDEPENDENT_ID_NS, nsid, ns); } /** @@ -1194,20 +1180,8 @@ static inline int nvme_identify_iocs(int fd, __u16 cntlid, static inline int nvme_zns_identify_ns(int fd, __u32 nsid, struct nvme_zns_id_ns *data) { - struct nvme_identify_args args = { - .result = NULL, - .data = data, - .args_size = sizeof(args), - .fd = fd, - .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, - .cns = NVME_IDENTIFY_CNS_CSI_NS, - .csi = NVME_CSI_ZNS, - .nsid = nsid, - .cntid = NVME_CNTLID_NONE, - .cns_specific_id = NVME_CNSSPECID_NONE, - }; - - return nvme_identify(&args); + return nvme_identify_ns_csi( + fd, nsid, NVME_UUID_NONE, NVME_CSI_ZNS, data); } /** @@ -1946,7 +1920,7 @@ static inline int nvme_get_log_boot_partition(int fd, bool rae, .nsid = NVME_NSID_NONE, .csi = NVME_CSI_NVM, .lsi = NVME_LOG_LSI_NONE, - .lsp = NVME_LOG_LSP_NONE, + .lsp = lsp, .uuidx = NVME_UUID_NONE, .rae = rae, .ot = false, @@ -1955,6 +1929,41 @@ static inline int nvme_get_log_boot_partition(int fd, bool rae, } /** + * nvme_get_log_phy_rx_eom() - Retrieve Physical Interface Receiver Eye Opening Measurement Log + * @fd: File descriptor of nvme device + * @lsp: Log specific, controls action and measurement quality + * @controller: Target controller ID + * @len: The allocated size, minimum + * struct nvme_phy_rx_eom_log + * @log: User address to store the log page + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise + */ +static inline int nvme_get_log_phy_rx_eom(int fd, __u8 lsp, __u16 controller, + __u32 len, struct nvme_phy_rx_eom_log *log) +{ + struct nvme_get_log_args args = { + .lpo = 0, + .result = NULL, + .log = log, + .args_size = sizeof(args), + .fd = fd, + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .lid = NVME_LOG_LID_PHY_RX_EOM, + .len = len, + .nsid = NVME_NSID_NONE, + .csi = NVME_CSI_NVM, + .lsi = controller, + .lsp = lsp, + .uuidx = NVME_UUID_NONE, + .rae = false, + .ot = false, + }; + return nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &args); +} + +/** * nvme_get_log_discovery() - Retrieve Discovery log page * @fd: File descriptor of nvme device * @rae: Retain asynchronous events @@ -2266,7 +2275,7 @@ int nvme_set_features_power_mgmt(int fd, __u8 ps, __u8 wh, bool save, * Return: The nvme command status if a response was received (see * &enum nvme_status_field) or -1 with errno set otherwise. */ -int nvme_set_features_lba_range(int fd, __u32 nsid, __u32 nr_ranges, bool save, +int nvme_set_features_lba_range(int fd, __u32 nsid, __u8 nr_ranges, bool save, struct nvme_lba_range_type *data, __u32 *result); /** @@ -2542,7 +2551,25 @@ int nvme_set_features_host_id(int fd, bool exhid, bool save, __u8 *hostid); /** * nvme_set_features_resv_mask() - Set reservation notification mask feature + * + * Deprecated: doesn't support specifying a NSID. + * Use nvme_set_features_resv_mask2() instead. + * + * @fd: File descriptor of nvme device + * @mask: Reservation Notification Mask Field + * @save: Save value across power states + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result) + __attribute__((deprecated)); + +/** + * nvme_set_features_resv_mask2() - Set reservation notification mask feature * @fd: File descriptor of nvme device + * @nsid: Namespace ID * @mask: Reservation Notification Mask Field * @save: Save value across power states * @result: The command completion result from CQE dword0 @@ -2550,11 +2577,30 @@ int nvme_set_features_host_id(int fd, bool exhid, bool save, __u8 *hostid); * Return: The nvme command status if a response was received (see * &enum nvme_status_field) or -1 with errno set otherwise. */ -int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result); +int nvme_set_features_resv_mask2(int fd, __u32 nsid, __u32 mask, bool save, + __u32 *result); /** * nvme_set_features_resv_persist() - Set persist through power loss feature + * + * Deprecated: doesn't support specifying a NSID. + * Use nvme_set_features_resv_persist2() instead. + * + * @fd: File descriptor of nvme device + * @ptpl: Persist Through Power Loss + * @save: Save value across power states + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result) + __attribute__((deprecated)); + +/** + * nvme_set_features_resv_persist2() - Set persist through power loss feature * @fd: File descriptor of nvme device + * @nsid: Namespace ID * @ptpl: Persist Through Power Loss * @save: Save value across power states * @result: The command completion result from CQE dword0 @@ -2562,10 +2608,15 @@ int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result); * Return: The nvme command status if a response was received (see * &enum nvme_status_field) or -1 with errno set otherwise. */ -int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result); +int nvme_set_features_resv_persist2(int fd, __u32 nsid, bool ptpl, bool save, + __u32 *result); /** * nvme_set_features_write_protect() - Set write protect feature + * + * Deprecated: doesn't support specifying a NSID. + * Use nvme_set_features_write_protect2() instead. + * * @fd: File descriptor of nvme device * @state: Write Protection State * @save: Save value across power states @@ -2575,7 +2626,34 @@ int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result); * &enum nvme_status_field) or -1 with errno set otherwise. */ int nvme_set_features_write_protect(int fd, enum nvme_feat_nswpcfg_state state, - bool save, __u32 *result); + bool save, __u32 *result) + __attribute__((deprecated)); + +/** + * nvme_set_features_write_protect2() - Set write protect feature + * @fd: File descriptor of nvme device + * @nsid: Namespace ID + * @state: Write Protection State + * @save: Save value across power states + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_set_features_write_protect2(int fd, __u32 nsid, + enum nvme_feat_nswpcfg_state state, + bool save, __u32 *result); + +/** + * nvme_set_features_iocs_profile() - Set I/O command set profile feature + * @fd: File descriptor of nvme device + * @iocsi: I/O Command Set Combination Index + * @save: Save value across power states + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_set_features_iocs_profile(int fd, __u16 iocsi, bool save); /** * nvme_get_features() - Retrieve a feature attribute @@ -2660,6 +2738,10 @@ int nvme_get_features_power_mgmt(int fd, enum nvme_get_features_sel sel, /** * nvme_get_features_lba_range() - Get LBA range feature + * + * Deprecated: doesn't support specifying a NSID. + * Use nvme_get_features_lba_range2() instead. + * * @fd: File descriptor of nvme device * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel * @data: User address of feature data, if applicable @@ -2670,7 +2752,22 @@ int nvme_get_features_power_mgmt(int fd, enum nvme_get_features_sel sel, */ int nvme_get_features_lba_range(int fd, enum nvme_get_features_sel sel, struct nvme_lba_range_type *data, - __u32 *result); + __u32 *result) __attribute__((deprecated)); + +/** + * nvme_get_features_lba_range2() - Get LBA range feature + * @fd: File descriptor of nvme device + * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel + * @nsid: Namespace ID + * @data: Buffer to receive LBA Range Type data structure + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_get_features_lba_range2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, struct nvme_lba_range_type *data, + __u32 *result); /** * nvme_get_features_temp_thresh() - Get temperature threshold feature @@ -2686,6 +2783,10 @@ int nvme_get_features_temp_thresh(int fd, enum nvme_get_features_sel sel, /** * nvme_get_features_err_recovery() - Get error recovery feature + * + * Deprecated: doesn't support specifying a NSID. + * Use nvme_get_features_err_recovery2() instead. + * * @fd: File descriptor of nvme device * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel * @result: The command completion result from CQE dword0 @@ -2694,7 +2795,20 @@ int nvme_get_features_temp_thresh(int fd, enum nvme_get_features_sel sel, * &enum nvme_status_field) or -1 with errno set otherwise. */ int nvme_get_features_err_recovery(int fd, enum nvme_get_features_sel sel, - __u32 *result); + __u32 *result) __attribute__((deprecated)); + +/** + * nvme_get_features_err_recovery2() - Get error recovery feature + * @fd: File descriptor of nvme device + * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel + * @nsid: Namespace ID + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_get_features_err_recovery2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, __u32 *result); /** * nvme_get_features_volatile_wc() - Get volatile write cache feature @@ -2784,6 +2898,10 @@ int nvme_get_features_auto_pst(int fd, enum nvme_get_features_sel sel, /** * nvme_get_features_host_mem_buf() - Get host memory buffer feature + * + * Deprecated: doesn't fetch the Host Memory Buffer Attributes data structure. + * Use nvme_get_features_host_mem_buf2() instead. + * * @fd: File descriptor of nvme device * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel * @result: The command completion result from CQE dword0 @@ -2792,7 +2910,21 @@ int nvme_get_features_auto_pst(int fd, enum nvme_get_features_sel sel, * &enum nvme_status_field) or -1 with errno set otherwise. */ int nvme_get_features_host_mem_buf(int fd, enum nvme_get_features_sel sel, - __u32 *result); + __u32 *result) __attribute__((deprecated)); + +/** + * nvme_get_features_host_mem_buf2() - Get host memory buffer feature + * @fd: File descriptor of nvme device + * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel + * @attrs: Buffer for returned Host Memory Buffer Attributes + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_get_features_host_mem_buf2(int fd, enum nvme_get_features_sel sel, + struct nvme_host_mem_buf_attrs *attrs, + __u32 *result); /** * nvme_get_features_timestamp() - Get timestamp feature @@ -2957,6 +3089,10 @@ int nvme_get_features_host_id(int fd, enum nvme_get_features_sel sel, /** * nvme_get_features_resv_mask() - Get reservation mask feature + * + * Deprecated: doesn't support specifying a NSID. + * Use nvme_get_features_resv_mask2() instead. + * * @fd: File descriptor of nvme device * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel * @result: The command completion result from CQE dword0 @@ -2965,10 +3101,27 @@ int nvme_get_features_host_id(int fd, enum nvme_get_features_sel sel, * &enum nvme_status_field) or -1 with errno set otherwise. */ int nvme_get_features_resv_mask(int fd, enum nvme_get_features_sel sel, - __u32 *result); + __u32 *result) __attribute__((deprecated)); + +/** + * nvme_get_features_resv_mask2() - Get reservation mask feature + * @fd: File descriptor of nvme device + * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel + * @nsid: Namespace ID + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_get_features_resv_mask2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, __u32 *result); /** * nvme_get_features_resv_persist() - Get reservation persist feature + * + * Deprecated: doesn't support specifying a NSID. + * Use nvme_get_features_resv_persist2() instead. + * * @fd: File descriptor of nvme device * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel * @result: The command completion result from CQE dword0 @@ -2977,7 +3130,20 @@ int nvme_get_features_resv_mask(int fd, enum nvme_get_features_sel sel, * &enum nvme_status_field) or -1 with errno set otherwise. */ int nvme_get_features_resv_persist(int fd, enum nvme_get_features_sel sel, - __u32 *result); + __u32 *result) __attribute__((deprecated)); + +/** + * nvme_get_features_resv_persist2() - Get reservation persist feature + * @fd: File descriptor of nvme device + * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel + * @nsid: Namespace ID + * @result: The command completion result from CQE dword0 + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_get_features_resv_persist2(int fd, enum nvme_get_features_sel sel, + __u32 nsid, __u32 *result); /** * nvme_get_features_write_protect() - Get write protect feature @@ -3881,4 +4047,16 @@ int nvme_zns_append(struct nvme_zns_append_args *args); */ int nvme_dim_send(struct nvme_dim_args *args); +/** + * nvme_set_debug - Set NVMe command debugging output + * @debug: true to enable or false to disable + */ +void nvme_set_debug(bool debug); + +/** + * nvme_get_debug - Get NVMe command debugging output + * + * Return: false if disabled or true if enabled. + */ +bool nvme_get_debug(void); #endif /* _LIBNVME_IOCTL_H */ diff --git a/src/nvme/json.c b/src/nvme/json.c index 7a5a69e..4d0f987 100644 --- a/src/nvme/json.c +++ b/src/nvme/json.c @@ -14,6 +14,7 @@ #include <json.h> +#include "cleanup.h" #include "fabrics.h" #include "log.h" #include "private.h" @@ -189,31 +190,34 @@ static void json_parse_host(nvme_root_t r, struct json_object *host_obj) } } +static DEFINE_CLEANUP_FUNC(cleanup_tokener, json_tokener *, json_tokener_free) +#define _cleanup_tokener_ __cleanup__(cleanup_tokener) + static struct json_object *parse_json(nvme_root_t r, int fd) { char buf[JSON_FILE_BUF_SIZE]; - struct json_object *obj = NULL; + struct json_object *obj; char *str = NULL; - json_tokener *tok = NULL; + _cleanup_tokener_ json_tokener *tok = NULL; int ret; - void *ptr = NULL; + _cleanup_free_ void *ptr = NULL; int len = 0; while ((ret = read(fd, buf, JSON_FILE_BUF_SIZE)) > 0) { str = realloc(ptr, len + ret); if (!str) - goto out; + return NULL; memcpy(&str[len], buf, ret); len += ret; ptr = str; } if (ret < 0 || !len) - goto out; + return NULL; tok = json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH); if (!tok) - goto out; + return NULL; /* Enforce correctly formatted JSON */ tok->flags = JSON_TOKENER_STRICT; @@ -222,10 +226,6 @@ static struct json_object *parse_json(nvme_root_t r, int fd) if (!obj) nvme_msg(r, LOG_DEBUG, "JSON parsing failed: %s\n", json_util_get_last_err()); -out: - if (tok) - json_tokener_free(tok); - free(ptr); return obj; } @@ -335,21 +335,21 @@ static void json_update_port(struct json_object *ctrl_array, nvme_ctrl_t c) * Store the keyring description in the JSON config file. */ if (cfg->keyring) { - char *desc = nvme_describe_key_serial(cfg->keyring); + _cleanup_free_ char *desc = + nvme_describe_key_serial(cfg->keyring); if (desc) { json_object_object_add(port_obj, "keyring", json_object_new_string(desc)); - free(desc); } } if (cfg->tls_key) { - char *desc = nvme_describe_key_serial(cfg->tls_key); + _cleanup_free_ char *desc = + nvme_describe_key_serial(cfg->tls_key); if (desc) { json_object_object_add(port_obj, "tls_key", json_object_new_string(desc)); - free(desc); } } diff --git a/src/nvme/linux.c b/src/nvme/linux.c index c6eedc2..163086e 100644 --- a/src/nvme/linux.c +++ b/src/nvme/linux.c @@ -35,15 +35,17 @@ #include <ccan/endian/endian.h> +#include "cleanup.h" #include "linux.h" #include "tree.h" #include "log.h" #include "private.h" +#include "base64.h" static int __nvme_open(const char *name) { - char *path; - int fd, ret; + _cleanup_free_ char *path = NULL; + int ret; ret = asprintf(&path, "%s/%s", "/dev", name); if (ret < 0) { @@ -51,9 +53,7 @@ static int __nvme_open(const char *name) return -1; } - fd = open(path, O_RDONLY); - free(path); - return fd; + return open(path, O_RDONLY); } int nvme_open(const char *name) @@ -122,17 +122,51 @@ int nvme_fw_download_seq(int fd, __u32 size, __u32 xfer, __u32 offset, return err; } -static int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, - struct nvme_telemetry_log **buf, enum nvme_telemetry_da da, - size_t *size) +int nvme_get_telemetry_max(int fd, enum nvme_telemetry_da *da, size_t *data_tx) +{ + _cleanup_free_ struct nvme_id_ctrl *id_ctrl; + int err; + + id_ctrl = __nvme_alloc(sizeof(*id_ctrl)); + if (!id_ctrl) { + errno = ENOMEM; + return -1; + } + err = nvme_identify_ctrl(fd, id_ctrl); + if (err) + return err; + + if (data_tx) { + *data_tx = id_ctrl->mdts; + if (id_ctrl->mdts) { + /* assuming CAP.MPSMIN is zero minimum Memory Page Size is at least + * 4096 bytes + */ + *data_tx = (1 << id_ctrl->mdts) * 4096; + } + } + if (da) { + if (id_ctrl->lpa & 0x8) + *da = NVME_TELEMETRY_DA_3; + if (id_ctrl->lpa & 0x40) + *da = NVME_TELEMETRY_DA_4; + + } + return err; +} + +int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, size_t max_data_tx, + enum nvme_telemetry_da da, struct nvme_telemetry_log **buf, + size_t *size) { static const __u32 xfer = NVME_LOG_TELEM_BLOCK_SIZE; struct nvme_telemetry_log *telem; enum nvme_cmd_get_log_lid lid; - struct nvme_id_ctrl id_ctrl; - void *log, *tmp; + _cleanup_free_ void *log; + void *tmp; int err; + size_t dalb; struct nvme_get_log_args args = { .args_size = sizeof(args), .fd = fd, @@ -167,89 +201,101 @@ static int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, } if (err) - goto free; + return err; telem = log; if (ctrl && !telem->ctrlavail) { *buf = log; + log = NULL; *size = xfer; return 0; } switch (da) { case NVME_TELEMETRY_DA_1: + dalb = le16_to_cpu(telem->dalb1); + break; case NVME_TELEMETRY_DA_2: + dalb = le16_to_cpu(telem->dalb2); + break; case NVME_TELEMETRY_DA_3: /* dalb3 >= dalb2 >= dalb1 */ - *size = (le16_to_cpu(telem->dalb3) + 1) * xfer; + dalb = le16_to_cpu(telem->dalb3); break; case NVME_TELEMETRY_DA_4: - err = nvme_identify_ctrl(fd, &id_ctrl); - if (err) { - perror("identify-ctrl"); - errno = EINVAL; - goto free; - } - - if (id_ctrl.lpa & 0x40) { - *size = (le32_to_cpu(telem->dalb4) + 1) * xfer; - } else { - fprintf(stderr, "Data area 4 unsupported, bit 6 of Log Page Attributes not set\n"); - errno = EINVAL; - err = -1; - goto free; - } + dalb = le32_to_cpu(telem->dalb4); break; default: - fprintf(stderr, "Invalid data area parameter - %d\n", da); errno = EINVAL; - err = -1; - goto free; + return -1; } + if (dalb == 0) { + errno = ENOENT; + return -1; + } + + *size = (dalb + 1) * xfer; tmp = realloc(log, *size); if (!tmp) { errno = ENOMEM; - err = -1; - goto free; + return -1; } log = tmp; args.lid = lid; args.log = log; args.len = *size; - err = nvme_get_log_page(fd, 4096, &args); - if (!err) { - *buf = log; - return 0; + err = nvme_get_log_page(fd, max_data_tx, &args); + if (err) + return err; + + *buf = log; + log = NULL; + return 0; +} + + +static int nvme_check_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, + struct nvme_telemetry_log **log, enum nvme_telemetry_da da, + size_t *size) +{ + enum nvme_telemetry_da max_da = 0; + int err = nvme_get_telemetry_max(fd, &max_da, NULL); + + if (err) + return err; + if (da > max_da) { + errno = ENOENT; + return -1; } -free: - free(log); - return err; + return nvme_get_telemetry_log(fd, create, ctrl, rae, 4096, da, log, size); } + int nvme_get_ctrl_telemetry(int fd, bool rae, struct nvme_telemetry_log **log, enum nvme_telemetry_da da, size_t *size) { - return nvme_get_telemetry_log(fd, false, true, rae, log, da, size); + return nvme_check_get_telemetry_log(fd, false, true, rae, log, da, size); } int nvme_get_host_telemetry(int fd, struct nvme_telemetry_log **log, enum nvme_telemetry_da da, size_t *size) { - return nvme_get_telemetry_log(fd, false, false, false, log, da, size); + return nvme_check_get_telemetry_log(fd, false, false, false, log, da, size); } int nvme_get_new_host_telemetry(int fd, struct nvme_telemetry_log **log, enum nvme_telemetry_da da, size_t *size) { - return nvme_get_telemetry_log(fd, true, false, false, log, da, size); + return nvme_check_get_telemetry_log(fd, true, false, false, log, da, size); } int nvme_get_lba_status_log(int fd, bool rae, struct nvme_lba_status_log **log) { - __u32 size = sizeof(struct nvme_lba_status_log); - void *buf, *tmp; + __u32 size; + _cleanup_free_ struct nvme_lba_status_log *buf; + void *tmp; int err; struct nvme_get_log_args args = { .args_size = sizeof(args), @@ -265,38 +311,42 @@ int nvme_get_lba_status_log(int fd, bool rae, struct nvme_lba_status_log **log) .ot = false, }; - buf = malloc(size); + buf = malloc(sizeof(*buf)); if (!buf) return -1; - *log = buf; - err = nvme_get_log_lba_status(fd, true, 0, size, buf); - if (err) - goto free; + err = nvme_get_log_lba_status(fd, true, 0, sizeof(*buf), buf); + if (err) { + *log = NULL; + return err; + } - size = le32_to_cpu((*log)->lslplen); - if (!size) + size = le32_to_cpu(buf->lslplen); + if (!size) { + *log = buf; + buf = NULL; return 0; + } tmp = realloc(buf, size); if (!tmp) { - err = -1; - goto free; + *log = NULL; + return -1; } buf = tmp; - *log = buf; args.lid = NVME_LOG_LID_LBA_STATUS; args.log = buf; args.len = size; err = nvme_get_log_page(fd, 4096, &args); - if (!err) - return 0; + if (err) { + *log = NULL; + return err; + } -free: - *log = NULL; - free(buf); - return err; + *log = buf; + buf = NULL; + return 0; } static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, @@ -335,38 +385,48 @@ int nvme_namespace_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, int nvme_get_ana_log_len(int fd, size_t *analen) { - struct nvme_id_ctrl ctrl; + _cleanup_free_ struct nvme_id_ctrl *ctrl; int ret; - ret = nvme_identify_ctrl(fd, &ctrl); + ctrl = __nvme_alloc(sizeof(*ctrl)); + if (!ctrl) { + errno = ENOMEM; + return -1; + } + ret = nvme_identify_ctrl(fd, ctrl); if (ret) return ret; *analen = sizeof(struct nvme_ana_log) + - le32_to_cpu(ctrl.nanagrpid) * sizeof(struct nvme_ana_group_desc) + - le32_to_cpu(ctrl.mnan) * sizeof(__le32); + le32_to_cpu(ctrl->nanagrpid) * sizeof(struct nvme_ana_group_desc) + + le32_to_cpu(ctrl->mnan) * sizeof(__le32); return 0; } int nvme_get_logical_block_size(int fd, __u32 nsid, int *blksize) { - struct nvme_id_ns ns; + _cleanup_free_ struct nvme_id_ns *ns; __u8 flbas; int ret; - ret = nvme_identify_ns(fd, nsid, &ns); + ns = __nvme_alloc(sizeof(*ns)); + if (!ns) { + errno = ENOMEM; + return -1; + } + ret = nvme_identify_ns(fd, nsid, ns); if (ret) return ret; - nvme_id_ns_flbas_to_lbaf_inuse(ns.flbas, &flbas); - *blksize = 1 << ns.lbaf[flbas].ds; + nvme_id_ns_flbas_to_lbaf_inuse(ns->flbas, &flbas); + *blksize = 1 << ns->lbaf[flbas].ds; return 0; } static int __nvme_set_attr(const char *path, const char *value) { - int ret, fd; + _cleanup_fd_ int fd; fd = open(path, O_WRONLY); if (fd < 0) { @@ -376,23 +436,19 @@ static int __nvme_set_attr(const char *path, const char *value) #endif return -1; } - ret = write(fd, value, strlen(value)); - close(fd); - return ret; + return write(fd, value, strlen(value)); } int nvme_set_attr(const char *dir, const char *attr, const char *value) { - char *path; + _cleanup_free_ char *path = NULL; int ret; ret = asprintf(&path, "%s/%s", dir, attr); if (ret < 0) return -1; - ret = __nvme_set_attr(path, value); - free(path); - return ret; + return __nvme_set_attr(path, value); } static char *__nvme_get_attr(const char *path) @@ -426,7 +482,7 @@ static char *__nvme_get_attr(const char *path) char *nvme_get_attr(const char *dir, const char *attr) { - char *path, *value; + _cleanup_free_ char *path = NULL; int ret; ret = asprintf(&path, "%s/%s", dir, attr); @@ -435,9 +491,7 @@ char *nvme_get_attr(const char *dir, const char *attr) return NULL; } - value = __nvme_get_attr(path); - free(path); - return value; + return __nvme_get_attr(path); } char *nvme_get_subsys_attr(nvme_subsystem_t s, const char *attr) @@ -476,21 +530,80 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, return 0; } -static int derive_nvme_keys(const char *hostnqn, const char *identity, - int hmac, unsigned char *configured, - unsigned char *psk, int key_len) +static int derive_retained_key(int hmac, const char *hostnqn, + unsigned char *generated, + unsigned char *retained, + size_t key_len) +{ + nvme_msg(NULL, LOG_ERR, "NVMe TLS is not supported; " + "recompile with OpenSSL support.\n"); + errno = ENOTSUP; + return -1; +} + +static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, + int version, int hmac, char *identity, + unsigned char *retained, size_t key_len) { - errno = EOPNOTSUPP; + if (version != 0) { + nvme_msg(NULL, LOG_ERR, "NVMe TLS 2.0 is not supported; " + "recompile with OpenSSL support.\n"); + errno = ENOTSUP; + return -1; + } + sprintf(identity, "NVMe0R%02d %s %s", + hmac, hostnqn, subsysnqn); + return strlen(identity); +} + +static int derive_tls_key(int hmac, const char *identity, + unsigned char *retained, + unsigned char *psk, size_t key_len) +{ + nvme_msg(NULL, LOG_ERR, "NVMe TLS is not supported; " + "recompile with OpenSSL support.\n"); + errno = ENOTSUP; return -1; } #else /* CONFIG_OPENSSL */ -static int derive_retained_key(const EVP_MD *md, const char *hostnqn, +static const EVP_MD *select_hmac(int hmac, size_t *key_len) +{ + const EVP_MD *md = NULL; + + switch (hmac) { + case NVME_HMAC_ALG_SHA2_256: + md = EVP_sha256(); + *key_len = 32; + break; + case NVME_HMAC_ALG_SHA2_384: + md = EVP_sha384(); + *key_len = 48; + break; + default: + break; + } + return md; +} + +static DEFINE_CLEANUP_FUNC( + cleanup_evp_pkey_ctx, EVP_PKEY_CTX *, EVP_PKEY_CTX_free) +#define _cleanup_evp_pkey_ctx_ __cleanup__(cleanup_evp_pkey_ctx) + +static int derive_retained_key(int hmac, const char *hostnqn, unsigned char *generated, unsigned char *retained, size_t key_len) { - EVP_PKEY_CTX *ctx; - int ret; + const EVP_MD *md; + _cleanup_evp_pkey_ctx_ EVP_PKEY_CTX *ctx = NULL; + uint16_t length = key_len & 0xFFFF; + size_t hmac_len; + + md = select_hmac(hmac, &hmac_len); + if (!md || hmac_len > key_len) { + errno = EINVAL; + return -1; + } ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL); if (!ctx) { @@ -499,42 +612,60 @@ static int derive_retained_key(const EVP_MD *md, const char *hostnqn, } if (EVP_PKEY_derive_init(ctx) <= 0) { - ret = -ENOMEM; - goto out_free_ctx; - } - ret = -ENOKEY; - if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0) - goto out_free_ctx; - if (EVP_PKEY_CTX_set1_hkdf_key(ctx, generated, key_len) <= 0) - goto out_free_ctx; + errno = ENOMEM; + return -1; + } + if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0) { + errno = ENOKEY; + return -1; + } + if (EVP_PKEY_CTX_set1_hkdf_key(ctx, generated, key_len) <= 0) { + errno = ENOKEY; + return -1; + } if (EVP_PKEY_CTX_add1_hkdf_info(ctx, - (const unsigned char *)"tls13 ", 6) <= 0) - goto out_free_ctx; + (const unsigned char *)&length, 2) <= 0) { + errno = ENOKEY; + return -1; + } if (EVP_PKEY_CTX_add1_hkdf_info(ctx, - (const unsigned char *)"HostNQN", 7) <= 0) - goto out_free_ctx; + (const unsigned char *)"tls13 ", 6) <= 0) { + errno = ENOKEY; + return -1; + } if (EVP_PKEY_CTX_add1_hkdf_info(ctx, - (const unsigned char *)hostnqn, strlen(hostnqn)) <= 0) - goto out_free_ctx; - - if (EVP_PKEY_derive(ctx, retained, &key_len) > 0) - ret = key_len; + (const unsigned char *)"HostNQN", 7) <= 0) { + errno = ENOKEY; + return -1; + } + if (EVP_PKEY_CTX_add1_hkdf_info(ctx, + (const unsigned char *)hostnqn, strlen(hostnqn)) <= 0) { + errno = ENOKEY; + return -1; + } -out_free_ctx: - if (ret < 0) { - errno = -ret; - ret = -1; + if (EVP_PKEY_derive(ctx, retained, &key_len) <= 0) { + errno = ENOKEY; + return -1; } - EVP_PKEY_CTX_free(ctx); - return ret; + + return key_len; } -static int derive_tls_key(const EVP_MD *md, const char *identity, +static int derive_tls_key(int hmac, const char *identity, unsigned char *retained, unsigned char *psk, size_t key_len) { - EVP_PKEY_CTX *ctx; - int ret; + const EVP_MD *md; + _cleanup_evp_pkey_ctx_ EVP_PKEY_CTX *ctx = NULL; + size_t hmac_len; + uint16_t length = key_len & 0xFFFF; + + md = select_hmac(hmac, &hmac_len); + if (!md || hmac_len > key_len) { + errno = EINVAL; + return -1; + } ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL); if (!ctx) { @@ -543,85 +674,59 @@ static int derive_tls_key(const EVP_MD *md, const char *identity, } if (EVP_PKEY_derive_init(ctx) <= 0) { - ret = -ENOMEM; - goto out_free_ctx; - } - ret = -ENOKEY; - if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0) - goto out_free_ctx; - if (EVP_PKEY_CTX_set1_hkdf_key(ctx, retained, key_len) <= 0) - goto out_free_ctx; - if (EVP_PKEY_CTX_add1_hkdf_info(ctx, - (const unsigned char *)"tls13 ", 6) <= 0) - goto out_free_ctx; + errno = ENOMEM; + return -1; + } + if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0) { + errno = ENOKEY; + return -1; + } + if (EVP_PKEY_CTX_set1_hkdf_key(ctx, retained, key_len) <= 0) { + errno = ENOKEY; + return -1; + } if (EVP_PKEY_CTX_add1_hkdf_info(ctx, - (const unsigned char *)"nvme-tls-psk", 12) <= 0) - goto out_free_ctx; + (const unsigned char *)&length, 2) <= 0) { + errno = ENOKEY; + return -1; + } if (EVP_PKEY_CTX_add1_hkdf_info(ctx, - (const unsigned char *)identity, - strlen(identity)) <= 0) - goto out_free_ctx; - - if (EVP_PKEY_derive(ctx, psk, &key_len) > 0) - ret = key_len; - -out_free_ctx: - EVP_PKEY_CTX_free(ctx); - if (ret < 0) { - errno = -ret; - ret = -1; + (const unsigned char *)"tls13 ", 6) <= 0) { + errno = ENOKEY; + return -1; } - - return ret; -} - -static int derive_nvme_keys(const char *hostnqn, const char *identity, - int hmac, unsigned char *configured, - unsigned char *psk, int key_len) -{ - const EVP_MD *md; - unsigned char *retained; - int ret = -1; - - if (!hostnqn || !identity) { - errno = EINVAL; + if (EVP_PKEY_CTX_add1_hkdf_info(ctx, + (const unsigned char *)"nvme-tls-psk", 12) <= 0) { + errno = ENOKEY; return -1; } - - switch (hmac) { - case 1: - md = EVP_sha256(); - break; - case 2: - md = EVP_sha384(); - break; - default: - errno = EINVAL; + if (EVP_PKEY_CTX_add1_hkdf_info(ctx, + (const unsigned char *)identity, + strlen(identity)) <= 0) { + errno = ENOKEY; return -1; } - retained = malloc(key_len); - if (!retained) { - errno = ENOMEM; + if (EVP_PKEY_derive(ctx, psk, &key_len) <= 0) { + errno = ENOKEY; return -1; } - ret = derive_retained_key(md, hostnqn, configured, retained, key_len); - if (ret > 0) - ret = derive_tls_key(md, identity, retained, psk, key_len); - free(retained); - return ret; + + return key_len; } #endif /* CONFIG_OPENSSL */ #ifdef CONFIG_OPENSSL_1 +static DEFINE_CLEANUP_FUNC(cleanup_hmac_ctx, HMAC_CTX *, HMAC_CTX_free) +#define _cleanup_hmac_ctx_ __cleanup__(cleanup_hmac_ctx) + int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, unsigned int key_len, unsigned char *secret, unsigned char *key) { const char hmac_seed[] = "NVMe-over-Fabrics"; - HMAC_CTX *hmac_ctx; + _cleanup_hmac_ctx_ HMAC_CTX *hmac_ctx; const EVP_MD *md; - int err = -1; ENGINE_load_builtin_engines(); ENGINE_register_all_complete(); @@ -629,14 +734,13 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, hmac_ctx = HMAC_CTX_new(); if (!hmac_ctx) { errno = ENOMEM; - return err; + return -1; } switch (hmac) { case NVME_HMAC_ALG_NONE: memcpy(key, secret, key_len); - err = 0; - goto out; + return 0; case NVME_HMAC_ALG_SHA2_256: md = EVP_sha256(); break; @@ -648,82 +752,164 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, break; default: errno = EINVAL; - goto out; + return -1; } if (!md) { errno = EINVAL; - goto out; + return -1; } if (!HMAC_Init_ex(hmac_ctx, secret, key_len, md, NULL)) { errno = ENOMEM; - goto out; + return -1; } if (!HMAC_Update(hmac_ctx, (unsigned char *)hostnqn, strlen(hostnqn))) { errno = ENOKEY; - goto out; + return -1; } if (!HMAC_Update(hmac_ctx, (unsigned char *)hmac_seed, strlen(hmac_seed))) { errno = ENOKEY; - goto out; + return -1; } if (!HMAC_Final(hmac_ctx, key, &key_len)) { errno = ENOKEY; - goto out; + return -1; } - err = 0; + return 0; +} -out: - HMAC_CTX_free(hmac_ctx); - return err; +static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, + int version, int hmac, char *identity, + unsigned char *retained, size_t key_len) +{ + static const char hmac_seed[] = "NVMe-over-Fabrics"; + size_t hmac_len; + const EVP_MD *md = select_hmac(hmac, &hmac_len); + _cleanup_hmac_ctx_ HMAC_CTX *hmac_ctx = NULL; + _cleanup_free_ unsigned char *psk_ctx = NULL; + _cleanup_free_ char *enc_ctx = NULL; + size_t len; + + if (version == 0) { + sprintf(identity, "NVMe%01dR%02d %s %s", + version, hmac, hostnqn, subsysnqn); + return strlen(identity); + } + if (version > 1) { + errno = EINVAL; + return -1; + } + + hmac_ctx = HMAC_CTX_new(); + if (!hmac_ctx) { + errno = ENOMEM; + return -1; + } + if (!md) { + errno = EINVAL; + return -1; + } + + psk_ctx = malloc(key_len); + if (!psk_ctx) { + errno = ENOMEM; + return -1; + } + if (!HMAC_Init_ex(hmac_ctx, retained, key_len, md, NULL)) { + errno = ENOMEM; + return -1; + } + if (!HMAC_Update(hmac_ctx, (unsigned char *)hostnqn, + strlen(hostnqn))) { + errno = ENOKEY; + return -1; + } + if (!HMAC_Update(hmac_ctx, (unsigned char *)" ", 1)) { + errno = ENOKEY; + return -1; + } + if (!HMAC_Update(hmac_ctx, (unsigned char *)subsysnqn, + strlen(subsysnqn))) { + errno = ENOKEY; + return -1; + } + if (!HMAC_Update(hmac_ctx, (unsigned char *)" ", 1)) { + errno = ENOKEY; + return -1; + } + if (!HMAC_Update(hmac_ctx, (unsigned char *)hmac_seed, + strlen(hmac_seed))) { + errno = ENOKEY; + return -1; + } + if (!HMAC_Final(hmac_ctx, psk_ctx, (unsigned int *)&key_len)) { + errno = ENOKEY; + return -1; + } + enc_ctx = malloc(key_len * 2); + memset(enc_ctx, 0, key_len * 2); + len = base64_encode(psk_ctx, key_len, enc_ctx); + if (len < 0) { + errno = ENOKEY; + return len; + } + sprintf(identity, "NVMe%01dR%02d %s %s %s", + version, hmac, hostnqn, subsysnqn, enc_ctx); + return strlen(identity); } #endif /* !CONFIG_OPENSSL_1 */ #ifdef CONFIG_OPENSSL_3 +static DEFINE_CLEANUP_FUNC( + cleanup_ossl_lib_ctx, OSSL_LIB_CTX *, OSSL_LIB_CTX_free) +#define _cleanup_ossl_lib_ctx_ __cleanup__(cleanup_ossl_lib_ctx) +static DEFINE_CLEANUP_FUNC(cleanup_evp_mac_ctx, EVP_MAC_CTX *, EVP_MAC_CTX_free) +#define _cleanup_evp_mac_ctx_ __cleanup__(cleanup_evp_mac_ctx) +static DEFINE_CLEANUP_FUNC(cleanup_evp_mac, EVP_MAC *, EVP_MAC_free) +#define _cleanup_evp_mac_ __cleanup__(cleanup_evp_mac) + int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, unsigned int key_len, unsigned char *secret, unsigned char *key) { const char hmac_seed[] = "NVMe-over-Fabrics"; OSSL_PARAM params[2], *p = params; - OSSL_LIB_CTX *lib_ctx; - EVP_MAC_CTX *mac_ctx = NULL; - EVP_MAC *mac = NULL; + _cleanup_ossl_lib_ctx_ OSSL_LIB_CTX *lib_ctx; + _cleanup_evp_mac_ctx_ EVP_MAC_CTX *mac_ctx = NULL; + _cleanup_evp_mac_ EVP_MAC *mac = NULL; char *progq = NULL; char *digest; size_t len; - int err = -1; lib_ctx = OSSL_LIB_CTX_new(); if (!lib_ctx) { errno = ENOMEM; - return err; + return -1; } mac = EVP_MAC_fetch(lib_ctx, OSSL_MAC_NAME_HMAC, progq); if (!mac) { errno = ENOMEM; - goto out; + return -1; } mac_ctx = EVP_MAC_CTX_new(mac); if (!mac_ctx) { errno = ENOMEM; - goto out; + return -1; } switch (hmac) { case NVME_HMAC_ALG_NONE: memcpy(key, secret, key_len); - err = 0; - goto out; + return 0; case NVME_HMAC_ALG_SHA2_256: digest = OSSL_DIGEST_NAME_SHA2_256; break; @@ -735,7 +921,7 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, break; default: errno = EINVAL; - goto out; + return -1; } *p++ = OSSL_PARAM_construct_utf8_string(OSSL_MAC_PARAM_DIGEST, digest, @@ -744,42 +930,224 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, if (!EVP_MAC_init(mac_ctx, secret, key_len, params)) { errno = ENOKEY; - goto out; + return -1; } if (!EVP_MAC_update(mac_ctx, (unsigned char *)hostnqn, strlen(hostnqn))) { errno = ENOKEY; - goto out; + return -1; } if (!EVP_MAC_update(mac_ctx, (unsigned char *)hmac_seed, strlen(hmac_seed))) { errno = ENOKEY; - goto out; + return -1; } if (!EVP_MAC_final(mac_ctx, key, &len, key_len)) { errno = ENOKEY; - goto out; + return -1; } if (len != key_len) { errno = EMSGSIZE; - goto out; + return -1; } - err = 0; + return 0; +} -out: - EVP_MAC_CTX_free(mac_ctx); - EVP_MAC_free(mac); - OSSL_LIB_CTX_free(lib_ctx); +static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, + int version, int hmac, char *identity, + unsigned char *retained, size_t key_len) +{ + static const char hmac_seed[] = "NVMe-over-Fabrics"; + size_t hmac_len; + OSSL_PARAM params[2], *p = params; + _cleanup_ossl_lib_ctx_ OSSL_LIB_CTX *lib_ctx = NULL; + _cleanup_evp_mac_ctx_ EVP_MAC_CTX *mac_ctx = NULL; + _cleanup_evp_mac_ EVP_MAC *mac = NULL; + char *progq = NULL; + char *digest = NULL; + _cleanup_free_ unsigned char *psk_ctx = NULL; + _cleanup_free_ char *enc_ctx = NULL; + size_t len; - return err; + if (version == 0) { + sprintf(identity, "NVMe%01dR%02d %s %s", + version, hmac, hostnqn, subsysnqn); + return strlen(identity); + } + if (version > 1) { + errno = EINVAL; + return -1; + } + + lib_ctx = OSSL_LIB_CTX_new(); + if (!lib_ctx) { + errno = ENOMEM; + return -1; + } + mac = EVP_MAC_fetch(lib_ctx, OSSL_MAC_NAME_HMAC, progq); + if (!mac) { + errno = ENOMEM; + return -1; + } + + mac_ctx = EVP_MAC_CTX_new(mac); + if (!mac_ctx) { + errno = ENOMEM; + return -1; + } + switch (hmac) { + case NVME_HMAC_ALG_SHA2_256: + digest = OSSL_DIGEST_NAME_SHA2_256; + break; + case NVME_HMAC_ALG_SHA2_384: + digest = OSSL_DIGEST_NAME_SHA2_384; + break; + default: + errno = EINVAL; + break; + } + if (!digest) + return -1; + *p++ = OSSL_PARAM_construct_utf8_string(OSSL_MAC_PARAM_DIGEST, + digest, 0); + *p = OSSL_PARAM_construct_end(); + + psk_ctx = malloc(key_len); + if (!psk_ctx) { + errno = ENOMEM; + return -1; + } + + if (!EVP_MAC_init(mac_ctx, retained, key_len, params)) { + errno = ENOKEY; + return -1; + } + if (!EVP_MAC_update(mac_ctx, (unsigned char *)hostnqn, + strlen(hostnqn))) { + errno = ENOKEY; + return -1; + } + if (!EVP_MAC_update(mac_ctx, (unsigned char *)" ", 1)) { + errno = ENOKEY; + return -1; + } + if (!EVP_MAC_update(mac_ctx, (unsigned char *)subsysnqn, + strlen(subsysnqn))) { + errno = ENOKEY; + return -1; + } + if (!EVP_MAC_update(mac_ctx, (unsigned char *)" ", 1)) { + errno = ENOKEY; + return -1; + } + if (!EVP_MAC_update(mac_ctx, (unsigned char *)hmac_seed, + strlen(hmac_seed))) { + errno = ENOKEY; + return -1; + } + if (!EVP_MAC_final(mac_ctx, psk_ctx, &hmac_len, key_len)) { + errno = ENOKEY; + return -1; + } + if (hmac_len > key_len) { + errno = EMSGSIZE; + return -1; + } + enc_ctx = malloc(hmac_len * 2); + memset(enc_ctx, 0, hmac_len * 2); + len = base64_encode(psk_ctx, hmac_len, enc_ctx); + if (len < 0) { + errno = ENOKEY; + return len; + } + sprintf(identity, "NVMe%01dR%02d %s %s %s", + version, hmac, hostnqn, subsysnqn, enc_ctx); + return strlen(identity); } #endif /* !CONFIG_OPENSSL_3 */ +static int derive_nvme_keys(const char *hostnqn, const char *subsysnqn, + char *identity, int version, + int hmac, unsigned char *configured, + unsigned char *psk, int key_len) +{ + _cleanup_free_ unsigned char *retained = NULL; + int ret = -1; + + if (!hostnqn || !subsysnqn || !identity || !psk) { + errno = EINVAL; + return -1; + } + + retained = malloc(key_len); + if (!retained) { + errno = ENOMEM; + return -1; + } + ret = derive_retained_key(hmac, hostnqn, configured, retained, key_len); + if (ret < 0) + return ret; + ret = gen_tls_identity(hostnqn, subsysnqn, version, hmac, + identity, retained, key_len); + if (ret < 0) + return ret; + return derive_tls_key(hmac, identity, retained, psk, key_len); +} + +static size_t nvme_identity_len(int hmac, int version, const char *hostnqn, + const char *subsysnqn) +{ + size_t len; + + len = strlen(hostnqn) + strlen(subsysnqn) + 12; + if (version == 1) { + len += 66; + if (hmac == NVME_HMAC_ALG_SHA2_384) + len += 32; + } else if (version > 1) { + errno = EINVAL; + return -1; + } + return len; +} + +char *nvme_generate_tls_key_identity(const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *configured_key, int key_len) +{ + char *identity; + size_t identity_len; + _cleanup_free_ unsigned char *psk = NULL; + int ret = -1; + + identity_len = nvme_identity_len(hmac, version, hostnqn, subsysnqn); + if (identity_len < 0) + return NULL; + + identity = malloc(identity_len); + if (!identity) + return NULL; + + psk = malloc(key_len); + if (!psk) + goto out_free_identity; + + memset(psk, 0, key_len); + ret = derive_nvme_keys(hostnqn, subsysnqn, identity, version, hmac, + configured_key, psk, key_len); +out_free_identity: + if (ret < 0) { + free(identity); + identity = NULL; + } + return identity; +} + #ifdef CONFIG_KEYUTILS long nvme_lookup_keyring(const char *keyring) { @@ -820,37 +1188,41 @@ int nvme_set_keyring(long key_id) return 0; } -long nvme_insert_tls_key(const char *keyring, const char *key_type, - const char *hostnqn, const char *subsysnqn, int hmac, - unsigned char *configured_key, int key_len) +long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type, + const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *configured_key, int key_len) { - key_serial_t keyring_id, key = 0; - char *identity; - unsigned char *psk; + key_serial_t keyring_id, key; + _cleanup_free_ char *identity = NULL; + size_t identity_len; + _cleanup_free_ unsigned char *psk = NULL; int ret = -1; keyring_id = nvme_lookup_keyring(keyring); if (keyring_id == 0) return -1; - identity = malloc(strlen(hostnqn) + strlen(subsysnqn) + 12); + identity_len = nvme_identity_len(hmac, version, hostnqn, subsysnqn); + if (identity_len < 0) + return -1; + + identity = malloc(identity_len); if (!identity) { errno = ENOMEM; return -1; } - sprintf(identity, "NVMe0R%02d %s %s", hmac, hostnqn, subsysnqn); - psk = malloc(key_len); if (!psk) { errno = ENOMEM; - goto out_free_identity; + return 0; } memset(psk, 0, key_len); - ret = derive_nvme_keys(hostnqn, identity, hmac, + ret = derive_nvme_keys(hostnqn, subsysnqn, identity, version, hmac, configured_key, psk, key_len); if (ret != key_len) - goto out_free_psk; + return 0; key = keyctl_search(keyring_id, key_type, identity, 0); if (key > 0) { @@ -862,10 +1234,6 @@ long nvme_insert_tls_key(const char *keyring, const char *key_type, if (key < 0) key = 0; } -out_free_psk: - free(psk); -out_free_identity: - free(identity); return key; } @@ -902,10 +1270,23 @@ int nvme_set_keyring(long key_id) return -1; } +long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type, + const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *configured_key, int key_len) +{ + nvme_msg(NULL, LOG_ERR, "key operations not supported; " + "recompile with keyutils support.\n"); + errno = ENOTSUP; + return -1; +} +#endif + long nvme_insert_tls_key(const char *keyring, const char *key_type, const char *hostnqn, const char *subsysnqn, int hmac, unsigned char *configured_key, int key_len) { - return derive_nvme_keys(NULL, NULL, 0, NULL, NULL, 0); + return nvme_insert_tls_key_versioned(keyring, key_type, + hostnqn, subsysnqn, 0, hmac, + configured_key, key_len); } -#endif diff --git a/src/nvme/linux.h b/src/nvme/linux.h index 37ba9d4..11ee76e 100644 --- a/src/nvme/linux.h +++ b/src/nvme/linux.h @@ -49,6 +49,37 @@ enum nvme_telemetry_da { }; /** + * nvme_get_telemetry_max() - Get telemetry limits + * @fd: File descriptor of nvme device + * @da: On success return max supported data area + * @max_data_tx: On success set to max transfer chunk supported by the controller + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_get_telemetry_max(int fd, enum nvme_telemetry_da *da, size_t *max_data_tx); + +/** + * nvme_get_telemetry_log() - Get specified telemetry log + * @fd: File descriptor of nvme device + * @create: Generate new host initated telemetry capture + * @ctrl: Get controller Initiated log + * @rae: Retain asynchronous events + * @max_data_tx: Set the max data transfer size to be used retrieving telemetry. + * @da: Log page data area, valid values: &enum nvme_telemetry_da. + * @log: On success, set to the value of the allocated and retrieved log. + * @size: Ptr to the telemetry log size, so it can be returned + * + * The total size allocated can be calculated as: + * (nvme_telemetry_log da size + 1) * NVME_LOG_TELEM_BLOCK_SIZE. + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise. + */ +int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, size_t max_data_tx, + enum nvme_telemetry_da da, struct nvme_telemetry_log **log, + size_t *size); +/** * nvme_get_ctrl_telemetry() - Get controller telemetry log * @fd: File descriptor of nvme device * @rae: Retain asynchronous events @@ -262,4 +293,46 @@ long nvme_insert_tls_key(const char *keyring, const char *key_type, const char *hostnqn, const char *subsysnqn, int hmac, unsigned char *configured_key, int key_len); +/** + * nvme_insert_tls_key_versioned() - Derive and insert TLS key + * @keyring: Keyring to use + * @key_type: Type of the resulting key + * @hostnqn: Host NVMe Qualified Name + * @subsysnqn: Subsystem NVMe Qualified Name + * @version: Key version to use + * @hmac: HMAC algorithm + * @configured_key: Configured key data to derive the key from + * @key_len: Length of @configured_key + * + * Derives a 'retained' TLS key as specified in NVMe TCP 1.0a (if + * @version s set to '0') or NVMe TP8028 (if @version is set to '1) and + * stores it as type @key_type in the keyring specified by @keyring. + * + * Return: The key serial number if the key could be inserted into + * the keyring or 0 with errno otherwise. + */ +long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type, + const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *configured_key, int key_len); + +/** + * nvme_generate_tls_key_identity() - Generate the TLS key identity + * @hostnqn: Host NVMe Qualified Name + * @subsysnqn: Subsystem NVMe Qualified Name + * @version: Key version to use + * @hmac: HMAC algorithm + * @configured_key: Configured key data to derive the key from + * @key_len: Length of @configured_key + * + * Derives a 'retained' TLS key as specified in NVMe TCP and + * generate the corresponding TLs identity. + * + * Return: The string containing the TLS identity. It is the responsibility + * of the caller to free the returned string. + */ +char *nvme_generate_tls_key_identity(const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *configured_key, int key_len); + #endif /* _LIBNVME_LINUX_H */ diff --git a/src/nvme/log.c b/src/nvme/log.c index e4697df..2ffca3e 100644 --- a/src/nvme/log.c +++ b/src/nvme/log.c @@ -26,11 +26,13 @@ #define LOG_CLOCK CLOCK_MONOTONIC #endif +static nvme_root_t root; + void __attribute__((format(printf, 4, 5))) __nvme_msg(nvme_root_t r, int lvl, const char *func, const char *format, ...) { - FILE *fp = r ? r->fp : stderr; + FILE *fp = stderr; va_list ap; char pidbuf[16]; char timebuf[32]; @@ -44,10 +46,16 @@ __nvme_msg(nvme_root_t r, int lvl, "[%s] <%s>%s ", "[%s] <%s> %s: ", }; - char *header __cleanup__(cleanup_charp) = NULL; - char *message __cleanup__(cleanup_charp) = NULL; + _cleanup_free_ char *header = NULL; + _cleanup_free_ char *message = NULL; int idx = 0; + if (!r) + r = root; + + if (r) + fp = r->fp; + if (r && lvl > r->log_level) return; @@ -90,3 +98,8 @@ void nvme_init_logging(nvme_root_t r, int lvl, bool log_pid, bool log_tstamp) r->log_pid = log_pid; r->log_timestamp = log_tstamp; } + +void nvme_set_root(nvme_root_t r) +{ + root = r; +} diff --git a/src/nvme/log.h b/src/nvme/log.h index 1cf797a..7c345f6 100644 --- a/src/nvme/log.h +++ b/src/nvme/log.h @@ -35,4 +35,17 @@ */ void nvme_init_logging(nvme_root_t r, int lvl, bool log_pid, bool log_tstamp); +/** + * nvme_set_root() - Set nvme_root_t context + * @r: nvme_root_t context + * + * In order to be able to log from code paths where no root object is passed in + * via the arguments use the the default one which can be set via this call. + * When creating a new root object with @nvme_create_root the global root object + * will be set as well. This means the global root object is always pointing to + * the latest created root object. Note the first @nvme_free_tree call will reset + * the global root object. + */ +void nvme_set_root(nvme_root_t r); + #endif /* _LOG_H */ diff --git a/src/nvme/mi-mctp.c b/src/nvme/mi-mctp.c index 0c5972a..86c4c29 100644 --- a/src/nvme/mi-mctp.c +++ b/src/nvme/mi-mctp.c @@ -82,6 +82,8 @@ struct nvme_mi_transport_mctp { int net; __u8 eid; int sd; + void *resp_buf; + size_t resp_buf_size; }; static int ioctl_tag(int sd, unsigned long req, struct mctp_ioc_tag_ctl *ctl) @@ -175,60 +177,40 @@ struct nvme_mi_msg_resp_mpr { /* Check if this response was a More Processing Required response; if so, * populate the worst-case expected processing time, given in milliseconds. + * + * buf is the incoming message data, including type byte, but excluding + * the MIC which has been extracted into the mic argument already. */ -static bool nvme_mi_mctp_resp_is_mpr(struct nvme_mi_resp *resp, size_t len, +static bool nvme_mi_mctp_resp_is_mpr(void *buf, size_t len, __le32 mic, unsigned int *mpr_time) { - struct nvme_mi_admin_resp_hdr *admin_msg; struct nvme_mi_msg_resp_mpr *msg; - size_t clen; __u32 crc; - /* We need at least the minimal header plus checksum */ - if (len < sizeof(*msg) + sizeof(mic)) + /* We need at least the minimal header */ + if (len < sizeof(*msg)) return false; - msg = (struct nvme_mi_msg_resp_mpr *)resp->hdr; + msg = (struct nvme_mi_msg_resp_mpr *)buf; if (msg->status != NVME_MI_RESP_MPR) return false; - /* Find and verify the MIC from the response, which may not be laid out - * in resp as we expect. We have to preserve resp->hdr_len and - * resp->data_len, as we will need them for the eventual reply message. - * Because of that, we can't use verify_resp_mic here. - * - * If the packet was at the expected response size, then mic will - * be set already; if not, find it within the header/data buffers. - */ - /* Devices may send a MPR response as a full-sized Admin response, * rather than the minimal MI-only header. Allow this, but only if the * type indicates admin, and the allocated response header is the * correct size for an Admin response. */ - if (((msg->hdr.nmp >> 3) & 0xf) == NVME_MI_MT_ADMIN && - len == sizeof(*admin_msg) + sizeof(mic) && - resp->hdr_len == sizeof(*admin_msg)) { - if (resp->data_len) - mic = *(__le32 *)resp->data; - } else if (len == sizeof(*msg) + sizeof(mic)) { - if (resp->hdr_len > sizeof(*msg)) - mic = *(__le32 *)(msg + 1); - else if (resp->data_len) - mic = *(__le32 *)(resp->data); - } else { - return false; - } - - /* Since our response is just a header, we're guaranteed to have - * all data in resp->hdr. The response may be shorter than the expected - * header though, so clamp to len. + if (!(len == sizeof(*msg) || + ((msg->hdr.nmp >> 3 & 0x0f) == NVME_MI_MT_ADMIN && + len == sizeof(struct nvme_mi_admin_resp_hdr)))) + return false; + + /* Verify the MIC from the response. We're dealing with linear + * header data here, and need to preserve the resp pointer & size + * values, so can't use verify_resp_mic here. */ - len -= sizeof(mic); - clen = len < resp->hdr_len ? len : resp->hdr_len; - - crc = ~nvme_mi_crc32_update(0xffffffff, resp->hdr, clen); + crc = ~nvme_mi_crc32_update(0xffffffff, buf, len); if (le32_to_cpu(mic) != crc) return false; @@ -242,14 +224,14 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep, struct nvme_mi_req *req, struct nvme_mi_resp *resp) { + ssize_t len, resp_len, resp_hdr_len, resp_data_len; struct nvme_mi_transport_mctp *mctp; - struct iovec req_iov[3], resp_iov[3]; + struct iovec req_iov[3], resp_iov[1]; struct msghdr req_msg, resp_msg; int i, rc, errno_save, timeout; struct sockaddr_mctp addr; struct pollfd pollfds[1]; unsigned int mpr_time; - ssize_t len; __le32 mic; __u8 tag; @@ -306,20 +288,30 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep, goto out; } - resp_iov[0].iov_base = ((__u8 *)resp->hdr) + 1; - resp_iov[0].iov_len = resp->hdr_len - 1; - - resp_iov[1].iov_base = ((__u8 *)resp->data); - resp_iov[1].iov_len = resp->data_len; + resp_len = resp->hdr_len + resp->data_len + sizeof(mic); + if (resp_len > mctp->resp_buf_size) { + void *tmp = realloc(mctp->resp_buf, resp_len); + if (!tmp) { + errno_save = errno; + nvme_msg(ep->root, LOG_ERR, + "Failure allocating response buffer: %m\n"); + errno = errno_save; + rc = -1; + goto out; + } + mctp->resp_buf = tmp; + mctp->resp_buf_size = resp_len; + } - resp_iov[2].iov_base = &mic; - resp_iov[2].iov_len = sizeof(mic); + /* offset by one: the MCTP message type is excluded from the buffer */ + resp_iov[0].iov_base = mctp->resp_buf + 1; + resp_iov[0].iov_len = resp_len - 1; memset(&resp_msg, 0, sizeof(resp_msg)); resp_msg.msg_name = &addr; resp_msg.msg_namelen = sizeof(addr); resp_msg.msg_iov = resp_iov; - resp_msg.msg_iovlen = 3; + resp_msg.msg_iovlen = 1; pollfds[0].fd = mctp->sd; pollfds[0].events = POLLIN; @@ -333,13 +325,14 @@ retry: nvme_msg(ep->root, LOG_ERR, "Failed polling on MCTP socket: %m"); errno = errno_save; - return -1; + goto out; } if (rc == 0) { nvme_msg(ep->root, LOG_DEBUG, "Timeout on MCTP socket"); errno = ETIMEDOUT; - return -1; + rc = -1; + goto out; } rc = -1; @@ -361,7 +354,7 @@ retry: } /* Re-add the type byte, so we can work on aligned lengths from here */ - resp->hdr->type = MCTP_TYPE_NVME | MCTP_TYPE_MIC; + ((uint8_t *)mctp->resp_buf)[0] = MCTP_TYPE_NVME | MCTP_TYPE_MIC; len += 1; /* The smallest response data is 8 bytes: generic 4-byte message header @@ -375,21 +368,21 @@ retry: goto out; } - /* We can't have header/payload data that isn't a multiple of 4 bytes */ - if (len & 0x3) { - nvme_msg(ep->root, LOG_WARNING, - "Response message has unaligned length (%zd)!\n", - len); - errno = EPROTO; - goto out; - } + /* Start unpacking the linear resp buffer into the split header + data + * + MIC. We check for a MPR response before fully unpacking, as we'll + * need to preserve the resp layout if we need to retry the receive. + */ + + /* MIC is always at the tail */ + memcpy(&mic, mctp->resp_buf + len - sizeof(mic), sizeof(mic)); + len -= 4; /* Check for a More Processing Required response. This is a slight * layering violation, as we're pre-checking the MIC and inspecting * header fields. However, we need to do this in the transport in order * to keep the tag allocated and retry the recvmsg */ - if (nvme_mi_mctp_resp_is_mpr(resp, len, mic, &mpr_time)) { + if (nvme_mi_mctp_resp_is_mpr(mctp->resp_buf, len, mic, &mpr_time)) { nvme_msg(ep->root, LOG_DEBUG, "Received More Processing Required, waiting for response\n"); @@ -406,30 +399,20 @@ retry: goto retry; } - /* If we have a shorter than expected response, we need to find the - * MIC and the correct split between header & data. We know that the - * split is 4-byte aligned, so the MIC will be entirely within one - * of the iovecs. - */ - if (len == resp->hdr_len + resp->data_len + sizeof(mic)) { - /* Common case: expected data length. Header, data and MIC - * are already laid-out correctly. Nothing to do. */ - - } else if (len < resp->hdr_len + sizeof(mic)) { - /* Response is smaller than the expected header. MIC is - * somewhere in the header buf */ - resp->hdr_len = len - sizeof(mic); - resp->data_len = 0; - memcpy(&mic, ((uint8_t *)resp->hdr) + resp->hdr_len, - sizeof(mic)); - - } else { - /* We have a full header, but data is truncated - possibly - * zero bytes. MIC is somewhere in the data buf */ - resp->data_len = len - resp->hdr_len - sizeof(mic); - memcpy(&mic, ((uint8_t *)resp->data) + resp->data_len, - sizeof(mic)); - } + /* we expect resp->hdr_len bytes, but we may have less */ + resp_hdr_len = resp->hdr_len; + if (resp_hdr_len > len) + resp_hdr_len = len; + memcpy(resp->hdr, mctp->resp_buf, resp_hdr_len); + resp->hdr_len = resp_hdr_len; + len -= resp_hdr_len; + + /* any remaining bytes are the data payload */ + resp_data_len = resp->data_len; + if (resp_data_len > len) + resp_data_len = len; + memcpy(resp->data, mctp->resp_buf + resp_hdr_len, resp_data_len); + resp->data_len = resp_data_len; resp->mic = le32_to_cpu(mic); @@ -450,6 +433,7 @@ static void nvme_mi_mctp_close(struct nvme_mi_ep *ep) mctp = ep->transport_data; close(mctp->sd); + free(mctp->resp_buf); free(ep->transport_data); } @@ -488,15 +472,29 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid) return NULL; mctp = malloc(sizeof(*mctp)); - if (!mctp) - goto err_free_ep; + if (!mctp) { + errno_save = errno; + goto err_close_ep; + } + + memset(mctp, 0, sizeof(*mctp)); + mctp->sd = -1; + + mctp->resp_buf_size = 4096; + mctp->resp_buf = malloc(mctp->resp_buf_size); + if (!mctp->resp_buf) { + errno_save = errno; + goto err_free_mctp; + } mctp->net = netid; mctp->eid = eid; mctp->sd = ops.socket(AF_MCTP, SOCK_DGRAM, 0); - if (mctp->sd < 0) - goto err_free_ep; + if (mctp->sd < 0) { + errno_save = errno; + goto err_free_rspbuf; + } ep->transport = &nvme_mi_transport_mctp; ep->transport_data = mctp; @@ -512,10 +510,14 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid) return ep; -err_free_ep: - errno_save = errno; - nvme_mi_close(ep); +err_free_rspbuf: + free(mctp->resp_buf); +err_free_mctp: free(mctp); +err_close_ep: + /* the ep->transport is not set yet, so this will not call back + * into nvme_mi_mctp_close() */ + nvme_mi_close(ep); errno = errno_save; return NULL; } diff --git a/src/nvme/mi.c b/src/nvme/mi.c index 3799f35..82ed88a 100644 --- a/src/nvme/mi.c +++ b/src/nvme/mi.c @@ -413,11 +413,6 @@ int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req, return -1; } - if (resp->data_len & 0x3) { - errno = EINVAL; - return -1; - } - if (ep->transport->mic_enabled) nvme_mi_calc_req_mic(req); @@ -580,8 +575,10 @@ int nvme_mi_admin_xfer(nvme_mi_ctrl_t ctrl, return -1; } - /* must be aligned */ - if (resp_data_offset & 0x3) { + /* request and response lengths & offset must be aligned */ + if ((req_data_size & 0x3) || + (*resp_data_size & 0x3) || + (resp_data_offset & 0x3)) { errno = EINVAL; return -1; } @@ -1051,7 +1048,7 @@ int nvme_mi_admin_set_features(nvme_mi_ctrl_t ctrl, nvme_admin_set_features); req_hdr.cdw1 = cpu_to_le32(args->nsid); - req_hdr.cdw10 = cpu_to_le32((args->save ? 1 : 0) << 31 | + req_hdr.cdw10 = cpu_to_le32((__u32)!!args->save << 31 | (args->fid & 0xff)); req_hdr.cdw14 = cpu_to_le32(args->uuidx & 0x7f); req_hdr.cdw11 = cpu_to_le32(args->cdw11); @@ -1223,7 +1220,7 @@ int nvme_mi_admin_fw_commit(nvme_mi_ctrl_t ctrl, nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_fw_commit); - req_hdr.cdw10 = cpu_to_le32(((args->bpid & 0x1) << 31) | + req_hdr.cdw10 = cpu_to_le32(((__u32)(args->bpid & 0x1) << 31) | ((args->action & 0x7) << 3) | ((args->slot & 0x7) << 0)); diff --git a/src/nvme/mi.h b/src/nvme/mi.h index 211cb29..bd26627 100644 --- a/src/nvme/mi.h +++ b/src/nvme/mi.h @@ -1395,7 +1395,6 @@ static inline int nvme_mi_admin_identify_primary_ctrl(nvme_mi_ctrl_t ctrl, * nvme_mi_admin_identify_secondary_ctrl_list() - Perform an Admin identify for * a secondary controller list. * @ctrl: Controller to process identify command - * @nsid: Namespace ID to specify list start * @cntid: Controller ID to specify list start * @list: List data to populate * @@ -1412,7 +1411,6 @@ static inline int nvme_mi_admin_identify_primary_ctrl(nvme_mi_ctrl_t ctrl, * See: &struct nvme_secondary_ctrl_list */ static inline int nvme_mi_admin_identify_secondary_ctrl_list(nvme_mi_ctrl_t ctrl, - __u32 nsid, __u16 cntid, struct nvme_secondary_ctrl_list *list) { @@ -1422,7 +1420,7 @@ static inline int nvme_mi_admin_identify_secondary_ctrl_list(nvme_mi_ctrl_t ctrl .args_size = sizeof(args), .cns = NVME_IDENTIFY_CNS_SECONDARY_CTRL_LIST, .csi = NVME_CSI_NVM, - .nsid = nsid, + .nsid = NVME_NSID_NONE, .cntid = cntid, .cns_specific_id = NVME_CNSSPECID_NONE, .uuidx = NVME_UUID_NONE, @@ -2109,6 +2107,41 @@ static inline int nvme_mi_admin_get_log_boot_partition(nvme_mi_ctrl_t ctrl, } /** + * nvme_mi_admin_get_log_phy_rx_eom() - Retrieve Physical Interface Receiver Eye Opening Measurement Log + * @ctrl: Controller to query + * @lsp: Log specific, controls action and measurement quality + * @controller: Target controller ID + * @len: The allocated size, minimum + * struct nvme_phy_rx_eom_log + * @log: User address to store the log page + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise + */ +static inline int nvme_mi_admin_get_log_phy_rx_eom(nvme_mi_ctrl_t ctrl, + __u8 lsp, __u16 controller, + __u32 len, + struct nvme_phy_rx_eom_log *log) +{ + struct nvme_get_log_args args = { + .lpo = 0, + .result = NULL, + .log = log, + .args_size = sizeof(args), + .lid = NVME_LOG_LID_PHY_RX_EOM, + .len = len, + .nsid = NVME_NSID_NONE, + .csi = NVME_CSI_NVM, + .lsi = controller, + .lsp = lsp, + .uuidx = NVME_UUID_NONE, + .rae = false, + .ot = false, + }; + return nvme_mi_admin_get_log(ctrl, &args); +} + +/** * nvme_mi_admin_get_log_discovery() - Retrieve Discovery log page * @ctrl: Controller to query * @rae: Retain asynchronous events diff --git a/src/nvme/nbft.c b/src/nvme/nbft.c index a1e17cd..2c87088 100644 --- a/src/nvme/nbft.c +++ b/src/nvme/nbft.c @@ -33,17 +33,15 @@ static __u8 csum(const __u8 *buffer, ssize_t length) static void format_ip_addr(char *buf, size_t buflen, __u8 *addr) { - struct in6_addr *addr_ipv6; + struct in6_addr addr_ipv6; - addr_ipv6 = (struct in6_addr *)addr; - if (addr_ipv6->s6_addr32[0] == 0 && - addr_ipv6->s6_addr32[1] == 0 && - ntohl(addr_ipv6->s6_addr32[2]) == 0xffff) + memcpy(&addr_ipv6, addr, sizeof(addr_ipv6)); + if (IN6_IS_ADDR_V4MAPPED(&addr_ipv6)) /* ipv4 */ - inet_ntop(AF_INET, &(addr_ipv6->s6_addr32[3]), buf, buflen); + inet_ntop(AF_INET, &addr_ipv6.s6_addr32[3], buf, buflen); else /* ipv6 */ - inet_ntop(AF_INET6, addr_ipv6, buf, buflen); + inet_ntop(AF_INET6, &addr_ipv6, buf, buflen); } static bool in_heap(struct nbft_header *header, struct nbft_heap_obj obj) @@ -199,15 +197,15 @@ static int read_ssns(struct nbft_info *nbft, verify(raw_ssns->structure_id == NBFT_DESC_SSNS, "invalid ID in SSNS descriptor"); + /* verify transport type */ + verify(raw_ssns->trtype == NBFT_TRTYPE_TCP, + "invalid transport type in SSNS descriptor"); + ssns = calloc(1, sizeof(*ssns)); if (!ssns) return -ENOMEM; ssns->index = le16_to_cpu(raw_ssns->index); - - /* transport type */ - verify(raw_ssns->trtype == NBFT_TRTYPE_TCP, - "invalid transport type in SSNS descriptor"); strncpy(ssns->transport, trtype_to_string(raw_ssns->trtype), sizeof(ssns->transport)); /* transport specific flags */ @@ -413,26 +411,29 @@ static int read_discovery(struct nbft_info *nbft, struct nbft_discovery *raw_discovery, struct nbft_info_discovery **d) { - struct nbft_info_discovery *discovery; + struct nbft_info_discovery *discovery = NULL; struct nbft_header *header = (struct nbft_header *)nbft->raw_nbft; + int r = -EINVAL; if (!(raw_discovery->flags & NBFT_DISCOVERY_VALID)) - return -EINVAL; + goto error; verify(raw_discovery->structure_id == NBFT_DESC_DISCOVERY, "invalid ID in discovery descriptor"); discovery = calloc(1, sizeof(struct nbft_info_discovery)); - if (!discovery) - return -ENOMEM; + if (!discovery) { + r = -ENOMEM; + goto error; + } discovery->index = raw_discovery->index; if (get_heap_obj(raw_discovery, discovery_ctrl_addr_obj, 1, &discovery->uri)) - return -EINVAL; + goto error; if (get_heap_obj(raw_discovery, discovery_ctrl_nqn_obj, 1, &discovery->nqn)) - return -EINVAL; + goto error; discovery->hfi = hfi_from_index(nbft, raw_discovery->hfi_index); if (raw_discovery->hfi_index && !discovery->hfi) @@ -447,7 +448,12 @@ static int read_discovery(struct nbft_info *nbft, nbft->filename, discovery->index); *d = discovery; - return 0; + r = 0; + +error: + if (r) + free(discovery); + return r; } static int read_security(struct nbft_info *nbft, diff --git a/src/nvme/private.h b/src/nvme/private.h index 809b3bb..ee9d738 100644 --- a/src/nvme/private.h +++ b/src/nvme/private.h @@ -106,6 +106,7 @@ struct nvme_subsystem { char *firmware; char *subsystype; char *application; + char *iopolicy; }; struct nvme_host { @@ -179,7 +180,9 @@ int json_dump_tree(nvme_root_t r); nvme_ctrl_t __nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, const char *traddr, const char *host_traddr, const char *host_iface, const char *trsvcid, - nvme_ctrl_t p); + const char *subsysnqn, nvme_ctrl_t p); + +void *__nvme_alloc(size_t len); #if (LOG_FUNCNAME == 1) #define __nvme_log_func __func__ @@ -197,6 +200,11 @@ __nvme_msg(nvme_root_t r, int lvl, const char *func, const char *format, ...); format, ##__VA_ARGS__); \ } while (0) +#define root_from_ctrl(c) ((c)->s && (c)->s->h ? (c)->s->h->r : NULL) +#define root_from_ns(n) ((n)->s && (n)->s->h ? (n)->s->h->r : \ + (n)->c && (n)->c->s && (n)->c->s->h ? (n)->c->s->h->r : \ + NULL) + /* mi internal headers */ /* internal transport API */ diff --git a/src/nvme/tree.c b/src/nvme/tree.c index a2ac069..07a3c53 100644 --- a/src/nvme/tree.c +++ b/src/nvme/tree.c @@ -15,6 +15,7 @@ #include <fcntl.h> #include <libgen.h> #include <unistd.h> +#include <ifaddrs.h> #include <sys/types.h> #include <sys/stat.h> @@ -24,6 +25,7 @@ #include <ccan/endian/endian.h> #include <ccan/list/list.h> +#include "cleanup.h" #include "ioctl.h" #include "linux.h" #include "filters.h" @@ -34,6 +36,31 @@ #include "log.h" #include "private.h" +/** + * struct candidate_args - Used to look for a controller matching these parameters + * @transport: Transport type: loop, fc, rdma, tcp + * @traddr: Transport address (destination address) + * @trsvcid: Transport service ID + * @subsysnqn: Subsystem NQN + * @host_traddr: Host transport address (source address) + * @host_iface: Host interface for connection (tcp only) + * @iface_list: Interface list (tcp only) + * @addreq: Address comparison function (for traddr, host-traddr) + * @well_known_nqn: Set to "true" when @subsysnqn is the well-known NQN + */ +struct candidate_args { + const char *transport; + const char *traddr; + const char *trsvcid; + const char *subsysnqn; + const char *host_traddr; + const char *host_iface; + struct ifaddrs *iface_list; + bool (*addreq)(const char *, const char *); + bool well_known_nqn; +}; +typedef bool (*ctrl_match_t)(struct nvme_ctrl *c, struct candidate_args *candidate); + const char *nvme_slots_sysfs_dir = "/sys/bus/pci/slots"; static struct nvme_host *default_host; @@ -78,17 +105,24 @@ static bool streqcase0(const char *s1, const char *s2) return !strcasecmp(s1, s2); } -static inline void nvme_free_dirents(struct dirent **d, int i) +struct dirents { + struct dirent **ents; + int num; +}; + +static void cleanup_dirents(struct dirents *ents) { - while (i-- > 0) - free(d[i]); - free(d); + while (ents->num > 0) + free(ents->ents[--ents->num]); + free(ents->ents); } +#define _cleanup_dirents_ __cleanup__(cleanup_dirents) + nvme_host_t nvme_default_host(nvme_root_t r) { struct nvme_host *h; - char *hostnqn, *hostid; + _cleanup_free_ char *hostnqn, *hostid; hostnqn = nvmf_hostnqn_from_file(); if (!hostnqn) @@ -100,61 +134,55 @@ nvme_host_t nvme_default_host(nvme_root_t r) nvme_host_set_hostsymname(h, NULL); default_host = h; - free(hostnqn); - if (hostid) - free(hostid); return h; } int nvme_scan_topology(struct nvme_root *r, nvme_scan_filter_t f, void *f_args) { - struct dirent **subsys, **ctrls; - int i, num_subsys, num_ctrls, ret; + _cleanup_dirents_ struct dirents subsys = {}, ctrls = {}; + int i, ret; if (!r) return 0; - num_ctrls = nvme_scan_ctrls(&ctrls); - if (num_ctrls < 0) { + ctrls.num = nvme_scan_ctrls(&ctrls.ents); + if (ctrls.num < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan ctrls: %s\n", strerror(errno)); - return num_ctrls; + return ctrls.num; } - for (i = 0; i < num_ctrls; i++) { - nvme_ctrl_t c = nvme_scan_ctrl(r, ctrls[i]->d_name); + for (i = 0; i < ctrls.num; i++) { + nvme_ctrl_t c = nvme_scan_ctrl(r, ctrls.ents[i]->d_name); if (!c) { nvme_msg(r, LOG_DEBUG, "failed to scan ctrl %s: %s\n", - ctrls[i]->d_name, strerror(errno)); + ctrls.ents[i]->d_name, strerror(errno)); continue; } if ((f) && !f(NULL, c, NULL, f_args)) { nvme_msg(r, LOG_DEBUG, "filter out controller %s\n", - ctrls[i]->d_name); + ctrls.ents[i]->d_name); nvme_free_ctrl(c); } } - nvme_free_dirents(ctrls, i); - - num_subsys = nvme_scan_subsystems(&subsys); - if (num_subsys < 0) { + subsys.num = nvme_scan_subsystems(&subsys.ents); + if (subsys.num < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan subsystems: %s\n", strerror(errno)); - return num_subsys; + return subsys.num; } - for (i = 0; i < num_subsys; i++) { - ret = nvme_scan_subsystem(r, subsys[i]->d_name, f, f_args); + for (i = 0; i < subsys.num; i++) { + ret = nvme_scan_subsystem( + r, subsys.ents[i]->d_name, f, f_args); if (ret < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan subsystem %s: %s\n", - subsys[i]->d_name, strerror(errno)); + subsys.ents[i]->d_name, strerror(errno)); } } - nvme_free_dirents(subsys, i); - return 0; } @@ -172,6 +200,7 @@ nvme_root_t nvme_create_root(FILE *fp, int log_level) r->fp = fp; list_head_init(&r->hosts); list_head_init(&r->endpoints); + nvme_set_root(r); return r; } @@ -234,8 +263,10 @@ const char *nvme_root_get_application(nvme_root_t r) void nvme_root_set_application(nvme_root_t r, const char *a) { - if (r->application) + if (r->application) { free(r->application); + r->application = NULL; + } if (a) r->application = strdup(a); } @@ -338,9 +369,18 @@ void nvme_free_tree(nvme_root_t r) free(r->config_file); if (r->application) free(r->application); + nvme_set_root(NULL); free(r); } +void nvme_root_release_fds(nvme_root_t r) +{ + struct nvme_host *h, *_h; + + nvme_for_each_host_safe(r, h, _h) + nvme_host_release_fds(h); +} + const char *nvme_subsystem_get_nqn(nvme_subsystem_t s) { return s->subsysnqn; @@ -368,12 +408,19 @@ const char *nvme_subsystem_get_application(nvme_subsystem_t s) void nvme_subsystem_set_application(nvme_subsystem_t s, const char *a) { - if (s->application) + if (s->application) { free(s->application); + s->application = NULL; + } if (a) s->application = strdup(a); } +const char *nvme_subsystem_get_iopolicy(nvme_subsystem_t s) +{ + return s->iopolicy; +} + nvme_ctrl_t nvme_subsystem_first_ctrl(nvme_subsystem_t s) { return list_top(&s->ctrls, struct nvme_ctrl, entry); @@ -412,7 +459,7 @@ nvme_path_t nvme_namespace_next_path(nvme_ns_t ns, nvme_path_t p) static void __nvme_free_ns(struct nvme_ns *n) { list_del_init(&n->entry); - close(n->fd); + nvme_ns_release_fd(n); free(n->generic_name); free(n->name); free(n->sysfs_dir); @@ -451,9 +498,23 @@ static void __nvme_free_subsystem(struct nvme_subsystem *s) free(s->subsystype); if (s->application) free(s->application); + if (s->iopolicy) + free(s->iopolicy); free(s); } +void nvme_subsystem_release_fds(struct nvme_subsystem *s) +{ + struct nvme_ctrl *c, *_c; + struct nvme_ns *n, *_n; + + nvme_subsystem_for_each_ctrl_safe(s, c, _c) + nvme_ctrl_release_fd(c); + + nvme_subsystem_for_each_ns_safe(s, n, _n) + nvme_ns_release_fd(n); +} + /* * Stub for SWIG */ @@ -524,6 +585,14 @@ static void __nvme_free_host(struct nvme_host *h) free(h); } +void nvme_host_release_fds(struct nvme_host *h) +{ + struct nvme_subsystem *s, *_s; + + nvme_for_each_subsystem_safe(h, s, _s) + nvme_subsystem_release_fds(s); +} + /* Stub for SWIG */ void nvme_free_host(struct nvme_host *h) { @@ -563,27 +632,26 @@ struct nvme_host *nvme_lookup_host(nvme_root_t r, const char *hostnqn, static int nvme_subsystem_scan_namespaces(nvme_root_t r, nvme_subsystem_t s, nvme_scan_filter_t f, void *f_args) { - struct dirent **namespaces; - int i, num_ns, ret; + _cleanup_dirents_ struct dirents namespaces = {}; + int i, ret; - num_ns = nvme_scan_subsystem_namespaces(s, &namespaces); - if (num_ns < 0) { + namespaces.num = nvme_scan_subsystem_namespaces(s, &namespaces.ents); + if (namespaces.num < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan namespaces for subsys %s: %s\n", s->subsysnqn, strerror(errno)); - return num_ns; + return namespaces.num; } - for (i = 0; i < num_ns; i++) { + for (i = 0; i < namespaces.num; i++) { ret = nvme_subsystem_scan_namespace(r, s, - namespaces[i]->d_name, f, f_args); + namespaces.ents[i]->d_name, f, f_args); if (ret < 0) nvme_msg(r, LOG_DEBUG, "failed to scan namespace %s: %s\n", - namespaces[i]->d_name, strerror(errno)); + namespaces.ents[i]->d_name, strerror(errno)); } - nvme_free_dirents(namespaces, i); return 0; } @@ -610,15 +678,28 @@ static int nvme_init_subsystem(nvme_subsystem_t s, const char *name) s->sysfs_dir = (char *)path; if (s->h->r->application) s->application = strdup(s->h->r->application); + s->iopolicy = nvme_get_attr(path, "iopolicy"); return 0; } +static bool __nvme_scan_subsystem(struct nvme_root *r, nvme_subsystem_t s, + nvme_scan_filter_t f, void *f_args) +{ + if (f && !f(s, NULL, NULL, f_args)) { + nvme_msg(r, LOG_DEBUG, "filter out subsystem %s\n", s->name); + __nvme_free_subsystem(s); + return false; + } + nvme_subsystem_scan_namespaces(r, s, f, f_args); + return true; +} + static int nvme_scan_subsystem(struct nvme_root *r, const char *name, nvme_scan_filter_t f, void *f_args) { struct nvme_subsystem *s = NULL, *_s; - char *path, *subsysnqn; + _cleanup_free_ char *path = NULL, *subsysnqn = NULL; nvme_host_t h = NULL; int ret; @@ -628,7 +709,6 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name, return ret; subsysnqn = nvme_get_attr(path, "subsysnqn"); - free(path); if (!subsysnqn) { errno = ENODEV; return -1; @@ -644,6 +724,10 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name, continue; if (strcmp(_s->name, name)) continue; + if (!__nvme_scan_subsystem(r, _s, f, f_args)) { + errno = EINVAL; + return -1; + } s = _s; } } @@ -659,26 +743,18 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name, s = nvme_alloc_subsystem(h, name, subsysnqn); if (!s) { errno = ENOMEM; + return -1; + } + if (!__nvme_scan_subsystem(r, s, f, f_args)) { + errno = EINVAL; + return -1; } } else if (strcmp(s->subsysnqn, subsysnqn)) { - nvme_msg(r, LOG_WARNING, "NQN mismatch for subsystem '%s'\n", + nvme_msg(r, LOG_DEBUG, "NQN mismatch for subsystem '%s'\n", name); - s = NULL; - free(subsysnqn); errno = EINVAL; return -1; } - free(subsysnqn); - if (!s) - return -1; - - if (f && !f(s, NULL, NULL, f_args)) { - nvme_msg(r, LOG_DEBUG, "filter out subsystem %s\n", name); - __nvme_free_subsystem(s); - return 0; - } - - nvme_subsystem_scan_namespaces(r, s, f, f_args); return 0; } @@ -740,7 +816,7 @@ static void nvme_subsystem_set_path_ns(nvme_subsystem_t s, nvme_path_t p) static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) { struct nvme_path *p; - char *path, *grpid; + _cleanup_free_ char *path = NULL, *grpid = NULL; int ret; nvme_msg(r, LOG_DEBUG, "scan controller %s path %s\n", @@ -758,12 +834,13 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) p = calloc(1, sizeof(*p)); if (!p) { errno = ENOMEM; - goto free_path; + return -1; } p->c = c; p->name = strdup(name); p->sysfs_dir = path; + path = NULL; p->ana_state = nvme_get_path_attr(p, "ana_state"); if (!p->ana_state) p->ana_state = strdup("optimized"); @@ -771,7 +848,6 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) grpid = nvme_get_path_attr(p, "ana_grpid"); if (grpid) { sscanf(grpid, "%d", &p->grpid); - free(grpid); } list_node_init(&p->nentry); @@ -779,26 +855,29 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) list_node_init(&p->entry); list_add(&c->paths, &p->entry); return 0; - -free_path: - free(path); - return -1; } int nvme_ctrl_get_fd(nvme_ctrl_t c) { - nvme_root_t r = c->s && c->s->h ? c->s->h->r : NULL; - if (c->fd < 0) { c->fd = nvme_open(c->name); if (c->fd < 0) - nvme_msg(r, LOG_ERR, + nvme_msg(root_from_ctrl(c), LOG_ERR, "Failed to open ctrl %s, errno %d\n", c->name, errno); } return c->fd; } +void nvme_ctrl_release_fd(nvme_ctrl_t c) +{ + if (c->fd < 0) + return; + + close(c->fd); + c->fd = -1; +} + nvme_subsystem_t nvme_ctrl_get_subsystem(nvme_ctrl_t c) { return c->s; @@ -824,6 +903,32 @@ const char *nvme_ctrl_get_address(nvme_ctrl_t c) return c->address ? c->address : ""; } +char *nvme_ctrl_get_src_addr(nvme_ctrl_t c, char *src_addr, size_t src_addr_len) +{ + size_t l; + char *p; + + if (!c->address) + return NULL; + + p = strstr(c->address, "src_addr="); + if (!p) + return NULL; + + p += strlen("src_addr="); + l = strcspn(p, ",%"); /* % to eliminate IPv6 scope (if present) */ + if (l >= src_addr_len) { + nvme_msg(root_from_ctrl(c), LOG_ERR, + "Buffer for src_addr is too small (%zu must be > %zu)\n", + src_addr_len, l); + return NULL; + } + + strncpy(src_addr, p, l); + src_addr[l] = '\0'; + return src_addr; +} + const char *nvme_ctrl_get_phy_slot(nvme_ctrl_t c) { return c->phy_slot ? c->phy_slot : ""; @@ -998,10 +1103,7 @@ nvme_path_t nvme_ctrl_next_path(nvme_ctrl_t c, nvme_path_t p) do { if (a) { free(a); (a) = NULL; } } while (0) void nvme_deconfigure_ctrl(nvme_ctrl_t c) { - if (c->fd >= 0) { - close(c->fd); - c->fd = -1; - } + nvme_ctrl_release_fd(c); FREE_CTRL_ATTR(c->name); FREE_CTRL_ATTR(c->sysfs_dir); FREE_CTRL_ATTR(c->firmware); @@ -1140,40 +1242,391 @@ struct nvme_ctrl *nvme_create_ctrl(nvme_root_t r, return c; } +/** + * _tcp_ctrl_match_host_traddr_no_src_addr() - Match host_traddr w/o src_addr + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * On kernels prior to 6.1 (i.e. src_addr is not available), try to match + * a candidate controller's host_traddr to that of an existing controller. + * + * This function takes an optimistic approach. In doubt, it will declare a + * match and return true. + * + * Return: true if @c->host_traddr matches @candidate->host_traddr. false otherwise. + */ +static bool _tcp_ctrl_match_host_traddr_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (c->cfg.host_traddr) + return candidate->addreq(candidate->host_traddr, c->cfg.host_traddr); + + /* If c->cfg.host_traddr is NULL, then the controller (c) + * uses the interface's primary address as the source + * address. If c->cfg.host_iface is defined we can + * determine the primary address associated with that + * interface and compare that to the candidate->host_traddr. + */ + if (c->cfg.host_iface) + return nvme_iface_primary_addr_matches(candidate->iface_list, + c->cfg.host_iface, + candidate->host_traddr); + + /* If both c->cfg.host_traddr and c->cfg.host_iface are + * NULL, we don't have enough information to make a + * 100% positive match. Regardless, let's be optimistic + * and assume that we have a match. + */ + nvme_msg(root_from_ctrl(c), LOG_DEBUG, + "Not enough data, but assume %s matches candidate's host_traddr: %s\n", + nvme_ctrl_get_name(c), candidate->host_traddr); + + return true; +} + +/** + * _tcp_ctrl_match_host_iface_no_src_addr() - Match host_iface w/o src_addr + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * On kernels prior to 6.1 (i.e. src_addr is not available), try to match + * a candidate controller's host_iface to that of an existing controller. + * + * This function takes an optimistic approach. In doubt, it will declare a + * match and return true. + * + * Return: true if @c->host_iface matches @candidate->host_iface. false otherwise. + */ +static bool _tcp_ctrl_match_host_iface_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (c->cfg.host_iface) + return streq0(candidate->host_iface, c->cfg.host_iface); + + /* If c->cfg.host_traddr is not NULL we can infer the controller's (c) + * interface from it and compare it to the candidate->host_iface. + */ + if (c->cfg.host_traddr) { + const char *c_host_iface; + + c_host_iface = nvme_iface_matching_addr(candidate->iface_list, c->cfg.host_traddr); + return streq0(candidate->host_iface, c_host_iface); + } + + /* If both c->cfg.host_traddr and c->cfg.host_iface are + * NULL, we don't have enough information to make a + * 100% positive match. Regardless, let's be optimistic + * and assume that we have a match. + */ + nvme_msg(root_from_ctrl(c), LOG_DEBUG, + "Not enough data, but assume %s matches candidate's host_iface: %s\n", + nvme_ctrl_get_name(c), candidate->host_iface); + + return true; +} + +/** + * _tcp_opt_params_match_no_src_addr() - Match optional host_traddr/host_iface w/o src_addr + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * Before kernel 6.1, the src_addr was not reported by the kernel which makes + * it hard to match a candidate's host_traddr and host_iface to an existing + * controller if that controller was created without specifying the + * host_traddr and/or host_iface. This function tries its best in the absense + * of a src_addr to match @c to @candidate. This may not be 100% accurate. + * Only the src_addr can provide 100% accuracy. + * + * This function takes an optimistic approach. In doubt, it will declare a + * match and return true. + * + * Return: true if @c matches @candidate. false otherwise. + */ +static bool _tcp_opt_params_match_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + /* Check host_traddr only if candidate is interested */ + if (candidate->host_traddr) { + if (!_tcp_ctrl_match_host_traddr_no_src_addr(c, candidate)) + return false; + } + + /* Check host_iface only if candidate is interested */ + if (candidate->host_iface) { + if (!_tcp_ctrl_match_host_iface_no_src_addr(c, candidate)) + return false; + } + + return true; +} + +/** + * _tcp_opt_params_match() - Match optional host_traddr/host_iface + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * The host_traddr and host_iface are optional for TCP. When they are not + * specified, the kernel looks up the destination IP address (traddr) in the + * routing table to determine the best interface for the connection. The + * kernel then retrieves the primary IP address assigned to that interface + * and uses that as the connection’s source address. + * + * An interface’s primary address is the default source address used for + * all connections made on that interface unless host-traddr is used to + * override the default. Kernel-selected interfaces and/or source addresses + * are hidden from user-space applications unless the kernel makes that + * information available through the "src_addr" attribute in the + * sysfs (kernel 6.1 or later). + * + * Sometimes, an application may force the interface by specifying the + * "host-iface" or may force a different source address (instead of the + * primary address) by providing the "host-traddr". + * + * If the candidate specifies the host_traddr and/or host_iface but they + * do not match the existing controller's host_traddr and/or host_iface + * (they could be NULL), we may still be able to find a match by taking + * the existing controller's src_addr into consideration since that + * parameter identifies the actual source address of the connection and + * therefore can be used to infer the interface of the connection. However, + * the src_addr can only be read from the nvme device's sysfs "address" + * attribute starting with kernel 6.1 (or kernels that backported the + * src_addr patch). + * + * For legacy kernels that do not provide the src_addr we must use a + * different algorithm to match the host_traddr and host_iface, but + * it's not 100% accurate. + * + * Return: true if @c matches @candidate. false otherwise. + */ +static bool _tcp_opt_params_match(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + char *src_addr, buffer[INET6_ADDRSTRLEN]; + + /* Check if src_addr is available (kernel 6.1 or later) */ + src_addr = nvme_ctrl_get_src_addr(c, buffer, sizeof(buffer)); + if (!src_addr) + return _tcp_opt_params_match_no_src_addr(c, candidate); + + /* Check host_traddr only if candidate is interested */ + if (candidate->host_traddr && + !candidate->addreq(candidate->host_traddr, src_addr)) + return false; + + /* Check host_iface only if candidate is interested */ + if (candidate->host_iface && + !streq0(candidate->host_iface, + nvme_iface_matching_addr(candidate->iface_list, src_addr))) + return false; + + return true; +} + +/** + * _tcp_match_ctrl() - Check if controller matches candidate (TCP only) + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * We want to determine if an existing controller can be re-used + * for the candidate controller we're trying to instantiate. + * + * For TCP, we do not have a match if the candidate's transport, traddr, + * trsvcid are not identical to those of the the existing controller. + * These 3 parameters are mandatory for a match. + * + * The host_traddr and host_iface are optional. When the candidate does + * not specify them (both NULL), we can ignore them. Otherwise, we must + * employ advanced investigation techniques to determine if there's a match. + * + * Return: true if a match is found, false otherwise. + */ +static bool _tcp_match_ctrl(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (!streq0(c->transport, candidate->transport)) + return false; + + if (!streq0(c->trsvcid, candidate->trsvcid)) + return false; + + if (!candidate->addreq(c->traddr, candidate->traddr)) + return false; + + if (candidate->well_known_nqn && !nvme_ctrl_is_discovery_ctrl(c)) + return false; + + if (candidate->subsysnqn && !streq0(c->subsysnqn, candidate->subsysnqn)) + return false; + + /* Check host_traddr / host_iface only if candidate is interested */ + if ((candidate->host_iface || candidate->host_traddr) && + !_tcp_opt_params_match(c, candidate)) + return false; + + return true; +} + +/** + * _match_ctrl() - Check if controller matches candidate (non TCP transport) + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * We want to determine if an existing controller can be re-used + * for the candidate controller we're trying to instantiate. This function + * is used for all transports except TCP. + * + * Return: true if a match is found, false otherwise. + */ +static bool _match_ctrl(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (!streq0(c->transport, candidate->transport)) + return false; + + if (candidate->traddr && c->traddr && + !candidate->addreq(c->traddr, candidate->traddr)) + return false; + + if (candidate->host_traddr && c->cfg.host_traddr && + !candidate->addreq(c->cfg.host_traddr, candidate->host_traddr)) + return false; + + if (candidate->host_iface && c->cfg.host_iface && + !streq0(c->cfg.host_iface, candidate->host_iface)) + return false; + + if (candidate->trsvcid && c->trsvcid && + !streq0(c->trsvcid, candidate->trsvcid)) + return false; + + if (candidate->well_known_nqn && !nvme_ctrl_is_discovery_ctrl(c)) + return false; + + if (candidate->subsysnqn && !streq0(c->subsysnqn, candidate->subsysnqn)) + return false; + + return true; +} +/** + * _candidate_init() - Init candidate and get the matching function + * + * @candidate: Candidate struct to initialize + * @transport: Transport name + * @traddr: Transport address + * @trsvcid: Transport service identifier + * @subsysnqn: Subsystem NQN + * @host_traddr: Host transport address + * @host_iface: Host interface name + * @host_iface: Host interface name + * + * The function _candidate_free() must be called to release resources once + * the candidate object is not longer required. + * + * Return: The matching function to use when comparing an existing + * controller to the candidate controller. + */ +static ctrl_match_t _candidate_init(struct candidate_args *candidate, + const char *transport, + const char *traddr, + const char *trsvcid, + const char *subsysnqn, + const char *host_traddr, + const char *host_iface) +{ + memset(candidate, 0, sizeof(*candidate)); + + candidate->traddr = traddr; + candidate->trsvcid = trsvcid; + candidate->transport = transport; + candidate->subsysnqn = subsysnqn; + candidate->host_iface = host_iface; + candidate->host_traddr = host_traddr; + + if (streq0(subsysnqn, NVME_DISC_SUBSYS_NAME)) { + /* Since TP8013, the NQN of discovery controllers can be the + * well-known NQN (i.e. nqn.2014-08.org.nvmexpress.discovery) or + * a unique NQN. A DC created using the well-known NQN may later + * display a unique NQN when looked up in the sysfs. Therefore, + * ignore (i.e. set to NULL) the well-known NQN when looking for + * a match. + */ + candidate->subsysnqn = NULL; + candidate->well_known_nqn = true; + } + + if (streq0(transport, "tcp")) { + /* For TCP we may need to access the interface map. + * Let's retrieve and cache the map. + */ + if (getifaddrs(&candidate->iface_list) == -1) + candidate->iface_list = NULL; + + candidate->addreq = nvme_ipaddrs_eq; + return _tcp_match_ctrl; + } + + if (streq0(transport, "rdma")) { + candidate->addreq = nvme_ipaddrs_eq; + return _match_ctrl; + } + + /* All other transport types */ + candidate->addreq = streqcase0; + return _match_ctrl; +} + +/** + * _candidate_free() - Release resources allocated by _candidate_init() + * + * @candidate: data to free. + */ +static void _candidate_free(struct candidate_args *candidate) +{ + freeifaddrs(candidate->iface_list); /* This is NULL-safe */ +} + +#define _cleanup_candidate_ __cleanup__(_candidate_free) + nvme_ctrl_t __nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, const char *traddr, const char *host_traddr, const char *host_iface, const char *trsvcid, - nvme_ctrl_t p) - + const char *subsysnqn, nvme_ctrl_t p) { - struct nvme_ctrl *c; - bool (*addreq)(const char *, const char *); + struct nvme_ctrl *c, *matching_c = NULL; + _cleanup_candidate_ struct candidate_args candidate; + ctrl_match_t ctrl_match; - if (!strcmp(transport, "tcp") || !strcmp(transport, "rdma")) - addreq = nvme_ipaddrs_eq; /* IP address compare for TCP/RDMA */ - else - addreq = streqcase0; /* Case-insensitive for FC (n/a for loop) */ + /* Init candidate and get the matching function to use */ + ctrl_match = _candidate_init(&candidate, transport, traddr, trsvcid, + subsysnqn, host_traddr, host_iface); c = p ? nvme_subsystem_next_ctrl(s, p) : nvme_subsystem_first_ctrl(s); for (; c != NULL; c = nvme_subsystem_next_ctrl(s, c)) { - if (!streq0(c->transport, transport)) - continue; - if (traddr && c->traddr && - !addreq(c->traddr, traddr)) - continue; - if (host_traddr && c->cfg.host_traddr && - !addreq(c->cfg.host_traddr, host_traddr)) - continue; - if (host_iface && c->cfg.host_iface && - !streq0(c->cfg.host_iface, host_iface)) - continue; - if (trsvcid && c->trsvcid && - !streq0(c->trsvcid, trsvcid)) - continue; - return c; + if (ctrl_match(c, &candidate)) { + matching_c = c; + break; + } } - return NULL; + return matching_c; +} + +bool nvme_ctrl_config_match(struct nvme_ctrl *c, const char *transport, + const char *traddr, const char *trsvcid, + const char *subsysnqn, const char *host_traddr, + const char *host_iface) +{ + ctrl_match_t ctrl_match; + _cleanup_candidate_ struct candidate_args candidate; + + /* Init candidate and get the matching function to use */ + ctrl_match = _candidate_init(&candidate, transport, traddr, trsvcid, + subsysnqn, host_traddr, host_iface); + + return ctrl_match(c, &candidate); +} + +nvme_ctrl_t nvme_ctrl_find(nvme_subsystem_t s, const char *transport, + const char *traddr, const char *trsvcid, + const char *subsysnqn, const char *host_traddr, + const char *host_iface) +{ + return __nvme_lookup_ctrl(s, transport, traddr, host_traddr, host_iface, + trsvcid, subsysnqn, NULL/*p*/); } nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, @@ -1188,7 +1641,7 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, return NULL; c = __nvme_lookup_ctrl(s, transport, traddr, host_traddr, - host_iface, trsvcid, p); + host_iface, trsvcid, NULL, p); if (c) return c; @@ -1205,73 +1658,63 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, static int nvme_ctrl_scan_paths(nvme_root_t r, struct nvme_ctrl *c) { - struct dirent **paths; - int i, ret; + _cleanup_dirents_ struct dirents paths = {}; + int i; - ret = nvme_scan_ctrl_namespace_paths(c, &paths); - if (ret < 0) - return ret; + paths.num = nvme_scan_ctrl_namespace_paths(c, &paths.ents); + if (paths.num < 0) + return paths.num; - for (i = 0; i < ret; i++) - nvme_ctrl_scan_path(r, c, paths[i]->d_name); + for (i = 0; i < paths.num; i++) + nvme_ctrl_scan_path(r, c, paths.ents[i]->d_name); - nvme_free_dirents(paths, i); return 0; } static int nvme_ctrl_scan_namespaces(nvme_root_t r, struct nvme_ctrl *c) { - struct dirent **namespaces; - int i, ret; + _cleanup_dirents_ struct dirents namespaces = {}; + int i; - ret = nvme_scan_ctrl_namespaces(c, &namespaces); - for (i = 0; i < ret; i++) - nvme_ctrl_scan_namespace(r, c, namespaces[i]->d_name); + namespaces.num = nvme_scan_ctrl_namespaces(c, &namespaces.ents); + for (i = 0; i < namespaces.num; i++) + nvme_ctrl_scan_namespace(r, c, namespaces.ents[i]->d_name); - nvme_free_dirents(namespaces, i); return 0; } static char *nvme_ctrl_lookup_subsystem_name(nvme_root_t r, const char *ctrl_name) { - struct dirent **subsys; - char *subsys_name = NULL; - int ret, i; + _cleanup_dirents_ struct dirents subsys = {}; + int i; - ret = nvme_scan_subsystems(&subsys); - if (ret < 0) + subsys.num = nvme_scan_subsystems(&subsys.ents); + if (subsys.num < 0) return NULL; - for (i = 0; i < ret; i++) { + for (i = 0; i < subsys.num; i++) { struct stat st; - char *path; + _cleanup_free_ char *path = NULL; if (asprintf(&path, "%s/%s/%s", nvme_subsys_sysfs_dir, - subsys[i]->d_name, ctrl_name) < 0) { + subsys.ents[i]->d_name, ctrl_name) < 0) { errno = ENOMEM; return NULL; } nvme_msg(r, LOG_DEBUG, "lookup subsystem %s\n", path); if (stat(path, &st) < 0) { - free(path); continue; } - subsys_name = strdup(subsys[i]->d_name); - free(path); - break; + return strdup(subsys.ents[i]->d_name); } - nvme_free_dirents(subsys, ret); - return subsys_name; + return NULL; } static char *nvme_ctrl_lookup_phy_slot(nvme_root_t r, const char *address) { - char *target_addr; - char *addr; - char *path; - int found = 0; + _cleanup_free_ char *target_addr = NULL; int ret; - DIR *slots_dir; + _cleanup_dir_ DIR *slots_dir = NULL; struct dirent *entry; if (!address) @@ -1289,25 +1732,20 @@ static char *nvme_ctrl_lookup_phy_slot(nvme_root_t r, const char *address) if (entry->d_type == DT_DIR && strncmp(entry->d_name, ".", 1) != 0 && strncmp(entry->d_name, "..", 2) != 0) { - ret = asprintf(&path, "/sys/bus/pci/slots/%s", entry->d_name); + _cleanup_free_ char *path = NULL; + _cleanup_free_ char *addr = NULL; + + ret = asprintf(&path, "%s/%s", + nvme_slots_sysfs_dir, entry->d_name); if (ret < 0) { errno = ENOMEM; return NULL; } addr = nvme_get_attr(path, "address"); - if (strcmp(addr, target_addr) == 0) { - found = 1; - free(path); - free(addr); - break; - } - free(path); - free(addr); + if (strcmp(addr, target_addr) == 0) + return strdup(entry->d_name); } } - free(target_addr); - if (found) - return strdup(entry->d_name); return NULL; } @@ -1361,8 +1799,9 @@ static int nvme_configure_ctrl(nvme_root_t r, nvme_ctrl_t c, const char *path, int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance) { nvme_subsystem_t s; - char *subsys_name = NULL; - char *path, *name; + _cleanup_free_ char *subsys_name = NULL; + char *path; + _cleanup_free_ char *name = NULL; int ret; ret = asprintf(&name, "nvme%d", instance); @@ -1373,20 +1812,19 @@ int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance) ret = asprintf(&path, "%s/nvme%d", nvme_ctrl_sysfs_dir, instance); if (ret < 0) { errno = ENOMEM; - goto out_free_name; + return ret; } ret = nvme_configure_ctrl(h->r, c, path, name); if (ret < 0) { free(path); - goto out_free_name; + return ret; } c->address = nvme_get_attr(path, "address"); if (!c->address && strcmp(c->transport, "loop")) { errno = ENVME_CONNECT_INVAL_TR; - ret = -1; - goto out_free_name; + return -1; } subsys_name = nvme_ctrl_lookup_subsystem_name(h->r, name); @@ -1395,23 +1833,17 @@ int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance) "Failed to lookup subsystem name for %s\n", c->name); errno = ENVME_CONNECT_LOOKUP_SUBSYS_NAME; - ret = -1; - goto out_free_name; + return -1; } s = nvme_lookup_subsystem(h, subsys_name, c->subsysnqn); if (!s) { errno = ENVME_CONNECT_LOOKUP_SUBSYS; - ret = -1; - goto out_free_subsys; + return -1; } if (s->subsystype && !strcmp(s->subsystype, "discovery")) c->discovery_ctrl = true; c->s = s; list_add(&s->ctrls, &c->entry); -out_free_subsys: - free(subsys_name); - out_free_name: - free(name); return ret; } @@ -1419,8 +1851,10 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, const char *path, const char *name) { nvme_ctrl_t c, p; - char *addr = NULL, *address = NULL, *a, *e; - char *transport, *traddr = NULL, *trsvcid = NULL; + _cleanup_free_ char *addr = NULL, *address = NULL; + char *a, *e; + _cleanup_free_ char *transport; + char *traddr = NULL, *trsvcid = NULL; char *host_traddr = NULL, *host_iface = NULL; int ret; @@ -1432,7 +1866,8 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, /* Parse 'address' string into components */ addr = nvme_get_attr(path, "address"); if (!addr) { - char *rpath = NULL, *p = NULL, *_a = NULL; + _cleanup_free_ char *rpath = NULL; + char *p = NULL, *_a = NULL; /* loop transport might not have an address */ if (!strcmp(transport, "loop")) @@ -1440,14 +1875,12 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, /* Older kernel don't support pcie transport addresses */ if (strcmp(transport, "pcie")) { - free(transport); errno = ENXIO; return NULL; } /* Figure out the PCI address from the attribute path */ rpath = realpath(path, NULL); if (!rpath) { - free(transport); errno = ENOMEM; return NULL; } @@ -1462,7 +1895,6 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, } if (p) addr = strdup(p); - free(rpath); } else if (!strcmp(transport, "pcie")) { /* The 'address' string is the transport address */ traddr = addr; @@ -1500,16 +1932,13 @@ skip_address: } while (c); if (!c) c = p; - free(transport); - if (address) - free(address); if (!c && !p) { nvme_msg(r, LOG_ERR, "failed to lookup ctrl\n"); errno = ENODEV; - free(addr); return NULL; } c->address = addr; + addr = NULL; if (s->subsystype && !strcmp(s->subsystype, "discovery")) c->discovery_ctrl = true; ret = nvme_configure_ctrl(r, c, path, name); @@ -1521,8 +1950,9 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) nvme_host_t h; nvme_subsystem_t s; nvme_ctrl_t c; - char *path; - char *hostnqn, *hostid, *subsysnqn, *subsysname; + _cleanup_free_ char *path = NULL; + _cleanup_free_ char *hostnqn = NULL, *hostid = NULL; + _cleanup_free_ char *subsysnqn = NULL, *subsysname = NULL; int ret; nvme_msg(r, LOG_DEBUG, "scan controller %s\n", name); @@ -1535,10 +1965,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) hostnqn = nvme_get_attr(path, "hostnqn"); hostid = nvme_get_attr(path, "hostid"); h = nvme_lookup_host(r, hostnqn, hostid); - if (hostnqn) - free(hostnqn); - if (hostid) - free(hostid); if (h) { if (h->dhchap_key) free(h->dhchap_key); @@ -1551,7 +1977,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) if (!h) { h = nvme_default_host(r); if (!h) { - free(path); errno = ENOMEM; return NULL; } @@ -1559,7 +1984,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) subsysnqn = nvme_get_attr(path, "subsysnqn"); if (!subsysnqn) { - free(path); errno = ENXIO; return NULL; } @@ -1568,27 +1992,21 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) nvme_msg(r, LOG_ERR, "failed to lookup subsystem for controller %s\n", name); - free(subsysnqn); - free(path); errno = ENXIO; return NULL; } s = nvme_lookup_subsystem(h, subsysname, subsysnqn); - free(subsysnqn); - free(subsysname); if (!s) { - free(path); errno = ENOMEM; return NULL; } c = nvme_ctrl_alloc(r, s, path, name); - if (!c) { - free(path); + if (!c) return NULL; - } + path = NULL; nvme_ctrl_scan_namespaces(r, c); nvme_ctrl_scan_paths(r, c); return c; @@ -1622,9 +2040,26 @@ static int nvme_bytes_to_lba(nvme_ns_t n, off_t offset, size_t count, int nvme_ns_get_fd(nvme_ns_t n) { + if (n->fd < 0) { + n->fd = nvme_open(n->name); + if (n->fd < 0) + nvme_msg(root_from_ns(n), LOG_ERR, + "Failed to open ns %s, errno %d\n", + n->name, errno); + } + return n->fd; } +void nvme_ns_release_fd(nvme_ns_t n) +{ + if (n->fd < 0) + return; + + close(n->fd); + n->fd = -1; +} + nvme_subsystem_t nvme_ns_get_subsystem(nvme_ns_t n) { return n->s; @@ -1887,57 +2322,164 @@ int nvme_ns_flush(nvme_ns_t n) return nvme_flush(nvme_ns_get_fd(n), nvme_ns_get_nsid(n)); } -static void nvme_ns_parse_descriptors(struct nvme_ns *n, - struct nvme_ns_id_desc *descs) +static int nvme_strtou64(const char *str, void *res) { - void *d = descs; - int i, len; + char *endptr; + __u64 v; - for (i = 0; i < NVME_IDENTIFY_DATA_SIZE; i += len) { - struct nvme_ns_id_desc *desc = d + i; + errno = 0; + v = strtoull(str, &endptr, 0); - if (!desc->nidl) - break; - len = desc->nidl + sizeof(*desc); + if (errno != 0) + return -errno; - switch (desc->nidt) { - case NVME_NIDT_EUI64: - memcpy(n->eui64, desc->nid, sizeof(n->eui64)); - break; - case NVME_NIDT_NGUID: - memcpy(n->nguid, desc->nid, sizeof(n->nguid)); - break; - case NVME_NIDT_UUID: - memcpy(n->uuid, desc->nid, sizeof(n->uuid)); - break; - case NVME_NIDT_CSI: - memcpy(&n->csi, desc->nid, sizeof(n->csi)); - break; + if (endptr == str) { + /* no digits found */ + return -EINVAL; + } + + *(__u64 *)res = v; + return 0; +} + +static int nvme_strtou32(const char *str, void *res) +{ + char *endptr; + __u32 v; + + errno = 0; + v = strtol(str, &endptr, 0); + + if (errno != 0) + return -errno; + + if (endptr == str) { + /* no digits found */ + return -EINVAL; + } + + *(__u32 *)res = v; + return 0; +} + +static int nvme_strtoi(const char *str, void *res) +{ + char *endptr; + int v; + + errno = 0; + v = strtol(str, &endptr, 0); + + if (errno != 0) + return -errno; + + if (endptr == str) { + /* no digits found */ + return -EINVAL; + } + + *(int *)res = v; + return 0; +} + +static int nvme_strtoeuid(const char *str, void *res) +{ + memcpy(res, str, 8); + return 0; +} + +static int nvme_strtouuid(const char *str, void *res) +{ + memcpy(res, str, NVME_UUID_LEN); + return 0; +} + +struct sysfs_attr_table { + void *var; + int (*parse)(const char *str, void *res); + bool mandatory; + const char *name; +}; + +#define GETSHIFT(x) (__builtin_ffsll(x) - 1) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static int parse_attrs(const char *path, struct sysfs_attr_table *tbl, int size) +{ + char *str; + int ret, i; + + for (i = 0; i < size; i++) { + struct sysfs_attr_table *e = &tbl[i]; + + str = nvme_get_attr(path, e->name); + if (!str) { + if (!e->mandatory) + continue; + return -ENOENT; } + ret = e->parse(str, e->var); + free(str); + if (ret) + return ret; } + + return 0; } -static int nvme_ns_init(struct nvme_ns *n) +static int nvme_ns_init(const char *path, struct nvme_ns *ns) { - struct nvme_id_ns ns = { }; - uint8_t buffer[NVME_IDENTIFY_DATA_SIZE] = { }; - struct nvme_ns_id_desc *descs = (void *)buffer; - uint8_t flbas; + _cleanup_free_ char *attr = NULL; + struct stat sb; int ret; - ret = nvme_ns_identify(n, &ns); + struct sysfs_attr_table base[] = { + { &ns->nsid, nvme_strtou32, true, "nsid" }, + { &ns->lba_count, nvme_strtou64, true, "size" }, + { &ns->lba_size, nvme_strtou64, true, "queue/physical_block_size" }, + { ns->eui64, nvme_strtoeuid, false, "eui" }, + { ns->nguid, nvme_strtouuid, false, "nguid" }, + { ns->uuid, nvme_strtouuid, false, "uuid" } + }; + + ret = parse_attrs(path, base, ARRAY_SIZE(base)); if (ret) return ret; - nvme_id_ns_flbas_to_lbaf_inuse(ns.flbas, &flbas); - n->lba_shift = ns.lbaf[flbas].ds; - n->lba_size = 1 << n->lba_shift; - n->lba_count = le64_to_cpu(ns.nsze); - n->lba_util = le64_to_cpu(ns.nuse); - n->meta_size = le16_to_cpu(ns.lbaf[flbas].ms); + ns->lba_shift = GETSHIFT(ns->lba_size); + + if (asprintf(&attr, "%s/csi", path) < 0) + return -errno; + ret = stat(attr, &sb); + if (ret == 0) { + /* only available on kernels >= 6.8 */ + struct sysfs_attr_table ext[] = { + { &ns->csi, nvme_strtoi, true, "csi" }, + { &ns->lba_util, nvme_strtou64, true, "nuse" }, + { &ns->meta_size, nvme_strtoi, true, "metadata_bytes"}, + + }; - if (!nvme_ns_identify_descs(n, descs)) - nvme_ns_parse_descriptors(n, descs); + ret = parse_attrs(path, ext, ARRAY_SIZE(ext)); + if (ret) + return ret; + } else { + struct nvme_id_ns *id; + uint8_t flbas; + + id = __nvme_alloc(sizeof(*ns)); + if (!id) + return -ENOMEM; + + ret = nvme_ns_identify(ns, id); + if (ret) + return ret; + + nvme_id_ns_flbas_to_lbaf_inuse(id->flbas, &flbas); + ns->lba_count = le64_to_cpu(id->nsze); + ns->lba_util = le64_to_cpu(id->nuse); + ns->meta_size = le16_to_cpu(id->lbaf[flbas].ms); + } return 0; } @@ -1956,7 +2498,7 @@ static void nvme_ns_set_generic_name(struct nvme_ns *n, const char *name) n->generic_name = strdup(generic_name); } -static nvme_ns_t nvme_ns_open(const char *name) +static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name) { struct nvme_ns *n; @@ -1966,26 +2508,20 @@ static nvme_ns_t nvme_ns_open(const char *name) return NULL; } + n->fd = -1; n->name = strdup(name); - n->fd = nvme_open(n->name); - if (n->fd < 0) - goto free_ns; nvme_ns_set_generic_name(n, name); - if (nvme_get_nsid(n->fd, &n->nsid) < 0) - goto close_fd; - - if (nvme_ns_init(n) != 0) - goto close_fd; + if (nvme_ns_init(sys_path, n) != 0) + goto free_ns; list_head_init(&n->paths); list_node_init(&n->entry); + nvme_ns_release_fd(n); /* Do not leak fds */ return n; -close_fd: - close(n->fd); free_ns: free(n->generic_name); free(n->name); @@ -2020,9 +2556,9 @@ static char *nvme_ns_generic_to_blkdev(const char *generic) static struct nvme_ns *__nvme_scan_namespace(const char *sysfs_dir, const char *name) { struct nvme_ns *n; - char *path; + _cleanup_free_ char *path = NULL; int ret; - char *blkdev; + _cleanup_free_ char *blkdev = NULL; blkdev = nvme_ns_generic_to_blkdev(name); if (!blkdev) { @@ -2033,23 +2569,17 @@ static struct nvme_ns *__nvme_scan_namespace(const char *sysfs_dir, const char * ret = asprintf(&path, "%s/%s", sysfs_dir, blkdev); if (ret < 0) { errno = ENOMEM; - goto free_blkdev; + return NULL; } - n = nvme_ns_open(blkdev); + n = nvme_ns_open(path, blkdev); if (!n) - goto free_path; + return NULL; n->sysfs_dir = path; + path = NULL; - free(blkdev); return n; - -free_path: - free(path); -free_blkdev: - free(blkdev); - return NULL; } nvme_ns_t nvme_scan_namespace(const char *name) diff --git a/src/nvme/tree.h b/src/nvme/tree.h index bcf3636..a30e8eb 100644 --- a/src/nvme/tree.h +++ b/src/nvme/tree.h @@ -15,6 +15,7 @@ #include <stddef.h> #include <sys/types.h> +#include <netinet/in.h> #include "ioctl.h" #include "util.h" @@ -62,6 +63,17 @@ void nvme_root_set_application(nvme_root_t r, const char *a); const char *nvme_root_get_application(nvme_root_t r); /** + * nvme_root_release_fds - Close all opened file descriptors in the tree + * @r: &nvme_root_t object + * + * Controller and Namespace objects cache the file descriptors + * of opened nvme devices. This API can be used to close and + * clear all cached fds in the tree. + * + */ +void nvme_root_release_fds(nvme_root_t r); + +/** * nvme_free_tree() - Free root object * @r: &nvme_root_t object * @@ -295,6 +307,51 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, const char *host_iface, const char *trsvcid, nvme_ctrl_t p); +/** + * nvme_ctrl_find() - Locate an existing controller + * @s: &nvme_subsystem_t object + * @transport: Transport name + * @traddr: Transport address + * @trsvcid: Transport service identifier + * @subsysnqn: Subsystem NQN + * @host_traddr: Host transport address + * @host_iface: Host interface name + * + * Lookup a controller in @s based on @transport, @traddr, @trsvcid, + * @subsysnqn, @host_traddr, and @host_iface. @transport must be specified, + * other fields may be required depending on the transport. Parameters set + * to NULL will be ignored. + * + * Unlike nvme_lookup_ctrl(), this function does not create a new object if + * an existing controller cannot be found. + * + * Return: Controller instance on success, NULL otherwise. + */ +nvme_ctrl_t nvme_ctrl_find(nvme_subsystem_t s, const char *transport, + const char *traddr, const char *trsvcid, + const char *subsysnqn, const char *host_traddr, + const char *host_iface); + +/** + * nvme_ctrl_config_match() - Check if ctrl @c matches config params + * @c: An existing controller instance + * @transport: Transport name + * @traddr: Transport address + * @trsvcid: Transport service identifier + * @subsysnqn: Subsystem NQN + * @host_traddr: Host transport address + * @host_iface: Host interface name + * + * Check that controller @c matches parameters: @transport, @traddr, + * @trsvcid, @subsysnqn, @host_traddr, and @host_iface. Parameters set + * to NULL will be ignored. + * + * Return: true if there's a match, false otherwise. + */ +bool nvme_ctrl_config_match(struct nvme_ctrl *c, const char *transport, + const char *traddr, const char *trsvcid, + const char *subsysnqn, const char *host_traddr, + const char *host_iface); /** * nvme_create_ctrl() - Allocate an unconnected NVMe controller @@ -484,11 +541,25 @@ nvme_ns_t nvme_subsystem_next_ns(nvme_subsystem_t s, nvme_ns_t n); * nvme_ns_get_fd() - Get associated file descriptor * @n: Namespace instance * + * libnvme will open() the file (if not already opened) and keep + * an internal copy of the file descriptor. Following calls to + * this API retrieve the internal cached copy of the file + * descriptor. The file will remain opened and the fd will + * remain cached until the ns object is deleted or + * nvme_ns_release_fd() is called. + * * Return: File descriptor associated with @n or -1 */ int nvme_ns_get_fd(nvme_ns_t n); /** + * nvme_ns_release_fd() - Close fd and clear fd from ns object + * @n: Namespace instance + * + */ +void nvme_ns_release_fd(nvme_ns_t n); + +/** * nvme_ns_get_nsid() - NSID of a namespace * @n: Namespace instance * @@ -772,11 +843,25 @@ nvme_ns_t nvme_path_get_ns(nvme_path_t p); * nvme_ctrl_get_fd() - Get associated file descriptor * @c: Controller instance * + * libnvme will open() the file (if not already opened) and keep + * an internal copy of the file descriptor. Following calls to + * this API retrieve the internal cached copy of the file + * descriptor. The file will remain opened and the fd will + * remain cached until the controller object is deleted or + * nvme_ctrl_release_fd() is called. + * * Return: File descriptor associated with @c or -1 */ int nvme_ctrl_get_fd(nvme_ctrl_t c); /** + * nvme_ctrl_release_fd() - Close fd and clear fd from controller object + * @c: Controller instance + * + */ +void nvme_ctrl_release_fd(nvme_ctrl_t c); + +/** * nvme_ctrl_get_name() - sysfs name of a controller * @c: Controller instance * @@ -802,6 +887,16 @@ const char *nvme_ctrl_get_sysfs_dir(nvme_ctrl_t c); const char *nvme_ctrl_get_address(nvme_ctrl_t c); /** + * nvme_ctrl_get_src_addr() - Extract src_addr from the c->address string + * @c: Controller instance + * @src_addr: Where to copy the src_addr. Size must be at least INET6_ADDRSTRLEN. + * @src_addr_len: Length of the buffer @src_addr. + * + * Return: Pointer to @src_addr on success. NULL on failure to extract the src_addr. + */ +char *nvme_ctrl_get_src_addr(nvme_ctrl_t c, char *src_addr, size_t src_addr_len); + +/** * nvme_ctrl_get_phy_slot() - PCI physical slot number of a controller * @c: Controller instance * @@ -827,7 +922,7 @@ const char *nvme_ctrl_get_firmware(nvme_ctrl_t c); const char *nvme_ctrl_get_model(nvme_ctrl_t c); /** - * nvme_ctrl_get_state() - Running state of an controller + * nvme_ctrl_get_state() - Running state of a controller * @c: Controller instance * * Return: String indicating the running state of @c @@ -1148,6 +1243,14 @@ const char *nvme_subsystem_get_application(nvme_subsystem_t s); void nvme_subsystem_set_application(nvme_subsystem_t s, const char *a); /** + * nvme_subsystem_get_iopolicy() - Return the IO policy of subsytem + * @s: nvme_subsystem_t object + * + * Return: IO policy used by current subsystem + */ +const char *nvme_subsystem_get_iopolicy(nvme_subsystem_t s); + +/** * nvme_scan_topology() - Scan NVMe topology and apply filter * @r: nvme_root_t object * @f: filter to apply @@ -1177,6 +1280,16 @@ const char *nvme_host_get_hostnqn(nvme_host_t h); const char *nvme_host_get_hostid(nvme_host_t h); /** + * nvme_host_release_fds() - Close all opened file descriptors under host + * @h: nvme_host_t object + * + * Controller and Namespace objects cache the file descriptors + * of opened nvme devices. This API can be used to close and + * clear all cached fds under this host. + */ +void nvme_host_release_fds(struct nvme_host *h); + +/** * nvme_free_host() - Free nvme_host_t object * @h: nvme_host_t object */ @@ -1293,6 +1406,18 @@ nvme_ns_t nvme_subsystem_lookup_namespace(struct nvme_subsystem *s, __u32 nsid); /** + * nvme_subsystem_release_fds() - Close all opened fds under subsystem + * @s: nvme_subsystem_t object + * + * Controller and Namespace objects cache the file descriptors + * of opened nvme devices. This API can be used to close and + * clear all cached fds under this subsystem. + * + */ +void nvme_subsystem_release_fds(struct nvme_subsystem *s); + + +/** * nvme_get_path_attr() - Read path sysfs attribute * @p: nvme_path_t object * @attr: sysfs attribute name diff --git a/src/nvme/types.h b/src/nvme/types.h index 3bf2237..29ac050 100644 --- a/src/nvme/types.h +++ b/src/nvme/types.h @@ -43,7 +43,7 @@ * Returns: The 'name' field from 'value' */ #define NVME_SET(value, name) \ - (((value) & NVME_##name##_MASK) << NVME_##name##_SHIFT) + (((__u32)(value) & NVME_##name##_MASK) << NVME_##name##_SHIFT) /** * enum nvme_constants - A place to stash various constant nvme values @@ -611,6 +611,19 @@ static const __u64 NVME_PMRMSC_CBA_MASK = 0xfffffffffffffull; #define NVME_PMRMSC_CMSE(pmrmsc) NVME_GET(pmrmsc, PMRMSC_CMSE) #define NVME_PMRMSC_CBA(pmrmsc) NVME_GET(pmrmsc, PMRMSC_CBA) +enum nvme_flbas { + NVME_FLBAS_LOWER_SHIFT = 0, + NVME_FLBAS_META_EXT_SHIFT = 4, + NVME_FLBAS_HIGHER_SHIFT = 5, + NVME_FLBAS_LOWER_MASK = 0xf, + NVME_FLBAS_META_EXT_MASK = 0x1, + NVME_FLBAS_HIGHER_MASK = 0x3, +}; + +#define NVME_FLBAS_LOWER(flbas) NVME_GET(flbas, FLBAS_LOWER) +#define NVME_FLBAS_META_EXT(flbas) NVME_GET(flbas, FLBAS_META_EXT) +#define NVME_FLBAS_HIGHER(flbas) NVME_GET(flbas, FLBAS_HIGHER) + /** * enum nvme_psd_flags - Possible flag values in nvme power state descriptor * @NVME_PSD_FLAGS_MXPS: Indicates the scale for the Maximum Power @@ -930,7 +943,10 @@ struct nvme_id_psd { * @maxcna: Maximum I/O Controller Namespace Attachments indicates the * maximum number of namespaces that are allowed to be attached to * this I/O controller. - * @rsvd564: Reserved + * @oaqd: Optimal Aggregated Queue Depth indicates the recommended maximum + * total number of outstanding I/O commands across all I/O queues + * on the controller for optimal operation. + * @rsvd568: Reserved * @subnqn: NVM Subsystem NVMe Qualified Name, UTF-8 null terminated string * @rsvd1024: Reserved * @ioccsz: I/O Queue Command Capsule Supported Size, defines the maximum @@ -1035,7 +1051,8 @@ struct nvme_id_ctrl { __le32 mnan; __u8 maxdna[16]; __le32 maxcna; - __u8 rsvd564[204]; + __le32 oaqd; + __u8 rsvd568[200]; char subnqn[NVME_NQN_LENGTH]; __u8 rsvd1024[768]; @@ -1489,6 +1506,14 @@ enum nvme_id_ctrl_cqes { * the Verify command. * @NVME_CTRL_ONCS_COPY: If set, then the controller supports * the copy command. + * @NVME_CTRL_ONCS_COPY_SINGLE_ATOMICITY: If set, then the write portion of a + * Copy command is performed as a single + * write command to which the same + * atomicity requirements that apply to + * a write command apply. + * @NVME_CTRL_ONCS_ALL_FAST_COPY: If set, then all copy operations for + * the Copy command are fast copy + * operations. */ enum nvme_id_ctrl_oncs { NVME_CTRL_ONCS_COMPARE = 1 << 0, @@ -1500,6 +1525,8 @@ enum nvme_id_ctrl_oncs { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_ONCS_VERIFY = 1 << 7, NVME_CTRL_ONCS_COPY = 1 << 8, + NVME_CTRL_ONCS_COPY_SINGLE_ATOMICITY = 1 << 9, + NVME_CTRL_ONCS_ALL_FAST_COPY = 1 << 10, }; /** @@ -1772,7 +1799,6 @@ enum nvme_lbaf_rp { * remains fixed throughout the life of the namespace and is * preserved across namespace and controller operations * @lbaf: LBA Format, see &struct nvme_lbaf. - * @lbstm: Logical Block Storage Tag Mask for end-to-end protection * @vs: Vendor Specific */ struct nvme_id_ns { @@ -1816,8 +1842,7 @@ struct nvme_id_ns { __u8 nguid[16]; __u8 eui64[8]; struct nvme_lbaf lbaf[64]; - __le64 lbstm; - __u8 vs[3704]; + __u8 vs[3712]; }; /** @@ -3075,11 +3100,13 @@ struct nvme_telemetry_log { /** * struct nvme_endurance_group_log - Endurance Group Information Log * @critical_warning: Critical Warning - * @rsvd1: Reserved + * @endurance_group_features: Endurance Group Features + * @rsvd2: Reserved * @avl_spare: Available Spare * @avl_spare_threshold: Available Spare Threshold * @percent_used: Percentage Used - * @rsvd6: Reserved + * @domain_identifier: Domain Identifier + * @rsvd8: Reserved * @endurance_estimate: Endurance Estimate * @data_units_read: Data Units Read * @data_units_written: Data Units Written @@ -3088,15 +3115,19 @@ struct nvme_telemetry_log { * @host_write_cmds: Host Write Commands * @media_data_integrity_err: Media and Data Integrity Errors * @num_err_info_log_entries: Number of Error Information Log Entries - * @rsvd160: Reserved + * @total_end_grp_cap: Total Endurance Group Capacity + * @unalloc_end_grp_cap: Unallocated Endurance Group Capacity + * @rsvd192: Reserved */ struct nvme_endurance_group_log { __u8 critical_warning; - __u8 rsvd1[2]; + __u8 endurance_group_features; + __u8 rsvd2; __u8 avl_spare; __u8 avl_spare_threshold; __u8 percent_used; - __u8 rsvd6[26]; + __le16 domain_identifier; + __u8 rsvd8[24]; __u8 endurance_estimate[16]; __u8 data_units_read[16]; __u8 data_units_written[16]; @@ -3105,7 +3136,9 @@ struct nvme_endurance_group_log { __u8 host_write_cmds[16]; __u8 media_data_integrity_err[16]; __u8 num_err_info_log_entries[16]; - __u8 rsvd160[352]; + __u8 total_end_grp_cap[16]; + __u8 unalloc_end_grp_cap[16]; + __u8 rsvd192[320]; }; /** @@ -3710,6 +3743,110 @@ struct nvme_boot_partition { }; /** + * struct nvme_eom_lane_desc - EOM Lane Descriptor + * @rsvd0: Reserved + * @mstatus: Measurement Status + * @lane: Lane number + * @eye: Eye number + * @top: Absolute number of rows from center to top edge of eye + * @bottom: Absolute number of rows from center to bottom edge of eye + * @left: Absolute number of rows from center to left edge of eye + * @right: Absolute number of rows from center to right edge of eye + * @nrows: Number of Rows + * @ncols: Number of Columns + * @edlen: Eye Data Length + * @rsvd18: Reserved + * @eye_desc: Printable Eye, Eye Data, and any Padding + */ +struct nvme_eom_lane_desc { + __u8 rsvd0; + __u8 mstatus; + __u8 lane; + __u8 eye; + __le16 top; + __le16 bottom; + __le16 left; + __le16 right; + __le16 nrows; + __le16 ncols; + __le16 edlen; + __u8 rsvd18[14]; + __u8 eye_desc[]; +}; + +/** + * struct nvme_phy_rx_eom_log - Physical Interface Receiver Eye Opening Measurement Log + * @lid: Log Identifier + * @eomip: EOM In Progress + * @hsize: Header Size + * @rsize: Result Size + * @eomdgn: EOM Data Generation Number + * @lr: Log Revision + * @odp: Optional Data Present + * @lanes: Number of lanes configured for this port + * @epl: Eyes Per Lane + * @lspfc: Log Specific Parameter Field Copy + * @li: Link Information + * @rsvd15: Reserved + * @lsic: Log Specific Identifier Copy + * @dsize: Descriptor Size + * @nd: Number of Descriptors + * @maxtb: Maximum Top Bottom + * @maxlr: Maximum Left Right + * @etgood: Estimated Time for Good Quality + * @etbetter: Estimated Time for Better Quality + * @etbest: Estimated Time for Best Quality + * @rsvd36: Reserved + * @descs: EOM Lane Descriptors + */ +struct nvme_phy_rx_eom_log { + __u8 lid; + __u8 eomip; + __le16 hsize; + __le32 rsize; + __u8 eomdgn; + __u8 lr; + __u8 odp; + __u8 lanes; + __u8 epl; + __u8 lspfc; + __u8 li; + __u8 rsvd15[3]; + __le16 lsic; + __le32 dsize; + __le16 nd; + __le16 maxtb; + __le16 maxlr; + __le16 etgood; + __le16 etbetter; + __le16 etbest; + __u8 rsvd36[28]; + struct nvme_eom_lane_desc descs[]; +}; + +/** + * enum nvme_eom_optional_data - EOM Optional Data Present Fields + * @NVME_EOM_EYE_DATA_PRESENT: Eye Data Present + * @NVME_EOM_PRINTABLE_EYE_PRESENT: Printable Eye Present + */ +enum nvme_eom_optional_data { + NVME_EOM_EYE_DATA_PRESENT = 1, + NVME_EOM_PRINTABLE_EYE_PRESENT = 1 << 1, +}; + +/** + * enum nvme_phy_rx_eom_progress - EOM In Progress Values + * @NVME_PHY_RX_EOM_NOT_STARTED: EOM Not Started + * @NVME_PHY_RX_EOM_IN_PROGRESS: EOM In Progress + * @NVME_PHY_RX_EOM_COMPLETED: EOM Completed + */ +enum nvme_phy_rx_eom_progress { + NVME_PHY_RX_EOM_NOT_STARTED = 0, + NVME_PHY_RX_EOM_IN_PROGRESS = 1, + NVME_PHY_RX_EOM_COMPLETED = 2, +}; + +/** * struct nvme_media_unit_stat_desc - Media Unit Status Descriptor * @muid: Media Unit Identifier * @domainid: Domain Identifier @@ -4604,11 +4741,19 @@ struct nvme_plm_config { /** * struct nvme_feat_host_behavior - Host Behavior Support - Data Structure * @acre: Advanced Command Retry Enable - * @rsvd1: Reserved + * @etdas: Extended Telemetry Data Area 4 Supported + * @lbafee: LBA Format Extension Enable + * @rsvd3: Reserved + * @cdfe: Copy Descriptor Formats Enable + * @rsvd6: Reserved */ struct nvme_feat_host_behavior { __u8 acre; - __u8 rsvd1[511]; + __u8 etdas; + __u8 lbafee; + __u8 rsvd3; + __u16 cdfe; + __u8 rsvd6[506]; }; /** @@ -4674,6 +4819,66 @@ struct nvme_copy_range_f1 { }; /** + * enum nvme_copy_range_sopt - NVMe Copy Range Source Options + * @NVME_COPY_SOPT_FCO: NVMe Copy Source Option Fast Copy Only + */ +enum nvme_copy_range_sopt { + NVME_COPY_SOPT_FCO = 1 << 15, +}; + +/** + * struct nvme_copy_range_f2 - Copy - Source Range Entries Descriptor Format 2h + * @snsid: Source Namespace Identifier + * @rsvd4: Reserved + * @slba: Starting LBA + * @nlb: Number of Logical Blocks + * @rsvd18: Reserved + * @sopt: Source Options + * @eilbrt: Expected Initial Logical Block Reference Tag / + * Expected Logical Block Storage Tag + * @elbatm: Expected Logical Block Application Tag Mask + * @elbat: Expected Logical Block Application Tag + */ +struct nvme_copy_range_f2 { + __le32 snsid; + __u8 rsvd4[4]; + __le64 slba; + __le16 nlb; + __u8 rsvd18[4]; + __le16 sopt; + __le32 eilbrt; + __le16 elbat; + __le16 elbatm; +}; + +/** + * struct nvme_copy_range_f3 - Copy - Source Range Entries Descriptor Format 3h + * @snsid: Source Namespace Identifier + * @rsvd4: Reserved + * @slba: Starting LBA + * @nlb: Number of Logical Blocks + * @rsvd18: Reserved + * @sopt: Source Options + * @rsvd24: Reserved + * @elbt: Expected Initial Logical Block Reference Tag / + * Expected Logical Block Storage Tag + * @elbatm: Expected Logical Block Application Tag Mask + * @elbat: Expected Logical Block Application Tag + */ +struct nvme_copy_range_f3 { + __le32 snsid; + __u8 rsvd4[4]; + __le64 slba; + __le16 nlb; + __u8 rsvd18[4]; + __le16 sopt; + __u8 rsvd24[2]; + __u8 elbt[10]; + __le16 elbat; + __le16 elbatm; +}; + +/** * struct nvme_registered_ctrl - Registered Controller Data Structure * @cntlid: Controller ID * @rcsts: Reservation Status @@ -6125,6 +6330,21 @@ struct nvme_mi_vpd_hdr { * @NVME_SC_INVALID_PI: Invalid Protection Information * @NVME_SC_READ_ONLY: Attempted Write to Read Only Range * @NVME_SC_CMD_SIZE_LIMIT_EXCEEDED: Command Size Limit Exceeded + * @NVME_SC_INCOMPATIBLE_NS: Incompatible Namespace or Format: At + * least one source namespace and the + * destination namespace have incompatible + * formats. + * @NVME_SC_FAST_COPY_NOT_POSSIBLE: Fast Copy Not Possible: The Fast Copy + * Only (FCO) bit was set to ‘1’ in a Source + * Range entry and the controller was not + * able to use fast copy operations to copy + * the specified data. + * @NVME_SC_OVERLAPPING_IO_RANGE: Overlapping I/O Range: A source logical + * block range overlaps the destination + * logical block range. + * @NVME_SC_INSUFFICIENT_RESOURCES: Insufficient Resources: A resource + * shortage prevented the controller from + * performing the requested copy. * @NVME_SC_CONNECT_FORMAT: Incompatible Format: The NVM subsystem * does not support the record format * specified by the host. @@ -6370,6 +6590,10 @@ enum nvme_status_field { NVME_SC_INVALID_PI = 0x81, NVME_SC_READ_ONLY = 0x82, NVME_SC_CMD_SIZE_LIMIT_EXCEEDED = 0x83, + NVME_SC_INCOMPATIBLE_NS = 0x85, + NVME_SC_FAST_COPY_NOT_POSSIBLE = 0x86, + NVME_SC_OVERLAPPING_IO_RANGE = 0x87, + NVME_SC_INSUFFICIENT_RESOURCES = 0x89, /* * I/O Command Set Specific - Fabrics commands: @@ -6501,7 +6725,7 @@ static inline __u32 nvme_status_get_type(int status) */ static inline __u32 nvme_status_get_value(int status) { - return status & ~(NVME_STATUS_TYPE_MASK << NVME_STATUS_TYPE_SHIFT); + return status & ~NVME_SET(NVME_STATUS_TYPE_MASK, STATUS_TYPE); } /** @@ -6688,6 +6912,7 @@ enum nvme_identify_cns { * @NVME_LOG_LID_FID_SUPPORTED_EFFECTS: Feature Identifiers Supported and Effects * @NVME_LOG_LID_MI_CMD_SUPPORTED_EFFECTS: NVMe-MI Commands Supported and Effects * @NVME_LOG_LID_BOOT_PARTITION: Boot Partition + * @NVME_LOG_LID_PHY_RX_EOM: Physical Interface Receiver Eye Opening Measurement * @NVME_LOG_LID_FDP_CONFIGS: FDP Configurations * @NVME_LOG_LID_FDP_RUH_USAGE: Reclaim Unit Handle Usage * @NVME_LOG_LID_FDP_STATS: FDP Statistics @@ -6719,6 +6944,7 @@ enum nvme_cmd_get_log_lid { NVME_LOG_LID_FID_SUPPORTED_EFFECTS = 0x12, NVME_LOG_LID_MI_CMD_SUPPORTED_EFFECTS = 0x13, NVME_LOG_LID_BOOT_PARTITION = 0x15, + NVME_LOG_LID_PHY_RX_EOM = 0x19, NVME_LOG_LID_FDP_CONFIGS = 0x20, NVME_LOG_LID_FDP_RUH_USAGE = 0x21, NVME_LOG_LID_FDP_STATS = 0x22, @@ -7012,7 +7238,7 @@ enum nvme_feat { NVME_FEAT_WP_WPS_SHIFT = 0, NVME_FEAT_WP_WPS_MASK = 0x7, NVME_FEAT_IOCSP_IOCSCI_SHIFT = 0, - NVME_FEAT_IOCSP_IOCSCI_MASK = 0xff, + NVME_FEAT_IOCSP_IOCSCI_MASK = 0x1ff, NVME_FEAT_FDP_ENABLED_SHIFT = 0, NVME_FEAT_FDP_ENABLED_MASK = 0x1, NVME_FEAT_FDP_INDEX_SHIFT = 8, @@ -7273,6 +7499,30 @@ enum nvme_log_ana_lsp { }; /** + * enum nvme_log_phy_rx_eom_action - Physical Interface Receiver Eye Opening Measurement Action + * @NVME_LOG_PHY_RX_EOM_READ: Read Log Data + * @NVME_LOG_PHY_RX_EOM_START_READ: Start Measurement and Read Log Data + * @NVME_LOG_PHY_RX_EOM_ABORT_CLEAR: Abort Measurement and Clear Log Data + */ +enum nvme_log_phy_rx_eom_action { + NVME_LOG_PHY_RX_EOM_READ = 0, + NVME_LOG_PHY_RX_EOM_START_READ = 1, + NVME_LOG_PHY_RX_EOM_ABORT_CLEAR = 2, +}; + +/** + * enum nvme_log_phy_rx_eom_quality - Physical Interface Receiver Eye Opening Measurement Quality + * @NVME_LOG_PHY_RX_EOM_GOOD: <= Better Quality + * @NVME_LOG_PHY_RX_EOM_BETTER: <= Best Quality, >= Good Quality + * @NVME_LOG_PHY_RX_EOM_BEST: >= Better Quality + */ +enum nvme_log_phy_rx_eom_quality { + NVME_LOG_PHY_RX_EOM_GOOD = 0, + NVME_LOG_PHY_RX_EOM_BETTER = 1, + NVME_LOG_PHY_RX_EOM_BEST = 2, +}; + +/** * enum nvme_pevent_log_action - Persistent Event Log - Action * @NVME_PEVENT_LOG_READ: Read Log Data * @NVME_PEVENT_LOG_EST_CTX_AND_READ: Establish Context and Read Log Data diff --git a/src/nvme/util.c b/src/nvme/util.c index 143cc31..45512ff 100644 --- a/src/nvme/util.c +++ b/src/nvme/util.c @@ -7,6 +7,7 @@ * Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> */ +#include <stdlib.h> #include <stdio.h> #include <stdbool.h> #include <string.h> @@ -22,6 +23,7 @@ #include <ccan/endian/endian.h> +#include "cleanup.h" #include "private.h" #include "util.h" #include "log.h" @@ -290,6 +292,10 @@ static const char * const nvm_status[] = { [NVME_SC_INVALID_PI] = "Invalid Protection Information: The command's Protection Information Field settings are invalid for the namespace's Protection Information format", [NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range: The LBA range specified contains read-only blocks", [NVME_SC_CMD_SIZE_LIMIT_EXCEEDED] = "Command Size Limit Exceeded", + [NVME_SC_INCOMPATIBLE_NS] = "Incompatible Namespace or Format", + [NVME_SC_FAST_COPY_NOT_POSSIBLE] = "Fast Copy Not Possible", + [NVME_SC_OVERLAPPING_IO_RANGE] = "Overlapping I/O Range", + [NVME_SC_INSUFFICIENT_RESOURCES] = "Insufficient Resources", [NVME_SC_ZNS_INVALID_OP_REQUEST] = "Invalid Zone Operation Request: The operation requested is invalid", [NVME_SC_ZNS_ZRWA_RESOURCES_UNAVAILABLE] = "ZRWA Resources Unavailable: No ZRWAs are available", [NVME_SC_ZNS_BOUNDARY_ERROR] = "Zoned Boundary Error: Invalid Zone Boundary crossing", @@ -385,6 +391,16 @@ const char *nvme_status_to_string(int status, bool fabrics) return s; } +static inline void nvme_init_copy_range_elbt(__u8 *elbt, __u64 eilbrt) +{ + int i; + + for (i = 0; i < 8; i++) + elbt[9 - i] = (eilbrt >> (8 * i)) & 0xff; + elbt[1] = 0; + elbt[0] = 0; +} + void nvme_init_copy_range(struct nvme_copy_range *copy, __u16 *nlbs, __u64 *slbas, __u32 *eilbrts, __u32 *elbatms, __u32 *elbats, __u16 nr) @@ -404,18 +420,51 @@ void nvme_init_copy_range_f1(struct nvme_copy_range_f1 *copy, __u16 *nlbs, __u64 *slbas, __u64 *eilbrts, __u32 *elbatms, __u32 *elbats, __u16 nr) { - int i, j; + int i; + + for (i = 0; i < nr; i++) { + copy[i].nlb = cpu_to_le16(nlbs[i]); + copy[i].slba = cpu_to_le64(slbas[i]); + copy[i].elbatm = cpu_to_le16(elbatms[i]); + copy[i].elbat = cpu_to_le16(elbats[i]); + nvme_init_copy_range_elbt(copy[i].elbt, eilbrts[i]); + } +} + +void nvme_init_copy_range_f2(struct nvme_copy_range_f2 *copy, __u32 *snsids, + __u16 *nlbs, __u64 *slbas, __u16 *sopts, + __u32 *eilbrts, __u32 *elbatms, __u32 *elbats, + __u16 nr) +{ + int i; for (i = 0; i < nr; i++) { + copy[i].snsid = cpu_to_le32(snsids[i]); copy[i].nlb = cpu_to_le16(nlbs[i]); copy[i].slba = cpu_to_le64(slbas[i]); + copy[i].sopt = cpu_to_le16(sopts[i]); + copy[i].eilbrt = cpu_to_le32(eilbrts[i]); copy[i].elbatm = cpu_to_le16(elbatms[i]); copy[i].elbat = cpu_to_le16(elbats[i]); - for (j = 0; j < 8; j++) - copy[i].elbt[9 - j] = (eilbrts[i] >> (8 * j)) & 0xff; - copy[i].elbt[1] = 0; - copy[i].elbt[0] = 0; - } + } +} + +void nvme_init_copy_range_f3(struct nvme_copy_range_f3 *copy, __u32 *snsids, + __u16 *nlbs, __u64 *slbas, __u16 *sopts, + __u64 *eilbrts, __u32 *elbatms, __u32 *elbats, + __u16 nr) +{ + int i; + + for (i = 0; i < nr; i++) { + copy[i].snsid = cpu_to_le32(snsids[i]); + copy[i].nlb = cpu_to_le16(nlbs[i]); + copy[i].slba = cpu_to_le64(slbas[i]); + copy[i].sopt = cpu_to_le16(sopts[i]); + copy[i].elbatm = cpu_to_le16(elbatms[i]); + copy[i].elbat = cpu_to_le16(elbats[i]); + nvme_init_copy_range_elbt(copy[i].elbt, eilbrts[i]); + } } void nvme_init_dsm_range(struct nvme_dsm_range *dsm, __u32 *ctx_attrs, @@ -708,7 +757,7 @@ char *kv_keymatch(const char *kv, const char *key) static size_t read_file(const char * fname, char *buffer, size_t *bufsz) { char *p; - FILE *file; + _cleanup_file_ FILE *file; size_t len; file = fopen(fname, "re"); @@ -716,7 +765,6 @@ static size_t read_file(const char * fname, char *buffer, size_t *bufsz) return 0; p = fgets(buffer, *bufsz, file); - fclose(file); if (!p) return 0; @@ -758,7 +806,7 @@ size_t get_entity_name(char *buffer, size_t bufsz) size_t get_entity_version(char *buffer, size_t bufsz) { - FILE *file; + _cleanup_file_ FILE *file; size_t num_bytes = 0; /* /proc/sys/kernel/ostype typically contains the string "Linux" */ @@ -808,7 +856,6 @@ size_t get_entity_version(char *buffer, size_t bufsz) if (s) ver_id_len = copy_value(ver_id, sizeof(ver_id), s); } - fclose(file); if (name_len) { /* Append a space */ @@ -881,14 +928,13 @@ int nvme_uuid_from_string(const char *str, unsigned char uuid[NVME_UUID_LEN]) int nvme_uuid_random(unsigned char uuid[NVME_UUID_LEN]) { - int f; + _cleanup_fd_ int f; ssize_t n; f = open("/dev/urandom", O_RDONLY); if (f < 0) return -errno; n = read(f, uuid, NVME_UUID_LEN); - close(f); if (n < 0) return -errno; else if (n != NVME_UUID_LEN) @@ -906,6 +952,46 @@ int nvme_uuid_random(unsigned char uuid[NVME_UUID_LEN]) } #ifdef HAVE_NETDB +static bool _nvme_ipaddrs_eq(struct sockaddr *addr1, struct sockaddr *addr2) +{ + struct sockaddr_in *sockaddr_v4; + struct sockaddr_in6 *sockaddr_v6; + + if (addr1->sa_family == AF_INET && addr2->sa_family == AF_INET) { + struct sockaddr_in *sockaddr1 = (struct sockaddr_in *)addr1; + struct sockaddr_in *sockaddr2 = (struct sockaddr_in *)addr2; + return sockaddr1->sin_addr.s_addr == sockaddr2->sin_addr.s_addr; + } + + if (addr1->sa_family == AF_INET6 && addr2->sa_family == AF_INET6) { + struct sockaddr_in6 *sockaddr1 = (struct sockaddr_in6 *)addr1; + struct sockaddr_in6 *sockaddr2 = (struct sockaddr_in6 *)addr2; + return !memcmp(&sockaddr1->sin6_addr, &sockaddr2->sin6_addr, sizeof(struct in6_addr)); + } + + switch (addr1->sa_family) { + case AF_INET: + sockaddr_v6 = (struct sockaddr_in6 *)addr2; + if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) { + sockaddr_v4 = (struct sockaddr_in *)addr1; + return sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3]; + } + break; + + case AF_INET6: + sockaddr_v6 = (struct sockaddr_in6 *)addr1; + if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) { + sockaddr_v4 = (struct sockaddr_in *)addr2; + return sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3]; + } + break; + + default: ; + } + + return false; +} + bool nvme_ipaddrs_eq(const char *addr1, const char *addr2) { bool result = false; @@ -924,37 +1010,7 @@ bool nvme_ipaddrs_eq(const char *addr1, const char *addr2) if (getaddrinfo(addr2, 0, &hint2, &info2) || !info2) goto ipaddrs_eq_fail; - if (info1->ai_family == AF_INET && info2->ai_family == AF_INET) { - struct sockaddr_in *sockaddr1 = (struct sockaddr_in *)(info1->ai_addr); - struct sockaddr_in *sockaddr2 = (struct sockaddr_in *)(info2->ai_addr); - result = sockaddr1->sin_addr.s_addr == sockaddr2->sin_addr.s_addr; - } else if (info1->ai_family == AF_INET6 && info2->ai_family == AF_INET6) { - struct sockaddr_in6 *sockaddr1 = (struct sockaddr_in6 *)(info1->ai_addr); - struct sockaddr_in6 *sockaddr2 = (struct sockaddr_in6 *)(info2->ai_addr); - result = !memcmp(&sockaddr1->sin6_addr, &sockaddr2->sin6_addr, sizeof(struct in6_addr)); - } else { - struct sockaddr_in *sockaddr_v4; - struct sockaddr_in6 *sockaddr_v6; - switch (info1->ai_family) { - case AF_INET: - sockaddr_v6 = (struct sockaddr_in6 *)(info2->ai_addr); - if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) { - sockaddr_v4 = (struct sockaddr_in *)(info1->ai_addr); - result = sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3]; - } - break; - - case AF_INET6: - sockaddr_v6 = (struct sockaddr_in6 *)(info1->ai_addr); - if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) { - sockaddr_v4 = (struct sockaddr_in *)(info2->ai_addr); - result = sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3]; - } - break; - - default: ; - } - } + result = _nvme_ipaddrs_eq(info1->ai_addr, info2->ai_addr); ipaddrs_eq_fail: if (info1) @@ -972,3 +1028,91 @@ bool nvme_ipaddrs_eq(const char *addr1, const char *addr2) return false; } #endif /* HAVE_NETDB */ + +#ifdef HAVE_NETDB +const char *nvme_iface_matching_addr(const struct ifaddrs *iface_list, const char *addr) +{ + const struct ifaddrs *iface_it; + struct addrinfo *info = NULL, hint = { .ai_flags = AI_NUMERICHOST, .ai_family = AF_UNSPEC }; + const char *iface_name = NULL; + + if (!iface_list || !addr || getaddrinfo(addr, 0, &hint, &info) || !info) + return NULL; + + /* Walk through the linked list */ + for (iface_it = iface_list; iface_it != NULL; iface_it = iface_it->ifa_next) { + struct sockaddr *ifaddr = iface_it->ifa_addr; + + if (ifaddr && (ifaddr->sa_family == AF_INET || ifaddr->sa_family == AF_INET6) && + _nvme_ipaddrs_eq(info->ai_addr, ifaddr)) { + iface_name = iface_it->ifa_name; + break; + } + } + + freeaddrinfo(info); + + return iface_name; +} + +bool nvme_iface_primary_addr_matches(const struct ifaddrs *iface_list, const char *iface, const char *addr) +{ + const struct ifaddrs *iface_it; + struct addrinfo *info = NULL, hint = { .ai_flags = AI_NUMERICHOST, .ai_family = AF_UNSPEC }; + bool match_found = false; + + if (!iface_list || !addr || getaddrinfo(addr, 0, &hint, &info) || !info) + return false; + + /* Walk through the linked list */ + for (iface_it = iface_list; iface_it != NULL; iface_it = iface_it->ifa_next) { + if (strcmp(iface, iface_it->ifa_name)) + continue; /* Not the interface we're looking for*/ + + /* The interface list is ordered in a way that the primary + * address is listed first. As soon as the parsed address + * matches the family of the address we're looking for, we + * have found the primary address for that family. + */ + if (iface_it->ifa_addr && (iface_it->ifa_addr->sa_family == info->ai_addr->sa_family)) { + match_found = _nvme_ipaddrs_eq(info->ai_addr, iface_it->ifa_addr); + break; + } + } + + freeaddrinfo(info); + + return match_found; +} + +#else /* HAVE_NETDB */ + +const char *nvme_iface_matching_addr(const struct ifaddrs *iface_list, const char *addr) +{ + nvme_msg(NULL, LOG_ERR, "no support for interface lookup; " + "recompile with libnss support.\n"); + + return NULL; +} + +bool nvme_iface_primary_addr_matches(const struct ifaddrs *iface_list, const char *iface, const char *addr) +{ + nvme_msg(NULL, LOG_ERR, "no support for interface lookup; " + "recompile with libnss support.\n"); + + return false; +} + +#endif /* HAVE_NETDB */ + +void *__nvme_alloc(size_t len) +{ + size_t _len = round_up(len, 0x1000); + void *p; + + if (posix_memalign((void *)&p, getpagesize(), _len)) + return NULL; + + memset(p, 0, _len); + return p; +} diff --git a/src/nvme/util.h b/src/nvme/util.h index 9d6faf3..16d5b9c 100644 --- a/src/nvme/util.h +++ b/src/nvme/util.h @@ -9,6 +9,8 @@ #ifndef _LIBNVME_UTIL_H #define _LIBNVME_UTIL_H +#include <ifaddrs.h> + #include "types.h" /** @@ -149,6 +151,40 @@ void nvme_init_copy_range_f1(struct nvme_copy_range_f1 *copy, __u16 *nlbs, __u32 *elbats, __u16 nr); /** + * nvme_init_copy_range_f2() - Constructs a copy range f2 structure + * @copy: Copy range array + * @snsids: Source namespace identifier + * @nlbs: Number of logical blocks + * @slbas: Starting LBA + * @sopts: Source options + * @eilbrts: Expected initial logical block reference tag + * @elbatms: Expected logical block application tag mask + * @elbats: Expected logical block application tag + * @nr: Number of descriptors to construct + */ +void nvme_init_copy_range_f2(struct nvme_copy_range_f2 *copy, __u32 *snsids, + __u16 *nlbs, __u64 *slbas, __u16 *sopts, + __u32 *eilbrts, __u32 *elbatms, __u32 *elbats, + __u16 nr); + +/** + * nvme_init_copy_range_f3() - Constructs a copy range f3 structure + * @copy: Copy range array + * @snsids: Source namespace identifier + * @nlbs: Number of logical blocks + * @slbas: Starting LBA + * @sopts: Source options + * @eilbrts: Expected initial logical block reference tag + * @elbatms: Expected logical block application tag mask + * @elbats: Expected logical block application tag + * @nr: Number of descriptors to construct + */ +void nvme_init_copy_range_f3(struct nvme_copy_range_f3 *copy, __u32 *snsids, + __u16 *nlbs, __u64 *slbas, __u16 *sopts, + __u64 *eilbrts, __u32 *elbatms, __u32 *elbats, + __u16 nr); + +/** * nvme_get_feature_length() - Retreive the command payload length for a * specific feature identifier * @fid: Feature identifier, see &enum nvme_features_id. @@ -447,8 +483,8 @@ static inline void nvme_feature_decode_namespace_write_protect(__u32 value, static inline void nvme_id_ns_flbas_to_lbaf_inuse(__u8 flbas, __u8 *lbaf_inuse) { - *lbaf_inuse = (((flbas & NVME_NS_FLBAS_HIGHER_MASK) >> 1) | - (flbas & NVME_NS_FLBAS_LOWER_MASK)); + *lbaf_inuse = ((NVME_FLBAS_HIGHER(flbas) >> 1) | + NVME_FLBAS_LOWER(flbas)); } struct nvme_root; @@ -639,4 +675,31 @@ int nvme_uuid_random(unsigned char uuid[NVME_UUID_LEN]); */ bool nvme_ipaddrs_eq(const char *addr1, const char *addr2); +/** + * nvme_iface_matching_addr - Get interface matching @addr + * @iface_list: Interface list returned by getifaddrs() + * @addr: Address to match + * + * Parse the interface list pointed to by @iface_list looking + * for the interface that has @addr as one of its assigned + * addresses. + * + * Return: The name of the interface that owns @addr or NULL. + */ +const char *nvme_iface_matching_addr(const struct ifaddrs *iface_list, const char *addr); + +/** + * nvme_iface_primary_addr_matches - Check that interface's primary address matches + * @iface_list: Interface list returned by getifaddrs() + * @iface: Interface to match + * @addr: Address to match + * + * Parse the interface list pointed to by @iface_list and looking for + * interface @iface. The get its primary address and check if it matches + * @addr. + * + * Return: true if a match is found, false otherwise. + */ +bool nvme_iface_primary_addr_matches(const struct ifaddrs *iface_list, const char *iface, const char *addr); + #endif /* _LIBNVME_UTIL_H */ |