diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 08:35:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 08:35:48 +0000 |
commit | fb4382cf6ceb11ce2c5781d14714e15e45022d03 (patch) | |
tree | 4d529e6152272fde35bc85955a300b4811f672ca /src/nvme | |
parent | Releasing debian version 1.9-1. (diff) | |
download | libnvme-fb4382cf6ceb11ce2c5781d14714e15e45022d03.tar.xz libnvme-fb4382cf6ceb11ce2c5781d14714e15e45022d03.zip |
Merging upstream version 1.10.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/nvme')
-rw-r--r-- | src/nvme/base64.c | 5 | ||||
-rw-r--r-- | src/nvme/fabrics.c | 184 | ||||
-rw-r--r-- | src/nvme/fabrics.h | 64 | ||||
-rw-r--r-- | src/nvme/ioctl.c | 168 | ||||
-rw-r--r-- | src/nvme/ioctl.h | 31 | ||||
-rw-r--r-- | src/nvme/json.c | 23 | ||||
-rw-r--r-- | src/nvme/linux.c | 209 | ||||
-rw-r--r-- | src/nvme/linux.h | 21 | ||||
-rw-r--r-- | src/nvme/log.c | 68 | ||||
-rw-r--r-- | src/nvme/log.h | 25 | ||||
-rw-r--r-- | src/nvme/mi-mctp.c | 4 | ||||
-rw-r--r-- | src/nvme/mi.c | 244 | ||||
-rw-r--r-- | src/nvme/mi.h | 29 | ||||
-rw-r--r-- | src/nvme/private.h | 24 | ||||
-rw-r--r-- | src/nvme/tree.c | 179 | ||||
-rw-r--r-- | src/nvme/tree.h | 47 | ||||
-rw-r--r-- | src/nvme/types.h | 85 | ||||
-rw-r--r-- | src/nvme/util.c | 2 |
18 files changed, 1163 insertions, 249 deletions
diff --git a/src/nvme/base64.c b/src/nvme/base64.c index 5fae829..0163fa1 100644 --- a/src/nvme/base64.c +++ b/src/nvme/base64.c @@ -7,6 +7,7 @@ * Author: Hannes Reinecke <hare@suse.de> */ +#include <stdint.h> #include <stdlib.h> #include <string.h> #include <errno.h> @@ -29,7 +30,7 @@ static const char base64_table[65] = int base64_encode(const unsigned char *src, int srclen, char *dst) { int i, bits = 0; - u_int32_t ac = 0; + uint32_t ac = 0; char *cp = dst; for (i = 0; i < srclen; i++) { @@ -64,7 +65,7 @@ int base64_encode(const unsigned char *src, int srclen, char *dst) */ int base64_decode(const char *src, int srclen, unsigned char *dst) { - u_int32_t ac = 0; + uint32_t ac = 0; int i, bits = 0; unsigned char *bp = dst; diff --git a/src/nvme/fabrics.c b/src/nvme/fabrics.c index 6738e9d..acf12bc 100644 --- a/src/nvme/fabrics.c +++ b/src/nvme/fabrics.c @@ -1342,27 +1342,42 @@ static int uuid_from_dmi(char *system_uuid) return ret; } -char *nvmf_hostnqn_generate() +char *nvmf_hostid_generate() { - char *hostnqn; int ret; char uuid_str[NVME_UUID_LEN_STRING]; unsigned char uuid[NVME_UUID_LEN]; ret = uuid_from_dmi(uuid_str); - if (ret < 0) { + if (ret < 0) ret = uuid_from_device_tree(uuid_str); - } if (ret < 0) { if (nvme_uuid_random(uuid) < 0) memset(uuid, 0, NVME_UUID_LEN); nvme_uuid_to_string(uuid, uuid_str); } - if (asprintf(&hostnqn, "nqn.2014-08.org.nvmexpress:uuid:%s", uuid_str) < 0) - return NULL; + return strdup(uuid_str); +} - return hostnqn; +char *nvmf_hostnqn_generate_from_hostid(char *hostid) +{ + char *hid = NULL; + char *hostnqn; + int ret; + + if (!hostid) + hostid = hid = nvmf_hostid_generate(); + + ret = asprintf(&hostnqn, "nqn.2014-08.org.nvmexpress:uuid:%s", hostid); + free(hid); + + return (ret < 0) ? NULL : hostnqn; +} + +char *nvmf_hostnqn_generate() +{ + return nvmf_hostnqn_generate_from_hostid(NULL); } static char *nvmf_read_file(const char *f, int len) @@ -1387,8 +1402,11 @@ char *nvmf_hostnqn_from_file() { char *hostnqn = getenv("LIBNVME_HOSTNQN"); - if (hostnqn) + if (hostnqn) { + if (!strcmp(hostnqn, "")) + return NULL; return strdup(hostnqn); + } return nvmf_read_file(NVMF_HOSTNQN_FILE, NVMF_NQN_SIZE); } @@ -1397,8 +1415,11 @@ char *nvmf_hostid_from_file() { char *hostid = getenv("LIBNVME_HOSTID"); - if (hostid) + if (hostid) { + if (!strcmp(hostid, "")) + return NULL; return strdup(hostid); + } return nvmf_read_file(NVMF_HOSTID_FILE, NVMF_HOSTID_SIZE); } @@ -1703,3 +1724,148 @@ int nvmf_register_ctrl(nvme_ctrl_t c, enum nvmf_dim_tas tas, __u32 *result) */ return nvmf_dim(c, tas, NVMF_TRTYPE_TCP, nvme_get_adrfam(c), "", NULL, result); } + +#define IS_XDIGIT(c) ((c >= '0' && c <= '9') || \ + (c >= 'A' && c <= 'F') || \ + (c >= 'a' && c <= 'f')) +#define XDIGIT_VAL(c) ((c >= '0' && c <= '9') ? c - '0' : ( \ + (c >= 'A' && c <= 'F') ? c - 'A' + 10 : c - 'a' + 10)) + +/* returns newly allocated string */ +static char *unescape_uri(const char *str, int len) +{ + char *dst; + int l; + int i, j; + + l = len > 0 ? len : strlen(str); + dst = malloc(l + 1); + for (i = 0, j = 0; i < l; i++, j++) { + if (str[i] == '%' && i + 2 < l && + IS_XDIGIT(str[i + 1]) && IS_XDIGIT(str[i + 2])) { + dst[j] = (XDIGIT_VAL(str[i + 1]) << 4) + + XDIGIT_VAL(str[i + 2]); + i += 2; + } else + dst[j] = str[i]; + } + dst[j] = '\0'; + return dst; +} + +struct nvme_fabrics_uri *nvme_parse_uri(const char *str) +{ + struct nvme_fabrics_uri *uri; + _cleanup_free_ char *scheme = NULL; + _cleanup_free_ char *authority = NULL; + _cleanup_free_ char *path = NULL; + _cleanup_free_ char *h = NULL; + const char *host; + int i; + + /* As defined in Boot Specification rev. 1.0: + * + * section 1.5.7: NVMe-oF URI Format + * nvme+tcp://192.168.1.1:4420/ + * nvme+tcp://[FE80::1010]:4420/ + * + * section 3.1.2.5.3: DHCP Root-Path - a hierarchical NVMe-oF URI Format + * NVME<+PROTOCOL>://<SERVERNAME/IP>[:TRANSPORT PORT]/<SUBSYS NQN>/<NID> + * or + * NVME<+PROTOCOL>://<DISCOVERY CONTROLLER ADDRESS>[:DISCOVERY- + * -CONTROLLER PORT]/NQN.2014-08.ORG.NVMEXPRESS.DISCOVERY/<NID> + */ + + uri = calloc(1, sizeof(struct nvme_fabrics_uri)); + if (!uri) + return NULL; + + if (sscanf(str, "%m[^:/]://%m[^/?#]%ms", + &scheme, &authority, &path) < 2) { + nvme_free_uri(uri); + errno = EINVAL; + return NULL; + } + + if (sscanf(scheme, "%m[^+]+%ms", + &uri->scheme, &uri->protocol) < 1) { + nvme_free_uri(uri); + errno = EINVAL; + return NULL; + } + + /* split userinfo */ + host = strrchr(authority, '@'); + if (host) { + host++; + uri->userinfo = unescape_uri(authority, host - authority); + } else + host = authority; + + /* try matching IPv6 address first */ + if (sscanf(host, "[%m[^]]]:%d", + &uri->host, &uri->port) < 1) { + /* treat it as IPv4/hostname */ + if (sscanf(host, "%m[^:]:%d", + &h, &uri->port) < 1) { + nvme_free_uri(uri); + errno = EINVAL; + return NULL; + } + uri->host = unescape_uri(h, 0); + } + + /* split path into elements */ + if (path) { + char *e, *elem; + + /* separate the fragment */ + e = strrchr(path, '#'); + if (e) { + uri->fragment = unescape_uri(e + 1, 0); + *e = '\0'; + } + /* separate the query string */ + e = strrchr(path, '?'); + if (e) { + uri->query = unescape_uri(e + 1, 0); + *e = '\0'; + } + + /* count elements first */ + for (i = 0, e = path; *e; e++) + if (*e == '/' && *(e + 1) != '/') + i++; + uri->path_segments = calloc(i + 2, sizeof(char *)); + + i = 0; + elem = strtok_r(path, "/", &e); + if (elem) + uri->path_segments[i++] = unescape_uri(elem, 0); + while (elem && strlen(elem)) { + elem = strtok_r(NULL, "/", &e); + if (elem) + uri->path_segments[i++] = unescape_uri(elem, 0); + } + } + + return uri; +} + +void nvme_free_uri(struct nvme_fabrics_uri *uri) +{ + char **s; + + if (!uri) + return; + free(uri->scheme); + free(uri->protocol); + free(uri->userinfo); + free(uri->host); + for (s = uri->path_segments; s && *s; s++) + free(*s); + free(uri->path_segments); + free(uri->query); + free(uri->fragment); + free(uri); +} diff --git a/src/nvme/fabrics.h b/src/nvme/fabrics.h index 4ebeb35..8e26e9f 100644 --- a/src/nvme/fabrics.h +++ b/src/nvme/fabrics.h @@ -68,6 +68,28 @@ struct nvme_fabrics_config { }; /** + * struct nvme_fabrics_uri - Parsed URI structure + * @scheme: Scheme name (typically 'nvme') + * @protocol: Optional protocol/transport (e.g. 'tcp') + * @userinfo: Optional user information component of the URI authority + * @host: Host transport address + * @port: The port subcomponent or 0 if not specified + * @path_segments: NULL-terminated array of path segments + * @query: Optional query string component (separated by '?') + * @fragment: Optional fragment identifier component (separated by '#') + */ +struct nvme_fabrics_uri { + char *scheme; + char *protocol; + char *userinfo; + char *host; + int port; + char **path_segments; + char *query; + char *fragment; +}; + +/** * nvmf_trtype_str() - Decode TRTYPE field * @trtype: value to be decoded * @@ -258,6 +280,26 @@ struct nvmf_discovery_log *nvmf_get_discovery_wargs(struct nvme_get_discovery_ar char *nvmf_hostnqn_generate(); /** + * nvmf_hostnqn_generate_from_hostid() - Generate a host nqn from host identifier + * @hostid: Host identifier + * + * If @hostid is NULL, the function generates it based on the machine + * identifier. + * + * Return: On success, an NVMe Qualified Name for host identification. This + * name is based on the given host identifier. On failure, NULL. + */ +char *nvmf_hostnqn_generate_from_hostid(char *hostid); + +/** + * nvmf_hostid_generate() - Generate a machine specific host identifier + * + * Return: On success, an identifier string based on the machine identifier to + * be used as NVMe Host Identifier, or NULL on failure. + */ +char *nvmf_hostid_generate(); + +/** * nvmf_hostnqn_from_file() - Reads the host nvm qualified name from the config * default location * @@ -324,4 +366,26 @@ bool nvmf_is_registration_supported(nvme_ctrl_t c); */ int nvmf_register_ctrl(nvme_ctrl_t c, enum nvmf_dim_tas tas, __u32 *result); +/** + * nvme_parse_uri() - Parse the URI string + * @str: URI string + * + * Parse the URI string as defined in the NVM Express Boot Specification. + * Supported URI elements looks as follows: + * + * nvme+tcp://user@host:port/subsys_nqn/nid?query=val#fragment + * + * Return: &nvme_fabrics_uri structure on success; NULL on failure with errno + * set. + */ +struct nvme_fabrics_uri *nvme_parse_uri(const char *str); + +/** + * nvme_free_uri() - Free the URI structure + * @uri: &nvme_fabrics_uri structure + * + * Free an &nvme_fabrics_uri structure. + */ +void nvme_free_uri(struct nvme_fabrics_uri *uri); + #endif /* _LIBNVME_FABRICS_H */ diff --git a/src/nvme/ioctl.c b/src/nvme/ioctl.c index ce5a911..9707829 100644 --- a/src/nvme/ioctl.c +++ b/src/nvme/ioctl.c @@ -355,6 +355,131 @@ int nvme_get_log_page(int fd, __u32 xfer_len, struct nvme_get_log_args *args) return 0; } +static int read_ana_chunk(int fd, enum nvme_log_ana_lsp lsp, bool rae, + __u8 *log, __u8 **read, __u8 *to_read, __u8 *log_end) +{ + if (to_read > log_end) { + errno = ENOSPC; + return -1; + } + + while (*read < to_read) { + __u32 len = min(log_end - *read, NVME_LOG_PAGE_PDU_SIZE); + int ret; + + ret = nvme_get_log_ana(fd, lsp, rae, *read - log, len, *read); + if (ret) + return ret; + + *read += len; + } + return 0; +} + +static int try_read_ana(int fd, enum nvme_log_ana_lsp lsp, bool rae, + struct nvme_ana_log *log, __u8 *log_end, + __u8 *read, __u8 **to_read, bool *may_retry) +{ + __u16 ngrps = le16_to_cpu(log->ngrps); + + while (ngrps--) { + __u8 *group = *to_read; + int ret; + __le32 nnsids; + + *to_read += sizeof(*log->descs); + ret = read_ana_chunk(fd, lsp, rae, + (__u8 *)log, &read, *to_read, log_end); + if (ret) { + /* + * If the provided buffer isn't long enough, + * the log page may have changed while reading it + * and the computed length was inaccurate. + * Have the caller check chgcnt and retry. + */ + *may_retry = errno == ENOSPC; + return ret; + } + + /* + * struct nvme_ana_group_desc has 8-byte alignment + * but the group pointer is only 4-byte aligned. + * Don't dereference the misaligned pointer. + */ + memcpy(&nnsids, + group + offsetof(struct nvme_ana_group_desc, nnsids), + sizeof(nnsids)); + *to_read += le32_to_cpu(nnsids) * sizeof(__le32); + ret = read_ana_chunk(fd, lsp, rae, + (__u8 *)log, &read, *to_read, log_end); + if (ret) { + *may_retry = errno == ENOSPC; + return ret; + } + } + + *may_retry = true; + return 0; +} + +int nvme_get_ana_log_atomic(int fd, bool rgo, bool rae, unsigned int retries, + struct nvme_ana_log *log, __u32 *len) +{ + const enum nvme_log_ana_lsp lsp = + rgo ? NVME_LOG_ANA_LSP_RGO_GROUPS_ONLY : 0; + /* Get Log Page can only fetch multiples of dwords */ + __u8 * const log_end = (__u8 *)log + (*len & -4); + __u8 *read = (__u8 *)log; + __u8 *to_read; + int ret; + + if (!retries) { + errno = EINVAL; + return -1; + } + + to_read = (__u8 *)log->descs; + ret = read_ana_chunk(fd, lsp, rae, + (__u8 *)log, &read, to_read, log_end); + if (ret) + return ret; + + do { + bool may_retry = false; + int saved_ret; + int saved_errno; + __le64 chgcnt; + + saved_ret = try_read_ana(fd, lsp, rae, log, log_end, + read, &to_read, &may_retry); + /* + * If the log page was read with multiple Get Log Page commands, + * chgcnt must be checked afterwards to ensure atomicity + */ + *len = to_read - (__u8 *)log; + if (*len <= NVME_LOG_PAGE_PDU_SIZE || !may_retry) + return saved_ret; + + saved_errno = errno; + chgcnt = log->chgcnt; + read = (__u8 *)log; + to_read = (__u8 *)log->descs; + ret = read_ana_chunk(fd, lsp, rae, + (__u8 *)log, &read, to_read, log_end); + if (ret) + return ret; + + if (log->chgcnt == chgcnt) { + /* Log hasn't changed; return try_read_ana() result */ + errno = saved_errno; + return saved_ret; + } + } while (--retries); + + errno = EAGAIN; + return -1; +} + int nvme_set_features(struct nvme_set_features_args *args) { __u32 cdw10 = NVME_SET(args->fid, FEATURES_CDW10_FID) | @@ -1670,35 +1795,30 @@ static int nvme_set_var_size_tags(__u32 *cmd_dw2, __u32 *cmd_dw3, __u32 *cmd_dw1 __u8 pif, __u8 sts, __u64 reftag, __u64 storage_tag) { __u32 cdw2 = 0, cdw3 = 0, cdw14; - beint64_t be_reftag = cpu_to_be64(reftag); - beint64_t be_storage_tag = cpu_to_be64(storage_tag); switch (pif) { - /* 16b Protection Information */ - case 0: - cdw14 = be_reftag & 0xffffffff; - cdw14 |= ((be_storage_tag << (32 - sts)) & 0xffffffff); + case NVME_NVM_PIF_16B_GUARD: + cdw14 = reftag & 0xffffffff; + cdw14 |= ((storage_tag << (32 - sts)) & 0xffffffff); break; - /* 32b Protection Information */ - case 1: - cdw14 = be_reftag & 0xffffffff; - cdw3 = be_reftag >> 32; - cdw14 |= ((be_storage_tag << (80 - sts)) & 0xffff0000); + case NVME_NVM_PIF_32B_GUARD: + cdw14 = reftag & 0xffffffff; + cdw3 = reftag >> 32; + cdw14 |= ((storage_tag << (80 - sts)) & 0xffff0000); if (sts >= 48) - cdw3 |= ((be_storage_tag >> (sts - 48)) & 0xffffffff); + cdw3 |= ((storage_tag >> (sts - 48)) & 0xffffffff); else - cdw3 |= ((be_storage_tag << (48 - sts)) & 0xffffffff); - cdw2 = (be_storage_tag >> (sts - 16)) & 0xffff; + cdw3 |= ((storage_tag << (48 - sts)) & 0xffffffff); + cdw2 = (storage_tag >> (sts - 16)) & 0xffff; break; - /* 64b Protection Information */ - case 2: - cdw14 = be_reftag & 0xffffffff; - cdw3 = (be_reftag >> 32) & 0xffff; - cdw14 |= ((be_storage_tag << (48 - sts)) & 0xffffffff); + case NVME_NVM_PIF_64B_GUARD: + cdw14 = reftag & 0xffffffff; + cdw3 = (reftag >> 32) & 0xffff; + cdw14 |= ((storage_tag << (48 - sts)) & 0xffffffff); if (sts >= 16) - cdw3 |= ((be_storage_tag >> (sts - 16)) & 0xffff); + cdw3 |= ((storage_tag >> (sts - 16)) & 0xffff); else - cdw3 |= ((be_storage_tag << (16 - sts)) & 0xffff); + cdw3 |= ((storage_tag << (16 - sts)) & 0xffff); break; default: perror("Unsupported Protection Information Format"); @@ -1706,9 +1826,9 @@ static int nvme_set_var_size_tags(__u32 *cmd_dw2, __u32 *cmd_dw3, __u32 *cmd_dw1 return -1; } - *cmd_dw2 = cdw2; - *cmd_dw3 = cdw3; - *cmd_dw14 = cdw14; + *cmd_dw2 = cpu_to_be32(cdw2); + *cmd_dw3 = cpu_to_be32(cdw3); + *cmd_dw14 = cpu_to_be32(cdw14); return 0; } diff --git a/src/nvme/ioctl.h b/src/nvme/ioctl.h index be4c1b7..2ebd39c 100644 --- a/src/nvme/ioctl.h +++ b/src/nvme/ioctl.h @@ -1865,7 +1865,7 @@ static inline int nvme_get_log_fdp_events(int fd, __u16 egid, bool host_events, * the asymmetric namespace access information for ANA Groups that contain * namespaces that are attached to the controller processing the command. * - * See &struct nvme_ana_rsp_hdr for the definition of the returned structure. + * See &struct nvme_ana_log for the definition of the returned structure. * * Return: The nvme command status if a response was received (see * &enum nvme_status_field) or -1 with errno set otherwise. @@ -1887,7 +1887,7 @@ static inline int nvme_get_log_ana(int fd, enum nvme_log_ana_lsp lsp, bool rae, .lsi = NVME_LOG_LSI_NONE, .lsp = (__u8)lsp, .uuidx = NVME_UUID_NONE, - .rae = false, + .rae = rae, .ot = false, }; return nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &args); @@ -1900,19 +1900,42 @@ static inline int nvme_get_log_ana(int fd, enum nvme_log_ana_lsp lsp, bool rae, * @len: The allocated length of the log page * @log: User address to store the ana group log * - * See &struct nvme_ana_group_desc for the definition of the returned structure. + * See &struct nvme_ana_log for the definition of the returned structure. * * Return: The nvme command status if a response was received (see * &enum nvme_status_field) or -1 with errno set otherwise. */ static inline int nvme_get_log_ana_groups(int fd, bool rae, __u32 len, - struct nvme_ana_group_desc *log) + struct nvme_ana_log *log) { return nvme_get_log_ana(fd, NVME_LOG_ANA_LSP_RGO_GROUPS_ONLY, rae, 0, len, log); } /** + * nvme_get_ana_log_atomic() - Retrieve Asymmetric Namespace Access log page atomically + * @fd: File descriptor of nvme device + * @rgo: Whether to retrieve ANA groups only (no NSIDs) + * @rae: Whether to retain asynchronous events + * @retries: The maximum number of times to retry on log page changes + * @log: Pointer to a buffer to receive the ANA log page + * @len: Input: the length of the log page buffer. + * Output: the actual length of the ANA log page. + * + * See &struct nvme_ana_log for the definition of the returned structure. + * + * Return: If successful, returns 0 and sets *len to the actual log page length. + * If unsuccessful, returns the nvme command status if a response was received + * (see &enum nvme_status_field) or -1 with errno set otherwise. + * Sets errno = EINVAL if retries == 0. + * Sets errno = EAGAIN if unable to read the log page atomically + * because chgcnt changed during each of the retries attempts. + * Sets errno = ENOSPC if the full log page does not fit in the provided buffer. + */ +int nvme_get_ana_log_atomic(int fd, bool rgo, bool rae, unsigned int retries, + struct nvme_ana_log *log, __u32 *len); + +/** * nvme_get_log_lba_status() - Retrieve LBA Status * @fd: File descriptor of nvme device * @rae: Retain asynchronous events diff --git a/src/nvme/json.c b/src/nvme/json.c index a02bd2d..2c769f2 100644 --- a/src/nvme/json.c +++ b/src/nvme/json.c @@ -339,6 +339,9 @@ static void json_update_port(struct json_object *ctrl_array, nvme_ctrl_t c) const char *transport, *value; transport = nvme_ctrl_get_transport(c); + if (!strcmp(transport, "pcie")) + return; + json_object_object_add(port_obj, "transport", json_object_new_string(transport)); value = nvme_ctrl_get_traddr(c); @@ -432,11 +435,13 @@ static void json_update_subsys(struct json_object *subsys_array, nvme_subsystem_for_each_ctrl(s, c) { json_update_port(port_array, c); } - if (json_object_array_length(port_array)) + if (json_object_array_length(port_array)) { json_object_object_add(subsys_obj, "ports", port_array); - else + json_object_array_add(subsys_array, subsys_obj); + } else { json_object_put(port_array); - json_object_array_add(subsys_array, subsys_obj); + json_object_put(subsys_obj); + } } int json_update_config(nvme_root_t r, const char *config_file) @@ -476,12 +481,14 @@ int json_update_config(nvme_root_t r, const char *config_file) nvme_for_each_subsystem(h, s) { json_update_subsys(subsys_array, s); } - if (json_object_array_length(subsys_array)) + if (json_object_array_length(subsys_array)) { json_object_object_add(host_obj, "subsystems", - subsys_array); - else + subsys_array); + json_object_array_add(json_root, host_obj); + } else { json_object_put(subsys_array); - json_object_array_add(json_root, host_obj); + json_object_put(host_obj); + } } if (!config_file) { ret = json_object_to_fd(1, json_root, JSON_C_TO_STRING_PRETTY); @@ -633,7 +640,7 @@ int json_dump_tree(nvme_root_t r) } json_object_object_add(json_root, "hosts", host_array); - ret = json_object_to_fd(fileno(r->fp), json_root, JSON_C_TO_STRING_PRETTY); + ret = json_object_to_fd(r->log.fd, json_root, JSON_C_TO_STRING_PRETTY); if (ret < 0) { nvme_msg(r, LOG_ERR, "Failed to write, %s\n", json_util_get_last_err()); diff --git a/src/nvme/linux.c b/src/nvme/linux.c index 25196fd..aff0544 100644 --- a/src/nvme/linux.c +++ b/src/nvme/linux.c @@ -18,7 +18,6 @@ #include <unistd.h> #ifdef CONFIG_OPENSSL -#include <openssl/engine.h> #include <openssl/evp.h> #include <openssl/hmac.h> #include <openssl/kdf.h> @@ -166,7 +165,7 @@ int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, size_t max_ struct nvme_telemetry_log *telem; enum nvme_cmd_get_log_lid lid; - _cleanup_free_ void *log; + _cleanup_free_ void *log = NULL; void *tmp; int err; size_t dalb; @@ -296,8 +295,8 @@ int nvme_get_new_host_telemetry(int fd, struct nvme_telemetry_log **log, int nvme_get_lba_status_log(int fd, bool rae, struct nvme_lba_status_log **log) { + _cleanup_free_ struct nvme_lba_status_log *buf = NULL; __u32 size; - _cleanup_free_ struct nvme_lba_status_log *buf; void *tmp; int err; struct nvme_get_log_args args = { @@ -386,6 +385,16 @@ int nvme_namespace_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, NVME_DEFAULT_IOCTL_TIMEOUT); } +size_t nvme_get_ana_log_len_from_id_ctrl(const struct nvme_id_ctrl *id_ctrl, + bool rgo) +{ + __u32 nanagrpid = le32_to_cpu(id_ctrl->nanagrpid); + size_t size = sizeof(struct nvme_ana_log) + + nanagrpid * sizeof(struct nvme_ana_group_desc); + + return rgo ? size : size + le32_to_cpu(id_ctrl->mnan) * sizeof(__le32); +} + int nvme_get_ana_log_len(int fd, size_t *analen) { _cleanup_free_ struct nvme_id_ctrl *ctrl = NULL; @@ -400,9 +409,7 @@ int nvme_get_ana_log_len(int fd, size_t *analen) if (ret) return ret; - *analen = sizeof(struct nvme_ana_log) + - le32_to_cpu(ctrl->nanagrpid) * sizeof(struct nvme_ana_group_desc) + - le32_to_cpu(ctrl->mnan) * sizeof(__le32); + *analen = nvme_get_ana_log_len_from_id_ctrl(ctrl, false); return 0; } @@ -544,22 +551,18 @@ static int derive_retained_key(int hmac, const char *hostnqn, return -1; } -static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, - int version, int hmac, char *identity, - unsigned char *retained, size_t key_len) +static int derive_psk_digest(const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *retained, size_t key_len, + char *digest, size_t digest_len) { - if (version != 0) { - nvme_msg(NULL, LOG_ERR, "NVMe TLS 2.0 is not supported; " - "recompile with OpenSSL support.\n"); - errno = ENOTSUP; - return -1; - } - sprintf(identity, "NVMe0R%02d %s %s", - hmac, hostnqn, subsysnqn); - return strlen(identity); + nvme_msg(NULL, LOG_ERR, "NVMe TLS 2.0 is not supported; " + "recompile with OpenSSL support.\n"); + errno = ENOTSUP; + return -1; } -static int derive_tls_key(int hmac, const char *identity, +static int derive_tls_key(int version, int hmac, const char *context, unsigned char *retained, unsigned char *psk, size_t key_len) { @@ -655,7 +658,7 @@ static int derive_retained_key(int hmac, const char *hostnqn, return key_len; } -static int derive_tls_key(int hmac, const char *identity, +static int derive_tls_key(int version, int hmac, const char *context, unsigned char *retained, unsigned char *psk, size_t key_len) { @@ -703,9 +706,20 @@ static int derive_tls_key(int hmac, const char *identity, errno = ENOKEY; return -1; } + if (version == 1) { + char hash_str[4]; + + sprintf(hash_str, "%02d ", hmac); + if (EVP_PKEY_CTX_add1_hkdf_info(ctx, + (const unsigned char *)hash_str, + strlen(hash_str)) <= 0) { + errno = ENOKEY; + return -1; + } + } if (EVP_PKEY_CTX_add1_hkdf_info(ctx, - (const unsigned char *)identity, - strlen(identity)) <= 0) { + (const unsigned char *)context, + strlen(context)) <= 0) { errno = ENOKEY; return -1; } @@ -731,9 +745,6 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, _cleanup_hmac_ctx_ HMAC_CTX *hmac_ctx = NULL; const EVP_MD *md; - ENGINE_load_builtin_engines(); - ENGINE_register_all_complete(); - hmac_ctx = HMAC_CTX_new(); if (!hmac_ctx) { errno = ENOMEM; @@ -788,28 +799,18 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, return 0; } -static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, - int version, int hmac, char *identity, - unsigned char *retained, size_t key_len) +static int derive_psk_digest(const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *retained, size_t key_len, + char *digest, size_t digest_len) { static const char hmac_seed[] = "NVMe-over-Fabrics"; size_t hmac_len; const EVP_MD *md = select_hmac(hmac, &hmac_len); _cleanup_hmac_ctx_ HMAC_CTX *hmac_ctx = NULL; _cleanup_free_ unsigned char *psk_ctx = NULL; - _cleanup_free_ char *enc_ctx = NULL; size_t len; - if (version == 0) { - sprintf(identity, "NVMe%01dR%02d %s %s", - version, hmac, hostnqn, subsysnqn); - return strlen(identity); - } - if (version > 1) { - errno = EINVAL; - return -1; - } - hmac_ctx = HMAC_CTX_new(); if (!hmac_ctx) { errno = ENOMEM; @@ -856,17 +857,19 @@ static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, errno = ENOKEY; return -1; } - enc_ctx = malloc(key_len * 2); - memset(enc_ctx, 0, key_len * 2); - len = base64_encode(psk_ctx, key_len, enc_ctx); + if (key_len * 2 > digest_len) { + errno = EINVAL; + return -1; + } + memset(digest, 0, digest_len); + len = base64_encode(psk_ctx, key_len, digest); if (len < 0) { errno = ENOKEY; return len; } - sprintf(identity, "NVMe%01dR%02d %s %s %s", - version, hmac, hostnqn, subsysnqn, enc_ctx); - return strlen(identity); + return strlen(digest); } + #endif /* !CONFIG_OPENSSL_1 */ #ifdef CONFIG_OPENSSL_3 @@ -961,9 +964,10 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac, return 0; } -static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, - int version, int hmac, char *identity, - unsigned char *retained, size_t key_len) +static int derive_psk_digest(const char *hostnqn, const char *subsysnqn, + int version, int hmac, + unsigned char *retained, size_t key_len, + char *digest, size_t digest_len) { static const char hmac_seed[] = "NVMe-over-Fabrics"; size_t hmac_len; @@ -972,21 +976,10 @@ static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, _cleanup_evp_mac_ctx_ EVP_MAC_CTX *mac_ctx = NULL; _cleanup_evp_mac_ EVP_MAC *mac = NULL; char *progq = NULL; - char *digest = NULL; + char *dig = NULL; _cleanup_free_ unsigned char *psk_ctx = NULL; - _cleanup_free_ char *enc_ctx = NULL; size_t len; - if (version == 0) { - sprintf(identity, "NVMe%01dR%02d %s %s", - version, hmac, hostnqn, subsysnqn); - return strlen(identity); - } - if (version > 1) { - errno = EINVAL; - return -1; - } - lib_ctx = OSSL_LIB_CTX_new(); if (!lib_ctx) { errno = ENOMEM; @@ -1005,19 +998,19 @@ static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, } switch (hmac) { case NVME_HMAC_ALG_SHA2_256: - digest = OSSL_DIGEST_NAME_SHA2_256; + dig = OSSL_DIGEST_NAME_SHA2_256; break; case NVME_HMAC_ALG_SHA2_384: - digest = OSSL_DIGEST_NAME_SHA2_384; + dig = OSSL_DIGEST_NAME_SHA2_384; break; default: errno = EINVAL; break; } - if (!digest) + if (!dig) return -1; *p++ = OSSL_PARAM_construct_utf8_string(OSSL_MAC_PARAM_DIGEST, - digest, 0); + dig, 0); *p = OSSL_PARAM_construct_end(); psk_ctx = malloc(key_len); @@ -1061,18 +1054,38 @@ static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, errno = EMSGSIZE; return -1; } - enc_ctx = malloc(hmac_len * 2); - memset(enc_ctx, 0, hmac_len * 2); - len = base64_encode(psk_ctx, hmac_len, enc_ctx); + if (hmac_len * 2 > digest_len) { + errno = EINVAL; + return -1; + } + memset(digest, 0, digest_len); + len = base64_encode(psk_ctx, hmac_len, digest); if (len < 0) { errno = ENOKEY; return len; } + return strlen(digest); +} +#endif /* !CONFIG_OPENSSL_3 */ + +static int gen_tls_identity(const char *hostnqn, const char *subsysnqn, + int version, int hmac, char *digest, + char *identity) +{ + if (version == 0) { + sprintf(identity, "NVMe%01dR%02d %s %s", + version, hmac, hostnqn, subsysnqn); + return strlen(identity); + } + if (version > 1) { + errno = EINVAL; + return -1; + } + sprintf(identity, "NVMe%01dR%02d %s %s %s", - version, hmac, hostnqn, subsysnqn, enc_ctx); + version, hmac, hostnqn, subsysnqn, digest); return strlen(identity); } -#endif /* !CONFIG_OPENSSL_3 */ static int derive_nvme_keys(const char *hostnqn, const char *subsysnqn, char *identity, int version, @@ -1080,6 +1093,8 @@ static int derive_nvme_keys(const char *hostnqn, const char *subsysnqn, unsigned char *psk, int key_len) { _cleanup_free_ unsigned char *retained = NULL; + _cleanup_free_ char *digest = NULL; + char *context = identity; int ret = -1; if (!hostnqn || !subsysnqn || !identity || !psk) { @@ -1095,11 +1110,28 @@ static int derive_nvme_keys(const char *hostnqn, const char *subsysnqn, ret = derive_retained_key(hmac, hostnqn, configured, retained, key_len); if (ret < 0) return ret; + + if (version == 1) { + size_t digest_len = 2 * key_len; + + digest = malloc(digest_len); + if (!digest) { + errno = ENOMEM; + return -1; + } + ret = derive_psk_digest(hostnqn, subsysnqn, version, hmac, + retained, key_len, + digest, digest_len); + if (ret) + return ret; + context = digest; + } ret = gen_tls_identity(hostnqn, subsysnqn, version, hmac, - identity, retained, key_len); + digest, identity); if (ret < 0) return ret; - return derive_tls_key(hmac, identity, retained, psk, key_len); + return derive_tls_key(version, hmac, context, retained, + psk, key_len); } static size_t nvme_identity_len(int hmac, int version, const char *hostnqn, @@ -1355,6 +1387,24 @@ long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type, return key; } +long nvme_revoke_tls_key(const char *keyring, const char *key_type, + const char *identity) +{ + key_serial_t keyring_id; + long key; + + keyring_id = nvme_lookup_keyring(keyring); + if (keyring_id == 0) { + errno = ENOKEY; + return 0; + } + + key = keyctl_search(keyring_id, key_type, identity, 0); + if (key < 0) + return -1; + + return keyctl_revoke(key); +} #else long nvme_lookup_keyring(const char *keyring) { @@ -1419,6 +1469,15 @@ long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type, errno = ENOTSUP; return -1; } + +long nvme_revoke_tls_key(const char *keyring, const char *key_type, + const char *identity) +{ + nvme_msg(NULL, LOG_ERR, "key operations not supported; " + "recompile with keyutils support.\n"); + errno = ENOTSUP; + return -1; +} #endif long nvme_insert_tls_key(const char *keyring, const char *key_type, @@ -1513,10 +1572,10 @@ unsigned char *nvme_import_tls_key(const char *encoded_key, int *key_len, return NULL; } crc = crc32(crc, decoded_key, decoded_len); - key_crc = ((u_int32_t)decoded_key[decoded_len]) | - ((u_int32_t)decoded_key[decoded_len + 1] << 8) | - ((u_int32_t)decoded_key[decoded_len + 2] << 16) | - ((u_int32_t)decoded_key[decoded_len + 3] << 24); + key_crc = ((uint32_t)decoded_key[decoded_len]) | + ((uint32_t)decoded_key[decoded_len + 1] << 8) | + ((uint32_t)decoded_key[decoded_len + 2] << 16) | + ((uint32_t)decoded_key[decoded_len + 3] << 24); if (key_crc != crc) { nvme_msg(NULL, LOG_ERR, "CRC mismatch (key %08x, crc %08x)", key_crc, crc); diff --git a/src/nvme/linux.h b/src/nvme/linux.h index bd74262..8e5e8ad 100644 --- a/src/nvme/linux.h +++ b/src/nvme/linux.h @@ -129,6 +129,16 @@ int nvme_get_new_host_telemetry(int fd, struct nvme_telemetry_log **log, enum nvme_telemetry_da da, size_t *size); /** + * nvme_get_ana_log_len_from_id_ctrl() - Retrieve maximum possible ANA log size + * @id_ctrl: Controller identify data + * @rgo: If true, return maximum log page size without NSIDs + * + * Return: A byte limit on the size of the controller's ANA log page + */ +size_t nvme_get_ana_log_len_from_id_ctrl(const struct nvme_id_ctrl *id_ctrl, + bool rgo); + +/** * nvme_get_ana_log_len() - Retrieve size of the current ANA log * @fd: File descriptor of nvme device * @analen: Pointer to where the length will be set on success @@ -402,6 +412,17 @@ char *nvme_generate_tls_key_identity(const char *hostnqn, const char *subsysnqn, unsigned char *configured_key, int key_len); /** + * nvme_revoke_tls_key() - Revoke TLS key from keyring + * @keyring: Keyring to use + * @key_type: Type of the key to revoke + * @identity: Key identity string + * + * Return: 0 on success or on failure -1 with errno set. + */ +long nvme_revoke_tls_key(const char *keyring, const char *key_type, + const char *identity); + +/** * nvme_export_tls_key() - Export a TLS key * @key_data: Raw data of the key * @key_len: Length of @key_data diff --git a/src/nvme/log.c b/src/nvme/log.c index c98d213..eaf74e1 100644 --- a/src/nvme/log.c +++ b/src/nvme/log.c @@ -26,13 +26,18 @@ #define LOG_CLOCK CLOCK_MONOTONIC #endif -static nvme_root_t root; +static struct nvme_log def_log = { + .fd = STDERR_FILENO, + .level = DEFAULT_LOGLEVEL, + .pid = false, + .timestamp = false, +}; void __attribute__((format(printf, 4, 5))) -__nvme_msg(nvme_root_t r, int lvl, +__nvme_msg(nvme_root_t r, int level, const char *func, const char *format, ...) { - FILE *fp = stderr; + struct nvme_log *l; va_list ap; char pidbuf[16]; char timebuf[32]; @@ -50,18 +55,15 @@ __nvme_msg(nvme_root_t r, int lvl, _cleanup_free_ char *message = NULL; int idx = 0; - if (!r) - r = root; - if (r) - fp = r->fp; + l = &r->log; + else + l = &def_log; - if (r && lvl > r->log_level) - return; - if (!r && lvl > DEFAULT_LOGLEVEL) + if (level > l->level) return; - if (r && r->log_timestamp) { + if (l->timestamp) { struct timespec now; clock_gettime(LOG_CLOCK, &now); @@ -71,7 +73,7 @@ __nvme_msg(nvme_root_t r, int lvl, } else *timebuf = '\0'; - if (r && r->log_pid) { + if (l->pid) { snprintf(pidbuf, sizeof(pidbuf), "%ld", (long)getpid()); idx |= 1 << 1; } else @@ -89,42 +91,56 @@ __nvme_msg(nvme_root_t r, int lvl, message = NULL; va_end(ap); - fprintf(fp, "%s%s", + dprintf(l->fd, "%s%s", header ? header : "<error>", message ? message : "<error>"); } void nvme_init_logging(nvme_root_t r, int lvl, bool log_pid, bool log_tstamp) { - r->log_level = lvl; - r->log_pid = log_pid; - r->log_timestamp = log_tstamp; + r->log.level = lvl; + r->log.pid = log_pid; + r->log.timestamp = log_tstamp; } int nvme_get_logging_level(nvme_root_t r, bool *log_pid, bool *log_tstamp) { - if (!r) - r = root; - if (!r) - return DEFAULT_LOGLEVEL; + struct nvme_log *l; + + if (r) + l = &r->log; + else + l = &def_log; + if (log_pid) - *log_pid = r->log_pid; + *log_pid = l->pid; if (log_tstamp) - *log_tstamp = r->log_timestamp; - return r->log_level; + *log_tstamp = l->timestamp; + return l->level; +} + +void nvme_init_default_logging(FILE *fp, int level, bool log_pid, bool log_tstamp) +{ + def_log.fd = fileno(fp); + def_log.level = level; + def_log.pid = log_pid; + def_log.timestamp = log_tstamp; } void nvme_set_root(nvme_root_t r) { - root = r; + def_log.fd = r->log.fd; + def_log.level = r->log.level; + def_log.pid = r->log.pid; + def_log.timestamp = r->log.timestamp; } void nvme_set_debug(bool debug) { - root->log_level = debug ? LOG_DEBUG : DEFAULT_LOGLEVEL; + def_log.level = debug ? LOG_DEBUG : DEFAULT_LOGLEVEL; } bool nvme_get_debug(void) { - return root->log_level == LOG_DEBUG; + return def_log.level == LOG_DEBUG; } diff --git a/src/nvme/log.h b/src/nvme/log.h index cd243ea..80c642a 100644 --- a/src/nvme/log.h +++ b/src/nvme/log.h @@ -36,6 +36,18 @@ void nvme_init_logging(nvme_root_t r, int lvl, bool log_pid, bool log_tstamp); /** + * nvme_init_default_logging() - Initialize default (fallback) logging + * @fp: File descriptor for logging messages + * @lvl: Logging level to set + * @log_pid: Boolean to enable logging of the PID + * @log_tstamp: Boolean to enable logging of the timestamp + * + * Sets the default logging settings for the library in case the root object + * is absent. + */ +void nvme_init_default_logging(FILE *fp, int lvl, bool log_pid, bool log_tstamp); + +/** * nvme_get_logging_level() - Get current logging level * @r: nvme_root_t context * @log_pid: Pointer to store a current value of logging of @@ -59,24 +71,27 @@ int nvme_get_logging_level(nvme_root_t r, bool *log_pid, bool *log_tstamp); * will be set as well. This means the global root object is always pointing to * the latest created root object. Note the first @nvme_free_tree call will reset * the global root object. + * + * This function is deprecated. Use nvme_init_default_logging or/and + * nvme_init_logging instead. */ -void nvme_set_root(nvme_root_t r); +void nvme_set_root(nvme_root_t r) __attribute__((deprecated)); /** * nvme_set_debug - Set NVMe command debugging output * @debug: true to enable or false to disable * - * Don't use it, it's debricated. + * This function is deprecated. Use nvme_init_default_logging instead. */ -void nvme_set_debug(bool debug); +void nvme_set_debug(bool debug) __attribute__((deprecated)); /** * nvme_get_debug - Get NVMe command debugging output * - * Don't use it, it's debricated. + * This function is deprecated. Use nvme_get_logging_level instead. * * Return: false if disabled or true if enabled. */ -bool nvme_get_debug(void); +bool nvme_get_debug(void) __attribute__((deprecated)); #endif /* _LOG_H */ diff --git a/src/nvme/mi-mctp.c b/src/nvme/mi-mctp.c index 86c4c29..2f1c02d 100644 --- a/src/nvme/mi-mctp.c +++ b/src/nvme/mi-mctp.c @@ -506,8 +506,6 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid) */ ep->timeout = 5000; - nvme_mi_ep_probe(ep); - return ep; err_free_rspbuf: @@ -595,7 +593,7 @@ static int handle_mctp_endpoint(nvme_root_t root, const char* objpath, bool have_eid = false, have_net = false, have_nvmemi = false; mctp_eid_t eid; int net; - int rc; + int rc = 0; /* for each property */ for (;;) { diff --git a/src/nvme/mi.c b/src/nvme/mi.c index 84d51b0..d98c74a 100644 --- a/src/nvme/mi.c +++ b/src/nvme/mi.c @@ -11,6 +11,7 @@ #include <stdlib.h> #include <stdio.h> #include <time.h> +#include <unistd.h> #include <ccan/array_size/array_size.h> #include <ccan/endian/endian.h> @@ -41,18 +42,32 @@ static bool nvme_mi_probe_enabled_default(void) */ nvme_root_t nvme_mi_create_root(FILE *fp, int log_level) { - struct nvme_root *r = calloc(1, sizeof(*r)); + struct nvme_root *r; + int fd; + r = calloc(1, sizeof(*r)); if (!r) { + errno = ENOMEM; return NULL; } - r->log_level = log_level; - r->fp = stderr; + + if (fp) { + fd = fileno(fp); + if (fd < 0) { + free(r); + return NULL; + } + } else + fd = STDERR_FILENO; + + r->log.fd = fd; + r->log.level = log_level; + r->mi_probe_enabled = nvme_mi_probe_enabled_default(); - if (fp) - r->fp = fp; + list_head_init(&r->hosts); list_head_init(&r->endpoints); + return r; } @@ -128,6 +143,20 @@ void nvme_mi_ep_probe(struct nvme_mi_ep *ep) struct nvme_mi_ctrl *ctrl; int rc; + /* Ensure the probe occurs at most once. This isn't just to mitigate doubling + * a linear stream of commands, it also terminates recursion via the + * nvme_mi_submit() call issued by nvme_mi_admin_identify_partial() below. + */ + if (ep->quirks_probed) + return; + + /* Mark ep->quirks as valid. Note that for the purpose of quirk probing, + * the quirk probe itself cannot rely on quirks, and so the fact that none are + * yet set is desirable. The request that triggered nvme_mi_submit() will have + * an initialised ep->quirks when we return from the root probe call. + */ + ep->quirks_probed = true; + if (!ep->root->mi_probe_enabled) return; @@ -250,6 +279,7 @@ struct nvme_mi_ep *nvme_mi_init_ep(nvme_root_t root) list_node_init(&ep->root_entry); ep->root = root; + ep->quirks_probed = false; ep->controllers_scanned = false; ep->timeout = default_timeout; ep->mprt_max = 0; @@ -327,7 +357,7 @@ int nvme_mi_scan_ep(nvme_mi_ep_t ep, bool force_rescan) rc = nvme_mi_mi_read_mi_data_ctrl_list(ep, 0, &list); if (rc) - return -1; + return rc; n_ctrl = le16_to_cpu(list.num); if (n_ctrl > NVME_ID_CTRL_LIST_MAX) { @@ -413,6 +443,8 @@ int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req, return -1; } + nvme_mi_ep_probe(ep); + if (ep->transport->mic_enabled) nvme_mi_calc_req_mic(req); @@ -639,6 +671,7 @@ int nvme_mi_admin_admin_passthru(nvme_mi_ctrl_t ctrl, __u8 opcode, __u8 flags, struct nvme_mi_admin_req_hdr req_hdr; struct nvme_mi_resp resp; struct nvme_mi_req req; + unsigned int timeout_save; int rc; int direction = opcode & 0x3; bool has_write_data = false; @@ -665,11 +698,6 @@ int nvme_mi_admin_admin_passthru(nvme_mi_ctrl_t ctrl, __u8 opcode, __u8 flags, has_read_data = true; } - if (timeout_ms > nvme_mi_ep_get_timeout(ctrl->ep)) { - /* Set timeout if user needs a bigger timeout */ - nvme_mi_ep_set_timeout(ctrl->ep, timeout_ms); - } - nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, opcode); req_hdr.cdw1 = cpu_to_le32(nsid); req_hdr.cdw2 = cpu_to_le32(cdw2); @@ -701,7 +729,17 @@ int nvme_mi_admin_admin_passthru(nvme_mi_ctrl_t ctrl, __u8 opcode, __u8 flags, resp.data_len = data_len; } + /* if the user has specified a custom timeout, save the current + * timeout and override + */ + if (timeout_ms != 0) { + timeout_save = nvme_mi_ep_get_timeout(ctrl->ep); + nvme_mi_ep_set_timeout(ctrl->ep, timeout_ms); + } rc = nvme_mi_submit(ctrl->ep, &req, &resp); + if (timeout_ms != 0) + nvme_mi_ep_set_timeout(ctrl->ep, timeout_save); + if (rc) return rc; @@ -896,6 +934,133 @@ int nvme_mi_admin_get_log(nvme_mi_ctrl_t ctrl, struct nvme_get_log_args *args) return nvme_mi_admin_get_log_page(ctrl, 4096, args); } +static int read_ana_chunk(nvme_mi_ctrl_t ctrl, enum nvme_log_ana_lsp lsp, bool rae, + __u8 *log, __u8 **read, __u8 *to_read, __u8 *log_end) +{ + if (to_read > log_end) { + errno = ENOSPC; + return -1; + } + + while (*read < to_read) { + __u32 len = min(log_end - *read, NVME_LOG_PAGE_PDU_SIZE); + int ret; + + ret = nvme_mi_admin_get_log_ana(ctrl, lsp, rae, + *read - log, len, *read); + if (ret) + return ret; + + *read += len; + } + return 0; +} + +static int try_read_ana(nvme_mi_ctrl_t ctrl, enum nvme_log_ana_lsp lsp, bool rae, + struct nvme_ana_log *log, __u8 *log_end, + __u8 *read, __u8 **to_read, bool *may_retry) +{ + __u16 ngrps = le16_to_cpu(log->ngrps); + + while (ngrps--) { + __u8 *group = *to_read; + int ret; + __le32 nnsids; + + *to_read += sizeof(*log->descs); + ret = read_ana_chunk(ctrl, lsp, rae, + (__u8 *)log, &read, *to_read, log_end); + if (ret) { + /* + * If the provided buffer isn't long enough, + * the log page may have changed while reading it + * and the computed length was inaccurate. + * Have the caller check chgcnt and retry. + */ + *may_retry = errno == ENOSPC; + return ret; + } + + /* + * struct nvme_ana_group_desc has 8-byte alignment + * but the group pointer is only 4-byte aligned. + * Don't dereference the misaligned pointer. + */ + memcpy(&nnsids, + group + offsetof(struct nvme_ana_group_desc, nnsids), + sizeof(nnsids)); + *to_read += le32_to_cpu(nnsids) * sizeof(__le32); + ret = read_ana_chunk(ctrl, lsp, rae, + (__u8 *)log, &read, *to_read, log_end); + if (ret) { + *may_retry = errno == ENOSPC; + return ret; + } + } + + *may_retry = true; + return 0; +} + +int nvme_mi_admin_get_ana_log_atomic(nvme_mi_ctrl_t ctrl, bool rgo, bool rae, + unsigned int retries, + struct nvme_ana_log *log, __u32 *len) +{ + const enum nvme_log_ana_lsp lsp = + rgo ? NVME_LOG_ANA_LSP_RGO_GROUPS_ONLY : 0; + /* Get Log Page can only fetch multiples of dwords */ + __u8 * const log_end = (__u8 *)log + (*len & -4); + __u8 *read = (__u8 *)log; + __u8 *to_read; + int ret; + + if (!retries) { + errno = EINVAL; + return -1; + } + + to_read = (__u8 *)log->descs; + ret = read_ana_chunk(ctrl, lsp, rae, + (__u8 *)log, &read, to_read, log_end); + if (ret) + return ret; + + do { + bool may_retry = false; + int saved_ret; + int saved_errno; + __le64 chgcnt; + + saved_ret = try_read_ana(ctrl, lsp, rae, log, log_end, + read, &to_read, &may_retry); + /* + * If the log page was read with multiple Get Log Page commands, + * chgcnt must be checked afterwards to ensure atomicity + */ + *len = to_read - (__u8 *)log; + if (*len <= NVME_LOG_PAGE_PDU_SIZE || !may_retry) + return saved_ret; + + saved_errno = errno; + chgcnt = log->chgcnt; + read = (__u8 *)log; + to_read = (__u8 *)log->descs; + ret = read_ana_chunk(ctrl, lsp, rae, + (__u8 *)log, &read, to_read, log_end); + if (ret) + return ret; + + if (log->chgcnt == chgcnt) { + /* Log hasn't changed; return try_read_ana() result */ + errno = saved_errno; + return saved_ret; + } + } while (--retries); + + errno = EAGAIN; + return -1; +} + int nvme_mi_admin_security_send(nvme_mi_ctrl_t ctrl, struct nvme_security_send_args *args) { @@ -1003,8 +1168,10 @@ int nvme_mi_admin_get_features(nvme_mi_ctrl_t ctrl, struct nvme_mi_req req; int rc; - if (args->args_size < sizeof(*args)) - return -EINVAL; + if (args->args_size < sizeof(*args)) { + errno = EINVAL; + return -1; + } nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_get_features); @@ -1042,8 +1209,10 @@ int nvme_mi_admin_set_features(nvme_mi_ctrl_t ctrl, struct nvme_mi_req req; int rc; - if (args->args_size < sizeof(*args)) - return -EINVAL; + if (args->args_size < sizeof(*args)) { + errno = EINVAL; + return -1; + } nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_set_features); @@ -1140,8 +1309,10 @@ int nvme_mi_admin_ns_attach(nvme_mi_ctrl_t ctrl, struct nvme_mi_req req; int rc; - if (args->args_size < sizeof(*args)) - return -EINVAL; + if (args->args_size < sizeof(*args)) { + errno = EINVAL; + return -1; + } nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_ns_attach); @@ -1173,17 +1344,20 @@ int nvme_mi_admin_fw_download(nvme_mi_ctrl_t ctrl, struct nvme_mi_req req; int rc; - if (args->args_size < sizeof(*args)) - return -EINVAL; - - if (args->data_len & 0x3) - return -EINVAL; + if (args->args_size < sizeof(*args)) { + errno = EINVAL; + return -1; + } - if (args->offset & 0x3) - return -EINVAL; + if ((args->data_len & 0x3) || (!args->data_len)) { + errno = EINVAL; + return -1; + } - if (!args->data_len) - return -EINVAL; + if (args->offset & 0x3) { + errno = EINVAL; + return -1; + } nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_fw_download); @@ -1215,8 +1389,10 @@ int nvme_mi_admin_fw_commit(nvme_mi_ctrl_t ctrl, struct nvme_mi_req req; int rc; - if (args->args_size < sizeof(*args)) - return -EINVAL; + if (args->args_size < sizeof(*args)) { + errno = EINVAL; + return -1; + } nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_fw_commit); @@ -1245,8 +1421,10 @@ int nvme_mi_admin_format_nvm(nvme_mi_ctrl_t ctrl, struct nvme_mi_req req; int rc; - if (args->args_size < sizeof(*args)) - return -EINVAL; + if (args->args_size < sizeof(*args)) { + errno = EINVAL; + return -1; + } nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_format_nvm); @@ -1279,8 +1457,10 @@ int nvme_mi_admin_sanitize_nvm(nvme_mi_ctrl_t ctrl, struct nvme_mi_req req; int rc; - if (args->args_size < sizeof(*args)) - return -EINVAL; + if (args->args_size < sizeof(*args)) { + errno = EINVAL; + return -1; + } nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, nvme_admin_sanitize_nvm); diff --git a/src/nvme/mi.h b/src/nvme/mi.h index bd26627..ae32a90 100644 --- a/src/nvme/mi.h +++ b/src/nvme/mi.h @@ -1917,7 +1917,7 @@ static inline int nvme_mi_admin_get_log_predictable_lat_event(nvme_mi_ctrl_t ctr * the asymmetric namespace access information for ANA Groups that contain * namespaces that are attached to the controller processing the command. * - * See &struct nvme_ana_rsp_hdr for the definition of the returned structure. + * See &struct nvme_ana_log for the definition of the returned structure. * * Return: The nvme command status if a response was received (see * &enum nvme_status_field) or -1 with errno set otherwise. @@ -1938,7 +1938,7 @@ static inline int nvme_mi_admin_get_log_ana(nvme_mi_ctrl_t ctrl, .lsi = NVME_LOG_LSI_NONE, .lsp = (__u8)lsp, .uuidx = NVME_UUID_NONE, - .rae = false, + .rae = rae, .ot = false, }; return nvme_mi_admin_get_log(ctrl, &args); @@ -1966,6 +1966,31 @@ static inline int nvme_mi_admin_get_log_ana_groups(nvme_mi_ctrl_t ctrl, } /** + * nvme_mi_admin_get_ana_log_atomic() - Retrieve Asymmetric Namespace Access + * log page atomically + * @ctrl: Controller to query + * @rgo: Whether to retrieve ANA groups only (no NSIDs) + * @rae: Whether to retain asynchronous events + * @retries: The maximum number of times to retry on log page changes + * @log: Pointer to a buffer to receive the ANA log page + * @len: Input: the length of the log page buffer. + * Output: the actual length of the ANA log page. + * + * See &struct nvme_ana_log for the definition of the returned structure. + * + * Return: If successful, returns 0 and sets *len to the actual log page length. + * If unsuccessful, returns the nvme command status if a response was received + * (see &enum nvme_status_field) or -1 with errno set otherwise. + * Sets errno = EINVAL if retries == 0. + * Sets errno = EAGAIN if unable to read the log page atomically + * because chgcnt changed during each of the retries attempts. + * Sets errno = ENOSPC if the full log page does not fit in the provided buffer. + */ +int nvme_mi_admin_get_ana_log_atomic(nvme_mi_ctrl_t ctrl, bool rgo, bool rae, + unsigned int retries, + struct nvme_ana_log *log, __u32 *len); + +/** * nvme_mi_admin_get_log_lba_status() - Retrieve LBA Status * @ctrl: Controller to query * @rae: Retain asynchronous events diff --git a/src/nvme/private.h b/src/nvme/private.h index dec3d85..3fa5aca 100644 --- a/src/nvme/private.h +++ b/src/nvme/private.h @@ -160,17 +160,22 @@ struct nvme_fabric_options { bool trsvcid; }; +struct nvme_log { + int fd; + int level; + bool pid; + bool timestamp; +}; + struct nvme_root { char *config_file; char *application; struct list_head hosts; struct list_head endpoints; /* MI endpoints */ - FILE *fp; - int log_level; - bool log_pid; - bool log_timestamp; + struct nvme_log log; bool modified; bool mi_probe_enabled; + bool create_only; struct nvme_fabric_options *options; }; @@ -198,14 +203,10 @@ void *__nvme_realloc(void *p, size_t len); #endif void __attribute__((format(printf, 4, 5))) -__nvme_msg(nvme_root_t r, int lvl, const char *func, const char *format, ...); +__nvme_msg(nvme_root_t r, int level, const char *func, const char *format, ...); -#define nvme_msg(r, lvl, format, ...) \ - do { \ - if ((lvl) <= MAX_LOGLEVEL) \ - __nvme_msg(r, lvl, __nvme_log_func, \ - format, ##__VA_ARGS__); \ - } while (0) +#define nvme_msg(r, level, format, ...) \ + __nvme_msg(r, level, __nvme_log_func, format, ##__VA_ARGS__) #define root_from_ctrl(c) ((c)->s && (c)->s->h ? (c)->s->h->r : NULL) #define root_from_ns(n) ((n)->s && (n)->s->h ? (n)->s->h->r : \ @@ -256,6 +257,7 @@ struct nvme_mi_ep { void *transport_data; struct list_node root_entry; struct list_head controllers; + bool quirks_probed; bool controllers_scanned; unsigned int timeout; unsigned int mprt_max; diff --git a/src/nvme/tree.c b/src/nvme/tree.c index eb9486d..3722461 100644 --- a/src/nvme/tree.c +++ b/src/nvme/tree.c @@ -117,16 +117,99 @@ static void cleanup_dirents(struct dirents *ents) #define _cleanup_dirents_ __cleanup__(cleanup_dirents) +static char *nvme_hostid_from_hostnqn(const char *hostnqn) +{ + const char *uuid; + + uuid = strstr(hostnqn, "uuid:"); + if (!uuid) + return NULL; + + return strdup(uuid + strlen("uuid:")); +} + +int nvme_host_get_ids(nvme_root_t r, + char *hostnqn_arg, char *hostid_arg, + char **hostnqn, char **hostid) +{ + _cleanup_free_ char *nqn = NULL; + _cleanup_free_ char *hid = NULL; + _cleanup_free_ char *hnqn = NULL; + nvme_host_t h; + + /* command line argumments */ + if (hostid_arg) + hid = strdup(hostid_arg); + if (hostnqn_arg) + hnqn = strdup(hostnqn_arg); + + /* JSON config: assume the first entry is the default host */ + h = nvme_first_host(r); + if (h) { + if (!hid) + hid = strdup(nvme_host_get_hostid(h)); + if (!hnqn) + hnqn = strdup(nvme_host_get_hostnqn(h)); + } + + /* /etc/nvme/hostid and/or /etc/nvme/hostnqn */ + if (!hid) + hid = nvmf_hostid_from_file(); + if (!hnqn) + hnqn = nvmf_hostnqn_from_file(); + + /* incomplete configuration, thus derive hostid from hostnqn */ + if (!hid && hnqn) + hid = nvme_hostid_from_hostnqn(hnqn); + + /* + * fallback to use either DMI information or device-tree. If all + * fails generate one + */ + if (!hid) { + hid = nvmf_hostid_generate(); + if (!hid) { + errno = -ENOMEM; + return -1; + } + + nvme_msg(r, LOG_DEBUG, + "warning: using auto generated hostid and hostnqn\n"); + } + + /* incomplete configuration, thus derive hostnqn from hostid */ + if (!hnqn) { + hnqn = nvmf_hostnqn_generate_from_hostid(hid); + if (!hnqn) { + errno = -ENOMEM; + return -1; + } + } + + /* sanity checks */ + nqn = nvme_hostid_from_hostnqn(hnqn); + if (nqn && strcmp(nqn, hid)) { + nvme_msg(r, LOG_DEBUG, + "warning: use hostid '%s' which does not match uuid in hostnqn '%s'\n", + hid, hnqn); + } + + *hostid = hid; + *hostnqn = hnqn; + hid = NULL; + hnqn = NULL; + + return 0; +} + nvme_host_t nvme_default_host(nvme_root_t r) { - struct nvme_host *h; _cleanup_free_ char *hostnqn = NULL; _cleanup_free_ char *hostid = NULL; + struct nvme_host *h; - hostnqn = nvmf_hostnqn_from_file(); - if (!hostnqn) - hostnqn = nvmf_hostnqn_generate(); - hostid = nvmf_hostid_from_file(); + if (nvme_host_get_ids(r, NULL, NULL, &hostnqn, &hostid)) + return NULL; h = nvme_lookup_host(r, hostnqn, hostid); @@ -187,25 +270,37 @@ int nvme_scan_topology(struct nvme_root *r, nvme_scan_filter_t f, void *f_args) nvme_root_t nvme_create_root(FILE *fp, int log_level) { - struct nvme_root *r = calloc(1, sizeof(*r)); + struct nvme_root *r; + int fd; + r = calloc(1, sizeof(*r)); if (!r) { errno = ENOMEM; return NULL; } - r->log_level = log_level; - r->fp = stderr; - if (fp) - r->fp = fp; + + if (fp) { + fd = fileno(fp); + if (fd < 0) { + free(r); + return NULL; + } + } else + fd = STDERR_FILENO; + + r->log.fd = fd; + r->log.level = log_level; + list_head_init(&r->hosts); list_head_init(&r->endpoints); - nvme_set_root(r); + return r; } int nvme_read_config(nvme_root_t r, const char *config_file) { int err = -1; + int tmp; if (!r || !config_file) { errno = ENODEV; @@ -217,13 +312,17 @@ int nvme_read_config(nvme_root_t r, const char *config_file) errno = ENOMEM; return err; } + + tmp = errno; err = json_read_config(r, config_file); /* * The json configuration file is optional, * so ignore errors when opening the file. */ - if (err < 0 && errno != EPROTO) - err = 0; + if (err < 0 && errno != EPROTO) { + errno = tmp; + return 0; + } return err; } @@ -270,6 +369,11 @@ void nvme_root_set_application(nvme_root_t r, const char *a) r->application = strdup(a); } +void nvme_root_skip_namespaces(nvme_root_t r) +{ + r->create_only = true; +} + nvme_host_t nvme_first_host(nvme_root_t r) { return list_top(&r->hosts, struct nvme_host, entry); @@ -361,14 +465,17 @@ void nvme_free_tree(nvme_root_t r) { struct nvme_host *h, *_h; - free(r->options); + if (!r) + return; + + if (r->options) + free(r->options); nvme_for_each_host_safe(r, h, _h) __nvme_free_host(h); if (r->config_file) free(r->config_file); if (r->application) free(r->application); - nvme_set_root(NULL); free(r); } @@ -538,7 +645,7 @@ struct nvme_subsystem *nvme_alloc_subsystem(struct nvme_host *h, list_head_init(&s->ctrls); list_head_init(&s->namespaces); list_node_init(&s->entry); - list_add(&h->subsystems, &s->entry); + list_add_tail(&h->subsystems, &s->entry); h->r->modified = true; return s; } @@ -622,7 +729,7 @@ struct nvme_host *nvme_lookup_host(nvme_root_t r, const char *hostnqn, list_head_init(&h->subsystems); list_node_init(&h->entry); h->r = r; - list_add(&r->hosts, &h->entry); + list_add_tail(&r->hosts, &h->entry); r->modified = true; return h; @@ -634,6 +741,12 @@ static int nvme_subsystem_scan_namespaces(nvme_root_t r, nvme_subsystem_t s, _cleanup_dirents_ struct dirents namespaces = {}; int i, ret; + if (r->create_only) { + nvme_msg(r, LOG_DEBUG, + "skipping namespace scan for subsys %s\n", + s->subsysnqn); + return 0; + } namespaces.num = nvme_scan_subsystem_namespaces(s, &namespaces.ents); if (namespaces.num < 0) { nvme_msg(r, LOG_DEBUG, @@ -806,7 +919,7 @@ static void nvme_subsystem_set_path_ns(nvme_subsystem_t s, nvme_path_t p) sprintf(n_name, "nvme%dn%d", i, nsid); nvme_subsystem_for_each_ns(s, n) { if (!strcmp(n_name, nvme_ns_get_name(n))) { - list_add(&n->paths, &p->nentry); + list_add_tail(&n->paths, &p->nentry); p->n = n; } } @@ -852,7 +965,7 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) list_node_init(&p->nentry); nvme_subsystem_set_path_ns(c->s, p); list_node_init(&p->entry); - list_add(&c->paths, &p->entry); + list_add_tail(&c->paths, &p->entry); return 0; } @@ -1591,8 +1704,8 @@ nvme_ctrl_t __nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, const char *host_iface, const char *trsvcid, const char *subsysnqn, nvme_ctrl_t p) { + _cleanup_candidate_ struct candidate_args candidate = {}; struct nvme_ctrl *c, *matching_c = NULL; - _cleanup_candidate_ struct candidate_args candidate; ctrl_match_t ctrl_match; /* Init candidate and get the matching function to use */ @@ -1615,8 +1728,8 @@ bool nvme_ctrl_config_match(struct nvme_ctrl *c, const char *transport, const char *subsysnqn, const char *host_traddr, const char *host_iface) { + _cleanup_candidate_ struct candidate_args candidate = {}; ctrl_match_t ctrl_match; - _cleanup_candidate_ struct candidate_args candidate; /* Init candidate and get the matching function to use */ ctrl_match = _candidate_init(&candidate, transport, traddr, trsvcid, @@ -1655,7 +1768,7 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, host_traddr, host_iface, trsvcid); if (c) { c->s = s; - list_add(&s->ctrls, &c->entry); + list_add_tail(&s->ctrls, &c->entry); s->h->r->modified = true; } return c; @@ -1666,6 +1779,11 @@ static int nvme_ctrl_scan_paths(nvme_root_t r, struct nvme_ctrl *c) _cleanup_dirents_ struct dirents paths = {}; int i; + if (r->create_only) { + nvme_msg(r, LOG_DEBUG, + "skipping path scan for ctrl %s\n", c->name); + return 0; + } paths.num = nvme_scan_ctrl_namespace_paths(c, &paths.ents); if (paths.num < 0) return paths.num; @@ -1681,6 +1799,11 @@ static int nvme_ctrl_scan_namespaces(nvme_root_t r, struct nvme_ctrl *c) _cleanup_dirents_ struct dirents namespaces = {}; int i; + if (r->create_only) { + nvme_msg(r, LOG_DEBUG, "skipping namespace scan for ctrl %s\n", + c->name); + return 0; + } namespaces.num = nvme_scan_ctrl_namespaces(c, &namespaces.ents); for (i = 0; i < namespaces.num; i++) nvme_ctrl_scan_namespace(r, c, namespaces.ents[i]->d_name); @@ -1749,6 +1872,10 @@ static char *nvme_ctrl_lookup_phy_slot(nvme_root_t r, const char *address) return NULL; } addr = nvme_get_attr(path, "address"); + + /* some directories don't have an address entry */ + if (!addr) + continue; if (strcmp(addr, target_addr) == 0) return strdup(entry->d_name); } @@ -1861,7 +1988,7 @@ int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance) if (s->subsystype && !strcmp(s->subsystype, "discovery")) c->discovery_ctrl = true; c->s = s; - list_add(&s->ctrls, &c->entry); + list_add_tail(&s->ctrls, &c->entry); return ret; } @@ -2635,7 +2762,7 @@ static int nvme_ctrl_scan_namespace(nvme_root_t r, struct nvme_ctrl *c, } n->s = c->s; n->c = c; - list_add(&c->namespaces, &n->entry); + list_add_tail(&c->namespaces, &n->entry); return 0; } @@ -2658,7 +2785,7 @@ static void nvme_subsystem_set_ns_path(nvme_subsystem_t s, nvme_ns_t n) if (ret != 3) continue; if (ns_ctrl == p_subsys && ns_nsid == p_nsid) { - list_add(&n->paths, &p->nentry); + list_add_tail(&n->paths, &p->nentry); p->n = n; } } @@ -2696,7 +2823,7 @@ static int nvme_subsystem_scan_namespace(nvme_root_t r, nvme_subsystem_t s, __nvme_free_ns(_n); } n->s = s; - list_add(&s->namespaces, &n->entry); + list_add_tail(&s->namespaces, &n->entry); nvme_subsystem_set_ns_path(s, n); return 0; } diff --git a/src/nvme/tree.h b/src/nvme/tree.h index 5e82579..1b583cd 100644 --- a/src/nvme/tree.h +++ b/src/nvme/tree.h @@ -63,6 +63,14 @@ void nvme_root_set_application(nvme_root_t r, const char *a); const char *nvme_root_get_application(nvme_root_t r); /** + * nvme_root_skip_namespaces - Skip namespace scanning + * @r: &nvme_root_t object + * + * Sets a flag to skip namespaces during scanning. + */ +void nvme_root_skip_namespaces(nvme_root_t r); + +/** * nvme_root_release_fds - Close all opened file descriptors in the tree * @r: &nvme_root_t object * @@ -161,14 +169,49 @@ bool nvme_host_is_pdc_enabled(nvme_host_t h, bool fallback); * nvme_default_host() - Initializes the default host * @r: &nvme_root_t object * - * Initializes the default host object based on the values in - * /etc/nvme/hostnqn and /etc/nvme/hostid and attaches it to @r. + * Initializes the default host object based on the hostnqn/hostid + * values returned by nvme_host_get_ids() and attaches it to @r. * * Return: &nvme_host_t object */ nvme_host_t nvme_default_host(nvme_root_t r); /** + * nvme_host_get_ids - Retrieve host ids from various sources + * + * @r: &nvme_root_t object + * @hostnqn_arg: Input hostnqn (command line) argument + * @hostid_arg: Input hostid (command line) argument + * @hostnqn: Output hostnqn + * @hostid: Output hostid + * + * nvme_host_get_ids figures out which hostnqn/hostid is to be used. + * There are several sources where this information can be retrieved. + * + * The order is: + * + * - Start with informartion from DMI or device-tree + * - Override hostnqn and hostid from /etc/nvme files + * - Override hostnqn or hostid with values from JSON + * configuration file. The first host entry in the file is + * considered the default host. + * - Override hostnqn or hostid with values from the command line + * (@hostnqn_arg, @hostid_arg). + * + * If the IDs are still NULL after the lookup algorithm, the function + * will generate random IDs. + * + * The function also verifies that hostnqn and hostid matches. The Linux + * NVMe implementation expects a 1:1 matching between the IDs. + * + * Return: 0 on success (@hostnqn and @hostid contain valid strings + * which the caller needs to free), -1 otherwise and errno is set. + */ +int nvme_host_get_ids(nvme_root_t r, + char *hostnqn_arg, char *hostid_arg, + char **hostnqn, char **hostid); + +/** * nvme_first_subsystem() - Start subsystem iterator * @h: &nvme_host_t object * diff --git a/src/nvme/types.h b/src/nvme/types.h index 26e5e25..5fa969d 100644 --- a/src/nvme/types.h +++ b/src/nvme/types.h @@ -1570,6 +1570,8 @@ enum nvme_id_ctrl_oaes { * @NVME_CTRL_CTRATT_DEL_ENDURANCE_GROUPS: Delete Endurance Groups supported * @NVME_CTRL_CTRATT_DEL_NVM_SETS: Delete NVM Sets supported * @NVME_CTRL_CTRATT_ELBAS: Extended LBA Formats supported + * @NVME_CTRL_CTRATT_MEM: MDTS and Size Limits Exclude Metadata supported + * @NVME_CTRL_CTRATT_HMBR: HMB Restrict Non-Operational Power State Access * @NVME_CTRL_CTRATT_FDPS: Flexible Data Placement supported */ enum nvme_id_ctrl_ctratt { @@ -1589,6 +1591,8 @@ enum nvme_id_ctrl_ctratt { NVME_CTRL_CTRATT_DEL_ENDURANCE_GROUPS = 1 << 13, NVME_CTRL_CTRATT_DEL_NVM_SETS = 1 << 14, NVME_CTRL_CTRATT_ELBAS = 1 << 15, + NVME_CTRL_CTRATT_MEM = 1 << 16, + NVME_CTRL_CTRATT_HMBR = 1 << 17, NVME_CTRL_CTRATT_FDPS = 1 << 19, }; @@ -2298,7 +2302,7 @@ enum nvme_id_nsfeat { NVME_NS_FEAT_NATOMIC = 1 << 1, NVME_NS_FEAT_DULBE = 1 << 2, NVME_NS_FEAT_ID_REUSE = 1 << 3, - NVME_NS_FEAT_IO_OPT = 1 << 4, + NVME_NS_FEAT_IO_OPT = 3 << 4, }; /** @@ -2332,10 +2336,31 @@ enum nvme_id_ns_flbas { * the variable-sized storage tag/reference tag fields * @NVME_NVM_ELBAF_PIF_MASK: Mask to get the protection information format for * the extended LBA format. + * @NVME_NVM_ELBAF_QPIF_MASK: Mask to get the Qualified Protection Information + * Format. */ enum nvme_nvm_id_ns_elbaf { NVME_NVM_ELBAF_STS_MASK = 127 << 0, NVME_NVM_ELBAF_PIF_MASK = 3 << 7, + NVME_NVM_ELBAF_QPIF_MASK = 15 << 9, +}; + +/** + * enum nvme_nvm_id_ns_pif - This field indicates the type of the Protection + * Information Format + * @NVME_NVM_PIF_16B_GUARD: 16-bit Guard Protection Information Format + * @NVME_NVM_PIF_32B_GUARD: 32-bit Guard Protection Information Format + * @NVME_NVM_PIF_64B_GUARD: 64-bit Guard Protection Information Format + * @NVME_NVM_PIF_QTYPE: If Qualified Protection Information Format Supports + * and Protection Information Format is set to 3, then + * protection information format is taken from Qualified + * Protection Information Format field. + */ +enum nvme_nvm_id_ns_pif { + NVME_NVM_PIF_16B_GUARD = 0, + NVME_NVM_PIF_32B_GUARD = 1, + NVME_NVM_PIF_64B_GUARD = 2, + NVME_NVM_PIF_QTYPE = 3, }; /** @@ -2683,32 +2708,44 @@ struct nvme_ns_list { * @dmrl: Dataset Management Ranges Limit * @dmrsl: Dataset Management Range Size Limit * @dmsl: Dataset Management Size Limit - * @rsvd16: reserved + * @rsvd16: Reserved + * @aocs: Admin Optional Command Support + * @rsvd20: Reserved */ struct nvme_id_ctrl_nvm { - __u8 vsl; - __u8 wzsl; - __u8 wusl; - __u8 dmrl; - __le32 dmrsl; - __le64 dmsl; - __u8 rsvd16[4080]; + __u8 vsl; + __u8 wzsl; + __u8 wusl; + __u8 dmrl; + __le32 dmrsl; + __le64 dmsl; + __u8 rsvd16[2]; + __le16 aocs; + __u8 rsvd20[4076]; }; /** * struct nvme_nvm_id_ns - NVME Command Set I/O Command Set Specific Identify Namespace Data Structure * @lbstm: Logical Block Storage Tag Mask * @pic: Protection Information Capabilities - * @rsvd9: Reserved + * @pifa: Protection Information Format Attribute + * @rsvd10: Reserved * @elbaf: List of Extended LBA Format Support - * @rsvd268: Reserved + * @npdgl: Namespace Preferred Deallocate Granularity Large + * @rsvd272: Reserved + * @tlbaag: Tracked LBA Allocation Granularity + * @rsvd296: Reserved */ struct nvme_nvm_id_ns { __le64 lbstm; __u8 pic; - __u8 rsvd9[3]; + __u8 pifa; + __u8 rsvd10[2]; __le32 elbaf[64]; - __u8 rsvd268[3828]; + __le32 npdgl; + __u8 rsvd272[20]; + __le32 tlbaag; + __u8 rsvd296[3800]; }; /** @@ -3847,6 +3884,8 @@ struct nvme_persistent_event_entry { * @NVME_PEL_SET_FEATURE_EVENT: Set Feature Event * @NVME_PEL_TELEMETRY_CRT: Telemetry Log Create Event * @NVME_PEL_THERMAL_EXCURSION_EVENT: Thermal Excursion Event + * @NVME_PEL_VENDOR_SPECIFIC_EVENT: Vendor Specific Event + * @NVME_PEL_TCG_DEFINED_EVENT: TCG Defined Event */ enum nvme_persistent_event_types { NVME_PEL_SMART_HEALTH_EVENT = 0x01, @@ -3862,6 +3901,8 @@ enum nvme_persistent_event_types { NVME_PEL_SET_FEATURE_EVENT = 0x0b, NVME_PEL_TELEMETRY_CRT = 0x0c, NVME_PEL_THERMAL_EXCURSION_EVENT = 0x0d, + NVME_PEL_VENDOR_SPECIFIC_EVENT = 0xde, + NVME_PEL_TCG_DEFINED_EVENT = 0xdf, }; /** @@ -4138,14 +4179,18 @@ struct nvme_lbas_ns_element { }; /** - * enum nvme_lba_status_atype - Potentially Unrecoverable LBAs - * @NVME_LBA_STATUS_ATYPE_SCAN_UNTRACKED: Potentially Unrecoverable LBAs - * @NVME_LBA_STATUS_ATYPE_SCAN_TRACKED: Potentially Unrecoverable LBAs - * associated with physical storage + * enum nvme_lba_status_atype - Action type the controller uses to return LBA status + * @NVME_LBA_STATUS_ATYPE_ALLOCATED: Return tracked allocated LBAs status + * @NVME_LBA_STATUS_ATYPE_SCAN_UNTRACKED: Perform scan and return Untracked and + * Tracked Potentially Unrecoverable LBAs + * status + * @NVME_LBA_STATUS_ATYPE_TRACKED: Return Tracked Potentially Unrecoverable + * LBAs associated with physical storage */ enum nvme_lba_status_atype { - NVME_LBA_STATUS_ATYPE_SCAN_UNTRACKED = 0x10, - NVME_LBA_STATUS_ATYPE_SCAN_TRACKED = 0x11, + NVME_LBA_STATUS_ATYPE_ALLOCATED = 0x2, + NVME_LBA_STATUS_ATYPE_SCAN_UNTRACKED = 0x10, + NVME_LBA_STATUS_ATYPE_TRACKED = 0x11, }; /** @@ -8248,6 +8293,7 @@ enum nvme_data_tfr { * @nvme_cmd_resv_acquire: Reservation Acquire * @nvme_cmd_io_mgmt_recv: I/O Management Receive * @nvme_cmd_resv_release: Reservation Release + * @nvme_cmd_cancel: Cancel * @nvme_cmd_copy: Copy * @nvme_cmd_io_mgmt_send: I/O Management Send * @nvme_zns_cmd_mgmt_send: Zone Management Send @@ -8268,6 +8314,7 @@ enum nvme_io_opcode { nvme_cmd_resv_acquire = 0x11, nvme_cmd_io_mgmt_recv = 0x12, nvme_cmd_resv_release = 0x15, + nvme_cmd_cancel = 0x18, nvme_cmd_copy = 0x19, nvme_cmd_io_mgmt_send = 0x1d, nvme_zns_cmd_mgmt_send = 0x79, diff --git a/src/nvme/util.c b/src/nvme/util.c index 7404509..ce0ce76 100644 --- a/src/nvme/util.c +++ b/src/nvme/util.c @@ -1144,7 +1144,7 @@ void *__nvme_realloc(void *p, size_t len) void *result = __nvme_alloc(len); - if (p) { + if (p && result) { memcpy(result, p, min(old_len, len)); free(p); } |