summaryrefslogtreecommitdiffstats
path: root/src/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvme')
-rw-r--r--src/nvme/base64.c94
-rw-r--r--src/nvme/base64.h8
-rw-r--r--src/nvme/cleanup.c5
-rw-r--r--src/nvme/cleanup.h24
-rw-r--r--src/nvme/fabrics.c395
-rw-r--r--src/nvme/ioctl.c272
-rw-r--r--src/nvme/ioctl.h262
-rw-r--r--src/nvme/json.c28
-rw-r--r--src/nvme/linux.c829
-rw-r--r--src/nvme/linux.h73
-rw-r--r--src/nvme/log.c19
-rw-r--r--src/nvme/log.h13
-rw-r--r--src/nvme/mi-mctp.c180
-rw-r--r--src/nvme/mi.c15
-rw-r--r--src/nvme/mi.h39
-rw-r--r--src/nvme/nbft.c42
-rw-r--r--src/nvme/private.h10
-rw-r--r--src/nvme/tree.c1006
-rw-r--r--src/nvme/tree.h127
-rw-r--r--src/nvme/types.h282
-rw-r--r--src/nvme/util.c230
-rw-r--r--src/nvme/util.h67
22 files changed, 3033 insertions, 987 deletions
diff --git a/src/nvme/base64.c b/src/nvme/base64.c
new file mode 100644
index 0000000..5fae829
--- /dev/null
+++ b/src/nvme/base64.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * base64.c - RFC4648-compliant base64 encoding
+ *
+ * Copyright (c) 2020 SUSE LLC
+ *
+ * Author: Hannes Reinecke <hare@suse.de>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+
+static const char base64_table[65] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+/**
+ * base64_encode() - base64-encode some bytes
+ * @src: the bytes to encode
+ * @srclen: number of bytes to encode
+ * @dst: (output) the base64-encoded string. Not NUL-terminated.
+ *
+ * Encodes the input string using characters from the set [A-Za-z0-9+,].
+ * The encoded string is roughly 4/3 times the size of the input string.
+ *
+ * Return: length of the encoded string
+ */
+int base64_encode(const unsigned char *src, int srclen, char *dst)
+{
+ int i, bits = 0;
+ u_int32_t ac = 0;
+ char *cp = dst;
+
+ for (i = 0; i < srclen; i++) {
+ ac = (ac << 8) | src[i];
+ bits += 8;
+ do {
+ bits -= 6;
+ *cp++ = base64_table[(ac >> bits) & 0x3f];
+ } while (bits >= 6);
+ }
+ if (bits) {
+ *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
+ bits -= 6;
+ }
+ while (bits < 0) {
+ *cp++ = '=';
+ bits += 2;
+ }
+
+ return cp - dst;
+}
+
+/**
+ * base64_decode() - base64-decode some bytes
+ * @src: the base64-encoded string to decode
+ * @len: number of bytes to decode
+ * @dst: (output) the decoded bytes.
+ *
+ * Decodes the base64-encoded bytes @src according to RFC 4648.
+ *
+ * Return: number of decoded bytes
+ */
+int base64_decode(const char *src, int srclen, unsigned char *dst)
+{
+ u_int32_t ac = 0;
+ int i, bits = 0;
+ unsigned char *bp = dst;
+
+ for (i = 0; i < srclen; i++) {
+ const char *p = strchr(base64_table, src[i]);
+
+ if (src[i] == '=') {
+ ac = (ac << 6);
+ bits += 6;
+ if (bits >= 8)
+ bits -= 8;
+ continue;
+ }
+ if (!p || !src[i])
+ return -EINVAL;
+ ac = (ac << 6) | (p - base64_table);
+ bits += 6;
+ if (bits >= 8) {
+ bits -= 8;
+ *bp++ = (unsigned char)(ac >> bits);
+ }
+ }
+ if (ac && ((1 << bits) - 1))
+ return -EAGAIN;
+
+ return bp - dst;
+}
diff --git a/src/nvme/base64.h b/src/nvme/base64.h
new file mode 100644
index 0000000..c0f62e2
--- /dev/null
+++ b/src/nvme/base64.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _BASE64_H
+#define _BASE64_H
+
+int base64_encode(const unsigned char *src, int len, char *dst);
+int base64_decode(const char *src, int len, unsigned char *dst);
+
+#endif /* _BASE64_H */
diff --git a/src/nvme/cleanup.c b/src/nvme/cleanup.c
deleted file mode 100644
index e652e33..0000000
--- a/src/nvme/cleanup.c
+++ /dev/null
@@ -1,5 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1-or-later
-#include <stdlib.h>
-#include "cleanup.h"
-
-DEFINE_CLEANUP_FUNC(cleanup_charp, char *, free);
diff --git a/src/nvme/cleanup.h b/src/nvme/cleanup.h
index b7e1533..4327600 100644
--- a/src/nvme/cleanup.h
+++ b/src/nvme/cleanup.h
@@ -2,6 +2,11 @@
#ifndef __CLEANUP_H
#define __CLEANUP_H
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
#define __cleanup__(fn) __attribute__((cleanup(fn)))
#define DECLARE_CLEANUP_FUNC(name, type) \
@@ -14,6 +19,23 @@ DECLARE_CLEANUP_FUNC(name, type) \
free_fn(*__p); \
}
-DECLARE_CLEANUP_FUNC(cleanup_charp, char *);
+static inline void freep(void *p)
+{
+ free(*(void **)p);
+}
+#define _cleanup_free_ __cleanup__(freep)
+
+static inline DEFINE_CLEANUP_FUNC(cleanup_file, FILE *, fclose)
+#define _cleanup_file_ __cleanup__(cleanup_file)
+
+static inline DEFINE_CLEANUP_FUNC(cleanup_dir, DIR *, closedir)
+#define _cleanup_dir_ __cleanup__(cleanup_dir)
+
+static inline void cleanup_fd(int *fd)
+{
+ if (*fd >= 0)
+ close(*fd);
+}
+#define _cleanup_fd_ __cleanup__(cleanup_fd)
#endif
diff --git a/src/nvme/fabrics.c b/src/nvme/fabrics.c
index f0a06e8..4e042d8 100644
--- a/src/nvme/fabrics.c
+++ b/src/nvme/fabrics.c
@@ -32,6 +32,7 @@
#include <ccan/array_size/array_size.h>
#include <ccan/str/str.h>
+#include "cleanup.h"
#include "fabrics.h"
#include "linux.h"
#include "ioctl.h"
@@ -47,7 +48,7 @@
const char *nvmf_dev = "/dev/nvme-fabrics";
/**
- * strchomp() - Strip trailing white space
+ * strchomp() - Strip trailing spaces
* @str: String to strip
* @max: Maximum length of string
*/
@@ -55,11 +56,8 @@ static void strchomp(char *str, int max)
{
int i;
- for (i = max - 1; i >= 0; i--) {
- if (str[i] != '\0' && str[i] != ' ')
- return;
- else
- str[i] = '\0';
+ for (i = max - 1; i >= 0 && str[i] == ' '; i--) {
+ str[i] = '\0';
}
}
@@ -357,10 +355,16 @@ static int __add_argument(char **argstr, const char *tok, const char *arg)
return 0;
}
+static int __nvmf_supported_options(nvme_root_t r);
+#define nvmf_check_option(r, tok) \
+({ \
+ !__nvmf_supported_options(r) && (r)->options->tok; \
+})
+
#define add_bool_argument(o, argstr, tok, arg) \
({ \
int ret; \
- if (r->options->tok) { \
+ if (nvmf_check_option(r, tok)) { \
ret = __add_bool_argument(argstr, \
stringify(tok), \
arg); \
@@ -376,7 +380,7 @@ static int __add_argument(char **argstr, const char *tok, const char *arg)
#define add_int_argument(o, argstr, tok, arg, allow_zero) \
({ \
int ret; \
- if (r->options->tok) { \
+ if (nvmf_check_option(r, tok)) { \
ret = __add_int_argument(argstr, \
stringify(tok), \
arg, \
@@ -393,7 +397,7 @@ static int __add_argument(char **argstr, const char *tok, const char *arg)
#define add_int_or_minus_one_argument(o, argstr, tok, arg) \
({ \
int ret; \
- if (r->options->tok) { \
+ if (nvmf_check_option(r, tok)) { \
ret = __add_int_or_minus_one_argument(argstr, \
stringify(tok), \
arg); \
@@ -409,7 +413,7 @@ static int __add_argument(char **argstr, const char *tok, const char *arg)
#define add_argument(r, argstr, tok, arg) \
({ \
int ret; \
- if (r->options->tok) { \
+ if (nvmf_check_option(r, tok)) { \
ret = __add_argument(argstr, \
stringify(tok), \
arg); \
@@ -442,7 +446,6 @@ static int inet4_pton(const char *src, uint16_t port,
static int inet6_pton(nvme_root_t r, const char *src, uint16_t port,
struct sockaddr_storage *addr)
{
- int ret = -EINVAL;
struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
const char *scope = NULL;
char *p;
@@ -450,7 +453,7 @@ static int inet6_pton(nvme_root_t r, const char *src, uint16_t port,
if (strlen(src) > INET6_ADDRSTRLEN)
return -EINVAL;
- char *tmp = strdup(src);
+ _cleanup_free_ char *tmp = strdup(src);
if (!tmp) {
nvme_msg(r, LOG_ERR, "cannot copy: %s\n", src);
return -ENOMEM;
@@ -463,24 +466,20 @@ static int inet6_pton(nvme_root_t r, const char *src, uint16_t port,
}
if (inet_pton(AF_INET6, tmp, &addr6->sin6_addr) != 1)
- goto free_tmp;
+ return -EINVAL;
if (IN6_IS_ADDR_LINKLOCAL(&addr6->sin6_addr) && scope) {
addr6->sin6_scope_id = if_nametoindex(scope);
if (addr6->sin6_scope_id == 0) {
nvme_msg(r, LOG_ERR,
"can't find iface index for: %s (%m)\n", scope);
- goto free_tmp;
+ return -EINVAL;
}
}
addr6->sin6_family = AF_INET6;
addr6->sin6_port = htons(port);
- ret = 0;
-
-free_tmp:
- free(tmp);
- return ret;
+ return 0;
}
/**
@@ -655,7 +654,7 @@ static int build_options(nvme_host_t h, nvme_ctrl_t c, char **argstr)
static int __nvmf_supported_options(nvme_root_t r)
{
char buf[0x1000], *options, *p, *v;
- int fd, ret;
+ _cleanup_fd_ int fd = -1;
ssize_t len;
if (r->options)
@@ -684,14 +683,12 @@ static int __nvmf_supported_options(nvme_root_t r)
"Cannot read %s, using default options\n",
nvmf_dev);
*r->options = default_supported_options;
- ret = 0;
- goto out_close;
+ return 0;
}
nvme_msg(r, LOG_ERR, "Failed to read from %s: %s\n",
nvmf_dev, strerror(errno));
- ret = -ENVME_CONNECT_READ;
- goto out_close;
+ return -ENVME_CONNECT_READ;
}
buf[len] = '\0';
@@ -738,16 +735,13 @@ static int __nvmf_supported_options(nvme_root_t r)
parse_option(r, v, trsvcid);
}
nvme_msg(r, LOG_DEBUG, "\n");
- ret = 0;
-
-out_close:
- close(fd);
- return ret;
+ return 0;
}
static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr)
{
- int ret, fd, len = strlen(argstr);
+ _cleanup_fd_ int fd;
+ int ret, len = strlen(argstr);
char buf[0x1000], *options, *p;
fd = open(nvmf_dev, O_RDWR);
@@ -765,31 +759,22 @@ static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr)
nvmf_dev, strerror(errno));
switch (errno) {
case EALREADY:
- ret = -ENVME_CONNECT_ALREADY;
- break;
+ return -ENVME_CONNECT_ALREADY;
case EINVAL:
- ret = -ENVME_CONNECT_INVAL;
- break;
+ return -ENVME_CONNECT_INVAL;
case EADDRINUSE:
- ret = -ENVME_CONNECT_ADDRINUSE;
- break;
+ return -ENVME_CONNECT_ADDRINUSE;
case ENODEV:
- ret = -ENVME_CONNECT_NODEV;
- break;
+ return -ENVME_CONNECT_NODEV;
case EOPNOTSUPP:
- ret = -ENVME_CONNECT_OPNOTSUPP;
- break;
+ return -ENVME_CONNECT_OPNOTSUPP;
case ECONNREFUSED:
- ret = -ENVME_CONNECT_CONNREFUSED;
- break;
+ return -ENVME_CONNECT_CONNREFUSED;
case EADDRNOTAVAIL:
- ret = -ENVME_CONNECT_ADDRNOTAVAIL;
- break;
+ return -ENVME_CONNECT_ADDRNOTAVAIL;
default:
- ret = -ENVME_CONNECT_WRITE;
- break;
+ return -ENVME_CONNECT_WRITE;
}
- goto out_close;
}
memset(buf, 0x0, sizeof(buf));
@@ -797,8 +782,7 @@ static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr)
if (len < 0) {
nvme_msg(r, LOG_ERR, "Failed to read from %s: %s\n",
nvmf_dev, strerror(errno));
- ret = -ENVME_CONNECT_READ;
- goto out_close;
+ return -ENVME_CONNECT_READ;
}
nvme_msg(r, LOG_DEBUG, "connect ctrl, response '%.*s'\n",
(int)strcspn(buf, "\n"), buf);
@@ -808,14 +792,33 @@ static int __nvmf_add_ctrl(nvme_root_t r, const char *argstr)
if (!*p)
continue;
if (sscanf(p, "instance=%d", &ret) == 1)
- goto out_close;
+ return ret;
}
nvme_msg(r, LOG_ERR, "Failed to parse ctrl info for \"%s\"\n", argstr);
- ret = -ENVME_CONNECT_PARSE;
-out_close:
- close(fd);
- return ret;
+ return -ENVME_CONNECT_PARSE;
+}
+
+static const char *lookup_context(nvme_root_t r, nvme_ctrl_t c)
+{
+
+ nvme_host_t h;
+ nvme_subsystem_t s;
+
+ nvme_for_each_host(r, h) {
+ nvme_for_each_subsystem(h, s) {
+ if (__nvme_lookup_ctrl(s, nvme_ctrl_get_transport(c),
+ nvme_ctrl_get_traddr(c),
+ NULL,
+ NULL,
+ nvme_ctrl_get_trsvcid(c),
+ NULL,
+ NULL))
+ return nvme_subsystem_get_application(s);
+ }
+ }
+
+ return NULL;
}
int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c,
@@ -823,7 +826,7 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c,
{
nvme_subsystem_t s;
const char *root_app, *app;
- char *argstr;
+ _cleanup_free_ char *argstr = NULL;
int ret;
/* highest prio have configs from command line */
@@ -839,6 +842,7 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c,
nvme_ctrl_get_host_traddr(c),
nvme_ctrl_get_host_iface(c),
nvme_ctrl_get_trsvcid(c),
+ NULL,
NULL);
if (fc) {
const char *key;
@@ -862,24 +866,9 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c,
root_app = nvme_root_get_application(h->r);
if (root_app) {
app = nvme_subsystem_get_application(s);
- if (!app && nvme_ctrl_is_discovery_ctrl(c)) {
- nvme_subsystem_t s;
- nvme_ctrl_t fc;
-
- nvme_for_each_subsystem(h, s) {
- fc = __nvme_lookup_ctrl(s, nvme_ctrl_get_transport(c),
- nvme_ctrl_get_traddr(c),
- NULL,
- NULL,
- nvme_ctrl_get_trsvcid(c),
- NULL);
-
- if (fc) {
- app = nvme_subsystem_get_application(s);
- break;
- }
- }
- }
+ if (!app && nvme_ctrl_is_discovery_ctrl(c))
+ app = lookup_context(h->r, c);
+
/*
* configuration is managed by an application,
* refuse to act on subsystems which either have
@@ -907,15 +896,11 @@ int nvmf_add_ctrl(nvme_host_t h, nvme_ctrl_t c,
free(traddr);
}
- ret = __nvmf_supported_options(h->r);
- if (ret)
- return ret;
ret = build_options(h, c, &argstr);
if (ret)
return ret;
ret = __nvmf_add_ctrl(h->r, argstr);
- free(argstr);
if (ret < 0) {
errno = -ret;
return -1;
@@ -1020,12 +1005,12 @@ nvme_ctrl_t nvmf_connect_disc_entry(nvme_host_t h,
return NULL;
}
- if (e->treq & NVMF_TREQ_DISABLE_SQFLOW)
+ if (e->treq & NVMF_TREQ_DISABLE_SQFLOW &&
+ nvmf_check_option(h->r, disable_sqflow))
c->cfg.disable_sqflow = true;
if (e->trtype == NVMF_TRTYPE_TCP &&
- (e->treq & NVMF_TREQ_REQUIRED ||
- e->treq & NVMF_TREQ_NOT_REQUIRED))
+ e->tsas.tcp.sectype != NVMF_TCP_SECTYPE_NONE)
c->cfg.tls = true;
ret = nvmf_add_ctrl(h, c, cfg);
@@ -1046,45 +1031,55 @@ nvme_ctrl_t nvmf_connect_disc_entry(nvme_host_t h,
return NULL;
}
-static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c,
- struct nvme_get_log_args *args,
- int max_retries)
+/*
+ * Most of nvmf_discovery_log is reserved, so only fetch the initial bytes.
+ * 8 bytes for GENCTR, 8 for NUMREC, and 2 for RECFMT.
+ * Since only multiples of 4 bytes are allowed, round 18 up to 20.
+ */
+#define DISCOVERY_HEADER_LEN 20
+
+static struct nvmf_discovery_log *nvme_discovery_log(
+ const struct nvme_get_discovery_args *args)
{
- nvme_root_t r = c->s && c->s->h ? c->s->h->r : NULL;
- struct nvmf_discovery_log *log = NULL;
- int ret, retries = 0;
- const char *name = nvme_ctrl_get_name(c);
+ nvme_root_t r = root_from_ctrl(args->c);
+ struct nvmf_discovery_log *log;
+ int retries = 0;
+ const char *name = nvme_ctrl_get_name(args->c);
uint64_t genctr, numrec;
- unsigned int size;
- int fd = nvme_ctrl_get_fd(c);
-
- args->fd = fd;
+ int fd = nvme_ctrl_get_fd(args->c);
+ struct nvme_get_log_args log_args = {
+ .result = args->result,
+ .args_size = sizeof(log_args),
+ .timeout = args->timeout,
+ .lid = NVME_LOG_LID_DISCOVER,
+ .nsid = NVME_NSID_NONE,
+ .csi = NVME_CSI_NVM,
+ .lsi = NVME_LOG_LSI_NONE,
+ .lsp = args->lsp,
+ .uuidx = NVME_UUID_NONE,
+ };
- do {
- size = sizeof(struct nvmf_discovery_log);
+ log = __nvme_alloc(sizeof(*log));
+ if (!log) {
+ nvme_msg(r, LOG_ERR,
+ "could not allocate memory for discovery log header\n");
+ errno = ENOMEM;
+ return NULL;
+ }
- free(log);
- log = calloc(1, size);
- if (!log) {
- nvme_msg(r, LOG_ERR,
- "could not allocate memory for discovery log header\n");
- errno = ENOMEM;
- return NULL;
- }
+ nvme_msg(r, LOG_DEBUG, "%s: get header (try %d/%d)\n",
+ name, retries, args->max_retries);
+ log_args.log = log;
+ log_args.len = DISCOVERY_HEADER_LEN;
+ if (nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &log_args)) {
+ nvme_msg(r, LOG_INFO,
+ "%s: discover try %d/%d failed, error %d\n",
+ name, retries, args->max_retries, errno);
+ goto out_free_log;
+ }
- nvme_msg(r, LOG_DEBUG, "%s: get header (try %d/%d)\n",
- name, retries, max_retries);
- args->rae = true;
- args->lpo = 0;
- args->len = size;
- args->log = log;
- ret = nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, args);
- if (ret) {
- nvme_msg(r, LOG_INFO,
- "%s: discover try %d/%d failed, error %d\n",
- name, retries, max_retries, errno);
- goto out_free_log;
- }
+ do {
+ size_t entries_size;
numrec = le64_to_cpu(log->numrec);
genctr = le64_to_cpu(log->genctr);
@@ -1092,11 +1087,9 @@ static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c,
if (numrec == 0)
break;
- size = sizeof(struct nvmf_discovery_log) +
- sizeof(struct nvmf_disc_log_entry) * numrec;
-
free(log);
- log = calloc(1, size);
+ entries_size = sizeof(*log->entries) * numrec;
+ log = __nvme_alloc(sizeof(*log) + entries_size);
if (!log) {
nvme_msg(r, LOG_ERR,
"could not alloc memory for discovery log page\n");
@@ -1105,19 +1098,16 @@ static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c,
}
nvme_msg(r, LOG_DEBUG,
- "%s: get %" PRIu64
- " records (length %d genctr %" PRIu64 ")\n",
- name, numrec, size, genctr);
-
- args->rae = true;
- args->lpo = sizeof(struct nvmf_discovery_log);
- args->len = size - sizeof(struct nvmf_discovery_log);
- args->log = log->entries;
- ret = nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, args);
- if (ret) {
+ "%s: get %" PRIu64 " records (genctr %" PRIu64 ")\n",
+ name, numrec, genctr);
+
+ log_args.lpo = sizeof(*log);
+ log_args.log = log->entries;
+ log_args.len = entries_size;
+ if (nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &log_args)) {
nvme_msg(r, LOG_INFO,
"%s: discover try %d/%d failed, error %d\n",
- name, retries, max_retries, errno);
+ name, retries, args->max_retries, errno);
goto out_free_log;
}
@@ -1127,19 +1117,17 @@ static struct nvmf_discovery_log *nvme_discovery_log(nvme_ctrl_t c,
*/
nvme_msg(r, LOG_DEBUG, "%s: get header again\n", name);
- args->rae = false;
- args->lpo = 0;
- args->len = sizeof(struct nvmf_discovery_log);
- args->log = log;
- ret = nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, args);
- if (ret) {
+ log_args.lpo = 0;
+ log_args.log = log;
+ log_args.len = DISCOVERY_HEADER_LEN;
+ if (nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &log_args)) {
nvme_msg(r, LOG_INFO,
"%s: discover try %d/%d failed, error %d\n",
- name, retries, max_retries, errno);
+ name, retries, args->max_retries, errno);
goto out_free_log;
}
} while (genctr != le64_to_cpu(log->genctr) &&
- ++retries < max_retries);
+ ++retries < args->max_retries);
if (genctr != le64_to_cpu(log->genctr)) {
nvme_msg(r, LOG_INFO, "%s: discover genctr mismatch\n", name);
@@ -1159,87 +1147,31 @@ out_free_log:
return NULL;
}
-static void sanitize_discovery_log_entry(struct nvmf_disc_log_entry *e)
+static void sanitize_discovery_log_entry(struct nvmf_disc_log_entry *e)
{
- switch (e->trtype) {
- case NVMF_TRTYPE_RDMA:
- case NVMF_TRTYPE_TCP:
- switch (e->adrfam) {
- case NVMF_ADDR_FAMILY_IP4:
- case NVMF_ADDR_FAMILY_IP6:
- strchomp(e->traddr, NVMF_TRADDR_SIZE);
- strchomp(e->trsvcid, NVMF_TRSVCID_SIZE);
- break;
- }
- break;
- case NVMF_TRTYPE_FC:
- switch (e->adrfam) {
- case NVMF_ADDR_FAMILY_FC:
- strchomp(e->traddr, NVMF_TRADDR_SIZE);
- break;
- }
- break;
- case NVMF_TRTYPE_LOOP:
- strchomp(e->traddr, NVMF_TRADDR_SIZE);
- break;
- }
+ strchomp(e->trsvcid, sizeof(e->trsvcid));
+ strchomp(e->traddr, sizeof(e->traddr));
}
int nvmf_get_discovery_log(nvme_ctrl_t c, struct nvmf_discovery_log **logp,
int max_retries)
{
- struct nvmf_discovery_log *log;
-
- struct nvme_get_log_args args = {
- .args_size = sizeof(args),
- .fd = nvme_ctrl_get_fd(c),
- .nsid = NVME_NSID_NONE,
- .lsp = NVMF_LOG_DISC_LSP_NONE,
- .lsi = NVME_LOG_LSI_NONE,
- .uuidx = NVME_UUID_NONE,
+ struct nvme_get_discovery_args args = {
+ .c = c,
+ .max_retries = max_retries,
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
- .result = NULL,
- .lid = NVME_LOG_LID_DISCOVER,
- .log = NULL,
- .len = 0,
- .csi = NVME_CSI_NVM,
- .rae = false,
- .ot = false,
+ .lsp = NVMF_LOG_DISC_LSP_NONE,
};
- log = nvme_discovery_log(c, &args, max_retries);
- if (!log)
- return -1;
-
- for (int i = 0; i < le64_to_cpu(log->numrec); i++)
- sanitize_discovery_log_entry(&log->entries[i]);
-
- *logp = log;
- return 0;
+ *logp = nvmf_get_discovery_wargs(&args);
+ return *logp ? 0 : -1;
}
struct nvmf_discovery_log *nvmf_get_discovery_wargs(struct nvme_get_discovery_args *args)
{
struct nvmf_discovery_log *log;
- struct nvme_get_log_args _args = {
- .args_size = sizeof(_args),
- .fd = nvme_ctrl_get_fd(args->c),
- .nsid = NVME_NSID_NONE,
- .lsp = args->lsp,
- .lsi = NVME_LOG_LSI_NONE,
- .uuidx = NVME_UUID_NONE,
- .timeout = args->timeout,
- .result = args->result,
- .lid = NVME_LOG_LID_DISCOVER,
- .log = NULL,
- .len = 0,
- .csi = NVME_CSI_NVM,
- .rae = false,
- .ot = false,
- };
-
- log = nvme_discovery_log(args->c, &_args, args->max_retries);
+ log = nvme_discovery_log(args);
if (!log)
return NULL;
@@ -1254,7 +1186,7 @@ struct nvmf_discovery_log *nvmf_get_discovery_wargs(struct nvme_get_discovery_ar
static int uuid_from_device_tree(char *system_uuid)
{
ssize_t len;
- int f;
+ _cleanup_fd_ int f;
f = open(PATH_UUID_IBM, O_RDONLY);
if (f < 0)
@@ -1262,7 +1194,6 @@ static int uuid_from_device_tree(char *system_uuid)
memset(system_uuid, 0, NVME_UUID_LEN_STRING);
len = read(f, system_uuid, NVME_UUID_LEN_STRING - 1);
- close(f);
if (len < 0)
return -ENXIO;
@@ -1299,7 +1230,7 @@ static bool is_dmi_uuid_valid(const char *buf, size_t len)
static int uuid_from_dmi_entries(char *system_uuid)
{
int f;
- DIR *d;
+ _cleanup_dir_ DIR *d;
struct dirent *de;
char buf[512] = {0};
@@ -1350,7 +1281,6 @@ static int uuid_from_dmi_entries(char *system_uuid)
(uint8_t)buf[8 + 14], (uint8_t)buf[8 + 15]);
break;
}
- closedir(d);
return strlen(system_uuid) ? 0 : -ENXIO;
}
@@ -1364,10 +1294,9 @@ static int uuid_from_dmi_entries(char *system_uuid)
*/
static int uuid_from_product_uuid(char *system_uuid)
{
- FILE *stream;
+ _cleanup_file_ FILE *stream;
ssize_t nread;
- int ret;
- char *line = NULL;
+ _cleanup_free_ char *line = NULL;
size_t len = 0;
stream = fopen(PATH_DMI_PROD_UUID, "re");
@@ -1376,10 +1305,8 @@ static int uuid_from_product_uuid(char *system_uuid)
system_uuid[0] = '\0';
nread = getline(&line, &len, stream);
- if (nread != NVME_UUID_LEN_STRING) {
- ret = -ENXIO;
- goto out;
- }
+ if (nread != NVME_UUID_LEN_STRING)
+ return -ENXIO;
/* The kernel is handling the byte swapping according DMTF
* SMBIOS 3.0 Section 7.2.1 System UUID */
@@ -1387,13 +1314,7 @@ static int uuid_from_product_uuid(char *system_uuid)
memcpy(system_uuid, line, NVME_UUID_LEN_STRING - 1);
system_uuid[NVME_UUID_LEN_STRING - 1] = '\0';
- ret = 0;
-
-out:
- free(line);
- fclose(stream);
-
- return ret;
+ return 0;
}
/**
@@ -1443,7 +1364,8 @@ char *nvmf_hostnqn_generate()
static char *nvmf_read_file(const char *f, int len)
{
char buf[len];
- int ret, fd;
+ _cleanup_fd_ int fd;
+ int ret;
fd = open(f, O_RDONLY);
if (fd < 0)
@@ -1451,7 +1373,6 @@ static char *nvmf_read_file(const char *f, int len)
memset(buf, 0, len);
ret = read(fd, buf, len - 1);
- close (fd);
if (ret < 0 || !strlen(buf))
return NULL;
@@ -1575,7 +1496,7 @@ static int nvmf_dim(nvme_ctrl_t c, enum nvmf_dim_tas tas, __u8 trtype,
__u32 *result)
{
nvme_root_t r = c->s && c->s->h ? c->s->h->r : NULL;
- struct nvmf_dim_data *dim;
+ _cleanup_free_ struct nvmf_dim_data *dim = NULL;
struct nvmf_ext_die *die;
__u32 tdl;
__u32 tel;
@@ -1662,11 +1583,7 @@ static int nvmf_dim(nvme_ctrl_t c, enum nvmf_dim_tas tas, __u8 trtype,
args.data_len = tdl;
args.data = dim;
- ret = nvme_dim_send(&args);
-
- free(dim);
-
- return ret;
+ return nvme_dim_send(&args);
}
/**
@@ -1720,25 +1637,31 @@ static const char *dctype_str[] = {
*/
static int nvme_fetch_cntrltype_dctype_from_id(nvme_ctrl_t c)
{
- struct nvme_id_ctrl id = { 0 };
+ _cleanup_free_ struct nvme_id_ctrl *id;
int ret;
- ret = nvme_ctrl_identify(c, &id);
+ id = __nvme_alloc(sizeof(*id));
+ if (!id) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ ret = nvme_ctrl_identify(c, id);
if (ret)
return ret;
if (!c->cntrltype) {
- if (id.cntrltype > NVME_CTRL_CNTRLTYPE_ADMIN || !cntrltype_str[id.cntrltype])
+ if (id->cntrltype > NVME_CTRL_CNTRLTYPE_ADMIN || !cntrltype_str[id->cntrltype])
c->cntrltype = strdup("reserved");
else
- c->cntrltype = strdup(cntrltype_str[id.cntrltype]);
+ c->cntrltype = strdup(cntrltype_str[id->cntrltype]);
}
- if (!c->dctype) {
- if (id.dctype > NVME_CTRL_DCTYPE_CDC || !dctype_str[id.dctype])
+ if (!c->dctype) {
+ if (id->dctype > NVME_CTRL_DCTYPE_CDC || !dctype_str[id->dctype])
c->dctype = strdup("reserved");
else
- c->dctype = strdup(dctype_str[id.dctype]);
+ c->dctype = strdup(dctype_str[id->dctype]);
}
return 0;
}
diff --git a/src/nvme/ioctl.c b/src/nvme/ioctl.c
index b9710b3..9090b7e 100644
--- a/src/nvme/ioctl.c
+++ b/src/nvme/ioctl.c
@@ -13,9 +13,11 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <inttypes.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
+#include <sys/time.h>
#include <ccan/build_assert/build_assert.h>
#include <ccan/endian/endian.h>
@@ -23,6 +25,8 @@
#include "ioctl.h"
#include "util.h"
+static bool nvme_debug;
+
static int nvme_verify_chr(int fd)
{
static struct stat nvme_stat;
@@ -86,13 +90,62 @@ static int nvme_submit_passthru64(int fd, unsigned long ioctl_cmd,
return err;
}
+static void nvme_show_command(struct nvme_passthru_cmd *cmd, int err, struct timeval start,
+ struct timeval end)
+{
+ printf("opcode : %02x\n", cmd->opcode);
+ printf("flags : %02x\n", cmd->flags);
+ printf("rsvd1 : %04x\n", cmd->rsvd1);
+ printf("nsid : %08x\n", cmd->nsid);
+ printf("cdw2 : %08x\n", cmd->cdw2);
+ printf("cdw3 : %08x\n", cmd->cdw3);
+ printf("data_len : %08x\n", cmd->data_len);
+ printf("metadata_len : %08x\n", cmd->metadata_len);
+ printf("addr : %"PRIx64"\n", (uint64_t)(uintptr_t)cmd->addr);
+ printf("metadata : %"PRIx64"\n", (uint64_t)(uintptr_t)cmd->metadata);
+ printf("cdw10 : %08x\n", cmd->cdw10);
+ printf("cdw11 : %08x\n", cmd->cdw11);
+ printf("cdw12 : %08x\n", cmd->cdw12);
+ printf("cdw13 : %08x\n", cmd->cdw13);
+ printf("cdw14 : %08x\n", cmd->cdw14);
+ printf("cdw15 : %08x\n", cmd->cdw15);
+ printf("timeout_ms : %08x\n", cmd->timeout_ms);
+ printf("result : %08x\n", cmd->result);
+ printf("err : %d\n", err);
+ printf("latency : %lu us\n",
+ (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
+}
+
+void nvme_set_debug(bool debug)
+{
+ nvme_debug = debug;
+}
+
+bool nvme_get_debug(void)
+{
+ return nvme_debug;
+}
+
static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
struct nvme_passthru_cmd *cmd, __u32 *result)
{
- int err = ioctl(fd, ioctl_cmd, cmd);
+ struct timeval start;
+ struct timeval end;
+ int err;
+
+ if (nvme_get_debug())
+ gettimeofday(&start, NULL);
+
+ err = ioctl(fd, ioctl_cmd, cmd);
+
+ if (nvme_get_debug()) {
+ gettimeofday(&end, NULL);
+ nvme_show_command(cmd, err, start, end);
+ }
if (err >= 0 && result)
*result = cmd->result;
+
return err;
}
@@ -532,16 +585,18 @@ int nvme_set_features_power_mgmt(int fd, __u8 ps, __u8 wh, bool save,
__u32 *result)
{
__u32 value = NVME_SET(ps, FEAT_PWRMGMT_PS) |
- NVME_SET(wh, FEAT_PWRMGMT_PS);
+ NVME_SET(wh, FEAT_PWRMGMT_WH);
return __nvme_set_features(fd, NVME_FEAT_FID_POWER_MGMT, value, save,
result);
}
-int nvme_set_features_lba_range(int fd, __u32 nsid, __u32 nr_ranges, bool save,
+int nvme_set_features_lba_range(int fd, __u32 nsid, __u8 nr_ranges, bool save,
struct nvme_lba_range_type *data, __u32 *result)
{
- return -1;
+ return nvme_set_features_data(
+ fd, NVME_FEAT_FID_LBA_RANGE, nsid, nr_ranges - 1, save,
+ sizeof(*data), data, result);
}
int nvme_set_features_temp_thresh(int fd, __u16 tmpth, __u8 tmpsel,
@@ -562,8 +617,8 @@ int nvme_set_features_err_recovery(int fd, __u32 nsid, __u16 tler, bool dulbe,
__u32 value = NVME_SET(tler, FEAT_ERROR_RECOVERY_TLER) |
NVME_SET(!!dulbe, FEAT_ERROR_RECOVERY_DULBE);
- return __nvme_set_features(fd, NVME_FEAT_FID_ERR_RECOVERY, value, save,
- result);
+ return nvme_set_features_simple(
+ fd, NVME_FEAT_FID_ERR_RECOVERY, nsid, value, save, result);
}
int nvme_set_features_volatile_wc(int fd, bool wce, bool save, __u32 *result)
@@ -577,8 +632,8 @@ int nvme_set_features_volatile_wc(int fd, bool wce, bool save, __u32 *result)
int nvme_set_features_irq_coalesce(int fd, __u8 thr, __u8 time, bool save,
__u32 *result)
{
- __u32 value = NVME_SET(thr, FEAT_IRQC_TIME) |
- NVME_SET(time, FEAT_IRQC_THR);
+ __u32 value = NVME_SET(thr, FEAT_IRQC_THR) |
+ NVME_SET(time, FEAT_IRQC_TIME);
return __nvme_set_features(fd, NVME_FEAT_FID_IRQ_COALESCE, value, save,
result);
@@ -612,19 +667,31 @@ int nvme_set_features_async_event(int fd, __u32 events,
int nvme_set_features_auto_pst(int fd, bool apste, bool save,
struct nvme_feat_auto_pst *apst, __u32 *result)
{
- __u32 value = NVME_SET(!!apste, FEAT_APST_APSTE);
+ struct nvme_set_features_args args = {
+ .args_size = sizeof(args),
+ .fd = fd,
+ .fid = NVME_FEAT_FID_AUTO_PST,
+ .nsid = NVME_NSID_NONE,
+ .cdw11 = NVME_SET(!!apste, FEAT_APST_APSTE),
+ .save = save,
+ .uuidx = NVME_UUID_NONE,
+ .data = apst,
+ .data_len = sizeof(*apst),
+ .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
+ .result = result,
+ };
- return __nvme_set_features(fd, NVME_FEAT_FID_AUTO_PST, value, save,
- result);
+ return nvme_set_features(&args);
}
int nvme_set_features_timestamp(int fd, bool save, __u64 timestamp)
{
__le64 t = cpu_to_le64(timestamp);
- struct nvme_timestamp ts;
+ struct nvme_timestamp ts = {};
struct nvme_set_features_args args = {
.args_size = sizeof(args),
.fd = fd,
+ .fid = NVME_FEAT_FID_TIMESTAMP,
.nsid = NVME_NSID_NONE,
.cdw11 = 0,
.cdw12 = 0,
@@ -694,8 +761,8 @@ int nvme_set_features_plm_config(int fd, bool plm, __u16 nvmsetid, bool save,
.save = save,
.uuidx = NVME_UUID_NONE,
.cdw15 = 0,
- .data_len = 0,
- .data = NULL,
+ .data_len = sizeof(*data),
+ .data = data,
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
.result = result,
};
@@ -746,7 +813,7 @@ int nvme_set_features_host_behavior(int fd, bool save,
.nsid = NVME_NSID_NONE,
.cdw11 = 0,
.cdw12 = 0,
- .save = save,
+ .save = false,
.uuidx = NVME_UUID_NONE,
.cdw15 = 0,
.data_len = sizeof(*data),
@@ -780,7 +847,7 @@ int nvme_set_features_sw_progress(int fd, __u8 pbslc, bool save,
result);
}
-int nvme_set_features_host_id(int fd, bool save, bool exhid, __u8 *hostid)
+int nvme_set_features_host_id(int fd, bool exhid, bool save, __u8 *hostid)
{
__u32 len = exhid ? 16 : 8;
__u32 value = !!exhid;
@@ -809,20 +876,42 @@ int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result)
result);
}
+int nvme_set_features_resv_mask2(int fd, __u32 nsid, __u32 mask, bool save,
+ __u32 *result)
+{
+ return nvme_set_features_simple(
+ fd, NVME_FEAT_FID_RESV_MASK, nsid, mask, save, result);
+}
+
int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result)
{
return __nvme_set_features(fd, NVME_FEAT_FID_RESV_PERSIST, !!ptpl, save,
result);
}
+int nvme_set_features_resv_persist2(int fd, __u32 nsid, bool ptpl, bool save,
+ __u32 *result)
+{
+ return nvme_set_features_simple(
+ fd, NVME_FEAT_FID_RESV_PERSIST, nsid, !!ptpl, save, result);
+}
+
int nvme_set_features_write_protect(int fd, enum nvme_feat_nswpcfg_state state,
bool save, __u32 *result)
{
return __nvme_set_features(fd, NVME_FEAT_FID_WRITE_PROTECT, state,
- save, result);
+ false, result);
+}
+
+int nvme_set_features_write_protect2(int fd, __u32 nsid,
+ enum nvme_feat_nswpcfg_state state,
+ bool save, __u32 *result)
+{
+ return nvme_set_features_simple(
+ fd, NVME_FEAT_FID_WRITE_PROTECT, nsid, state, false, result);
}
-int nvme_set_features_iocs_profile(int fd, __u8 iocsi, bool save)
+int nvme_set_features_iocs_profile(int fd, __u16 iocsi, bool save)
{
__u32 value = NVME_SET(iocsi, FEAT_IOCSP_IOCSCI);
@@ -898,8 +987,28 @@ int nvme_get_features_lba_range(int fd, enum nvme_get_features_sel sel,
.sel = sel,
.cdw11 = 0,
.uuidx = NVME_UUID_NONE,
- .data_len = 0,
- .data = NULL,
+ .data_len = sizeof(*data),
+ .data = data,
+ .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
+ .result = result,
+ };
+
+ return nvme_get_features(&args);
+}
+
+int nvme_get_features_lba_range2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, struct nvme_lba_range_type *data,
+ __u32 *result)
+{
+ struct nvme_get_features_args args = {
+ .args_size = sizeof(args),
+ .fd = fd,
+ .fid = NVME_FEAT_FID_LBA_RANGE,
+ .nsid = nsid,
+ .sel = sel,
+ .uuidx = NVME_UUID_NONE,
+ .data = data,
+ .data_len = sizeof(*data),
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
.result = result,
};
@@ -920,6 +1029,24 @@ int nvme_get_features_err_recovery(int fd, enum nvme_get_features_sel sel,
result);
}
+int nvme_get_features_err_recovery2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, __u32 *result)
+{
+
+ struct nvme_get_features_args args = {
+ .args_size = sizeof(args),
+ .fd = fd,
+ .fid = NVME_FEAT_FID_ERR_RECOVERY,
+ .nsid = nsid,
+ .sel = sel,
+ .uuidx = NVME_UUID_NONE,
+ .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
+ .result = result,
+ };
+
+ return nvme_get_features(&args);
+}
+
int nvme_get_features_volatile_wc(int fd, enum nvme_get_features_sel sel,
__u32 *result)
{
@@ -945,7 +1072,7 @@ int nvme_get_features_irq_config(int fd, enum nvme_get_features_sel sel,
struct nvme_get_features_args args = {
.args_size = sizeof(args),
.fd = fd,
- .fid = NVME_FEAT_FID_LBA_RANGE,
+ .fid = NVME_FEAT_FID_IRQ_CONFIG,
.nsid = NVME_NSID_NONE,
.sel = sel,
.cdw11 = iv,
@@ -978,13 +1105,13 @@ int nvme_get_features_auto_pst(int fd, enum nvme_get_features_sel sel,
struct nvme_get_features_args args = {
.args_size = sizeof(args),
.fd = fd,
- .fid = NVME_FEAT_FID_LBA_RANGE,
+ .fid = NVME_FEAT_FID_AUTO_PST,
.nsid = NVME_NSID_NONE,
.sel = sel,
.cdw11 = 0,
.uuidx = NVME_UUID_NONE,
- .data_len = 0,
- .data = NULL,
+ .data_len = sizeof(*apst),
+ .data = apst,
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
.result = result,
};
@@ -998,6 +1125,26 @@ int nvme_get_features_host_mem_buf(int fd, enum nvme_get_features_sel sel,
return __nvme_get_features(fd, NVME_FEAT_FID_HOST_MEM_BUF, sel, result);
}
+int nvme_get_features_host_mem_buf2(int fd, enum nvme_get_features_sel sel,
+ struct nvme_host_mem_buf_attrs *attrs,
+ __u32 *result)
+{
+ struct nvme_get_features_args args = {
+ .args_size = sizeof(args),
+ .fd = fd,
+ .fid = NVME_FEAT_FID_HOST_MEM_BUF,
+ .nsid = NVME_NSID_NONE,
+ .sel = sel,
+ .uuidx = NVME_UUID_NONE,
+ .data = attrs,
+ .data_len = sizeof(*attrs),
+ .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
+ .result = result,
+ };
+
+ return nvme_get_features(&args);
+}
+
int nvme_get_features_timestamp(int fd, enum nvme_get_features_sel sel,
struct nvme_timestamp *ts)
{
@@ -1050,8 +1197,8 @@ int nvme_get_features_plm_config(int fd, enum nvme_get_features_sel sel,
.sel = sel,
.cdw11 = nvmsetid,
.uuidx = NVME_UUID_NONE,
- .data_len = 0,
- .data = NULL,
+ .data_len = sizeof(*data),
+ .data = data,
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
.result = result,
};
@@ -1098,8 +1245,8 @@ int nvme_get_features_host_behavior(int fd, enum nvme_get_features_sel sel,
.sel = sel,
.cdw11 = 0,
.uuidx = NVME_UUID_NONE,
- .data_len = 0,
- .data = NULL,
+ .data_len = sizeof(*data),
+ .data = data,
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
.result = result,
};
@@ -1122,7 +1269,7 @@ int nvme_get_features_endurance_event_cfg(int fd, enum nvme_get_features_sel sel
.fid = NVME_FEAT_FID_ENDURANCE_EVT_CFG,
.nsid = NVME_NSID_NONE,
.sel = sel,
- .cdw11 = 0,
+ .cdw11 = endgid,
.uuidx = NVME_UUID_NONE,
.data_len = 0,
.data = NULL,
@@ -1165,12 +1312,46 @@ int nvme_get_features_resv_mask(int fd, enum nvme_get_features_sel sel,
return __nvme_get_features(fd, NVME_FEAT_FID_RESV_MASK, sel, result);
}
+int nvme_get_features_resv_mask2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, __u32 *result)
+{
+ struct nvme_get_features_args args = {
+ .args_size = sizeof(args),
+ .fd = fd,
+ .fid = NVME_FEAT_FID_RESV_MASK,
+ .nsid = nsid,
+ .sel = sel,
+ .uuidx = NVME_UUID_NONE,
+ .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
+ .result = result,
+ };
+
+ return nvme_get_features(&args);
+}
+
int nvme_get_features_resv_persist(int fd, enum nvme_get_features_sel sel,
__u32 *result)
{
return __nvme_get_features(fd, NVME_FEAT_FID_RESV_PERSIST, sel, result);
}
+int nvme_get_features_resv_persist2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, __u32 *result)
+{
+ struct nvme_get_features_args args = {
+ .args_size = sizeof(args),
+ .fd = fd,
+ .fid = NVME_FEAT_FID_RESV_PERSIST,
+ .nsid = nsid,
+ .sel = sel,
+ .uuidx = NVME_UUID_NONE,
+ .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
+ .result = result,
+ };
+
+ return nvme_get_features(&args);
+}
+
int nvme_get_features_write_protect(int fd, __u32 nsid,
enum nvme_get_features_sel sel,
__u32 *result)
@@ -1387,6 +1568,7 @@ int nvme_get_lba_status(struct nvme_get_lba_status_args *args)
.opcode = nvme_admin_get_lba_status,
.nsid = args->nsid,
.addr = (__u64)(uintptr_t)args->lbas,
+ .data_len = (args->mndw + 1) << 2,
.cdw10 = cdw10,
.cdw11 = cdw11,
.cdw12 = cdw12,
@@ -1655,33 +1837,35 @@ static int nvme_set_var_size_tags(__u32 *cmd_dw2, __u32 *cmd_dw3, __u32 *cmd_dw1
__u8 pif, __u8 sts, __u64 reftag, __u64 storage_tag)
{
__u32 cdw2 = 0, cdw3 = 0, cdw14;
+ beint64_t be_reftag = cpu_to_be64(reftag);
+ beint64_t be_storage_tag = cpu_to_be64(storage_tag);
switch (pif) {
/* 16b Protection Information */
case 0:
- cdw14 = reftag & 0xffffffff;
- cdw14 |= ((storage_tag << (32 - sts)) & 0xffffffff);
+ cdw14 = be_reftag & 0xffffffff;
+ cdw14 |= ((be_storage_tag << (32 - sts)) & 0xffffffff);
break;
/* 32b Protection Information */
case 1:
- cdw14 = reftag & 0xffffffff;
- cdw3 = reftag >> 32;
- cdw14 |= ((storage_tag << (80 - sts)) & 0xffff0000);
+ cdw14 = be_reftag & 0xffffffff;
+ cdw3 = be_reftag >> 32;
+ cdw14 |= ((be_storage_tag << (80 - sts)) & 0xffff0000);
if (sts >= 48)
- cdw3 |= ((storage_tag >> (sts - 48)) & 0xffffffff);
+ cdw3 |= ((be_storage_tag >> (sts - 48)) & 0xffffffff);
else
- cdw3 |= ((storage_tag << (48 - sts)) & 0xffffffff);
- cdw2 = (storage_tag >> (sts - 16)) & 0xffff;
+ cdw3 |= ((be_storage_tag << (48 - sts)) & 0xffffffff);
+ cdw2 = (be_storage_tag >> (sts - 16)) & 0xffff;
break;
/* 64b Protection Information */
case 2:
- cdw14 = reftag & 0xffffffff;
- cdw3 = (reftag >> 32) & 0xffff;
- cdw14 |= ((storage_tag << (48 - sts)) & 0xffffffff);
+ cdw14 = be_reftag & 0xffffffff;
+ cdw3 = (be_reftag >> 32) & 0xffff;
+ cdw14 |= ((be_storage_tag << (48 - sts)) & 0xffffffff);
if (sts >= 16)
- cdw3 |= ((storage_tag >> (sts - 16)) & 0xffff);
+ cdw3 |= ((be_storage_tag >> (sts - 16)) & 0xffff);
else
- cdw3 |= ((storage_tag << (16 - sts)) & 0xffff);
+ cdw3 |= ((be_storage_tag << (16 - sts)) & 0xffff);
break;
default:
perror("Unsupported Protection Information Format");
@@ -1793,6 +1977,10 @@ int nvme_copy(struct nvme_copy_args *args)
if (args->format == 1)
data_len = args->nr * sizeof(struct nvme_copy_range_f1);
+ else if (args->format == 2)
+ data_len = args->nr * sizeof(struct nvme_copy_range_f2);
+ else if (args->format == 3)
+ data_len = args->nr * sizeof(struct nvme_copy_range_f3);
else
data_len = args->nr * sizeof(struct nvme_copy_range);
diff --git a/src/nvme/ioctl.h b/src/nvme/ioctl.h
index 4d843bc..4a0698f 100644
--- a/src/nvme/ioctl.h
+++ b/src/nvme/ioctl.h
@@ -748,7 +748,6 @@ static inline int nvme_identify_primary_ctrl(int fd, __u16 cntid,
/**
* nvme_identify_secondary_ctrl_list() - Retrieves secondary controller list
* @fd: File descriptor of nvme device
- * @nsid: Namespace identifier
* @cntid: Return controllers starting at this identifier
* @sc_list: User space destination address to transfer the data
*
@@ -763,7 +762,7 @@ static inline int nvme_identify_primary_ctrl(int fd, __u16 cntid,
* Return: The nvme command status if a response was received (see
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
-static inline int nvme_identify_secondary_ctrl_list(int fd, __u32 nsid,
+static inline int nvme_identify_secondary_ctrl_list(int fd,
__u16 cntid, struct nvme_secondary_ctrl_list *sc_list)
{
struct nvme_identify_args args = {
@@ -774,7 +773,7 @@ static inline int nvme_identify_secondary_ctrl_list(int fd, __u32 nsid,
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
.cns = NVME_IDENTIFY_CNS_SECONDARY_CTRL_LIST,
.csi = NVME_CSI_NVM,
- .nsid = nsid,
+ .nsid = NVME_NSID_NONE,
.cntid = cntid,
.cns_specific_id = NVME_CNSSPECID_NONE,
.uuidx = NVME_UUID_NONE,
@@ -981,21 +980,8 @@ static inline int nvme_identify_allocated_ns_list_csi(int fd, __u32 nsid,
static inline int nvme_identify_independent_identify_ns(int fd, __u32 nsid,
struct nvme_id_independent_id_ns *ns)
{
- struct nvme_identify_args args = {
- .result = NULL,
- .data = ns,
- .args_size = sizeof(args),
- .fd = fd,
- .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
- .cns = NVME_IDENTIFY_CNS_CSI_INDEPENDENT_ID_NS,
- .csi = NVME_CSI_NVM,
- .nsid = nsid,
- .cntid = NVME_CNTLID_NONE,
- .cns_specific_id = NVME_CNSSPECID_NONE,
- .uuidx = NVME_UUID_NONE,
- };
-
- return nvme_identify(&args);
+ return nvme_identify_cns_nsid(
+ fd, NVME_IDENTIFY_CNS_CSI_INDEPENDENT_ID_NS, nsid, ns);
}
/**
@@ -1194,20 +1180,8 @@ static inline int nvme_identify_iocs(int fd, __u16 cntlid,
static inline int nvme_zns_identify_ns(int fd, __u32 nsid,
struct nvme_zns_id_ns *data)
{
- struct nvme_identify_args args = {
- .result = NULL,
- .data = data,
- .args_size = sizeof(args),
- .fd = fd,
- .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
- .cns = NVME_IDENTIFY_CNS_CSI_NS,
- .csi = NVME_CSI_ZNS,
- .nsid = nsid,
- .cntid = NVME_CNTLID_NONE,
- .cns_specific_id = NVME_CNSSPECID_NONE,
- };
-
- return nvme_identify(&args);
+ return nvme_identify_ns_csi(
+ fd, nsid, NVME_UUID_NONE, NVME_CSI_ZNS, data);
}
/**
@@ -1946,7 +1920,7 @@ static inline int nvme_get_log_boot_partition(int fd, bool rae,
.nsid = NVME_NSID_NONE,
.csi = NVME_CSI_NVM,
.lsi = NVME_LOG_LSI_NONE,
- .lsp = NVME_LOG_LSP_NONE,
+ .lsp = lsp,
.uuidx = NVME_UUID_NONE,
.rae = rae,
.ot = false,
@@ -1955,6 +1929,41 @@ static inline int nvme_get_log_boot_partition(int fd, bool rae,
}
/**
+ * nvme_get_log_phy_rx_eom() - Retrieve Physical Interface Receiver Eye Opening Measurement Log
+ * @fd: File descriptor of nvme device
+ * @lsp: Log specific, controls action and measurement quality
+ * @controller: Target controller ID
+ * @len: The allocated size, minimum
+ * struct nvme_phy_rx_eom_log
+ * @log: User address to store the log page
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise
+ */
+static inline int nvme_get_log_phy_rx_eom(int fd, __u8 lsp, __u16 controller,
+ __u32 len, struct nvme_phy_rx_eom_log *log)
+{
+ struct nvme_get_log_args args = {
+ .lpo = 0,
+ .result = NULL,
+ .log = log,
+ .args_size = sizeof(args),
+ .fd = fd,
+ .timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
+ .lid = NVME_LOG_LID_PHY_RX_EOM,
+ .len = len,
+ .nsid = NVME_NSID_NONE,
+ .csi = NVME_CSI_NVM,
+ .lsi = controller,
+ .lsp = lsp,
+ .uuidx = NVME_UUID_NONE,
+ .rae = false,
+ .ot = false,
+ };
+ return nvme_get_log_page(fd, NVME_LOG_PAGE_PDU_SIZE, &args);
+}
+
+/**
* nvme_get_log_discovery() - Retrieve Discovery log page
* @fd: File descriptor of nvme device
* @rae: Retain asynchronous events
@@ -2266,7 +2275,7 @@ int nvme_set_features_power_mgmt(int fd, __u8 ps, __u8 wh, bool save,
* Return: The nvme command status if a response was received (see
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
-int nvme_set_features_lba_range(int fd, __u32 nsid, __u32 nr_ranges, bool save,
+int nvme_set_features_lba_range(int fd, __u32 nsid, __u8 nr_ranges, bool save,
struct nvme_lba_range_type *data, __u32 *result);
/**
@@ -2542,7 +2551,25 @@ int nvme_set_features_host_id(int fd, bool exhid, bool save, __u8 *hostid);
/**
* nvme_set_features_resv_mask() - Set reservation notification mask feature
+ *
+ * Deprecated: doesn't support specifying a NSID.
+ * Use nvme_set_features_resv_mask2() instead.
+ *
+ * @fd: File descriptor of nvme device
+ * @mask: Reservation Notification Mask Field
+ * @save: Save value across power states
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result)
+ __attribute__((deprecated));
+
+/**
+ * nvme_set_features_resv_mask2() - Set reservation notification mask feature
* @fd: File descriptor of nvme device
+ * @nsid: Namespace ID
* @mask: Reservation Notification Mask Field
* @save: Save value across power states
* @result: The command completion result from CQE dword0
@@ -2550,11 +2577,30 @@ int nvme_set_features_host_id(int fd, bool exhid, bool save, __u8 *hostid);
* Return: The nvme command status if a response was received (see
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
-int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result);
+int nvme_set_features_resv_mask2(int fd, __u32 nsid, __u32 mask, bool save,
+ __u32 *result);
/**
* nvme_set_features_resv_persist() - Set persist through power loss feature
+ *
+ * Deprecated: doesn't support specifying a NSID.
+ * Use nvme_set_features_resv_persist2() instead.
+ *
+ * @fd: File descriptor of nvme device
+ * @ptpl: Persist Through Power Loss
+ * @save: Save value across power states
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result)
+ __attribute__((deprecated));
+
+/**
+ * nvme_set_features_resv_persist2() - Set persist through power loss feature
* @fd: File descriptor of nvme device
+ * @nsid: Namespace ID
* @ptpl: Persist Through Power Loss
* @save: Save value across power states
* @result: The command completion result from CQE dword0
@@ -2562,10 +2608,15 @@ int nvme_set_features_resv_mask(int fd, __u32 mask, bool save, __u32 *result);
* Return: The nvme command status if a response was received (see
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
-int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result);
+int nvme_set_features_resv_persist2(int fd, __u32 nsid, bool ptpl, bool save,
+ __u32 *result);
/**
* nvme_set_features_write_protect() - Set write protect feature
+ *
+ * Deprecated: doesn't support specifying a NSID.
+ * Use nvme_set_features_write_protect2() instead.
+ *
* @fd: File descriptor of nvme device
* @state: Write Protection State
* @save: Save value across power states
@@ -2575,7 +2626,34 @@ int nvme_set_features_resv_persist(int fd, bool ptpl, bool save, __u32 *result);
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
int nvme_set_features_write_protect(int fd, enum nvme_feat_nswpcfg_state state,
- bool save, __u32 *result);
+ bool save, __u32 *result)
+ __attribute__((deprecated));
+
+/**
+ * nvme_set_features_write_protect2() - Set write protect feature
+ * @fd: File descriptor of nvme device
+ * @nsid: Namespace ID
+ * @state: Write Protection State
+ * @save: Save value across power states
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_set_features_write_protect2(int fd, __u32 nsid,
+ enum nvme_feat_nswpcfg_state state,
+ bool save, __u32 *result);
+
+/**
+ * nvme_set_features_iocs_profile() - Set I/O command set profile feature
+ * @fd: File descriptor of nvme device
+ * @iocsi: I/O Command Set Combination Index
+ * @save: Save value across power states
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_set_features_iocs_profile(int fd, __u16 iocsi, bool save);
/**
* nvme_get_features() - Retrieve a feature attribute
@@ -2660,6 +2738,10 @@ int nvme_get_features_power_mgmt(int fd, enum nvme_get_features_sel sel,
/**
* nvme_get_features_lba_range() - Get LBA range feature
+ *
+ * Deprecated: doesn't support specifying a NSID.
+ * Use nvme_get_features_lba_range2() instead.
+ *
* @fd: File descriptor of nvme device
* @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
* @data: User address of feature data, if applicable
@@ -2670,7 +2752,22 @@ int nvme_get_features_power_mgmt(int fd, enum nvme_get_features_sel sel,
*/
int nvme_get_features_lba_range(int fd, enum nvme_get_features_sel sel,
struct nvme_lba_range_type *data,
- __u32 *result);
+ __u32 *result) __attribute__((deprecated));
+
+/**
+ * nvme_get_features_lba_range2() - Get LBA range feature
+ * @fd: File descriptor of nvme device
+ * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
+ * @nsid: Namespace ID
+ * @data: Buffer to receive LBA Range Type data structure
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_get_features_lba_range2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, struct nvme_lba_range_type *data,
+ __u32 *result);
/**
* nvme_get_features_temp_thresh() - Get temperature threshold feature
@@ -2686,6 +2783,10 @@ int nvme_get_features_temp_thresh(int fd, enum nvme_get_features_sel sel,
/**
* nvme_get_features_err_recovery() - Get error recovery feature
+ *
+ * Deprecated: doesn't support specifying a NSID.
+ * Use nvme_get_features_err_recovery2() instead.
+ *
* @fd: File descriptor of nvme device
* @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
* @result: The command completion result from CQE dword0
@@ -2694,7 +2795,20 @@ int nvme_get_features_temp_thresh(int fd, enum nvme_get_features_sel sel,
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
int nvme_get_features_err_recovery(int fd, enum nvme_get_features_sel sel,
- __u32 *result);
+ __u32 *result) __attribute__((deprecated));
+
+/**
+ * nvme_get_features_err_recovery2() - Get error recovery feature
+ * @fd: File descriptor of nvme device
+ * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
+ * @nsid: Namespace ID
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_get_features_err_recovery2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, __u32 *result);
/**
* nvme_get_features_volatile_wc() - Get volatile write cache feature
@@ -2784,6 +2898,10 @@ int nvme_get_features_auto_pst(int fd, enum nvme_get_features_sel sel,
/**
* nvme_get_features_host_mem_buf() - Get host memory buffer feature
+ *
+ * Deprecated: doesn't fetch the Host Memory Buffer Attributes data structure.
+ * Use nvme_get_features_host_mem_buf2() instead.
+ *
* @fd: File descriptor of nvme device
* @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
* @result: The command completion result from CQE dword0
@@ -2792,7 +2910,21 @@ int nvme_get_features_auto_pst(int fd, enum nvme_get_features_sel sel,
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
int nvme_get_features_host_mem_buf(int fd, enum nvme_get_features_sel sel,
- __u32 *result);
+ __u32 *result) __attribute__((deprecated));
+
+/**
+ * nvme_get_features_host_mem_buf2() - Get host memory buffer feature
+ * @fd: File descriptor of nvme device
+ * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
+ * @attrs: Buffer for returned Host Memory Buffer Attributes
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_get_features_host_mem_buf2(int fd, enum nvme_get_features_sel sel,
+ struct nvme_host_mem_buf_attrs *attrs,
+ __u32 *result);
/**
* nvme_get_features_timestamp() - Get timestamp feature
@@ -2957,6 +3089,10 @@ int nvme_get_features_host_id(int fd, enum nvme_get_features_sel sel,
/**
* nvme_get_features_resv_mask() - Get reservation mask feature
+ *
+ * Deprecated: doesn't support specifying a NSID.
+ * Use nvme_get_features_resv_mask2() instead.
+ *
* @fd: File descriptor of nvme device
* @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
* @result: The command completion result from CQE dword0
@@ -2965,10 +3101,27 @@ int nvme_get_features_host_id(int fd, enum nvme_get_features_sel sel,
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
int nvme_get_features_resv_mask(int fd, enum nvme_get_features_sel sel,
- __u32 *result);
+ __u32 *result) __attribute__((deprecated));
+
+/**
+ * nvme_get_features_resv_mask2() - Get reservation mask feature
+ * @fd: File descriptor of nvme device
+ * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
+ * @nsid: Namespace ID
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_get_features_resv_mask2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, __u32 *result);
/**
* nvme_get_features_resv_persist() - Get reservation persist feature
+ *
+ * Deprecated: doesn't support specifying a NSID.
+ * Use nvme_get_features_resv_persist2() instead.
+ *
* @fd: File descriptor of nvme device
* @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
* @result: The command completion result from CQE dword0
@@ -2977,7 +3130,20 @@ int nvme_get_features_resv_mask(int fd, enum nvme_get_features_sel sel,
* &enum nvme_status_field) or -1 with errno set otherwise.
*/
int nvme_get_features_resv_persist(int fd, enum nvme_get_features_sel sel,
- __u32 *result);
+ __u32 *result) __attribute__((deprecated));
+
+/**
+ * nvme_get_features_resv_persist2() - Get reservation persist feature
+ * @fd: File descriptor of nvme device
+ * @sel: Select which type of attribute to return, see &enum nvme_get_features_sel
+ * @nsid: Namespace ID
+ * @result: The command completion result from CQE dword0
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_get_features_resv_persist2(int fd, enum nvme_get_features_sel sel,
+ __u32 nsid, __u32 *result);
/**
* nvme_get_features_write_protect() - Get write protect feature
@@ -3881,4 +4047,16 @@ int nvme_zns_append(struct nvme_zns_append_args *args);
*/
int nvme_dim_send(struct nvme_dim_args *args);
+/**
+ * nvme_set_debug - Set NVMe command debugging output
+ * @debug: true to enable or false to disable
+ */
+void nvme_set_debug(bool debug);
+
+/**
+ * nvme_get_debug - Get NVMe command debugging output
+ *
+ * Return: false if disabled or true if enabled.
+ */
+bool nvme_get_debug(void);
#endif /* _LIBNVME_IOCTL_H */
diff --git a/src/nvme/json.c b/src/nvme/json.c
index 7a5a69e..4d0f987 100644
--- a/src/nvme/json.c
+++ b/src/nvme/json.c
@@ -14,6 +14,7 @@
#include <json.h>
+#include "cleanup.h"
#include "fabrics.h"
#include "log.h"
#include "private.h"
@@ -189,31 +190,34 @@ static void json_parse_host(nvme_root_t r, struct json_object *host_obj)
}
}
+static DEFINE_CLEANUP_FUNC(cleanup_tokener, json_tokener *, json_tokener_free)
+#define _cleanup_tokener_ __cleanup__(cleanup_tokener)
+
static struct json_object *parse_json(nvme_root_t r, int fd)
{
char buf[JSON_FILE_BUF_SIZE];
- struct json_object *obj = NULL;
+ struct json_object *obj;
char *str = NULL;
- json_tokener *tok = NULL;
+ _cleanup_tokener_ json_tokener *tok = NULL;
int ret;
- void *ptr = NULL;
+ _cleanup_free_ void *ptr = NULL;
int len = 0;
while ((ret = read(fd, buf, JSON_FILE_BUF_SIZE)) > 0) {
str = realloc(ptr, len + ret);
if (!str)
- goto out;
+ return NULL;
memcpy(&str[len], buf, ret);
len += ret;
ptr = str;
}
if (ret < 0 || !len)
- goto out;
+ return NULL;
tok = json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
if (!tok)
- goto out;
+ return NULL;
/* Enforce correctly formatted JSON */
tok->flags = JSON_TOKENER_STRICT;
@@ -222,10 +226,6 @@ static struct json_object *parse_json(nvme_root_t r, int fd)
if (!obj)
nvme_msg(r, LOG_DEBUG, "JSON parsing failed: %s\n",
json_util_get_last_err());
-out:
- if (tok)
- json_tokener_free(tok);
- free(ptr);
return obj;
}
@@ -335,21 +335,21 @@ static void json_update_port(struct json_object *ctrl_array, nvme_ctrl_t c)
* Store the keyring description in the JSON config file.
*/
if (cfg->keyring) {
- char *desc = nvme_describe_key_serial(cfg->keyring);
+ _cleanup_free_ char *desc =
+ nvme_describe_key_serial(cfg->keyring);
if (desc) {
json_object_object_add(port_obj, "keyring",
json_object_new_string(desc));
- free(desc);
}
}
if (cfg->tls_key) {
- char *desc = nvme_describe_key_serial(cfg->tls_key);
+ _cleanup_free_ char *desc =
+ nvme_describe_key_serial(cfg->tls_key);
if (desc) {
json_object_object_add(port_obj, "tls_key",
json_object_new_string(desc));
- free(desc);
}
}
diff --git a/src/nvme/linux.c b/src/nvme/linux.c
index c6eedc2..163086e 100644
--- a/src/nvme/linux.c
+++ b/src/nvme/linux.c
@@ -35,15 +35,17 @@
#include <ccan/endian/endian.h>
+#include "cleanup.h"
#include "linux.h"
#include "tree.h"
#include "log.h"
#include "private.h"
+#include "base64.h"
static int __nvme_open(const char *name)
{
- char *path;
- int fd, ret;
+ _cleanup_free_ char *path = NULL;
+ int ret;
ret = asprintf(&path, "%s/%s", "/dev", name);
if (ret < 0) {
@@ -51,9 +53,7 @@ static int __nvme_open(const char *name)
return -1;
}
- fd = open(path, O_RDONLY);
- free(path);
- return fd;
+ return open(path, O_RDONLY);
}
int nvme_open(const char *name)
@@ -122,17 +122,51 @@ int nvme_fw_download_seq(int fd, __u32 size, __u32 xfer, __u32 offset,
return err;
}
-static int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae,
- struct nvme_telemetry_log **buf, enum nvme_telemetry_da da,
- size_t *size)
+int nvme_get_telemetry_max(int fd, enum nvme_telemetry_da *da, size_t *data_tx)
+{
+ _cleanup_free_ struct nvme_id_ctrl *id_ctrl;
+ int err;
+
+ id_ctrl = __nvme_alloc(sizeof(*id_ctrl));
+ if (!id_ctrl) {
+ errno = ENOMEM;
+ return -1;
+ }
+ err = nvme_identify_ctrl(fd, id_ctrl);
+ if (err)
+ return err;
+
+ if (data_tx) {
+ *data_tx = id_ctrl->mdts;
+ if (id_ctrl->mdts) {
+ /* assuming CAP.MPSMIN is zero minimum Memory Page Size is at least
+ * 4096 bytes
+ */
+ *data_tx = (1 << id_ctrl->mdts) * 4096;
+ }
+ }
+ if (da) {
+ if (id_ctrl->lpa & 0x8)
+ *da = NVME_TELEMETRY_DA_3;
+ if (id_ctrl->lpa & 0x40)
+ *da = NVME_TELEMETRY_DA_4;
+
+ }
+ return err;
+}
+
+int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, size_t max_data_tx,
+ enum nvme_telemetry_da da, struct nvme_telemetry_log **buf,
+ size_t *size)
{
static const __u32 xfer = NVME_LOG_TELEM_BLOCK_SIZE;
struct nvme_telemetry_log *telem;
enum nvme_cmd_get_log_lid lid;
- struct nvme_id_ctrl id_ctrl;
- void *log, *tmp;
+ _cleanup_free_ void *log;
+ void *tmp;
int err;
+ size_t dalb;
struct nvme_get_log_args args = {
.args_size = sizeof(args),
.fd = fd,
@@ -167,89 +201,101 @@ static int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae,
}
if (err)
- goto free;
+ return err;
telem = log;
if (ctrl && !telem->ctrlavail) {
*buf = log;
+ log = NULL;
*size = xfer;
return 0;
}
switch (da) {
case NVME_TELEMETRY_DA_1:
+ dalb = le16_to_cpu(telem->dalb1);
+ break;
case NVME_TELEMETRY_DA_2:
+ dalb = le16_to_cpu(telem->dalb2);
+ break;
case NVME_TELEMETRY_DA_3:
/* dalb3 >= dalb2 >= dalb1 */
- *size = (le16_to_cpu(telem->dalb3) + 1) * xfer;
+ dalb = le16_to_cpu(telem->dalb3);
break;
case NVME_TELEMETRY_DA_4:
- err = nvme_identify_ctrl(fd, &id_ctrl);
- if (err) {
- perror("identify-ctrl");
- errno = EINVAL;
- goto free;
- }
-
- if (id_ctrl.lpa & 0x40) {
- *size = (le32_to_cpu(telem->dalb4) + 1) * xfer;
- } else {
- fprintf(stderr, "Data area 4 unsupported, bit 6 of Log Page Attributes not set\n");
- errno = EINVAL;
- err = -1;
- goto free;
- }
+ dalb = le32_to_cpu(telem->dalb4);
break;
default:
- fprintf(stderr, "Invalid data area parameter - %d\n", da);
errno = EINVAL;
- err = -1;
- goto free;
+ return -1;
}
+ if (dalb == 0) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ *size = (dalb + 1) * xfer;
tmp = realloc(log, *size);
if (!tmp) {
errno = ENOMEM;
- err = -1;
- goto free;
+ return -1;
}
log = tmp;
args.lid = lid;
args.log = log;
args.len = *size;
- err = nvme_get_log_page(fd, 4096, &args);
- if (!err) {
- *buf = log;
- return 0;
+ err = nvme_get_log_page(fd, max_data_tx, &args);
+ if (err)
+ return err;
+
+ *buf = log;
+ log = NULL;
+ return 0;
+}
+
+
+static int nvme_check_get_telemetry_log(int fd, bool create, bool ctrl, bool rae,
+ struct nvme_telemetry_log **log, enum nvme_telemetry_da da,
+ size_t *size)
+{
+ enum nvme_telemetry_da max_da = 0;
+ int err = nvme_get_telemetry_max(fd, &max_da, NULL);
+
+ if (err)
+ return err;
+ if (da > max_da) {
+ errno = ENOENT;
+ return -1;
}
-free:
- free(log);
- return err;
+ return nvme_get_telemetry_log(fd, create, ctrl, rae, 4096, da, log, size);
}
+
int nvme_get_ctrl_telemetry(int fd, bool rae, struct nvme_telemetry_log **log,
enum nvme_telemetry_da da, size_t *size)
{
- return nvme_get_telemetry_log(fd, false, true, rae, log, da, size);
+ return nvme_check_get_telemetry_log(fd, false, true, rae, log, da, size);
}
int nvme_get_host_telemetry(int fd, struct nvme_telemetry_log **log,
enum nvme_telemetry_da da, size_t *size)
{
- return nvme_get_telemetry_log(fd, false, false, false, log, da, size);
+ return nvme_check_get_telemetry_log(fd, false, false, false, log, da, size);
}
int nvme_get_new_host_telemetry(int fd, struct nvme_telemetry_log **log,
enum nvme_telemetry_da da, size_t *size)
{
- return nvme_get_telemetry_log(fd, true, false, false, log, da, size);
+ return nvme_check_get_telemetry_log(fd, true, false, false, log, da, size);
}
int nvme_get_lba_status_log(int fd, bool rae, struct nvme_lba_status_log **log)
{
- __u32 size = sizeof(struct nvme_lba_status_log);
- void *buf, *tmp;
+ __u32 size;
+ _cleanup_free_ struct nvme_lba_status_log *buf;
+ void *tmp;
int err;
struct nvme_get_log_args args = {
.args_size = sizeof(args),
@@ -265,38 +311,42 @@ int nvme_get_lba_status_log(int fd, bool rae, struct nvme_lba_status_log **log)
.ot = false,
};
- buf = malloc(size);
+ buf = malloc(sizeof(*buf));
if (!buf)
return -1;
- *log = buf;
- err = nvme_get_log_lba_status(fd, true, 0, size, buf);
- if (err)
- goto free;
+ err = nvme_get_log_lba_status(fd, true, 0, sizeof(*buf), buf);
+ if (err) {
+ *log = NULL;
+ return err;
+ }
- size = le32_to_cpu((*log)->lslplen);
- if (!size)
+ size = le32_to_cpu(buf->lslplen);
+ if (!size) {
+ *log = buf;
+ buf = NULL;
return 0;
+ }
tmp = realloc(buf, size);
if (!tmp) {
- err = -1;
- goto free;
+ *log = NULL;
+ return -1;
}
buf = tmp;
- *log = buf;
args.lid = NVME_LOG_LID_LBA_STATUS;
args.log = buf;
args.len = size;
err = nvme_get_log_page(fd, 4096, &args);
- if (!err)
- return 0;
+ if (err) {
+ *log = NULL;
+ return err;
+ }
-free:
- *log = NULL;
- free(buf);
- return err;
+ *log = buf;
+ buf = NULL;
+ return 0;
}
static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls,
@@ -335,38 +385,48 @@ int nvme_namespace_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls,
int nvme_get_ana_log_len(int fd, size_t *analen)
{
- struct nvme_id_ctrl ctrl;
+ _cleanup_free_ struct nvme_id_ctrl *ctrl;
int ret;
- ret = nvme_identify_ctrl(fd, &ctrl);
+ ctrl = __nvme_alloc(sizeof(*ctrl));
+ if (!ctrl) {
+ errno = ENOMEM;
+ return -1;
+ }
+ ret = nvme_identify_ctrl(fd, ctrl);
if (ret)
return ret;
*analen = sizeof(struct nvme_ana_log) +
- le32_to_cpu(ctrl.nanagrpid) * sizeof(struct nvme_ana_group_desc) +
- le32_to_cpu(ctrl.mnan) * sizeof(__le32);
+ le32_to_cpu(ctrl->nanagrpid) * sizeof(struct nvme_ana_group_desc) +
+ le32_to_cpu(ctrl->mnan) * sizeof(__le32);
return 0;
}
int nvme_get_logical_block_size(int fd, __u32 nsid, int *blksize)
{
- struct nvme_id_ns ns;
+ _cleanup_free_ struct nvme_id_ns *ns;
__u8 flbas;
int ret;
- ret = nvme_identify_ns(fd, nsid, &ns);
+ ns = __nvme_alloc(sizeof(*ns));
+ if (!ns) {
+ errno = ENOMEM;
+ return -1;
+ }
+ ret = nvme_identify_ns(fd, nsid, ns);
if (ret)
return ret;
- nvme_id_ns_flbas_to_lbaf_inuse(ns.flbas, &flbas);
- *blksize = 1 << ns.lbaf[flbas].ds;
+ nvme_id_ns_flbas_to_lbaf_inuse(ns->flbas, &flbas);
+ *blksize = 1 << ns->lbaf[flbas].ds;
return 0;
}
static int __nvme_set_attr(const char *path, const char *value)
{
- int ret, fd;
+ _cleanup_fd_ int fd;
fd = open(path, O_WRONLY);
if (fd < 0) {
@@ -376,23 +436,19 @@ static int __nvme_set_attr(const char *path, const char *value)
#endif
return -1;
}
- ret = write(fd, value, strlen(value));
- close(fd);
- return ret;
+ return write(fd, value, strlen(value));
}
int nvme_set_attr(const char *dir, const char *attr, const char *value)
{
- char *path;
+ _cleanup_free_ char *path = NULL;
int ret;
ret = asprintf(&path, "%s/%s", dir, attr);
if (ret < 0)
return -1;
- ret = __nvme_set_attr(path, value);
- free(path);
- return ret;
+ return __nvme_set_attr(path, value);
}
static char *__nvme_get_attr(const char *path)
@@ -426,7 +482,7 @@ static char *__nvme_get_attr(const char *path)
char *nvme_get_attr(const char *dir, const char *attr)
{
- char *path, *value;
+ _cleanup_free_ char *path = NULL;
int ret;
ret = asprintf(&path, "%s/%s", dir, attr);
@@ -435,9 +491,7 @@ char *nvme_get_attr(const char *dir, const char *attr)
return NULL;
}
- value = __nvme_get_attr(path);
- free(path);
- return value;
+ return __nvme_get_attr(path);
}
char *nvme_get_subsys_attr(nvme_subsystem_t s, const char *attr)
@@ -476,21 +530,80 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac,
return 0;
}
-static int derive_nvme_keys(const char *hostnqn, const char *identity,
- int hmac, unsigned char *configured,
- unsigned char *psk, int key_len)
+static int derive_retained_key(int hmac, const char *hostnqn,
+ unsigned char *generated,
+ unsigned char *retained,
+ size_t key_len)
+{
+ nvme_msg(NULL, LOG_ERR, "NVMe TLS is not supported; "
+ "recompile with OpenSSL support.\n");
+ errno = ENOTSUP;
+ return -1;
+}
+
+static int gen_tls_identity(const char *hostnqn, const char *subsysnqn,
+ int version, int hmac, char *identity,
+ unsigned char *retained, size_t key_len)
{
- errno = EOPNOTSUPP;
+ if (version != 0) {
+ nvme_msg(NULL, LOG_ERR, "NVMe TLS 2.0 is not supported; "
+ "recompile with OpenSSL support.\n");
+ errno = ENOTSUP;
+ return -1;
+ }
+ sprintf(identity, "NVMe0R%02d %s %s",
+ hmac, hostnqn, subsysnqn);
+ return strlen(identity);
+}
+
+static int derive_tls_key(int hmac, const char *identity,
+ unsigned char *retained,
+ unsigned char *psk, size_t key_len)
+{
+ nvme_msg(NULL, LOG_ERR, "NVMe TLS is not supported; "
+ "recompile with OpenSSL support.\n");
+ errno = ENOTSUP;
return -1;
}
#else /* CONFIG_OPENSSL */
-static int derive_retained_key(const EVP_MD *md, const char *hostnqn,
+static const EVP_MD *select_hmac(int hmac, size_t *key_len)
+{
+ const EVP_MD *md = NULL;
+
+ switch (hmac) {
+ case NVME_HMAC_ALG_SHA2_256:
+ md = EVP_sha256();
+ *key_len = 32;
+ break;
+ case NVME_HMAC_ALG_SHA2_384:
+ md = EVP_sha384();
+ *key_len = 48;
+ break;
+ default:
+ break;
+ }
+ return md;
+}
+
+static DEFINE_CLEANUP_FUNC(
+ cleanup_evp_pkey_ctx, EVP_PKEY_CTX *, EVP_PKEY_CTX_free)
+#define _cleanup_evp_pkey_ctx_ __cleanup__(cleanup_evp_pkey_ctx)
+
+static int derive_retained_key(int hmac, const char *hostnqn,
unsigned char *generated,
unsigned char *retained,
size_t key_len)
{
- EVP_PKEY_CTX *ctx;
- int ret;
+ const EVP_MD *md;
+ _cleanup_evp_pkey_ctx_ EVP_PKEY_CTX *ctx = NULL;
+ uint16_t length = key_len & 0xFFFF;
+ size_t hmac_len;
+
+ md = select_hmac(hmac, &hmac_len);
+ if (!md || hmac_len > key_len) {
+ errno = EINVAL;
+ return -1;
+ }
ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
if (!ctx) {
@@ -499,42 +612,60 @@ static int derive_retained_key(const EVP_MD *md, const char *hostnqn,
}
if (EVP_PKEY_derive_init(ctx) <= 0) {
- ret = -ENOMEM;
- goto out_free_ctx;
- }
- ret = -ENOKEY;
- if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0)
- goto out_free_ctx;
- if (EVP_PKEY_CTX_set1_hkdf_key(ctx, generated, key_len) <= 0)
- goto out_free_ctx;
+ errno = ENOMEM;
+ return -1;
+ }
+ if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (EVP_PKEY_CTX_set1_hkdf_key(ctx, generated, key_len) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
- (const unsigned char *)"tls13 ", 6) <= 0)
- goto out_free_ctx;
+ (const unsigned char *)&length, 2) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
- (const unsigned char *)"HostNQN", 7) <= 0)
- goto out_free_ctx;
+ (const unsigned char *)"tls13 ", 6) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
- (const unsigned char *)hostnqn, strlen(hostnqn)) <= 0)
- goto out_free_ctx;
-
- if (EVP_PKEY_derive(ctx, retained, &key_len) > 0)
- ret = key_len;
+ (const unsigned char *)"HostNQN", 7) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
+ (const unsigned char *)hostnqn, strlen(hostnqn)) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
-out_free_ctx:
- if (ret < 0) {
- errno = -ret;
- ret = -1;
+ if (EVP_PKEY_derive(ctx, retained, &key_len) <= 0) {
+ errno = ENOKEY;
+ return -1;
}
- EVP_PKEY_CTX_free(ctx);
- return ret;
+
+ return key_len;
}
-static int derive_tls_key(const EVP_MD *md, const char *identity,
+static int derive_tls_key(int hmac, const char *identity,
unsigned char *retained,
unsigned char *psk, size_t key_len)
{
- EVP_PKEY_CTX *ctx;
- int ret;
+ const EVP_MD *md;
+ _cleanup_evp_pkey_ctx_ EVP_PKEY_CTX *ctx = NULL;
+ size_t hmac_len;
+ uint16_t length = key_len & 0xFFFF;
+
+ md = select_hmac(hmac, &hmac_len);
+ if (!md || hmac_len > key_len) {
+ errno = EINVAL;
+ return -1;
+ }
ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
if (!ctx) {
@@ -543,85 +674,59 @@ static int derive_tls_key(const EVP_MD *md, const char *identity,
}
if (EVP_PKEY_derive_init(ctx) <= 0) {
- ret = -ENOMEM;
- goto out_free_ctx;
- }
- ret = -ENOKEY;
- if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0)
- goto out_free_ctx;
- if (EVP_PKEY_CTX_set1_hkdf_key(ctx, retained, key_len) <= 0)
- goto out_free_ctx;
- if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
- (const unsigned char *)"tls13 ", 6) <= 0)
- goto out_free_ctx;
+ errno = ENOMEM;
+ return -1;
+ }
+ if (EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (EVP_PKEY_CTX_set1_hkdf_key(ctx, retained, key_len) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
- (const unsigned char *)"nvme-tls-psk", 12) <= 0)
- goto out_free_ctx;
+ (const unsigned char *)&length, 2) <= 0) {
+ errno = ENOKEY;
+ return -1;
+ }
if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
- (const unsigned char *)identity,
- strlen(identity)) <= 0)
- goto out_free_ctx;
-
- if (EVP_PKEY_derive(ctx, psk, &key_len) > 0)
- ret = key_len;
-
-out_free_ctx:
- EVP_PKEY_CTX_free(ctx);
- if (ret < 0) {
- errno = -ret;
- ret = -1;
+ (const unsigned char *)"tls13 ", 6) <= 0) {
+ errno = ENOKEY;
+ return -1;
}
-
- return ret;
-}
-
-static int derive_nvme_keys(const char *hostnqn, const char *identity,
- int hmac, unsigned char *configured,
- unsigned char *psk, int key_len)
-{
- const EVP_MD *md;
- unsigned char *retained;
- int ret = -1;
-
- if (!hostnqn || !identity) {
- errno = EINVAL;
+ if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
+ (const unsigned char *)"nvme-tls-psk", 12) <= 0) {
+ errno = ENOKEY;
return -1;
}
-
- switch (hmac) {
- case 1:
- md = EVP_sha256();
- break;
- case 2:
- md = EVP_sha384();
- break;
- default:
- errno = EINVAL;
+ if (EVP_PKEY_CTX_add1_hkdf_info(ctx,
+ (const unsigned char *)identity,
+ strlen(identity)) <= 0) {
+ errno = ENOKEY;
return -1;
}
- retained = malloc(key_len);
- if (!retained) {
- errno = ENOMEM;
+ if (EVP_PKEY_derive(ctx, psk, &key_len) <= 0) {
+ errno = ENOKEY;
return -1;
}
- ret = derive_retained_key(md, hostnqn, configured, retained, key_len);
- if (ret > 0)
- ret = derive_tls_key(md, identity, retained, psk, key_len);
- free(retained);
- return ret;
+
+ return key_len;
}
#endif /* CONFIG_OPENSSL */
#ifdef CONFIG_OPENSSL_1
+static DEFINE_CLEANUP_FUNC(cleanup_hmac_ctx, HMAC_CTX *, HMAC_CTX_free)
+#define _cleanup_hmac_ctx_ __cleanup__(cleanup_hmac_ctx)
+
int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac,
unsigned int key_len, unsigned char *secret,
unsigned char *key)
{
const char hmac_seed[] = "NVMe-over-Fabrics";
- HMAC_CTX *hmac_ctx;
+ _cleanup_hmac_ctx_ HMAC_CTX *hmac_ctx;
const EVP_MD *md;
- int err = -1;
ENGINE_load_builtin_engines();
ENGINE_register_all_complete();
@@ -629,14 +734,13 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac,
hmac_ctx = HMAC_CTX_new();
if (!hmac_ctx) {
errno = ENOMEM;
- return err;
+ return -1;
}
switch (hmac) {
case NVME_HMAC_ALG_NONE:
memcpy(key, secret, key_len);
- err = 0;
- goto out;
+ return 0;
case NVME_HMAC_ALG_SHA2_256:
md = EVP_sha256();
break;
@@ -648,82 +752,164 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac,
break;
default:
errno = EINVAL;
- goto out;
+ return -1;
}
if (!md) {
errno = EINVAL;
- goto out;
+ return -1;
}
if (!HMAC_Init_ex(hmac_ctx, secret, key_len, md, NULL)) {
errno = ENOMEM;
- goto out;
+ return -1;
}
if (!HMAC_Update(hmac_ctx, (unsigned char *)hostnqn,
strlen(hostnqn))) {
errno = ENOKEY;
- goto out;
+ return -1;
}
if (!HMAC_Update(hmac_ctx, (unsigned char *)hmac_seed,
strlen(hmac_seed))) {
errno = ENOKEY;
- goto out;
+ return -1;
}
if (!HMAC_Final(hmac_ctx, key, &key_len)) {
errno = ENOKEY;
- goto out;
+ return -1;
}
- err = 0;
+ return 0;
+}
-out:
- HMAC_CTX_free(hmac_ctx);
- return err;
+static int gen_tls_identity(const char *hostnqn, const char *subsysnqn,
+ int version, int hmac, char *identity,
+ unsigned char *retained, size_t key_len)
+{
+ static const char hmac_seed[] = "NVMe-over-Fabrics";
+ size_t hmac_len;
+ const EVP_MD *md = select_hmac(hmac, &hmac_len);
+ _cleanup_hmac_ctx_ HMAC_CTX *hmac_ctx = NULL;
+ _cleanup_free_ unsigned char *psk_ctx = NULL;
+ _cleanup_free_ char *enc_ctx = NULL;
+ size_t len;
+
+ if (version == 0) {
+ sprintf(identity, "NVMe%01dR%02d %s %s",
+ version, hmac, hostnqn, subsysnqn);
+ return strlen(identity);
+ }
+ if (version > 1) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ hmac_ctx = HMAC_CTX_new();
+ if (!hmac_ctx) {
+ errno = ENOMEM;
+ return -1;
+ }
+ if (!md) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ psk_ctx = malloc(key_len);
+ if (!psk_ctx) {
+ errno = ENOMEM;
+ return -1;
+ }
+ if (!HMAC_Init_ex(hmac_ctx, retained, key_len, md, NULL)) {
+ errno = ENOMEM;
+ return -1;
+ }
+ if (!HMAC_Update(hmac_ctx, (unsigned char *)hostnqn,
+ strlen(hostnqn))) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!HMAC_Update(hmac_ctx, (unsigned char *)" ", 1)) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!HMAC_Update(hmac_ctx, (unsigned char *)subsysnqn,
+ strlen(subsysnqn))) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!HMAC_Update(hmac_ctx, (unsigned char *)" ", 1)) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!HMAC_Update(hmac_ctx, (unsigned char *)hmac_seed,
+ strlen(hmac_seed))) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!HMAC_Final(hmac_ctx, psk_ctx, (unsigned int *)&key_len)) {
+ errno = ENOKEY;
+ return -1;
+ }
+ enc_ctx = malloc(key_len * 2);
+ memset(enc_ctx, 0, key_len * 2);
+ len = base64_encode(psk_ctx, key_len, enc_ctx);
+ if (len < 0) {
+ errno = ENOKEY;
+ return len;
+ }
+ sprintf(identity, "NVMe%01dR%02d %s %s %s",
+ version, hmac, hostnqn, subsysnqn, enc_ctx);
+ return strlen(identity);
}
#endif /* !CONFIG_OPENSSL_1 */
#ifdef CONFIG_OPENSSL_3
+static DEFINE_CLEANUP_FUNC(
+ cleanup_ossl_lib_ctx, OSSL_LIB_CTX *, OSSL_LIB_CTX_free)
+#define _cleanup_ossl_lib_ctx_ __cleanup__(cleanup_ossl_lib_ctx)
+static DEFINE_CLEANUP_FUNC(cleanup_evp_mac_ctx, EVP_MAC_CTX *, EVP_MAC_CTX_free)
+#define _cleanup_evp_mac_ctx_ __cleanup__(cleanup_evp_mac_ctx)
+static DEFINE_CLEANUP_FUNC(cleanup_evp_mac, EVP_MAC *, EVP_MAC_free)
+#define _cleanup_evp_mac_ __cleanup__(cleanup_evp_mac)
+
int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac,
unsigned int key_len, unsigned char *secret,
unsigned char *key)
{
const char hmac_seed[] = "NVMe-over-Fabrics";
OSSL_PARAM params[2], *p = params;
- OSSL_LIB_CTX *lib_ctx;
- EVP_MAC_CTX *mac_ctx = NULL;
- EVP_MAC *mac = NULL;
+ _cleanup_ossl_lib_ctx_ OSSL_LIB_CTX *lib_ctx;
+ _cleanup_evp_mac_ctx_ EVP_MAC_CTX *mac_ctx = NULL;
+ _cleanup_evp_mac_ EVP_MAC *mac = NULL;
char *progq = NULL;
char *digest;
size_t len;
- int err = -1;
lib_ctx = OSSL_LIB_CTX_new();
if (!lib_ctx) {
errno = ENOMEM;
- return err;
+ return -1;
}
mac = EVP_MAC_fetch(lib_ctx, OSSL_MAC_NAME_HMAC, progq);
if (!mac) {
errno = ENOMEM;
- goto out;
+ return -1;
}
mac_ctx = EVP_MAC_CTX_new(mac);
if (!mac_ctx) {
errno = ENOMEM;
- goto out;
+ return -1;
}
switch (hmac) {
case NVME_HMAC_ALG_NONE:
memcpy(key, secret, key_len);
- err = 0;
- goto out;
+ return 0;
case NVME_HMAC_ALG_SHA2_256:
digest = OSSL_DIGEST_NAME_SHA2_256;
break;
@@ -735,7 +921,7 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac,
break;
default:
errno = EINVAL;
- goto out;
+ return -1;
}
*p++ = OSSL_PARAM_construct_utf8_string(OSSL_MAC_PARAM_DIGEST,
digest,
@@ -744,42 +930,224 @@ int nvme_gen_dhchap_key(char *hostnqn, enum nvme_hmac_alg hmac,
if (!EVP_MAC_init(mac_ctx, secret, key_len, params)) {
errno = ENOKEY;
- goto out;
+ return -1;
}
if (!EVP_MAC_update(mac_ctx, (unsigned char *)hostnqn,
strlen(hostnqn))) {
errno = ENOKEY;
- goto out;
+ return -1;
}
if (!EVP_MAC_update(mac_ctx, (unsigned char *)hmac_seed,
strlen(hmac_seed))) {
errno = ENOKEY;
- goto out;
+ return -1;
}
if (!EVP_MAC_final(mac_ctx, key, &len, key_len)) {
errno = ENOKEY;
- goto out;
+ return -1;
}
if (len != key_len) {
errno = EMSGSIZE;
- goto out;
+ return -1;
}
- err = 0;
+ return 0;
+}
-out:
- EVP_MAC_CTX_free(mac_ctx);
- EVP_MAC_free(mac);
- OSSL_LIB_CTX_free(lib_ctx);
+static int gen_tls_identity(const char *hostnqn, const char *subsysnqn,
+ int version, int hmac, char *identity,
+ unsigned char *retained, size_t key_len)
+{
+ static const char hmac_seed[] = "NVMe-over-Fabrics";
+ size_t hmac_len;
+ OSSL_PARAM params[2], *p = params;
+ _cleanup_ossl_lib_ctx_ OSSL_LIB_CTX *lib_ctx = NULL;
+ _cleanup_evp_mac_ctx_ EVP_MAC_CTX *mac_ctx = NULL;
+ _cleanup_evp_mac_ EVP_MAC *mac = NULL;
+ char *progq = NULL;
+ char *digest = NULL;
+ _cleanup_free_ unsigned char *psk_ctx = NULL;
+ _cleanup_free_ char *enc_ctx = NULL;
+ size_t len;
- return err;
+ if (version == 0) {
+ sprintf(identity, "NVMe%01dR%02d %s %s",
+ version, hmac, hostnqn, subsysnqn);
+ return strlen(identity);
+ }
+ if (version > 1) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ lib_ctx = OSSL_LIB_CTX_new();
+ if (!lib_ctx) {
+ errno = ENOMEM;
+ return -1;
+ }
+ mac = EVP_MAC_fetch(lib_ctx, OSSL_MAC_NAME_HMAC, progq);
+ if (!mac) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ mac_ctx = EVP_MAC_CTX_new(mac);
+ if (!mac_ctx) {
+ errno = ENOMEM;
+ return -1;
+ }
+ switch (hmac) {
+ case NVME_HMAC_ALG_SHA2_256:
+ digest = OSSL_DIGEST_NAME_SHA2_256;
+ break;
+ case NVME_HMAC_ALG_SHA2_384:
+ digest = OSSL_DIGEST_NAME_SHA2_384;
+ break;
+ default:
+ errno = EINVAL;
+ break;
+ }
+ if (!digest)
+ return -1;
+ *p++ = OSSL_PARAM_construct_utf8_string(OSSL_MAC_PARAM_DIGEST,
+ digest, 0);
+ *p = OSSL_PARAM_construct_end();
+
+ psk_ctx = malloc(key_len);
+ if (!psk_ctx) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ if (!EVP_MAC_init(mac_ctx, retained, key_len, params)) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!EVP_MAC_update(mac_ctx, (unsigned char *)hostnqn,
+ strlen(hostnqn))) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!EVP_MAC_update(mac_ctx, (unsigned char *)" ", 1)) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!EVP_MAC_update(mac_ctx, (unsigned char *)subsysnqn,
+ strlen(subsysnqn))) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!EVP_MAC_update(mac_ctx, (unsigned char *)" ", 1)) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!EVP_MAC_update(mac_ctx, (unsigned char *)hmac_seed,
+ strlen(hmac_seed))) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (!EVP_MAC_final(mac_ctx, psk_ctx, &hmac_len, key_len)) {
+ errno = ENOKEY;
+ return -1;
+ }
+ if (hmac_len > key_len) {
+ errno = EMSGSIZE;
+ return -1;
+ }
+ enc_ctx = malloc(hmac_len * 2);
+ memset(enc_ctx, 0, hmac_len * 2);
+ len = base64_encode(psk_ctx, hmac_len, enc_ctx);
+ if (len < 0) {
+ errno = ENOKEY;
+ return len;
+ }
+ sprintf(identity, "NVMe%01dR%02d %s %s %s",
+ version, hmac, hostnqn, subsysnqn, enc_ctx);
+ return strlen(identity);
}
#endif /* !CONFIG_OPENSSL_3 */
+static int derive_nvme_keys(const char *hostnqn, const char *subsysnqn,
+ char *identity, int version,
+ int hmac, unsigned char *configured,
+ unsigned char *psk, int key_len)
+{
+ _cleanup_free_ unsigned char *retained = NULL;
+ int ret = -1;
+
+ if (!hostnqn || !subsysnqn || !identity || !psk) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ retained = malloc(key_len);
+ if (!retained) {
+ errno = ENOMEM;
+ return -1;
+ }
+ ret = derive_retained_key(hmac, hostnqn, configured, retained, key_len);
+ if (ret < 0)
+ return ret;
+ ret = gen_tls_identity(hostnqn, subsysnqn, version, hmac,
+ identity, retained, key_len);
+ if (ret < 0)
+ return ret;
+ return derive_tls_key(hmac, identity, retained, psk, key_len);
+}
+
+static size_t nvme_identity_len(int hmac, int version, const char *hostnqn,
+ const char *subsysnqn)
+{
+ size_t len;
+
+ len = strlen(hostnqn) + strlen(subsysnqn) + 12;
+ if (version == 1) {
+ len += 66;
+ if (hmac == NVME_HMAC_ALG_SHA2_384)
+ len += 32;
+ } else if (version > 1) {
+ errno = EINVAL;
+ return -1;
+ }
+ return len;
+}
+
+char *nvme_generate_tls_key_identity(const char *hostnqn, const char *subsysnqn,
+ int version, int hmac,
+ unsigned char *configured_key, int key_len)
+{
+ char *identity;
+ size_t identity_len;
+ _cleanup_free_ unsigned char *psk = NULL;
+ int ret = -1;
+
+ identity_len = nvme_identity_len(hmac, version, hostnqn, subsysnqn);
+ if (identity_len < 0)
+ return NULL;
+
+ identity = malloc(identity_len);
+ if (!identity)
+ return NULL;
+
+ psk = malloc(key_len);
+ if (!psk)
+ goto out_free_identity;
+
+ memset(psk, 0, key_len);
+ ret = derive_nvme_keys(hostnqn, subsysnqn, identity, version, hmac,
+ configured_key, psk, key_len);
+out_free_identity:
+ if (ret < 0) {
+ free(identity);
+ identity = NULL;
+ }
+ return identity;
+}
+
#ifdef CONFIG_KEYUTILS
long nvme_lookup_keyring(const char *keyring)
{
@@ -820,37 +1188,41 @@ int nvme_set_keyring(long key_id)
return 0;
}
-long nvme_insert_tls_key(const char *keyring, const char *key_type,
- const char *hostnqn, const char *subsysnqn, int hmac,
- unsigned char *configured_key, int key_len)
+long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type,
+ const char *hostnqn, const char *subsysnqn,
+ int version, int hmac,
+ unsigned char *configured_key, int key_len)
{
- key_serial_t keyring_id, key = 0;
- char *identity;
- unsigned char *psk;
+ key_serial_t keyring_id, key;
+ _cleanup_free_ char *identity = NULL;
+ size_t identity_len;
+ _cleanup_free_ unsigned char *psk = NULL;
int ret = -1;
keyring_id = nvme_lookup_keyring(keyring);
if (keyring_id == 0)
return -1;
- identity = malloc(strlen(hostnqn) + strlen(subsysnqn) + 12);
+ identity_len = nvme_identity_len(hmac, version, hostnqn, subsysnqn);
+ if (identity_len < 0)
+ return -1;
+
+ identity = malloc(identity_len);
if (!identity) {
errno = ENOMEM;
return -1;
}
- sprintf(identity, "NVMe0R%02d %s %s", hmac, hostnqn, subsysnqn);
-
psk = malloc(key_len);
if (!psk) {
errno = ENOMEM;
- goto out_free_identity;
+ return 0;
}
memset(psk, 0, key_len);
- ret = derive_nvme_keys(hostnqn, identity, hmac,
+ ret = derive_nvme_keys(hostnqn, subsysnqn, identity, version, hmac,
configured_key, psk, key_len);
if (ret != key_len)
- goto out_free_psk;
+ return 0;
key = keyctl_search(keyring_id, key_type, identity, 0);
if (key > 0) {
@@ -862,10 +1234,6 @@ long nvme_insert_tls_key(const char *keyring, const char *key_type,
if (key < 0)
key = 0;
}
-out_free_psk:
- free(psk);
-out_free_identity:
- free(identity);
return key;
}
@@ -902,10 +1270,23 @@ int nvme_set_keyring(long key_id)
return -1;
}
+long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type,
+ const char *hostnqn, const char *subsysnqn,
+ int version, int hmac,
+ unsigned char *configured_key, int key_len)
+{
+ nvme_msg(NULL, LOG_ERR, "key operations not supported; "
+ "recompile with keyutils support.\n");
+ errno = ENOTSUP;
+ return -1;
+}
+#endif
+
long nvme_insert_tls_key(const char *keyring, const char *key_type,
const char *hostnqn, const char *subsysnqn, int hmac,
unsigned char *configured_key, int key_len)
{
- return derive_nvme_keys(NULL, NULL, 0, NULL, NULL, 0);
+ return nvme_insert_tls_key_versioned(keyring, key_type,
+ hostnqn, subsysnqn, 0, hmac,
+ configured_key, key_len);
}
-#endif
diff --git a/src/nvme/linux.h b/src/nvme/linux.h
index 37ba9d4..11ee76e 100644
--- a/src/nvme/linux.h
+++ b/src/nvme/linux.h
@@ -49,6 +49,37 @@ enum nvme_telemetry_da {
};
/**
+ * nvme_get_telemetry_max() - Get telemetry limits
+ * @fd: File descriptor of nvme device
+ * @da: On success return max supported data area
+ * @max_data_tx: On success set to max transfer chunk supported by the controller
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_get_telemetry_max(int fd, enum nvme_telemetry_da *da, size_t *max_data_tx);
+
+/**
+ * nvme_get_telemetry_log() - Get specified telemetry log
+ * @fd: File descriptor of nvme device
+ * @create: Generate new host initated telemetry capture
+ * @ctrl: Get controller Initiated log
+ * @rae: Retain asynchronous events
+ * @max_data_tx: Set the max data transfer size to be used retrieving telemetry.
+ * @da: Log page data area, valid values: &enum nvme_telemetry_da.
+ * @log: On success, set to the value of the allocated and retrieved log.
+ * @size: Ptr to the telemetry log size, so it can be returned
+ *
+ * The total size allocated can be calculated as:
+ * (nvme_telemetry_log da size + 1) * NVME_LOG_TELEM_BLOCK_SIZE.
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise.
+ */
+int nvme_get_telemetry_log(int fd, bool create, bool ctrl, bool rae, size_t max_data_tx,
+ enum nvme_telemetry_da da, struct nvme_telemetry_log **log,
+ size_t *size);
+/**
* nvme_get_ctrl_telemetry() - Get controller telemetry log
* @fd: File descriptor of nvme device
* @rae: Retain asynchronous events
@@ -262,4 +293,46 @@ long nvme_insert_tls_key(const char *keyring, const char *key_type,
const char *hostnqn, const char *subsysnqn, int hmac,
unsigned char *configured_key, int key_len);
+/**
+ * nvme_insert_tls_key_versioned() - Derive and insert TLS key
+ * @keyring: Keyring to use
+ * @key_type: Type of the resulting key
+ * @hostnqn: Host NVMe Qualified Name
+ * @subsysnqn: Subsystem NVMe Qualified Name
+ * @version: Key version to use
+ * @hmac: HMAC algorithm
+ * @configured_key: Configured key data to derive the key from
+ * @key_len: Length of @configured_key
+ *
+ * Derives a 'retained' TLS key as specified in NVMe TCP 1.0a (if
+ * @version s set to '0') or NVMe TP8028 (if @version is set to '1) and
+ * stores it as type @key_type in the keyring specified by @keyring.
+ *
+ * Return: The key serial number if the key could be inserted into
+ * the keyring or 0 with errno otherwise.
+ */
+long nvme_insert_tls_key_versioned(const char *keyring, const char *key_type,
+ const char *hostnqn, const char *subsysnqn,
+ int version, int hmac,
+ unsigned char *configured_key, int key_len);
+
+/**
+ * nvme_generate_tls_key_identity() - Generate the TLS key identity
+ * @hostnqn: Host NVMe Qualified Name
+ * @subsysnqn: Subsystem NVMe Qualified Name
+ * @version: Key version to use
+ * @hmac: HMAC algorithm
+ * @configured_key: Configured key data to derive the key from
+ * @key_len: Length of @configured_key
+ *
+ * Derives a 'retained' TLS key as specified in NVMe TCP and
+ * generate the corresponding TLs identity.
+ *
+ * Return: The string containing the TLS identity. It is the responsibility
+ * of the caller to free the returned string.
+ */
+char *nvme_generate_tls_key_identity(const char *hostnqn, const char *subsysnqn,
+ int version, int hmac,
+ unsigned char *configured_key, int key_len);
+
#endif /* _LIBNVME_LINUX_H */
diff --git a/src/nvme/log.c b/src/nvme/log.c
index e4697df..2ffca3e 100644
--- a/src/nvme/log.c
+++ b/src/nvme/log.c
@@ -26,11 +26,13 @@
#define LOG_CLOCK CLOCK_MONOTONIC
#endif
+static nvme_root_t root;
+
void __attribute__((format(printf, 4, 5)))
__nvme_msg(nvme_root_t r, int lvl,
const char *func, const char *format, ...)
{
- FILE *fp = r ? r->fp : stderr;
+ FILE *fp = stderr;
va_list ap;
char pidbuf[16];
char timebuf[32];
@@ -44,10 +46,16 @@ __nvme_msg(nvme_root_t r, int lvl,
"[%s] <%s>%s ",
"[%s] <%s> %s: ",
};
- char *header __cleanup__(cleanup_charp) = NULL;
- char *message __cleanup__(cleanup_charp) = NULL;
+ _cleanup_free_ char *header = NULL;
+ _cleanup_free_ char *message = NULL;
int idx = 0;
+ if (!r)
+ r = root;
+
+ if (r)
+ fp = r->fp;
+
if (r && lvl > r->log_level)
return;
@@ -90,3 +98,8 @@ void nvme_init_logging(nvme_root_t r, int lvl, bool log_pid, bool log_tstamp)
r->log_pid = log_pid;
r->log_timestamp = log_tstamp;
}
+
+void nvme_set_root(nvme_root_t r)
+{
+ root = r;
+}
diff --git a/src/nvme/log.h b/src/nvme/log.h
index 1cf797a..7c345f6 100644
--- a/src/nvme/log.h
+++ b/src/nvme/log.h
@@ -35,4 +35,17 @@
*/
void nvme_init_logging(nvme_root_t r, int lvl, bool log_pid, bool log_tstamp);
+/**
+ * nvme_set_root() - Set nvme_root_t context
+ * @r: nvme_root_t context
+ *
+ * In order to be able to log from code paths where no root object is passed in
+ * via the arguments use the the default one which can be set via this call.
+ * When creating a new root object with @nvme_create_root the global root object
+ * will be set as well. This means the global root object is always pointing to
+ * the latest created root object. Note the first @nvme_free_tree call will reset
+ * the global root object.
+ */
+void nvme_set_root(nvme_root_t r);
+
#endif /* _LOG_H */
diff --git a/src/nvme/mi-mctp.c b/src/nvme/mi-mctp.c
index 0c5972a..86c4c29 100644
--- a/src/nvme/mi-mctp.c
+++ b/src/nvme/mi-mctp.c
@@ -82,6 +82,8 @@ struct nvme_mi_transport_mctp {
int net;
__u8 eid;
int sd;
+ void *resp_buf;
+ size_t resp_buf_size;
};
static int ioctl_tag(int sd, unsigned long req, struct mctp_ioc_tag_ctl *ctl)
@@ -175,60 +177,40 @@ struct nvme_mi_msg_resp_mpr {
/* Check if this response was a More Processing Required response; if so,
* populate the worst-case expected processing time, given in milliseconds.
+ *
+ * buf is the incoming message data, including type byte, but excluding
+ * the MIC which has been extracted into the mic argument already.
*/
-static bool nvme_mi_mctp_resp_is_mpr(struct nvme_mi_resp *resp, size_t len,
+static bool nvme_mi_mctp_resp_is_mpr(void *buf, size_t len,
__le32 mic, unsigned int *mpr_time)
{
- struct nvme_mi_admin_resp_hdr *admin_msg;
struct nvme_mi_msg_resp_mpr *msg;
- size_t clen;
__u32 crc;
- /* We need at least the minimal header plus checksum */
- if (len < sizeof(*msg) + sizeof(mic))
+ /* We need at least the minimal header */
+ if (len < sizeof(*msg))
return false;
- msg = (struct nvme_mi_msg_resp_mpr *)resp->hdr;
+ msg = (struct nvme_mi_msg_resp_mpr *)buf;
if (msg->status != NVME_MI_RESP_MPR)
return false;
- /* Find and verify the MIC from the response, which may not be laid out
- * in resp as we expect. We have to preserve resp->hdr_len and
- * resp->data_len, as we will need them for the eventual reply message.
- * Because of that, we can't use verify_resp_mic here.
- *
- * If the packet was at the expected response size, then mic will
- * be set already; if not, find it within the header/data buffers.
- */
-
/* Devices may send a MPR response as a full-sized Admin response,
* rather than the minimal MI-only header. Allow this, but only if the
* type indicates admin, and the allocated response header is the
* correct size for an Admin response.
*/
- if (((msg->hdr.nmp >> 3) & 0xf) == NVME_MI_MT_ADMIN &&
- len == sizeof(*admin_msg) + sizeof(mic) &&
- resp->hdr_len == sizeof(*admin_msg)) {
- if (resp->data_len)
- mic = *(__le32 *)resp->data;
- } else if (len == sizeof(*msg) + sizeof(mic)) {
- if (resp->hdr_len > sizeof(*msg))
- mic = *(__le32 *)(msg + 1);
- else if (resp->data_len)
- mic = *(__le32 *)(resp->data);
- } else {
- return false;
- }
-
- /* Since our response is just a header, we're guaranteed to have
- * all data in resp->hdr. The response may be shorter than the expected
- * header though, so clamp to len.
+ if (!(len == sizeof(*msg) ||
+ ((msg->hdr.nmp >> 3 & 0x0f) == NVME_MI_MT_ADMIN &&
+ len == sizeof(struct nvme_mi_admin_resp_hdr))))
+ return false;
+
+ /* Verify the MIC from the response. We're dealing with linear
+ * header data here, and need to preserve the resp pointer & size
+ * values, so can't use verify_resp_mic here.
*/
- len -= sizeof(mic);
- clen = len < resp->hdr_len ? len : resp->hdr_len;
-
- crc = ~nvme_mi_crc32_update(0xffffffff, resp->hdr, clen);
+ crc = ~nvme_mi_crc32_update(0xffffffff, buf, len);
if (le32_to_cpu(mic) != crc)
return false;
@@ -242,14 +224,14 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
struct nvme_mi_req *req,
struct nvme_mi_resp *resp)
{
+ ssize_t len, resp_len, resp_hdr_len, resp_data_len;
struct nvme_mi_transport_mctp *mctp;
- struct iovec req_iov[3], resp_iov[3];
+ struct iovec req_iov[3], resp_iov[1];
struct msghdr req_msg, resp_msg;
int i, rc, errno_save, timeout;
struct sockaddr_mctp addr;
struct pollfd pollfds[1];
unsigned int mpr_time;
- ssize_t len;
__le32 mic;
__u8 tag;
@@ -306,20 +288,30 @@ static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep,
goto out;
}
- resp_iov[0].iov_base = ((__u8 *)resp->hdr) + 1;
- resp_iov[0].iov_len = resp->hdr_len - 1;
-
- resp_iov[1].iov_base = ((__u8 *)resp->data);
- resp_iov[1].iov_len = resp->data_len;
+ resp_len = resp->hdr_len + resp->data_len + sizeof(mic);
+ if (resp_len > mctp->resp_buf_size) {
+ void *tmp = realloc(mctp->resp_buf, resp_len);
+ if (!tmp) {
+ errno_save = errno;
+ nvme_msg(ep->root, LOG_ERR,
+ "Failure allocating response buffer: %m\n");
+ errno = errno_save;
+ rc = -1;
+ goto out;
+ }
+ mctp->resp_buf = tmp;
+ mctp->resp_buf_size = resp_len;
+ }
- resp_iov[2].iov_base = &mic;
- resp_iov[2].iov_len = sizeof(mic);
+ /* offset by one: the MCTP message type is excluded from the buffer */
+ resp_iov[0].iov_base = mctp->resp_buf + 1;
+ resp_iov[0].iov_len = resp_len - 1;
memset(&resp_msg, 0, sizeof(resp_msg));
resp_msg.msg_name = &addr;
resp_msg.msg_namelen = sizeof(addr);
resp_msg.msg_iov = resp_iov;
- resp_msg.msg_iovlen = 3;
+ resp_msg.msg_iovlen = 1;
pollfds[0].fd = mctp->sd;
pollfds[0].events = POLLIN;
@@ -333,13 +325,14 @@ retry:
nvme_msg(ep->root, LOG_ERR,
"Failed polling on MCTP socket: %m");
errno = errno_save;
- return -1;
+ goto out;
}
if (rc == 0) {
nvme_msg(ep->root, LOG_DEBUG, "Timeout on MCTP socket");
errno = ETIMEDOUT;
- return -1;
+ rc = -1;
+ goto out;
}
rc = -1;
@@ -361,7 +354,7 @@ retry:
}
/* Re-add the type byte, so we can work on aligned lengths from here */
- resp->hdr->type = MCTP_TYPE_NVME | MCTP_TYPE_MIC;
+ ((uint8_t *)mctp->resp_buf)[0] = MCTP_TYPE_NVME | MCTP_TYPE_MIC;
len += 1;
/* The smallest response data is 8 bytes: generic 4-byte message header
@@ -375,21 +368,21 @@ retry:
goto out;
}
- /* We can't have header/payload data that isn't a multiple of 4 bytes */
- if (len & 0x3) {
- nvme_msg(ep->root, LOG_WARNING,
- "Response message has unaligned length (%zd)!\n",
- len);
- errno = EPROTO;
- goto out;
- }
+ /* Start unpacking the linear resp buffer into the split header + data
+ * + MIC. We check for a MPR response before fully unpacking, as we'll
+ * need to preserve the resp layout if we need to retry the receive.
+ */
+
+ /* MIC is always at the tail */
+ memcpy(&mic, mctp->resp_buf + len - sizeof(mic), sizeof(mic));
+ len -= 4;
/* Check for a More Processing Required response. This is a slight
* layering violation, as we're pre-checking the MIC and inspecting
* header fields. However, we need to do this in the transport in order
* to keep the tag allocated and retry the recvmsg
*/
- if (nvme_mi_mctp_resp_is_mpr(resp, len, mic, &mpr_time)) {
+ if (nvme_mi_mctp_resp_is_mpr(mctp->resp_buf, len, mic, &mpr_time)) {
nvme_msg(ep->root, LOG_DEBUG,
"Received More Processing Required, waiting for response\n");
@@ -406,30 +399,20 @@ retry:
goto retry;
}
- /* If we have a shorter than expected response, we need to find the
- * MIC and the correct split between header & data. We know that the
- * split is 4-byte aligned, so the MIC will be entirely within one
- * of the iovecs.
- */
- if (len == resp->hdr_len + resp->data_len + sizeof(mic)) {
- /* Common case: expected data length. Header, data and MIC
- * are already laid-out correctly. Nothing to do. */
-
- } else if (len < resp->hdr_len + sizeof(mic)) {
- /* Response is smaller than the expected header. MIC is
- * somewhere in the header buf */
- resp->hdr_len = len - sizeof(mic);
- resp->data_len = 0;
- memcpy(&mic, ((uint8_t *)resp->hdr) + resp->hdr_len,
- sizeof(mic));
-
- } else {
- /* We have a full header, but data is truncated - possibly
- * zero bytes. MIC is somewhere in the data buf */
- resp->data_len = len - resp->hdr_len - sizeof(mic);
- memcpy(&mic, ((uint8_t *)resp->data) + resp->data_len,
- sizeof(mic));
- }
+ /* we expect resp->hdr_len bytes, but we may have less */
+ resp_hdr_len = resp->hdr_len;
+ if (resp_hdr_len > len)
+ resp_hdr_len = len;
+ memcpy(resp->hdr, mctp->resp_buf, resp_hdr_len);
+ resp->hdr_len = resp_hdr_len;
+ len -= resp_hdr_len;
+
+ /* any remaining bytes are the data payload */
+ resp_data_len = resp->data_len;
+ if (resp_data_len > len)
+ resp_data_len = len;
+ memcpy(resp->data, mctp->resp_buf + resp_hdr_len, resp_data_len);
+ resp->data_len = resp_data_len;
resp->mic = le32_to_cpu(mic);
@@ -450,6 +433,7 @@ static void nvme_mi_mctp_close(struct nvme_mi_ep *ep)
mctp = ep->transport_data;
close(mctp->sd);
+ free(mctp->resp_buf);
free(ep->transport_data);
}
@@ -488,15 +472,29 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid)
return NULL;
mctp = malloc(sizeof(*mctp));
- if (!mctp)
- goto err_free_ep;
+ if (!mctp) {
+ errno_save = errno;
+ goto err_close_ep;
+ }
+
+ memset(mctp, 0, sizeof(*mctp));
+ mctp->sd = -1;
+
+ mctp->resp_buf_size = 4096;
+ mctp->resp_buf = malloc(mctp->resp_buf_size);
+ if (!mctp->resp_buf) {
+ errno_save = errno;
+ goto err_free_mctp;
+ }
mctp->net = netid;
mctp->eid = eid;
mctp->sd = ops.socket(AF_MCTP, SOCK_DGRAM, 0);
- if (mctp->sd < 0)
- goto err_free_ep;
+ if (mctp->sd < 0) {
+ errno_save = errno;
+ goto err_free_rspbuf;
+ }
ep->transport = &nvme_mi_transport_mctp;
ep->transport_data = mctp;
@@ -512,10 +510,14 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid)
return ep;
-err_free_ep:
- errno_save = errno;
- nvme_mi_close(ep);
+err_free_rspbuf:
+ free(mctp->resp_buf);
+err_free_mctp:
free(mctp);
+err_close_ep:
+ /* the ep->transport is not set yet, so this will not call back
+ * into nvme_mi_mctp_close() */
+ nvme_mi_close(ep);
errno = errno_save;
return NULL;
}
diff --git a/src/nvme/mi.c b/src/nvme/mi.c
index 3799f35..82ed88a 100644
--- a/src/nvme/mi.c
+++ b/src/nvme/mi.c
@@ -413,11 +413,6 @@ int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req,
return -1;
}
- if (resp->data_len & 0x3) {
- errno = EINVAL;
- return -1;
- }
-
if (ep->transport->mic_enabled)
nvme_mi_calc_req_mic(req);
@@ -580,8 +575,10 @@ int nvme_mi_admin_xfer(nvme_mi_ctrl_t ctrl,
return -1;
}
- /* must be aligned */
- if (resp_data_offset & 0x3) {
+ /* request and response lengths & offset must be aligned */
+ if ((req_data_size & 0x3) ||
+ (*resp_data_size & 0x3) ||
+ (resp_data_offset & 0x3)) {
errno = EINVAL;
return -1;
}
@@ -1051,7 +1048,7 @@ int nvme_mi_admin_set_features(nvme_mi_ctrl_t ctrl,
nvme_admin_set_features);
req_hdr.cdw1 = cpu_to_le32(args->nsid);
- req_hdr.cdw10 = cpu_to_le32((args->save ? 1 : 0) << 31 |
+ req_hdr.cdw10 = cpu_to_le32((__u32)!!args->save << 31 |
(args->fid & 0xff));
req_hdr.cdw14 = cpu_to_le32(args->uuidx & 0x7f);
req_hdr.cdw11 = cpu_to_le32(args->cdw11);
@@ -1223,7 +1220,7 @@ int nvme_mi_admin_fw_commit(nvme_mi_ctrl_t ctrl,
nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id,
nvme_admin_fw_commit);
- req_hdr.cdw10 = cpu_to_le32(((args->bpid & 0x1) << 31) |
+ req_hdr.cdw10 = cpu_to_le32(((__u32)(args->bpid & 0x1) << 31) |
((args->action & 0x7) << 3) |
((args->slot & 0x7) << 0));
diff --git a/src/nvme/mi.h b/src/nvme/mi.h
index 211cb29..bd26627 100644
--- a/src/nvme/mi.h
+++ b/src/nvme/mi.h
@@ -1395,7 +1395,6 @@ static inline int nvme_mi_admin_identify_primary_ctrl(nvme_mi_ctrl_t ctrl,
* nvme_mi_admin_identify_secondary_ctrl_list() - Perform an Admin identify for
* a secondary controller list.
* @ctrl: Controller to process identify command
- * @nsid: Namespace ID to specify list start
* @cntid: Controller ID to specify list start
* @list: List data to populate
*
@@ -1412,7 +1411,6 @@ static inline int nvme_mi_admin_identify_primary_ctrl(nvme_mi_ctrl_t ctrl,
* See: &struct nvme_secondary_ctrl_list
*/
static inline int nvme_mi_admin_identify_secondary_ctrl_list(nvme_mi_ctrl_t ctrl,
- __u32 nsid,
__u16 cntid,
struct nvme_secondary_ctrl_list *list)
{
@@ -1422,7 +1420,7 @@ static inline int nvme_mi_admin_identify_secondary_ctrl_list(nvme_mi_ctrl_t ctrl
.args_size = sizeof(args),
.cns = NVME_IDENTIFY_CNS_SECONDARY_CTRL_LIST,
.csi = NVME_CSI_NVM,
- .nsid = nsid,
+ .nsid = NVME_NSID_NONE,
.cntid = cntid,
.cns_specific_id = NVME_CNSSPECID_NONE,
.uuidx = NVME_UUID_NONE,
@@ -2109,6 +2107,41 @@ static inline int nvme_mi_admin_get_log_boot_partition(nvme_mi_ctrl_t ctrl,
}
/**
+ * nvme_mi_admin_get_log_phy_rx_eom() - Retrieve Physical Interface Receiver Eye Opening Measurement Log
+ * @ctrl: Controller to query
+ * @lsp: Log specific, controls action and measurement quality
+ * @controller: Target controller ID
+ * @len: The allocated size, minimum
+ * struct nvme_phy_rx_eom_log
+ * @log: User address to store the log page
+ *
+ * Return: The nvme command status if a response was received (see
+ * &enum nvme_status_field) or -1 with errno set otherwise
+ */
+static inline int nvme_mi_admin_get_log_phy_rx_eom(nvme_mi_ctrl_t ctrl,
+ __u8 lsp, __u16 controller,
+ __u32 len,
+ struct nvme_phy_rx_eom_log *log)
+{
+ struct nvme_get_log_args args = {
+ .lpo = 0,
+ .result = NULL,
+ .log = log,
+ .args_size = sizeof(args),
+ .lid = NVME_LOG_LID_PHY_RX_EOM,
+ .len = len,
+ .nsid = NVME_NSID_NONE,
+ .csi = NVME_CSI_NVM,
+ .lsi = controller,
+ .lsp = lsp,
+ .uuidx = NVME_UUID_NONE,
+ .rae = false,
+ .ot = false,
+ };
+ return nvme_mi_admin_get_log(ctrl, &args);
+}
+
+/**
* nvme_mi_admin_get_log_discovery() - Retrieve Discovery log page
* @ctrl: Controller to query
* @rae: Retain asynchronous events
diff --git a/src/nvme/nbft.c b/src/nvme/nbft.c
index a1e17cd..2c87088 100644
--- a/src/nvme/nbft.c
+++ b/src/nvme/nbft.c
@@ -33,17 +33,15 @@ static __u8 csum(const __u8 *buffer, ssize_t length)
static void format_ip_addr(char *buf, size_t buflen, __u8 *addr)
{
- struct in6_addr *addr_ipv6;
+ struct in6_addr addr_ipv6;
- addr_ipv6 = (struct in6_addr *)addr;
- if (addr_ipv6->s6_addr32[0] == 0 &&
- addr_ipv6->s6_addr32[1] == 0 &&
- ntohl(addr_ipv6->s6_addr32[2]) == 0xffff)
+ memcpy(&addr_ipv6, addr, sizeof(addr_ipv6));
+ if (IN6_IS_ADDR_V4MAPPED(&addr_ipv6))
/* ipv4 */
- inet_ntop(AF_INET, &(addr_ipv6->s6_addr32[3]), buf, buflen);
+ inet_ntop(AF_INET, &addr_ipv6.s6_addr32[3], buf, buflen);
else
/* ipv6 */
- inet_ntop(AF_INET6, addr_ipv6, buf, buflen);
+ inet_ntop(AF_INET6, &addr_ipv6, buf, buflen);
}
static bool in_heap(struct nbft_header *header, struct nbft_heap_obj obj)
@@ -199,15 +197,15 @@ static int read_ssns(struct nbft_info *nbft,
verify(raw_ssns->structure_id == NBFT_DESC_SSNS,
"invalid ID in SSNS descriptor");
+ /* verify transport type */
+ verify(raw_ssns->trtype == NBFT_TRTYPE_TCP,
+ "invalid transport type in SSNS descriptor");
+
ssns = calloc(1, sizeof(*ssns));
if (!ssns)
return -ENOMEM;
ssns->index = le16_to_cpu(raw_ssns->index);
-
- /* transport type */
- verify(raw_ssns->trtype == NBFT_TRTYPE_TCP,
- "invalid transport type in SSNS descriptor");
strncpy(ssns->transport, trtype_to_string(raw_ssns->trtype), sizeof(ssns->transport));
/* transport specific flags */
@@ -413,26 +411,29 @@ static int read_discovery(struct nbft_info *nbft,
struct nbft_discovery *raw_discovery,
struct nbft_info_discovery **d)
{
- struct nbft_info_discovery *discovery;
+ struct nbft_info_discovery *discovery = NULL;
struct nbft_header *header = (struct nbft_header *)nbft->raw_nbft;
+ int r = -EINVAL;
if (!(raw_discovery->flags & NBFT_DISCOVERY_VALID))
- return -EINVAL;
+ goto error;
verify(raw_discovery->structure_id == NBFT_DESC_DISCOVERY,
"invalid ID in discovery descriptor");
discovery = calloc(1, sizeof(struct nbft_info_discovery));
- if (!discovery)
- return -ENOMEM;
+ if (!discovery) {
+ r = -ENOMEM;
+ goto error;
+ }
discovery->index = raw_discovery->index;
if (get_heap_obj(raw_discovery, discovery_ctrl_addr_obj, 1, &discovery->uri))
- return -EINVAL;
+ goto error;
if (get_heap_obj(raw_discovery, discovery_ctrl_nqn_obj, 1, &discovery->nqn))
- return -EINVAL;
+ goto error;
discovery->hfi = hfi_from_index(nbft, raw_discovery->hfi_index);
if (raw_discovery->hfi_index && !discovery->hfi)
@@ -447,7 +448,12 @@ static int read_discovery(struct nbft_info *nbft,
nbft->filename, discovery->index);
*d = discovery;
- return 0;
+ r = 0;
+
+error:
+ if (r)
+ free(discovery);
+ return r;
}
static int read_security(struct nbft_info *nbft,
diff --git a/src/nvme/private.h b/src/nvme/private.h
index 809b3bb..ee9d738 100644
--- a/src/nvme/private.h
+++ b/src/nvme/private.h
@@ -106,6 +106,7 @@ struct nvme_subsystem {
char *firmware;
char *subsystype;
char *application;
+ char *iopolicy;
};
struct nvme_host {
@@ -179,7 +180,9 @@ int json_dump_tree(nvme_root_t r);
nvme_ctrl_t __nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport,
const char *traddr, const char *host_traddr,
const char *host_iface, const char *trsvcid,
- nvme_ctrl_t p);
+ const char *subsysnqn, nvme_ctrl_t p);
+
+void *__nvme_alloc(size_t len);
#if (LOG_FUNCNAME == 1)
#define __nvme_log_func __func__
@@ -197,6 +200,11 @@ __nvme_msg(nvme_root_t r, int lvl, const char *func, const char *format, ...);
format, ##__VA_ARGS__); \
} while (0)
+#define root_from_ctrl(c) ((c)->s && (c)->s->h ? (c)->s->h->r : NULL)
+#define root_from_ns(n) ((n)->s && (n)->s->h ? (n)->s->h->r : \
+ (n)->c && (n)->c->s && (n)->c->s->h ? (n)->c->s->h->r : \
+ NULL)
+
/* mi internal headers */
/* internal transport API */
diff --git a/src/nvme/tree.c b/src/nvme/tree.c
index a2ac069..07a3c53 100644
--- a/src/nvme/tree.c
+++ b/src/nvme/tree.c
@@ -15,6 +15,7 @@
#include <fcntl.h>
#include <libgen.h>
#include <unistd.h>
+#include <ifaddrs.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -24,6 +25,7 @@
#include <ccan/endian/endian.h>
#include <ccan/list/list.h>
+#include "cleanup.h"
#include "ioctl.h"
#include "linux.h"
#include "filters.h"
@@ -34,6 +36,31 @@
#include "log.h"
#include "private.h"
+/**
+ * struct candidate_args - Used to look for a controller matching these parameters
+ * @transport: Transport type: loop, fc, rdma, tcp
+ * @traddr: Transport address (destination address)
+ * @trsvcid: Transport service ID
+ * @subsysnqn: Subsystem NQN
+ * @host_traddr: Host transport address (source address)
+ * @host_iface: Host interface for connection (tcp only)
+ * @iface_list: Interface list (tcp only)
+ * @addreq: Address comparison function (for traddr, host-traddr)
+ * @well_known_nqn: Set to "true" when @subsysnqn is the well-known NQN
+ */
+struct candidate_args {
+ const char *transport;
+ const char *traddr;
+ const char *trsvcid;
+ const char *subsysnqn;
+ const char *host_traddr;
+ const char *host_iface;
+ struct ifaddrs *iface_list;
+ bool (*addreq)(const char *, const char *);
+ bool well_known_nqn;
+};
+typedef bool (*ctrl_match_t)(struct nvme_ctrl *c, struct candidate_args *candidate);
+
const char *nvme_slots_sysfs_dir = "/sys/bus/pci/slots";
static struct nvme_host *default_host;
@@ -78,17 +105,24 @@ static bool streqcase0(const char *s1, const char *s2)
return !strcasecmp(s1, s2);
}
-static inline void nvme_free_dirents(struct dirent **d, int i)
+struct dirents {
+ struct dirent **ents;
+ int num;
+};
+
+static void cleanup_dirents(struct dirents *ents)
{
- while (i-- > 0)
- free(d[i]);
- free(d);
+ while (ents->num > 0)
+ free(ents->ents[--ents->num]);
+ free(ents->ents);
}
+#define _cleanup_dirents_ __cleanup__(cleanup_dirents)
+
nvme_host_t nvme_default_host(nvme_root_t r)
{
struct nvme_host *h;
- char *hostnqn, *hostid;
+ _cleanup_free_ char *hostnqn, *hostid;
hostnqn = nvmf_hostnqn_from_file();
if (!hostnqn)
@@ -100,61 +134,55 @@ nvme_host_t nvme_default_host(nvme_root_t r)
nvme_host_set_hostsymname(h, NULL);
default_host = h;
- free(hostnqn);
- if (hostid)
- free(hostid);
return h;
}
int nvme_scan_topology(struct nvme_root *r, nvme_scan_filter_t f, void *f_args)
{
- struct dirent **subsys, **ctrls;
- int i, num_subsys, num_ctrls, ret;
+ _cleanup_dirents_ struct dirents subsys = {}, ctrls = {};
+ int i, ret;
if (!r)
return 0;
- num_ctrls = nvme_scan_ctrls(&ctrls);
- if (num_ctrls < 0) {
+ ctrls.num = nvme_scan_ctrls(&ctrls.ents);
+ if (ctrls.num < 0) {
nvme_msg(r, LOG_DEBUG, "failed to scan ctrls: %s\n",
strerror(errno));
- return num_ctrls;
+ return ctrls.num;
}
- for (i = 0; i < num_ctrls; i++) {
- nvme_ctrl_t c = nvme_scan_ctrl(r, ctrls[i]->d_name);
+ for (i = 0; i < ctrls.num; i++) {
+ nvme_ctrl_t c = nvme_scan_ctrl(r, ctrls.ents[i]->d_name);
if (!c) {
nvme_msg(r, LOG_DEBUG, "failed to scan ctrl %s: %s\n",
- ctrls[i]->d_name, strerror(errno));
+ ctrls.ents[i]->d_name, strerror(errno));
continue;
}
if ((f) && !f(NULL, c, NULL, f_args)) {
nvme_msg(r, LOG_DEBUG, "filter out controller %s\n",
- ctrls[i]->d_name);
+ ctrls.ents[i]->d_name);
nvme_free_ctrl(c);
}
}
- nvme_free_dirents(ctrls, i);
-
- num_subsys = nvme_scan_subsystems(&subsys);
- if (num_subsys < 0) {
+ subsys.num = nvme_scan_subsystems(&subsys.ents);
+ if (subsys.num < 0) {
nvme_msg(r, LOG_DEBUG, "failed to scan subsystems: %s\n",
strerror(errno));
- return num_subsys;
+ return subsys.num;
}
- for (i = 0; i < num_subsys; i++) {
- ret = nvme_scan_subsystem(r, subsys[i]->d_name, f, f_args);
+ for (i = 0; i < subsys.num; i++) {
+ ret = nvme_scan_subsystem(
+ r, subsys.ents[i]->d_name, f, f_args);
if (ret < 0) {
nvme_msg(r, LOG_DEBUG,
"failed to scan subsystem %s: %s\n",
- subsys[i]->d_name, strerror(errno));
+ subsys.ents[i]->d_name, strerror(errno));
}
}
- nvme_free_dirents(subsys, i);
-
return 0;
}
@@ -172,6 +200,7 @@ nvme_root_t nvme_create_root(FILE *fp, int log_level)
r->fp = fp;
list_head_init(&r->hosts);
list_head_init(&r->endpoints);
+ nvme_set_root(r);
return r;
}
@@ -234,8 +263,10 @@ const char *nvme_root_get_application(nvme_root_t r)
void nvme_root_set_application(nvme_root_t r, const char *a)
{
- if (r->application)
+ if (r->application) {
free(r->application);
+ r->application = NULL;
+ }
if (a)
r->application = strdup(a);
}
@@ -338,9 +369,18 @@ void nvme_free_tree(nvme_root_t r)
free(r->config_file);
if (r->application)
free(r->application);
+ nvme_set_root(NULL);
free(r);
}
+void nvme_root_release_fds(nvme_root_t r)
+{
+ struct nvme_host *h, *_h;
+
+ nvme_for_each_host_safe(r, h, _h)
+ nvme_host_release_fds(h);
+}
+
const char *nvme_subsystem_get_nqn(nvme_subsystem_t s)
{
return s->subsysnqn;
@@ -368,12 +408,19 @@ const char *nvme_subsystem_get_application(nvme_subsystem_t s)
void nvme_subsystem_set_application(nvme_subsystem_t s, const char *a)
{
- if (s->application)
+ if (s->application) {
free(s->application);
+ s->application = NULL;
+ }
if (a)
s->application = strdup(a);
}
+const char *nvme_subsystem_get_iopolicy(nvme_subsystem_t s)
+{
+ return s->iopolicy;
+}
+
nvme_ctrl_t nvme_subsystem_first_ctrl(nvme_subsystem_t s)
{
return list_top(&s->ctrls, struct nvme_ctrl, entry);
@@ -412,7 +459,7 @@ nvme_path_t nvme_namespace_next_path(nvme_ns_t ns, nvme_path_t p)
static void __nvme_free_ns(struct nvme_ns *n)
{
list_del_init(&n->entry);
- close(n->fd);
+ nvme_ns_release_fd(n);
free(n->generic_name);
free(n->name);
free(n->sysfs_dir);
@@ -451,9 +498,23 @@ static void __nvme_free_subsystem(struct nvme_subsystem *s)
free(s->subsystype);
if (s->application)
free(s->application);
+ if (s->iopolicy)
+ free(s->iopolicy);
free(s);
}
+void nvme_subsystem_release_fds(struct nvme_subsystem *s)
+{
+ struct nvme_ctrl *c, *_c;
+ struct nvme_ns *n, *_n;
+
+ nvme_subsystem_for_each_ctrl_safe(s, c, _c)
+ nvme_ctrl_release_fd(c);
+
+ nvme_subsystem_for_each_ns_safe(s, n, _n)
+ nvme_ns_release_fd(n);
+}
+
/*
* Stub for SWIG
*/
@@ -524,6 +585,14 @@ static void __nvme_free_host(struct nvme_host *h)
free(h);
}
+void nvme_host_release_fds(struct nvme_host *h)
+{
+ struct nvme_subsystem *s, *_s;
+
+ nvme_for_each_subsystem_safe(h, s, _s)
+ nvme_subsystem_release_fds(s);
+}
+
/* Stub for SWIG */
void nvme_free_host(struct nvme_host *h)
{
@@ -563,27 +632,26 @@ struct nvme_host *nvme_lookup_host(nvme_root_t r, const char *hostnqn,
static int nvme_subsystem_scan_namespaces(nvme_root_t r, nvme_subsystem_t s,
nvme_scan_filter_t f, void *f_args)
{
- struct dirent **namespaces;
- int i, num_ns, ret;
+ _cleanup_dirents_ struct dirents namespaces = {};
+ int i, ret;
- num_ns = nvme_scan_subsystem_namespaces(s, &namespaces);
- if (num_ns < 0) {
+ namespaces.num = nvme_scan_subsystem_namespaces(s, &namespaces.ents);
+ if (namespaces.num < 0) {
nvme_msg(r, LOG_DEBUG,
"failed to scan namespaces for subsys %s: %s\n",
s->subsysnqn, strerror(errno));
- return num_ns;
+ return namespaces.num;
}
- for (i = 0; i < num_ns; i++) {
+ for (i = 0; i < namespaces.num; i++) {
ret = nvme_subsystem_scan_namespace(r, s,
- namespaces[i]->d_name, f, f_args);
+ namespaces.ents[i]->d_name, f, f_args);
if (ret < 0)
nvme_msg(r, LOG_DEBUG,
"failed to scan namespace %s: %s\n",
- namespaces[i]->d_name, strerror(errno));
+ namespaces.ents[i]->d_name, strerror(errno));
}
- nvme_free_dirents(namespaces, i);
return 0;
}
@@ -610,15 +678,28 @@ static int nvme_init_subsystem(nvme_subsystem_t s, const char *name)
s->sysfs_dir = (char *)path;
if (s->h->r->application)
s->application = strdup(s->h->r->application);
+ s->iopolicy = nvme_get_attr(path, "iopolicy");
return 0;
}
+static bool __nvme_scan_subsystem(struct nvme_root *r, nvme_subsystem_t s,
+ nvme_scan_filter_t f, void *f_args)
+{
+ if (f && !f(s, NULL, NULL, f_args)) {
+ nvme_msg(r, LOG_DEBUG, "filter out subsystem %s\n", s->name);
+ __nvme_free_subsystem(s);
+ return false;
+ }
+ nvme_subsystem_scan_namespaces(r, s, f, f_args);
+ return true;
+}
+
static int nvme_scan_subsystem(struct nvme_root *r, const char *name,
nvme_scan_filter_t f, void *f_args)
{
struct nvme_subsystem *s = NULL, *_s;
- char *path, *subsysnqn;
+ _cleanup_free_ char *path = NULL, *subsysnqn = NULL;
nvme_host_t h = NULL;
int ret;
@@ -628,7 +709,6 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name,
return ret;
subsysnqn = nvme_get_attr(path, "subsysnqn");
- free(path);
if (!subsysnqn) {
errno = ENODEV;
return -1;
@@ -644,6 +724,10 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name,
continue;
if (strcmp(_s->name, name))
continue;
+ if (!__nvme_scan_subsystem(r, _s, f, f_args)) {
+ errno = EINVAL;
+ return -1;
+ }
s = _s;
}
}
@@ -659,26 +743,18 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name,
s = nvme_alloc_subsystem(h, name, subsysnqn);
if (!s) {
errno = ENOMEM;
+ return -1;
+ }
+ if (!__nvme_scan_subsystem(r, s, f, f_args)) {
+ errno = EINVAL;
+ return -1;
}
} else if (strcmp(s->subsysnqn, subsysnqn)) {
- nvme_msg(r, LOG_WARNING, "NQN mismatch for subsystem '%s'\n",
+ nvme_msg(r, LOG_DEBUG, "NQN mismatch for subsystem '%s'\n",
name);
- s = NULL;
- free(subsysnqn);
errno = EINVAL;
return -1;
}
- free(subsysnqn);
- if (!s)
- return -1;
-
- if (f && !f(s, NULL, NULL, f_args)) {
- nvme_msg(r, LOG_DEBUG, "filter out subsystem %s\n", name);
- __nvme_free_subsystem(s);
- return 0;
- }
-
- nvme_subsystem_scan_namespaces(r, s, f, f_args);
return 0;
}
@@ -740,7 +816,7 @@ static void nvme_subsystem_set_path_ns(nvme_subsystem_t s, nvme_path_t p)
static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
{
struct nvme_path *p;
- char *path, *grpid;
+ _cleanup_free_ char *path = NULL, *grpid = NULL;
int ret;
nvme_msg(r, LOG_DEBUG, "scan controller %s path %s\n",
@@ -758,12 +834,13 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
p = calloc(1, sizeof(*p));
if (!p) {
errno = ENOMEM;
- goto free_path;
+ return -1;
}
p->c = c;
p->name = strdup(name);
p->sysfs_dir = path;
+ path = NULL;
p->ana_state = nvme_get_path_attr(p, "ana_state");
if (!p->ana_state)
p->ana_state = strdup("optimized");
@@ -771,7 +848,6 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
grpid = nvme_get_path_attr(p, "ana_grpid");
if (grpid) {
sscanf(grpid, "%d", &p->grpid);
- free(grpid);
}
list_node_init(&p->nentry);
@@ -779,26 +855,29 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
list_node_init(&p->entry);
list_add(&c->paths, &p->entry);
return 0;
-
-free_path:
- free(path);
- return -1;
}
int nvme_ctrl_get_fd(nvme_ctrl_t c)
{
- nvme_root_t r = c->s && c->s->h ? c->s->h->r : NULL;
-
if (c->fd < 0) {
c->fd = nvme_open(c->name);
if (c->fd < 0)
- nvme_msg(r, LOG_ERR,
+ nvme_msg(root_from_ctrl(c), LOG_ERR,
"Failed to open ctrl %s, errno %d\n",
c->name, errno);
}
return c->fd;
}
+void nvme_ctrl_release_fd(nvme_ctrl_t c)
+{
+ if (c->fd < 0)
+ return;
+
+ close(c->fd);
+ c->fd = -1;
+}
+
nvme_subsystem_t nvme_ctrl_get_subsystem(nvme_ctrl_t c)
{
return c->s;
@@ -824,6 +903,32 @@ const char *nvme_ctrl_get_address(nvme_ctrl_t c)
return c->address ? c->address : "";
}
+char *nvme_ctrl_get_src_addr(nvme_ctrl_t c, char *src_addr, size_t src_addr_len)
+{
+ size_t l;
+ char *p;
+
+ if (!c->address)
+ return NULL;
+
+ p = strstr(c->address, "src_addr=");
+ if (!p)
+ return NULL;
+
+ p += strlen("src_addr=");
+ l = strcspn(p, ",%"); /* % to eliminate IPv6 scope (if present) */
+ if (l >= src_addr_len) {
+ nvme_msg(root_from_ctrl(c), LOG_ERR,
+ "Buffer for src_addr is too small (%zu must be > %zu)\n",
+ src_addr_len, l);
+ return NULL;
+ }
+
+ strncpy(src_addr, p, l);
+ src_addr[l] = '\0';
+ return src_addr;
+}
+
const char *nvme_ctrl_get_phy_slot(nvme_ctrl_t c)
{
return c->phy_slot ? c->phy_slot : "";
@@ -998,10 +1103,7 @@ nvme_path_t nvme_ctrl_next_path(nvme_ctrl_t c, nvme_path_t p)
do { if (a) { free(a); (a) = NULL; } } while (0)
void nvme_deconfigure_ctrl(nvme_ctrl_t c)
{
- if (c->fd >= 0) {
- close(c->fd);
- c->fd = -1;
- }
+ nvme_ctrl_release_fd(c);
FREE_CTRL_ATTR(c->name);
FREE_CTRL_ATTR(c->sysfs_dir);
FREE_CTRL_ATTR(c->firmware);
@@ -1140,40 +1242,391 @@ struct nvme_ctrl *nvme_create_ctrl(nvme_root_t r,
return c;
}
+/**
+ * _tcp_ctrl_match_host_traddr_no_src_addr() - Match host_traddr w/o src_addr
+ * @c: An existing controller instance
+ * @candidate: Candidate ctrl we're trying to match with @c.
+ *
+ * On kernels prior to 6.1 (i.e. src_addr is not available), try to match
+ * a candidate controller's host_traddr to that of an existing controller.
+ *
+ * This function takes an optimistic approach. In doubt, it will declare a
+ * match and return true.
+ *
+ * Return: true if @c->host_traddr matches @candidate->host_traddr. false otherwise.
+ */
+static bool _tcp_ctrl_match_host_traddr_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate)
+{
+ if (c->cfg.host_traddr)
+ return candidate->addreq(candidate->host_traddr, c->cfg.host_traddr);
+
+ /* If c->cfg.host_traddr is NULL, then the controller (c)
+ * uses the interface's primary address as the source
+ * address. If c->cfg.host_iface is defined we can
+ * determine the primary address associated with that
+ * interface and compare that to the candidate->host_traddr.
+ */
+ if (c->cfg.host_iface)
+ return nvme_iface_primary_addr_matches(candidate->iface_list,
+ c->cfg.host_iface,
+ candidate->host_traddr);
+
+ /* If both c->cfg.host_traddr and c->cfg.host_iface are
+ * NULL, we don't have enough information to make a
+ * 100% positive match. Regardless, let's be optimistic
+ * and assume that we have a match.
+ */
+ nvme_msg(root_from_ctrl(c), LOG_DEBUG,
+ "Not enough data, but assume %s matches candidate's host_traddr: %s\n",
+ nvme_ctrl_get_name(c), candidate->host_traddr);
+
+ return true;
+}
+
+/**
+ * _tcp_ctrl_match_host_iface_no_src_addr() - Match host_iface w/o src_addr
+ * @c: An existing controller instance
+ * @candidate: Candidate ctrl we're trying to match with @c.
+ *
+ * On kernels prior to 6.1 (i.e. src_addr is not available), try to match
+ * a candidate controller's host_iface to that of an existing controller.
+ *
+ * This function takes an optimistic approach. In doubt, it will declare a
+ * match and return true.
+ *
+ * Return: true if @c->host_iface matches @candidate->host_iface. false otherwise.
+ */
+static bool _tcp_ctrl_match_host_iface_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate)
+{
+ if (c->cfg.host_iface)
+ return streq0(candidate->host_iface, c->cfg.host_iface);
+
+ /* If c->cfg.host_traddr is not NULL we can infer the controller's (c)
+ * interface from it and compare it to the candidate->host_iface.
+ */
+ if (c->cfg.host_traddr) {
+ const char *c_host_iface;
+
+ c_host_iface = nvme_iface_matching_addr(candidate->iface_list, c->cfg.host_traddr);
+ return streq0(candidate->host_iface, c_host_iface);
+ }
+
+ /* If both c->cfg.host_traddr and c->cfg.host_iface are
+ * NULL, we don't have enough information to make a
+ * 100% positive match. Regardless, let's be optimistic
+ * and assume that we have a match.
+ */
+ nvme_msg(root_from_ctrl(c), LOG_DEBUG,
+ "Not enough data, but assume %s matches candidate's host_iface: %s\n",
+ nvme_ctrl_get_name(c), candidate->host_iface);
+
+ return true;
+}
+
+/**
+ * _tcp_opt_params_match_no_src_addr() - Match optional host_traddr/host_iface w/o src_addr
+ * @c: An existing controller instance
+ * @candidate: Candidate ctrl we're trying to match with @c.
+ *
+ * Before kernel 6.1, the src_addr was not reported by the kernel which makes
+ * it hard to match a candidate's host_traddr and host_iface to an existing
+ * controller if that controller was created without specifying the
+ * host_traddr and/or host_iface. This function tries its best in the absense
+ * of a src_addr to match @c to @candidate. This may not be 100% accurate.
+ * Only the src_addr can provide 100% accuracy.
+ *
+ * This function takes an optimistic approach. In doubt, it will declare a
+ * match and return true.
+ *
+ * Return: true if @c matches @candidate. false otherwise.
+ */
+static bool _tcp_opt_params_match_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate)
+{
+ /* Check host_traddr only if candidate is interested */
+ if (candidate->host_traddr) {
+ if (!_tcp_ctrl_match_host_traddr_no_src_addr(c, candidate))
+ return false;
+ }
+
+ /* Check host_iface only if candidate is interested */
+ if (candidate->host_iface) {
+ if (!_tcp_ctrl_match_host_iface_no_src_addr(c, candidate))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * _tcp_opt_params_match() - Match optional host_traddr/host_iface
+ * @c: An existing controller instance
+ * @candidate: Candidate ctrl we're trying to match with @c.
+ *
+ * The host_traddr and host_iface are optional for TCP. When they are not
+ * specified, the kernel looks up the destination IP address (traddr) in the
+ * routing table to determine the best interface for the connection. The
+ * kernel then retrieves the primary IP address assigned to that interface
+ * and uses that as the connection’s source address.
+ *
+ * An interface’s primary address is the default source address used for
+ * all connections made on that interface unless host-traddr is used to
+ * override the default. Kernel-selected interfaces and/or source addresses
+ * are hidden from user-space applications unless the kernel makes that
+ * information available through the "src_addr" attribute in the
+ * sysfs (kernel 6.1 or later).
+ *
+ * Sometimes, an application may force the interface by specifying the
+ * "host-iface" or may force a different source address (instead of the
+ * primary address) by providing the "host-traddr".
+ *
+ * If the candidate specifies the host_traddr and/or host_iface but they
+ * do not match the existing controller's host_traddr and/or host_iface
+ * (they could be NULL), we may still be able to find a match by taking
+ * the existing controller's src_addr into consideration since that
+ * parameter identifies the actual source address of the connection and
+ * therefore can be used to infer the interface of the connection. However,
+ * the src_addr can only be read from the nvme device's sysfs "address"
+ * attribute starting with kernel 6.1 (or kernels that backported the
+ * src_addr patch).
+ *
+ * For legacy kernels that do not provide the src_addr we must use a
+ * different algorithm to match the host_traddr and host_iface, but
+ * it's not 100% accurate.
+ *
+ * Return: true if @c matches @candidate. false otherwise.
+ */
+static bool _tcp_opt_params_match(struct nvme_ctrl *c, struct candidate_args *candidate)
+{
+ char *src_addr, buffer[INET6_ADDRSTRLEN];
+
+ /* Check if src_addr is available (kernel 6.1 or later) */
+ src_addr = nvme_ctrl_get_src_addr(c, buffer, sizeof(buffer));
+ if (!src_addr)
+ return _tcp_opt_params_match_no_src_addr(c, candidate);
+
+ /* Check host_traddr only if candidate is interested */
+ if (candidate->host_traddr &&
+ !candidate->addreq(candidate->host_traddr, src_addr))
+ return false;
+
+ /* Check host_iface only if candidate is interested */
+ if (candidate->host_iface &&
+ !streq0(candidate->host_iface,
+ nvme_iface_matching_addr(candidate->iface_list, src_addr)))
+ return false;
+
+ return true;
+}
+
+/**
+ * _tcp_match_ctrl() - Check if controller matches candidate (TCP only)
+ * @c: An existing controller instance
+ * @candidate: Candidate ctrl we're trying to match with @c.
+ *
+ * We want to determine if an existing controller can be re-used
+ * for the candidate controller we're trying to instantiate.
+ *
+ * For TCP, we do not have a match if the candidate's transport, traddr,
+ * trsvcid are not identical to those of the the existing controller.
+ * These 3 parameters are mandatory for a match.
+ *
+ * The host_traddr and host_iface are optional. When the candidate does
+ * not specify them (both NULL), we can ignore them. Otherwise, we must
+ * employ advanced investigation techniques to determine if there's a match.
+ *
+ * Return: true if a match is found, false otherwise.
+ */
+static bool _tcp_match_ctrl(struct nvme_ctrl *c, struct candidate_args *candidate)
+{
+ if (!streq0(c->transport, candidate->transport))
+ return false;
+
+ if (!streq0(c->trsvcid, candidate->trsvcid))
+ return false;
+
+ if (!candidate->addreq(c->traddr, candidate->traddr))
+ return false;
+
+ if (candidate->well_known_nqn && !nvme_ctrl_is_discovery_ctrl(c))
+ return false;
+
+ if (candidate->subsysnqn && !streq0(c->subsysnqn, candidate->subsysnqn))
+ return false;
+
+ /* Check host_traddr / host_iface only if candidate is interested */
+ if ((candidate->host_iface || candidate->host_traddr) &&
+ !_tcp_opt_params_match(c, candidate))
+ return false;
+
+ return true;
+}
+
+/**
+ * _match_ctrl() - Check if controller matches candidate (non TCP transport)
+ * @c: An existing controller instance
+ * @candidate: Candidate ctrl we're trying to match with @c.
+ *
+ * We want to determine if an existing controller can be re-used
+ * for the candidate controller we're trying to instantiate. This function
+ * is used for all transports except TCP.
+ *
+ * Return: true if a match is found, false otherwise.
+ */
+static bool _match_ctrl(struct nvme_ctrl *c, struct candidate_args *candidate)
+{
+ if (!streq0(c->transport, candidate->transport))
+ return false;
+
+ if (candidate->traddr && c->traddr &&
+ !candidate->addreq(c->traddr, candidate->traddr))
+ return false;
+
+ if (candidate->host_traddr && c->cfg.host_traddr &&
+ !candidate->addreq(c->cfg.host_traddr, candidate->host_traddr))
+ return false;
+
+ if (candidate->host_iface && c->cfg.host_iface &&
+ !streq0(c->cfg.host_iface, candidate->host_iface))
+ return false;
+
+ if (candidate->trsvcid && c->trsvcid &&
+ !streq0(c->trsvcid, candidate->trsvcid))
+ return false;
+
+ if (candidate->well_known_nqn && !nvme_ctrl_is_discovery_ctrl(c))
+ return false;
+
+ if (candidate->subsysnqn && !streq0(c->subsysnqn, candidate->subsysnqn))
+ return false;
+
+ return true;
+}
+/**
+ * _candidate_init() - Init candidate and get the matching function
+ *
+ * @candidate: Candidate struct to initialize
+ * @transport: Transport name
+ * @traddr: Transport address
+ * @trsvcid: Transport service identifier
+ * @subsysnqn: Subsystem NQN
+ * @host_traddr: Host transport address
+ * @host_iface: Host interface name
+ * @host_iface: Host interface name
+ *
+ * The function _candidate_free() must be called to release resources once
+ * the candidate object is not longer required.
+ *
+ * Return: The matching function to use when comparing an existing
+ * controller to the candidate controller.
+ */
+static ctrl_match_t _candidate_init(struct candidate_args *candidate,
+ const char *transport,
+ const char *traddr,
+ const char *trsvcid,
+ const char *subsysnqn,
+ const char *host_traddr,
+ const char *host_iface)
+{
+ memset(candidate, 0, sizeof(*candidate));
+
+ candidate->traddr = traddr;
+ candidate->trsvcid = trsvcid;
+ candidate->transport = transport;
+ candidate->subsysnqn = subsysnqn;
+ candidate->host_iface = host_iface;
+ candidate->host_traddr = host_traddr;
+
+ if (streq0(subsysnqn, NVME_DISC_SUBSYS_NAME)) {
+ /* Since TP8013, the NQN of discovery controllers can be the
+ * well-known NQN (i.e. nqn.2014-08.org.nvmexpress.discovery) or
+ * a unique NQN. A DC created using the well-known NQN may later
+ * display a unique NQN when looked up in the sysfs. Therefore,
+ * ignore (i.e. set to NULL) the well-known NQN when looking for
+ * a match.
+ */
+ candidate->subsysnqn = NULL;
+ candidate->well_known_nqn = true;
+ }
+
+ if (streq0(transport, "tcp")) {
+ /* For TCP we may need to access the interface map.
+ * Let's retrieve and cache the map.
+ */
+ if (getifaddrs(&candidate->iface_list) == -1)
+ candidate->iface_list = NULL;
+
+ candidate->addreq = nvme_ipaddrs_eq;
+ return _tcp_match_ctrl;
+ }
+
+ if (streq0(transport, "rdma")) {
+ candidate->addreq = nvme_ipaddrs_eq;
+ return _match_ctrl;
+ }
+
+ /* All other transport types */
+ candidate->addreq = streqcase0;
+ return _match_ctrl;
+}
+
+/**
+ * _candidate_free() - Release resources allocated by _candidate_init()
+ *
+ * @candidate: data to free.
+ */
+static void _candidate_free(struct candidate_args *candidate)
+{
+ freeifaddrs(candidate->iface_list); /* This is NULL-safe */
+}
+
+#define _cleanup_candidate_ __cleanup__(_candidate_free)
+
nvme_ctrl_t __nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport,
const char *traddr, const char *host_traddr,
const char *host_iface, const char *trsvcid,
- nvme_ctrl_t p)
-
+ const char *subsysnqn, nvme_ctrl_t p)
{
- struct nvme_ctrl *c;
- bool (*addreq)(const char *, const char *);
+ struct nvme_ctrl *c, *matching_c = NULL;
+ _cleanup_candidate_ struct candidate_args candidate;
+ ctrl_match_t ctrl_match;
- if (!strcmp(transport, "tcp") || !strcmp(transport, "rdma"))
- addreq = nvme_ipaddrs_eq; /* IP address compare for TCP/RDMA */
- else
- addreq = streqcase0; /* Case-insensitive for FC (n/a for loop) */
+ /* Init candidate and get the matching function to use */
+ ctrl_match = _candidate_init(&candidate, transport, traddr, trsvcid,
+ subsysnqn, host_traddr, host_iface);
c = p ? nvme_subsystem_next_ctrl(s, p) : nvme_subsystem_first_ctrl(s);
for (; c != NULL; c = nvme_subsystem_next_ctrl(s, c)) {
- if (!streq0(c->transport, transport))
- continue;
- if (traddr && c->traddr &&
- !addreq(c->traddr, traddr))
- continue;
- if (host_traddr && c->cfg.host_traddr &&
- !addreq(c->cfg.host_traddr, host_traddr))
- continue;
- if (host_iface && c->cfg.host_iface &&
- !streq0(c->cfg.host_iface, host_iface))
- continue;
- if (trsvcid && c->trsvcid &&
- !streq0(c->trsvcid, trsvcid))
- continue;
- return c;
+ if (ctrl_match(c, &candidate)) {
+ matching_c = c;
+ break;
+ }
}
- return NULL;
+ return matching_c;
+}
+
+bool nvme_ctrl_config_match(struct nvme_ctrl *c, const char *transport,
+ const char *traddr, const char *trsvcid,
+ const char *subsysnqn, const char *host_traddr,
+ const char *host_iface)
+{
+ ctrl_match_t ctrl_match;
+ _cleanup_candidate_ struct candidate_args candidate;
+
+ /* Init candidate and get the matching function to use */
+ ctrl_match = _candidate_init(&candidate, transport, traddr, trsvcid,
+ subsysnqn, host_traddr, host_iface);
+
+ return ctrl_match(c, &candidate);
+}
+
+nvme_ctrl_t nvme_ctrl_find(nvme_subsystem_t s, const char *transport,
+ const char *traddr, const char *trsvcid,
+ const char *subsysnqn, const char *host_traddr,
+ const char *host_iface)
+{
+ return __nvme_lookup_ctrl(s, transport, traddr, host_traddr, host_iface,
+ trsvcid, subsysnqn, NULL/*p*/);
}
nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport,
@@ -1188,7 +1641,7 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport,
return NULL;
c = __nvme_lookup_ctrl(s, transport, traddr, host_traddr,
- host_iface, trsvcid, p);
+ host_iface, trsvcid, NULL, p);
if (c)
return c;
@@ -1205,73 +1658,63 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport,
static int nvme_ctrl_scan_paths(nvme_root_t r, struct nvme_ctrl *c)
{
- struct dirent **paths;
- int i, ret;
+ _cleanup_dirents_ struct dirents paths = {};
+ int i;
- ret = nvme_scan_ctrl_namespace_paths(c, &paths);
- if (ret < 0)
- return ret;
+ paths.num = nvme_scan_ctrl_namespace_paths(c, &paths.ents);
+ if (paths.num < 0)
+ return paths.num;
- for (i = 0; i < ret; i++)
- nvme_ctrl_scan_path(r, c, paths[i]->d_name);
+ for (i = 0; i < paths.num; i++)
+ nvme_ctrl_scan_path(r, c, paths.ents[i]->d_name);
- nvme_free_dirents(paths, i);
return 0;
}
static int nvme_ctrl_scan_namespaces(nvme_root_t r, struct nvme_ctrl *c)
{
- struct dirent **namespaces;
- int i, ret;
+ _cleanup_dirents_ struct dirents namespaces = {};
+ int i;
- ret = nvme_scan_ctrl_namespaces(c, &namespaces);
- for (i = 0; i < ret; i++)
- nvme_ctrl_scan_namespace(r, c, namespaces[i]->d_name);
+ namespaces.num = nvme_scan_ctrl_namespaces(c, &namespaces.ents);
+ for (i = 0; i < namespaces.num; i++)
+ nvme_ctrl_scan_namespace(r, c, namespaces.ents[i]->d_name);
- nvme_free_dirents(namespaces, i);
return 0;
}
static char *nvme_ctrl_lookup_subsystem_name(nvme_root_t r,
const char *ctrl_name)
{
- struct dirent **subsys;
- char *subsys_name = NULL;
- int ret, i;
+ _cleanup_dirents_ struct dirents subsys = {};
+ int i;
- ret = nvme_scan_subsystems(&subsys);
- if (ret < 0)
+ subsys.num = nvme_scan_subsystems(&subsys.ents);
+ if (subsys.num < 0)
return NULL;
- for (i = 0; i < ret; i++) {
+ for (i = 0; i < subsys.num; i++) {
struct stat st;
- char *path;
+ _cleanup_free_ char *path = NULL;
if (asprintf(&path, "%s/%s/%s", nvme_subsys_sysfs_dir,
- subsys[i]->d_name, ctrl_name) < 0) {
+ subsys.ents[i]->d_name, ctrl_name) < 0) {
errno = ENOMEM;
return NULL;
}
nvme_msg(r, LOG_DEBUG, "lookup subsystem %s\n", path);
if (stat(path, &st) < 0) {
- free(path);
continue;
}
- subsys_name = strdup(subsys[i]->d_name);
- free(path);
- break;
+ return strdup(subsys.ents[i]->d_name);
}
- nvme_free_dirents(subsys, ret);
- return subsys_name;
+ return NULL;
}
static char *nvme_ctrl_lookup_phy_slot(nvme_root_t r, const char *address)
{
- char *target_addr;
- char *addr;
- char *path;
- int found = 0;
+ _cleanup_free_ char *target_addr = NULL;
int ret;
- DIR *slots_dir;
+ _cleanup_dir_ DIR *slots_dir = NULL;
struct dirent *entry;
if (!address)
@@ -1289,25 +1732,20 @@ static char *nvme_ctrl_lookup_phy_slot(nvme_root_t r, const char *address)
if (entry->d_type == DT_DIR &&
strncmp(entry->d_name, ".", 1) != 0 &&
strncmp(entry->d_name, "..", 2) != 0) {
- ret = asprintf(&path, "/sys/bus/pci/slots/%s", entry->d_name);
+ _cleanup_free_ char *path = NULL;
+ _cleanup_free_ char *addr = NULL;
+
+ ret = asprintf(&path, "%s/%s",
+ nvme_slots_sysfs_dir, entry->d_name);
if (ret < 0) {
errno = ENOMEM;
return NULL;
}
addr = nvme_get_attr(path, "address");
- if (strcmp(addr, target_addr) == 0) {
- found = 1;
- free(path);
- free(addr);
- break;
- }
- free(path);
- free(addr);
+ if (strcmp(addr, target_addr) == 0)
+ return strdup(entry->d_name);
}
}
- free(target_addr);
- if (found)
- return strdup(entry->d_name);
return NULL;
}
@@ -1361,8 +1799,9 @@ static int nvme_configure_ctrl(nvme_root_t r, nvme_ctrl_t c, const char *path,
int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance)
{
nvme_subsystem_t s;
- char *subsys_name = NULL;
- char *path, *name;
+ _cleanup_free_ char *subsys_name = NULL;
+ char *path;
+ _cleanup_free_ char *name = NULL;
int ret;
ret = asprintf(&name, "nvme%d", instance);
@@ -1373,20 +1812,19 @@ int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance)
ret = asprintf(&path, "%s/nvme%d", nvme_ctrl_sysfs_dir, instance);
if (ret < 0) {
errno = ENOMEM;
- goto out_free_name;
+ return ret;
}
ret = nvme_configure_ctrl(h->r, c, path, name);
if (ret < 0) {
free(path);
- goto out_free_name;
+ return ret;
}
c->address = nvme_get_attr(path, "address");
if (!c->address && strcmp(c->transport, "loop")) {
errno = ENVME_CONNECT_INVAL_TR;
- ret = -1;
- goto out_free_name;
+ return -1;
}
subsys_name = nvme_ctrl_lookup_subsystem_name(h->r, name);
@@ -1395,23 +1833,17 @@ int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance)
"Failed to lookup subsystem name for %s\n",
c->name);
errno = ENVME_CONNECT_LOOKUP_SUBSYS_NAME;
- ret = -1;
- goto out_free_name;
+ return -1;
}
s = nvme_lookup_subsystem(h, subsys_name, c->subsysnqn);
if (!s) {
errno = ENVME_CONNECT_LOOKUP_SUBSYS;
- ret = -1;
- goto out_free_subsys;
+ return -1;
}
if (s->subsystype && !strcmp(s->subsystype, "discovery"))
c->discovery_ctrl = true;
c->s = s;
list_add(&s->ctrls, &c->entry);
-out_free_subsys:
- free(subsys_name);
- out_free_name:
- free(name);
return ret;
}
@@ -1419,8 +1851,10 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s,
const char *path, const char *name)
{
nvme_ctrl_t c, p;
- char *addr = NULL, *address = NULL, *a, *e;
- char *transport, *traddr = NULL, *trsvcid = NULL;
+ _cleanup_free_ char *addr = NULL, *address = NULL;
+ char *a, *e;
+ _cleanup_free_ char *transport;
+ char *traddr = NULL, *trsvcid = NULL;
char *host_traddr = NULL, *host_iface = NULL;
int ret;
@@ -1432,7 +1866,8 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s,
/* Parse 'address' string into components */
addr = nvme_get_attr(path, "address");
if (!addr) {
- char *rpath = NULL, *p = NULL, *_a = NULL;
+ _cleanup_free_ char *rpath = NULL;
+ char *p = NULL, *_a = NULL;
/* loop transport might not have an address */
if (!strcmp(transport, "loop"))
@@ -1440,14 +1875,12 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s,
/* Older kernel don't support pcie transport addresses */
if (strcmp(transport, "pcie")) {
- free(transport);
errno = ENXIO;
return NULL;
}
/* Figure out the PCI address from the attribute path */
rpath = realpath(path, NULL);
if (!rpath) {
- free(transport);
errno = ENOMEM;
return NULL;
}
@@ -1462,7 +1895,6 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s,
}
if (p)
addr = strdup(p);
- free(rpath);
} else if (!strcmp(transport, "pcie")) {
/* The 'address' string is the transport address */
traddr = addr;
@@ -1500,16 +1932,13 @@ skip_address:
} while (c);
if (!c)
c = p;
- free(transport);
- if (address)
- free(address);
if (!c && !p) {
nvme_msg(r, LOG_ERR, "failed to lookup ctrl\n");
errno = ENODEV;
- free(addr);
return NULL;
}
c->address = addr;
+ addr = NULL;
if (s->subsystype && !strcmp(s->subsystype, "discovery"))
c->discovery_ctrl = true;
ret = nvme_configure_ctrl(r, c, path, name);
@@ -1521,8 +1950,9 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name)
nvme_host_t h;
nvme_subsystem_t s;
nvme_ctrl_t c;
- char *path;
- char *hostnqn, *hostid, *subsysnqn, *subsysname;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_free_ char *hostnqn = NULL, *hostid = NULL;
+ _cleanup_free_ char *subsysnqn = NULL, *subsysname = NULL;
int ret;
nvme_msg(r, LOG_DEBUG, "scan controller %s\n", name);
@@ -1535,10 +1965,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name)
hostnqn = nvme_get_attr(path, "hostnqn");
hostid = nvme_get_attr(path, "hostid");
h = nvme_lookup_host(r, hostnqn, hostid);
- if (hostnqn)
- free(hostnqn);
- if (hostid)
- free(hostid);
if (h) {
if (h->dhchap_key)
free(h->dhchap_key);
@@ -1551,7 +1977,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name)
if (!h) {
h = nvme_default_host(r);
if (!h) {
- free(path);
errno = ENOMEM;
return NULL;
}
@@ -1559,7 +1984,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name)
subsysnqn = nvme_get_attr(path, "subsysnqn");
if (!subsysnqn) {
- free(path);
errno = ENXIO;
return NULL;
}
@@ -1568,27 +1992,21 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name)
nvme_msg(r, LOG_ERR,
"failed to lookup subsystem for controller %s\n",
name);
- free(subsysnqn);
- free(path);
errno = ENXIO;
return NULL;
}
s = nvme_lookup_subsystem(h, subsysname, subsysnqn);
- free(subsysnqn);
- free(subsysname);
if (!s) {
- free(path);
errno = ENOMEM;
return NULL;
}
c = nvme_ctrl_alloc(r, s, path, name);
- if (!c) {
- free(path);
+ if (!c)
return NULL;
- }
+ path = NULL;
nvme_ctrl_scan_namespaces(r, c);
nvme_ctrl_scan_paths(r, c);
return c;
@@ -1622,9 +2040,26 @@ static int nvme_bytes_to_lba(nvme_ns_t n, off_t offset, size_t count,
int nvme_ns_get_fd(nvme_ns_t n)
{
+ if (n->fd < 0) {
+ n->fd = nvme_open(n->name);
+ if (n->fd < 0)
+ nvme_msg(root_from_ns(n), LOG_ERR,
+ "Failed to open ns %s, errno %d\n",
+ n->name, errno);
+ }
+
return n->fd;
}
+void nvme_ns_release_fd(nvme_ns_t n)
+{
+ if (n->fd < 0)
+ return;
+
+ close(n->fd);
+ n->fd = -1;
+}
+
nvme_subsystem_t nvme_ns_get_subsystem(nvme_ns_t n)
{
return n->s;
@@ -1887,57 +2322,164 @@ int nvme_ns_flush(nvme_ns_t n)
return nvme_flush(nvme_ns_get_fd(n), nvme_ns_get_nsid(n));
}
-static void nvme_ns_parse_descriptors(struct nvme_ns *n,
- struct nvme_ns_id_desc *descs)
+static int nvme_strtou64(const char *str, void *res)
{
- void *d = descs;
- int i, len;
+ char *endptr;
+ __u64 v;
- for (i = 0; i < NVME_IDENTIFY_DATA_SIZE; i += len) {
- struct nvme_ns_id_desc *desc = d + i;
+ errno = 0;
+ v = strtoull(str, &endptr, 0);
- if (!desc->nidl)
- break;
- len = desc->nidl + sizeof(*desc);
+ if (errno != 0)
+ return -errno;
- switch (desc->nidt) {
- case NVME_NIDT_EUI64:
- memcpy(n->eui64, desc->nid, sizeof(n->eui64));
- break;
- case NVME_NIDT_NGUID:
- memcpy(n->nguid, desc->nid, sizeof(n->nguid));
- break;
- case NVME_NIDT_UUID:
- memcpy(n->uuid, desc->nid, sizeof(n->uuid));
- break;
- case NVME_NIDT_CSI:
- memcpy(&n->csi, desc->nid, sizeof(n->csi));
- break;
+ if (endptr == str) {
+ /* no digits found */
+ return -EINVAL;
+ }
+
+ *(__u64 *)res = v;
+ return 0;
+}
+
+static int nvme_strtou32(const char *str, void *res)
+{
+ char *endptr;
+ __u32 v;
+
+ errno = 0;
+ v = strtol(str, &endptr, 0);
+
+ if (errno != 0)
+ return -errno;
+
+ if (endptr == str) {
+ /* no digits found */
+ return -EINVAL;
+ }
+
+ *(__u32 *)res = v;
+ return 0;
+}
+
+static int nvme_strtoi(const char *str, void *res)
+{
+ char *endptr;
+ int v;
+
+ errno = 0;
+ v = strtol(str, &endptr, 0);
+
+ if (errno != 0)
+ return -errno;
+
+ if (endptr == str) {
+ /* no digits found */
+ return -EINVAL;
+ }
+
+ *(int *)res = v;
+ return 0;
+}
+
+static int nvme_strtoeuid(const char *str, void *res)
+{
+ memcpy(res, str, 8);
+ return 0;
+}
+
+static int nvme_strtouuid(const char *str, void *res)
+{
+ memcpy(res, str, NVME_UUID_LEN);
+ return 0;
+}
+
+struct sysfs_attr_table {
+ void *var;
+ int (*parse)(const char *str, void *res);
+ bool mandatory;
+ const char *name;
+};
+
+#define GETSHIFT(x) (__builtin_ffsll(x) - 1)
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static int parse_attrs(const char *path, struct sysfs_attr_table *tbl, int size)
+{
+ char *str;
+ int ret, i;
+
+ for (i = 0; i < size; i++) {
+ struct sysfs_attr_table *e = &tbl[i];
+
+ str = nvme_get_attr(path, e->name);
+ if (!str) {
+ if (!e->mandatory)
+ continue;
+ return -ENOENT;
}
+ ret = e->parse(str, e->var);
+ free(str);
+ if (ret)
+ return ret;
}
+
+ return 0;
}
-static int nvme_ns_init(struct nvme_ns *n)
+static int nvme_ns_init(const char *path, struct nvme_ns *ns)
{
- struct nvme_id_ns ns = { };
- uint8_t buffer[NVME_IDENTIFY_DATA_SIZE] = { };
- struct nvme_ns_id_desc *descs = (void *)buffer;
- uint8_t flbas;
+ _cleanup_free_ char *attr = NULL;
+ struct stat sb;
int ret;
- ret = nvme_ns_identify(n, &ns);
+ struct sysfs_attr_table base[] = {
+ { &ns->nsid, nvme_strtou32, true, "nsid" },
+ { &ns->lba_count, nvme_strtou64, true, "size" },
+ { &ns->lba_size, nvme_strtou64, true, "queue/physical_block_size" },
+ { ns->eui64, nvme_strtoeuid, false, "eui" },
+ { ns->nguid, nvme_strtouuid, false, "nguid" },
+ { ns->uuid, nvme_strtouuid, false, "uuid" }
+ };
+
+ ret = parse_attrs(path, base, ARRAY_SIZE(base));
if (ret)
return ret;
- nvme_id_ns_flbas_to_lbaf_inuse(ns.flbas, &flbas);
- n->lba_shift = ns.lbaf[flbas].ds;
- n->lba_size = 1 << n->lba_shift;
- n->lba_count = le64_to_cpu(ns.nsze);
- n->lba_util = le64_to_cpu(ns.nuse);
- n->meta_size = le16_to_cpu(ns.lbaf[flbas].ms);
+ ns->lba_shift = GETSHIFT(ns->lba_size);
+
+ if (asprintf(&attr, "%s/csi", path) < 0)
+ return -errno;
+ ret = stat(attr, &sb);
+ if (ret == 0) {
+ /* only available on kernels >= 6.8 */
+ struct sysfs_attr_table ext[] = {
+ { &ns->csi, nvme_strtoi, true, "csi" },
+ { &ns->lba_util, nvme_strtou64, true, "nuse" },
+ { &ns->meta_size, nvme_strtoi, true, "metadata_bytes"},
+
+ };
- if (!nvme_ns_identify_descs(n, descs))
- nvme_ns_parse_descriptors(n, descs);
+ ret = parse_attrs(path, ext, ARRAY_SIZE(ext));
+ if (ret)
+ return ret;
+ } else {
+ struct nvme_id_ns *id;
+ uint8_t flbas;
+
+ id = __nvme_alloc(sizeof(*ns));
+ if (!id)
+ return -ENOMEM;
+
+ ret = nvme_ns_identify(ns, id);
+ if (ret)
+ return ret;
+
+ nvme_id_ns_flbas_to_lbaf_inuse(id->flbas, &flbas);
+ ns->lba_count = le64_to_cpu(id->nsze);
+ ns->lba_util = le64_to_cpu(id->nuse);
+ ns->meta_size = le16_to_cpu(id->lbaf[flbas].ms);
+ }
return 0;
}
@@ -1956,7 +2498,7 @@ static void nvme_ns_set_generic_name(struct nvme_ns *n, const char *name)
n->generic_name = strdup(generic_name);
}
-static nvme_ns_t nvme_ns_open(const char *name)
+static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name)
{
struct nvme_ns *n;
@@ -1966,26 +2508,20 @@ static nvme_ns_t nvme_ns_open(const char *name)
return NULL;
}
+ n->fd = -1;
n->name = strdup(name);
- n->fd = nvme_open(n->name);
- if (n->fd < 0)
- goto free_ns;
nvme_ns_set_generic_name(n, name);
- if (nvme_get_nsid(n->fd, &n->nsid) < 0)
- goto close_fd;
-
- if (nvme_ns_init(n) != 0)
- goto close_fd;
+ if (nvme_ns_init(sys_path, n) != 0)
+ goto free_ns;
list_head_init(&n->paths);
list_node_init(&n->entry);
+ nvme_ns_release_fd(n); /* Do not leak fds */
return n;
-close_fd:
- close(n->fd);
free_ns:
free(n->generic_name);
free(n->name);
@@ -2020,9 +2556,9 @@ static char *nvme_ns_generic_to_blkdev(const char *generic)
static struct nvme_ns *__nvme_scan_namespace(const char *sysfs_dir, const char *name)
{
struct nvme_ns *n;
- char *path;
+ _cleanup_free_ char *path = NULL;
int ret;
- char *blkdev;
+ _cleanup_free_ char *blkdev = NULL;
blkdev = nvme_ns_generic_to_blkdev(name);
if (!blkdev) {
@@ -2033,23 +2569,17 @@ static struct nvme_ns *__nvme_scan_namespace(const char *sysfs_dir, const char *
ret = asprintf(&path, "%s/%s", sysfs_dir, blkdev);
if (ret < 0) {
errno = ENOMEM;
- goto free_blkdev;
+ return NULL;
}
- n = nvme_ns_open(blkdev);
+ n = nvme_ns_open(path, blkdev);
if (!n)
- goto free_path;
+ return NULL;
n->sysfs_dir = path;
+ path = NULL;
- free(blkdev);
return n;
-
-free_path:
- free(path);
-free_blkdev:
- free(blkdev);
- return NULL;
}
nvme_ns_t nvme_scan_namespace(const char *name)
diff --git a/src/nvme/tree.h b/src/nvme/tree.h
index bcf3636..a30e8eb 100644
--- a/src/nvme/tree.h
+++ b/src/nvme/tree.h
@@ -15,6 +15,7 @@
#include <stddef.h>
#include <sys/types.h>
+#include <netinet/in.h>
#include "ioctl.h"
#include "util.h"
@@ -62,6 +63,17 @@ void nvme_root_set_application(nvme_root_t r, const char *a);
const char *nvme_root_get_application(nvme_root_t r);
/**
+ * nvme_root_release_fds - Close all opened file descriptors in the tree
+ * @r: &nvme_root_t object
+ *
+ * Controller and Namespace objects cache the file descriptors
+ * of opened nvme devices. This API can be used to close and
+ * clear all cached fds in the tree.
+ *
+ */
+void nvme_root_release_fds(nvme_root_t r);
+
+/**
* nvme_free_tree() - Free root object
* @r: &nvme_root_t object
*
@@ -295,6 +307,51 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport,
const char *host_iface, const char *trsvcid,
nvme_ctrl_t p);
+/**
+ * nvme_ctrl_find() - Locate an existing controller
+ * @s: &nvme_subsystem_t object
+ * @transport: Transport name
+ * @traddr: Transport address
+ * @trsvcid: Transport service identifier
+ * @subsysnqn: Subsystem NQN
+ * @host_traddr: Host transport address
+ * @host_iface: Host interface name
+ *
+ * Lookup a controller in @s based on @transport, @traddr, @trsvcid,
+ * @subsysnqn, @host_traddr, and @host_iface. @transport must be specified,
+ * other fields may be required depending on the transport. Parameters set
+ * to NULL will be ignored.
+ *
+ * Unlike nvme_lookup_ctrl(), this function does not create a new object if
+ * an existing controller cannot be found.
+ *
+ * Return: Controller instance on success, NULL otherwise.
+ */
+nvme_ctrl_t nvme_ctrl_find(nvme_subsystem_t s, const char *transport,
+ const char *traddr, const char *trsvcid,
+ const char *subsysnqn, const char *host_traddr,
+ const char *host_iface);
+
+/**
+ * nvme_ctrl_config_match() - Check if ctrl @c matches config params
+ * @c: An existing controller instance
+ * @transport: Transport name
+ * @traddr: Transport address
+ * @trsvcid: Transport service identifier
+ * @subsysnqn: Subsystem NQN
+ * @host_traddr: Host transport address
+ * @host_iface: Host interface name
+ *
+ * Check that controller @c matches parameters: @transport, @traddr,
+ * @trsvcid, @subsysnqn, @host_traddr, and @host_iface. Parameters set
+ * to NULL will be ignored.
+ *
+ * Return: true if there's a match, false otherwise.
+ */
+bool nvme_ctrl_config_match(struct nvme_ctrl *c, const char *transport,
+ const char *traddr, const char *trsvcid,
+ const char *subsysnqn, const char *host_traddr,
+ const char *host_iface);
/**
* nvme_create_ctrl() - Allocate an unconnected NVMe controller
@@ -484,11 +541,25 @@ nvme_ns_t nvme_subsystem_next_ns(nvme_subsystem_t s, nvme_ns_t n);
* nvme_ns_get_fd() - Get associated file descriptor
* @n: Namespace instance
*
+ * libnvme will open() the file (if not already opened) and keep
+ * an internal copy of the file descriptor. Following calls to
+ * this API retrieve the internal cached copy of the file
+ * descriptor. The file will remain opened and the fd will
+ * remain cached until the ns object is deleted or
+ * nvme_ns_release_fd() is called.
+ *
* Return: File descriptor associated with @n or -1
*/
int nvme_ns_get_fd(nvme_ns_t n);
/**
+ * nvme_ns_release_fd() - Close fd and clear fd from ns object
+ * @n: Namespace instance
+ *
+ */
+void nvme_ns_release_fd(nvme_ns_t n);
+
+/**
* nvme_ns_get_nsid() - NSID of a namespace
* @n: Namespace instance
*
@@ -772,11 +843,25 @@ nvme_ns_t nvme_path_get_ns(nvme_path_t p);
* nvme_ctrl_get_fd() - Get associated file descriptor
* @c: Controller instance
*
+ * libnvme will open() the file (if not already opened) and keep
+ * an internal copy of the file descriptor. Following calls to
+ * this API retrieve the internal cached copy of the file
+ * descriptor. The file will remain opened and the fd will
+ * remain cached until the controller object is deleted or
+ * nvme_ctrl_release_fd() is called.
+ *
* Return: File descriptor associated with @c or -1
*/
int nvme_ctrl_get_fd(nvme_ctrl_t c);
/**
+ * nvme_ctrl_release_fd() - Close fd and clear fd from controller object
+ * @c: Controller instance
+ *
+ */
+void nvme_ctrl_release_fd(nvme_ctrl_t c);
+
+/**
* nvme_ctrl_get_name() - sysfs name of a controller
* @c: Controller instance
*
@@ -802,6 +887,16 @@ const char *nvme_ctrl_get_sysfs_dir(nvme_ctrl_t c);
const char *nvme_ctrl_get_address(nvme_ctrl_t c);
/**
+ * nvme_ctrl_get_src_addr() - Extract src_addr from the c->address string
+ * @c: Controller instance
+ * @src_addr: Where to copy the src_addr. Size must be at least INET6_ADDRSTRLEN.
+ * @src_addr_len: Length of the buffer @src_addr.
+ *
+ * Return: Pointer to @src_addr on success. NULL on failure to extract the src_addr.
+ */
+char *nvme_ctrl_get_src_addr(nvme_ctrl_t c, char *src_addr, size_t src_addr_len);
+
+/**
* nvme_ctrl_get_phy_slot() - PCI physical slot number of a controller
* @c: Controller instance
*
@@ -827,7 +922,7 @@ const char *nvme_ctrl_get_firmware(nvme_ctrl_t c);
const char *nvme_ctrl_get_model(nvme_ctrl_t c);
/**
- * nvme_ctrl_get_state() - Running state of an controller
+ * nvme_ctrl_get_state() - Running state of a controller
* @c: Controller instance
*
* Return: String indicating the running state of @c
@@ -1148,6 +1243,14 @@ const char *nvme_subsystem_get_application(nvme_subsystem_t s);
void nvme_subsystem_set_application(nvme_subsystem_t s, const char *a);
/**
+ * nvme_subsystem_get_iopolicy() - Return the IO policy of subsytem
+ * @s: nvme_subsystem_t object
+ *
+ * Return: IO policy used by current subsystem
+ */
+const char *nvme_subsystem_get_iopolicy(nvme_subsystem_t s);
+
+/**
* nvme_scan_topology() - Scan NVMe topology and apply filter
* @r: nvme_root_t object
* @f: filter to apply
@@ -1177,6 +1280,16 @@ const char *nvme_host_get_hostnqn(nvme_host_t h);
const char *nvme_host_get_hostid(nvme_host_t h);
/**
+ * nvme_host_release_fds() - Close all opened file descriptors under host
+ * @h: nvme_host_t object
+ *
+ * Controller and Namespace objects cache the file descriptors
+ * of opened nvme devices. This API can be used to close and
+ * clear all cached fds under this host.
+ */
+void nvme_host_release_fds(struct nvme_host *h);
+
+/**
* nvme_free_host() - Free nvme_host_t object
* @h: nvme_host_t object
*/
@@ -1293,6 +1406,18 @@ nvme_ns_t nvme_subsystem_lookup_namespace(struct nvme_subsystem *s,
__u32 nsid);
/**
+ * nvme_subsystem_release_fds() - Close all opened fds under subsystem
+ * @s: nvme_subsystem_t object
+ *
+ * Controller and Namespace objects cache the file descriptors
+ * of opened nvme devices. This API can be used to close and
+ * clear all cached fds under this subsystem.
+ *
+ */
+void nvme_subsystem_release_fds(struct nvme_subsystem *s);
+
+
+/**
* nvme_get_path_attr() - Read path sysfs attribute
* @p: nvme_path_t object
* @attr: sysfs attribute name
diff --git a/src/nvme/types.h b/src/nvme/types.h
index 3bf2237..29ac050 100644
--- a/src/nvme/types.h
+++ b/src/nvme/types.h
@@ -43,7 +43,7 @@
* Returns: The 'name' field from 'value'
*/
#define NVME_SET(value, name) \
- (((value) & NVME_##name##_MASK) << NVME_##name##_SHIFT)
+ (((__u32)(value) & NVME_##name##_MASK) << NVME_##name##_SHIFT)
/**
* enum nvme_constants - A place to stash various constant nvme values
@@ -611,6 +611,19 @@ static const __u64 NVME_PMRMSC_CBA_MASK = 0xfffffffffffffull;
#define NVME_PMRMSC_CMSE(pmrmsc) NVME_GET(pmrmsc, PMRMSC_CMSE)
#define NVME_PMRMSC_CBA(pmrmsc) NVME_GET(pmrmsc, PMRMSC_CBA)
+enum nvme_flbas {
+ NVME_FLBAS_LOWER_SHIFT = 0,
+ NVME_FLBAS_META_EXT_SHIFT = 4,
+ NVME_FLBAS_HIGHER_SHIFT = 5,
+ NVME_FLBAS_LOWER_MASK = 0xf,
+ NVME_FLBAS_META_EXT_MASK = 0x1,
+ NVME_FLBAS_HIGHER_MASK = 0x3,
+};
+
+#define NVME_FLBAS_LOWER(flbas) NVME_GET(flbas, FLBAS_LOWER)
+#define NVME_FLBAS_META_EXT(flbas) NVME_GET(flbas, FLBAS_META_EXT)
+#define NVME_FLBAS_HIGHER(flbas) NVME_GET(flbas, FLBAS_HIGHER)
+
/**
* enum nvme_psd_flags - Possible flag values in nvme power state descriptor
* @NVME_PSD_FLAGS_MXPS: Indicates the scale for the Maximum Power
@@ -930,7 +943,10 @@ struct nvme_id_psd {
* @maxcna: Maximum I/O Controller Namespace Attachments indicates the
* maximum number of namespaces that are allowed to be attached to
* this I/O controller.
- * @rsvd564: Reserved
+ * @oaqd: Optimal Aggregated Queue Depth indicates the recommended maximum
+ * total number of outstanding I/O commands across all I/O queues
+ * on the controller for optimal operation.
+ * @rsvd568: Reserved
* @subnqn: NVM Subsystem NVMe Qualified Name, UTF-8 null terminated string
* @rsvd1024: Reserved
* @ioccsz: I/O Queue Command Capsule Supported Size, defines the maximum
@@ -1035,7 +1051,8 @@ struct nvme_id_ctrl {
__le32 mnan;
__u8 maxdna[16];
__le32 maxcna;
- __u8 rsvd564[204];
+ __le32 oaqd;
+ __u8 rsvd568[200];
char subnqn[NVME_NQN_LENGTH];
__u8 rsvd1024[768];
@@ -1489,6 +1506,14 @@ enum nvme_id_ctrl_cqes {
* the Verify command.
* @NVME_CTRL_ONCS_COPY: If set, then the controller supports
* the copy command.
+ * @NVME_CTRL_ONCS_COPY_SINGLE_ATOMICITY: If set, then the write portion of a
+ * Copy command is performed as a single
+ * write command to which the same
+ * atomicity requirements that apply to
+ * a write command apply.
+ * @NVME_CTRL_ONCS_ALL_FAST_COPY: If set, then all copy operations for
+ * the Copy command are fast copy
+ * operations.
*/
enum nvme_id_ctrl_oncs {
NVME_CTRL_ONCS_COMPARE = 1 << 0,
@@ -1500,6 +1525,8 @@ enum nvme_id_ctrl_oncs {
NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
NVME_CTRL_ONCS_VERIFY = 1 << 7,
NVME_CTRL_ONCS_COPY = 1 << 8,
+ NVME_CTRL_ONCS_COPY_SINGLE_ATOMICITY = 1 << 9,
+ NVME_CTRL_ONCS_ALL_FAST_COPY = 1 << 10,
};
/**
@@ -1772,7 +1799,6 @@ enum nvme_lbaf_rp {
* remains fixed throughout the life of the namespace and is
* preserved across namespace and controller operations
* @lbaf: LBA Format, see &struct nvme_lbaf.
- * @lbstm: Logical Block Storage Tag Mask for end-to-end protection
* @vs: Vendor Specific
*/
struct nvme_id_ns {
@@ -1816,8 +1842,7 @@ struct nvme_id_ns {
__u8 nguid[16];
__u8 eui64[8];
struct nvme_lbaf lbaf[64];
- __le64 lbstm;
- __u8 vs[3704];
+ __u8 vs[3712];
};
/**
@@ -3075,11 +3100,13 @@ struct nvme_telemetry_log {
/**
* struct nvme_endurance_group_log - Endurance Group Information Log
* @critical_warning: Critical Warning
- * @rsvd1: Reserved
+ * @endurance_group_features: Endurance Group Features
+ * @rsvd2: Reserved
* @avl_spare: Available Spare
* @avl_spare_threshold: Available Spare Threshold
* @percent_used: Percentage Used
- * @rsvd6: Reserved
+ * @domain_identifier: Domain Identifier
+ * @rsvd8: Reserved
* @endurance_estimate: Endurance Estimate
* @data_units_read: Data Units Read
* @data_units_written: Data Units Written
@@ -3088,15 +3115,19 @@ struct nvme_telemetry_log {
* @host_write_cmds: Host Write Commands
* @media_data_integrity_err: Media and Data Integrity Errors
* @num_err_info_log_entries: Number of Error Information Log Entries
- * @rsvd160: Reserved
+ * @total_end_grp_cap: Total Endurance Group Capacity
+ * @unalloc_end_grp_cap: Unallocated Endurance Group Capacity
+ * @rsvd192: Reserved
*/
struct nvme_endurance_group_log {
__u8 critical_warning;
- __u8 rsvd1[2];
+ __u8 endurance_group_features;
+ __u8 rsvd2;
__u8 avl_spare;
__u8 avl_spare_threshold;
__u8 percent_used;
- __u8 rsvd6[26];
+ __le16 domain_identifier;
+ __u8 rsvd8[24];
__u8 endurance_estimate[16];
__u8 data_units_read[16];
__u8 data_units_written[16];
@@ -3105,7 +3136,9 @@ struct nvme_endurance_group_log {
__u8 host_write_cmds[16];
__u8 media_data_integrity_err[16];
__u8 num_err_info_log_entries[16];
- __u8 rsvd160[352];
+ __u8 total_end_grp_cap[16];
+ __u8 unalloc_end_grp_cap[16];
+ __u8 rsvd192[320];
};
/**
@@ -3710,6 +3743,110 @@ struct nvme_boot_partition {
};
/**
+ * struct nvme_eom_lane_desc - EOM Lane Descriptor
+ * @rsvd0: Reserved
+ * @mstatus: Measurement Status
+ * @lane: Lane number
+ * @eye: Eye number
+ * @top: Absolute number of rows from center to top edge of eye
+ * @bottom: Absolute number of rows from center to bottom edge of eye
+ * @left: Absolute number of rows from center to left edge of eye
+ * @right: Absolute number of rows from center to right edge of eye
+ * @nrows: Number of Rows
+ * @ncols: Number of Columns
+ * @edlen: Eye Data Length
+ * @rsvd18: Reserved
+ * @eye_desc: Printable Eye, Eye Data, and any Padding
+ */
+struct nvme_eom_lane_desc {
+ __u8 rsvd0;
+ __u8 mstatus;
+ __u8 lane;
+ __u8 eye;
+ __le16 top;
+ __le16 bottom;
+ __le16 left;
+ __le16 right;
+ __le16 nrows;
+ __le16 ncols;
+ __le16 edlen;
+ __u8 rsvd18[14];
+ __u8 eye_desc[];
+};
+
+/**
+ * struct nvme_phy_rx_eom_log - Physical Interface Receiver Eye Opening Measurement Log
+ * @lid: Log Identifier
+ * @eomip: EOM In Progress
+ * @hsize: Header Size
+ * @rsize: Result Size
+ * @eomdgn: EOM Data Generation Number
+ * @lr: Log Revision
+ * @odp: Optional Data Present
+ * @lanes: Number of lanes configured for this port
+ * @epl: Eyes Per Lane
+ * @lspfc: Log Specific Parameter Field Copy
+ * @li: Link Information
+ * @rsvd15: Reserved
+ * @lsic: Log Specific Identifier Copy
+ * @dsize: Descriptor Size
+ * @nd: Number of Descriptors
+ * @maxtb: Maximum Top Bottom
+ * @maxlr: Maximum Left Right
+ * @etgood: Estimated Time for Good Quality
+ * @etbetter: Estimated Time for Better Quality
+ * @etbest: Estimated Time for Best Quality
+ * @rsvd36: Reserved
+ * @descs: EOM Lane Descriptors
+ */
+struct nvme_phy_rx_eom_log {
+ __u8 lid;
+ __u8 eomip;
+ __le16 hsize;
+ __le32 rsize;
+ __u8 eomdgn;
+ __u8 lr;
+ __u8 odp;
+ __u8 lanes;
+ __u8 epl;
+ __u8 lspfc;
+ __u8 li;
+ __u8 rsvd15[3];
+ __le16 lsic;
+ __le32 dsize;
+ __le16 nd;
+ __le16 maxtb;
+ __le16 maxlr;
+ __le16 etgood;
+ __le16 etbetter;
+ __le16 etbest;
+ __u8 rsvd36[28];
+ struct nvme_eom_lane_desc descs[];
+};
+
+/**
+ * enum nvme_eom_optional_data - EOM Optional Data Present Fields
+ * @NVME_EOM_EYE_DATA_PRESENT: Eye Data Present
+ * @NVME_EOM_PRINTABLE_EYE_PRESENT: Printable Eye Present
+ */
+enum nvme_eom_optional_data {
+ NVME_EOM_EYE_DATA_PRESENT = 1,
+ NVME_EOM_PRINTABLE_EYE_PRESENT = 1 << 1,
+};
+
+/**
+ * enum nvme_phy_rx_eom_progress - EOM In Progress Values
+ * @NVME_PHY_RX_EOM_NOT_STARTED: EOM Not Started
+ * @NVME_PHY_RX_EOM_IN_PROGRESS: EOM In Progress
+ * @NVME_PHY_RX_EOM_COMPLETED: EOM Completed
+ */
+enum nvme_phy_rx_eom_progress {
+ NVME_PHY_RX_EOM_NOT_STARTED = 0,
+ NVME_PHY_RX_EOM_IN_PROGRESS = 1,
+ NVME_PHY_RX_EOM_COMPLETED = 2,
+};
+
+/**
* struct nvme_media_unit_stat_desc - Media Unit Status Descriptor
* @muid: Media Unit Identifier
* @domainid: Domain Identifier
@@ -4604,11 +4741,19 @@ struct nvme_plm_config {
/**
* struct nvme_feat_host_behavior - Host Behavior Support - Data Structure
* @acre: Advanced Command Retry Enable
- * @rsvd1: Reserved
+ * @etdas: Extended Telemetry Data Area 4 Supported
+ * @lbafee: LBA Format Extension Enable
+ * @rsvd3: Reserved
+ * @cdfe: Copy Descriptor Formats Enable
+ * @rsvd6: Reserved
*/
struct nvme_feat_host_behavior {
__u8 acre;
- __u8 rsvd1[511];
+ __u8 etdas;
+ __u8 lbafee;
+ __u8 rsvd3;
+ __u16 cdfe;
+ __u8 rsvd6[506];
};
/**
@@ -4674,6 +4819,66 @@ struct nvme_copy_range_f1 {
};
/**
+ * enum nvme_copy_range_sopt - NVMe Copy Range Source Options
+ * @NVME_COPY_SOPT_FCO: NVMe Copy Source Option Fast Copy Only
+ */
+enum nvme_copy_range_sopt {
+ NVME_COPY_SOPT_FCO = 1 << 15,
+};
+
+/**
+ * struct nvme_copy_range_f2 - Copy - Source Range Entries Descriptor Format 2h
+ * @snsid: Source Namespace Identifier
+ * @rsvd4: Reserved
+ * @slba: Starting LBA
+ * @nlb: Number of Logical Blocks
+ * @rsvd18: Reserved
+ * @sopt: Source Options
+ * @eilbrt: Expected Initial Logical Block Reference Tag /
+ * Expected Logical Block Storage Tag
+ * @elbatm: Expected Logical Block Application Tag Mask
+ * @elbat: Expected Logical Block Application Tag
+ */
+struct nvme_copy_range_f2 {
+ __le32 snsid;
+ __u8 rsvd4[4];
+ __le64 slba;
+ __le16 nlb;
+ __u8 rsvd18[4];
+ __le16 sopt;
+ __le32 eilbrt;
+ __le16 elbat;
+ __le16 elbatm;
+};
+
+/**
+ * struct nvme_copy_range_f3 - Copy - Source Range Entries Descriptor Format 3h
+ * @snsid: Source Namespace Identifier
+ * @rsvd4: Reserved
+ * @slba: Starting LBA
+ * @nlb: Number of Logical Blocks
+ * @rsvd18: Reserved
+ * @sopt: Source Options
+ * @rsvd24: Reserved
+ * @elbt: Expected Initial Logical Block Reference Tag /
+ * Expected Logical Block Storage Tag
+ * @elbatm: Expected Logical Block Application Tag Mask
+ * @elbat: Expected Logical Block Application Tag
+ */
+struct nvme_copy_range_f3 {
+ __le32 snsid;
+ __u8 rsvd4[4];
+ __le64 slba;
+ __le16 nlb;
+ __u8 rsvd18[4];
+ __le16 sopt;
+ __u8 rsvd24[2];
+ __u8 elbt[10];
+ __le16 elbat;
+ __le16 elbatm;
+};
+
+/**
* struct nvme_registered_ctrl - Registered Controller Data Structure
* @cntlid: Controller ID
* @rcsts: Reservation Status
@@ -6125,6 +6330,21 @@ struct nvme_mi_vpd_hdr {
* @NVME_SC_INVALID_PI: Invalid Protection Information
* @NVME_SC_READ_ONLY: Attempted Write to Read Only Range
* @NVME_SC_CMD_SIZE_LIMIT_EXCEEDED: Command Size Limit Exceeded
+ * @NVME_SC_INCOMPATIBLE_NS: Incompatible Namespace or Format: At
+ * least one source namespace and the
+ * destination namespace have incompatible
+ * formats.
+ * @NVME_SC_FAST_COPY_NOT_POSSIBLE: Fast Copy Not Possible: The Fast Copy
+ * Only (FCO) bit was set to ‘1’ in a Source
+ * Range entry and the controller was not
+ * able to use fast copy operations to copy
+ * the specified data.
+ * @NVME_SC_OVERLAPPING_IO_RANGE: Overlapping I/O Range: A source logical
+ * block range overlaps the destination
+ * logical block range.
+ * @NVME_SC_INSUFFICIENT_RESOURCES: Insufficient Resources: A resource
+ * shortage prevented the controller from
+ * performing the requested copy.
* @NVME_SC_CONNECT_FORMAT: Incompatible Format: The NVM subsystem
* does not support the record format
* specified by the host.
@@ -6370,6 +6590,10 @@ enum nvme_status_field {
NVME_SC_INVALID_PI = 0x81,
NVME_SC_READ_ONLY = 0x82,
NVME_SC_CMD_SIZE_LIMIT_EXCEEDED = 0x83,
+ NVME_SC_INCOMPATIBLE_NS = 0x85,
+ NVME_SC_FAST_COPY_NOT_POSSIBLE = 0x86,
+ NVME_SC_OVERLAPPING_IO_RANGE = 0x87,
+ NVME_SC_INSUFFICIENT_RESOURCES = 0x89,
/*
* I/O Command Set Specific - Fabrics commands:
@@ -6501,7 +6725,7 @@ static inline __u32 nvme_status_get_type(int status)
*/
static inline __u32 nvme_status_get_value(int status)
{
- return status & ~(NVME_STATUS_TYPE_MASK << NVME_STATUS_TYPE_SHIFT);
+ return status & ~NVME_SET(NVME_STATUS_TYPE_MASK, STATUS_TYPE);
}
/**
@@ -6688,6 +6912,7 @@ enum nvme_identify_cns {
* @NVME_LOG_LID_FID_SUPPORTED_EFFECTS: Feature Identifiers Supported and Effects
* @NVME_LOG_LID_MI_CMD_SUPPORTED_EFFECTS: NVMe-MI Commands Supported and Effects
* @NVME_LOG_LID_BOOT_PARTITION: Boot Partition
+ * @NVME_LOG_LID_PHY_RX_EOM: Physical Interface Receiver Eye Opening Measurement
* @NVME_LOG_LID_FDP_CONFIGS: FDP Configurations
* @NVME_LOG_LID_FDP_RUH_USAGE: Reclaim Unit Handle Usage
* @NVME_LOG_LID_FDP_STATS: FDP Statistics
@@ -6719,6 +6944,7 @@ enum nvme_cmd_get_log_lid {
NVME_LOG_LID_FID_SUPPORTED_EFFECTS = 0x12,
NVME_LOG_LID_MI_CMD_SUPPORTED_EFFECTS = 0x13,
NVME_LOG_LID_BOOT_PARTITION = 0x15,
+ NVME_LOG_LID_PHY_RX_EOM = 0x19,
NVME_LOG_LID_FDP_CONFIGS = 0x20,
NVME_LOG_LID_FDP_RUH_USAGE = 0x21,
NVME_LOG_LID_FDP_STATS = 0x22,
@@ -7012,7 +7238,7 @@ enum nvme_feat {
NVME_FEAT_WP_WPS_SHIFT = 0,
NVME_FEAT_WP_WPS_MASK = 0x7,
NVME_FEAT_IOCSP_IOCSCI_SHIFT = 0,
- NVME_FEAT_IOCSP_IOCSCI_MASK = 0xff,
+ NVME_FEAT_IOCSP_IOCSCI_MASK = 0x1ff,
NVME_FEAT_FDP_ENABLED_SHIFT = 0,
NVME_FEAT_FDP_ENABLED_MASK = 0x1,
NVME_FEAT_FDP_INDEX_SHIFT = 8,
@@ -7273,6 +7499,30 @@ enum nvme_log_ana_lsp {
};
/**
+ * enum nvme_log_phy_rx_eom_action - Physical Interface Receiver Eye Opening Measurement Action
+ * @NVME_LOG_PHY_RX_EOM_READ: Read Log Data
+ * @NVME_LOG_PHY_RX_EOM_START_READ: Start Measurement and Read Log Data
+ * @NVME_LOG_PHY_RX_EOM_ABORT_CLEAR: Abort Measurement and Clear Log Data
+ */
+enum nvme_log_phy_rx_eom_action {
+ NVME_LOG_PHY_RX_EOM_READ = 0,
+ NVME_LOG_PHY_RX_EOM_START_READ = 1,
+ NVME_LOG_PHY_RX_EOM_ABORT_CLEAR = 2,
+};
+
+/**
+ * enum nvme_log_phy_rx_eom_quality - Physical Interface Receiver Eye Opening Measurement Quality
+ * @NVME_LOG_PHY_RX_EOM_GOOD: <= Better Quality
+ * @NVME_LOG_PHY_RX_EOM_BETTER: <= Best Quality, >= Good Quality
+ * @NVME_LOG_PHY_RX_EOM_BEST: >= Better Quality
+ */
+enum nvme_log_phy_rx_eom_quality {
+ NVME_LOG_PHY_RX_EOM_GOOD = 0,
+ NVME_LOG_PHY_RX_EOM_BETTER = 1,
+ NVME_LOG_PHY_RX_EOM_BEST = 2,
+};
+
+/**
* enum nvme_pevent_log_action - Persistent Event Log - Action
* @NVME_PEVENT_LOG_READ: Read Log Data
* @NVME_PEVENT_LOG_EST_CTX_AND_READ: Establish Context and Read Log Data
diff --git a/src/nvme/util.c b/src/nvme/util.c
index 143cc31..45512ff 100644
--- a/src/nvme/util.c
+++ b/src/nvme/util.c
@@ -7,6 +7,7 @@
* Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
*/
+#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
@@ -22,6 +23,7 @@
#include <ccan/endian/endian.h>
+#include "cleanup.h"
#include "private.h"
#include "util.h"
#include "log.h"
@@ -290,6 +292,10 @@ static const char * const nvm_status[] = {
[NVME_SC_INVALID_PI] = "Invalid Protection Information: The command's Protection Information Field settings are invalid for the namespace's Protection Information format",
[NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range: The LBA range specified contains read-only blocks",
[NVME_SC_CMD_SIZE_LIMIT_EXCEEDED] = "Command Size Limit Exceeded",
+ [NVME_SC_INCOMPATIBLE_NS] = "Incompatible Namespace or Format",
+ [NVME_SC_FAST_COPY_NOT_POSSIBLE] = "Fast Copy Not Possible",
+ [NVME_SC_OVERLAPPING_IO_RANGE] = "Overlapping I/O Range",
+ [NVME_SC_INSUFFICIENT_RESOURCES] = "Insufficient Resources",
[NVME_SC_ZNS_INVALID_OP_REQUEST] = "Invalid Zone Operation Request: The operation requested is invalid",
[NVME_SC_ZNS_ZRWA_RESOURCES_UNAVAILABLE] = "ZRWA Resources Unavailable: No ZRWAs are available",
[NVME_SC_ZNS_BOUNDARY_ERROR] = "Zoned Boundary Error: Invalid Zone Boundary crossing",
@@ -385,6 +391,16 @@ const char *nvme_status_to_string(int status, bool fabrics)
return s;
}
+static inline void nvme_init_copy_range_elbt(__u8 *elbt, __u64 eilbrt)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ elbt[9 - i] = (eilbrt >> (8 * i)) & 0xff;
+ elbt[1] = 0;
+ elbt[0] = 0;
+}
+
void nvme_init_copy_range(struct nvme_copy_range *copy, __u16 *nlbs,
__u64 *slbas, __u32 *eilbrts, __u32 *elbatms,
__u32 *elbats, __u16 nr)
@@ -404,18 +420,51 @@ void nvme_init_copy_range_f1(struct nvme_copy_range_f1 *copy, __u16 *nlbs,
__u64 *slbas, __u64 *eilbrts, __u32 *elbatms,
__u32 *elbats, __u16 nr)
{
- int i, j;
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ copy[i].nlb = cpu_to_le16(nlbs[i]);
+ copy[i].slba = cpu_to_le64(slbas[i]);
+ copy[i].elbatm = cpu_to_le16(elbatms[i]);
+ copy[i].elbat = cpu_to_le16(elbats[i]);
+ nvme_init_copy_range_elbt(copy[i].elbt, eilbrts[i]);
+ }
+}
+
+void nvme_init_copy_range_f2(struct nvme_copy_range_f2 *copy, __u32 *snsids,
+ __u16 *nlbs, __u64 *slbas, __u16 *sopts,
+ __u32 *eilbrts, __u32 *elbatms, __u32 *elbats,
+ __u16 nr)
+{
+ int i;
for (i = 0; i < nr; i++) {
+ copy[i].snsid = cpu_to_le32(snsids[i]);
copy[i].nlb = cpu_to_le16(nlbs[i]);
copy[i].slba = cpu_to_le64(slbas[i]);
+ copy[i].sopt = cpu_to_le16(sopts[i]);
+ copy[i].eilbrt = cpu_to_le32(eilbrts[i]);
copy[i].elbatm = cpu_to_le16(elbatms[i]);
copy[i].elbat = cpu_to_le16(elbats[i]);
- for (j = 0; j < 8; j++)
- copy[i].elbt[9 - j] = (eilbrts[i] >> (8 * j)) & 0xff;
- copy[i].elbt[1] = 0;
- copy[i].elbt[0] = 0;
- }
+ }
+}
+
+void nvme_init_copy_range_f3(struct nvme_copy_range_f3 *copy, __u32 *snsids,
+ __u16 *nlbs, __u64 *slbas, __u16 *sopts,
+ __u64 *eilbrts, __u32 *elbatms, __u32 *elbats,
+ __u16 nr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ copy[i].snsid = cpu_to_le32(snsids[i]);
+ copy[i].nlb = cpu_to_le16(nlbs[i]);
+ copy[i].slba = cpu_to_le64(slbas[i]);
+ copy[i].sopt = cpu_to_le16(sopts[i]);
+ copy[i].elbatm = cpu_to_le16(elbatms[i]);
+ copy[i].elbat = cpu_to_le16(elbats[i]);
+ nvme_init_copy_range_elbt(copy[i].elbt, eilbrts[i]);
+ }
}
void nvme_init_dsm_range(struct nvme_dsm_range *dsm, __u32 *ctx_attrs,
@@ -708,7 +757,7 @@ char *kv_keymatch(const char *kv, const char *key)
static size_t read_file(const char * fname, char *buffer, size_t *bufsz)
{
char *p;
- FILE *file;
+ _cleanup_file_ FILE *file;
size_t len;
file = fopen(fname, "re");
@@ -716,7 +765,6 @@ static size_t read_file(const char * fname, char *buffer, size_t *bufsz)
return 0;
p = fgets(buffer, *bufsz, file);
- fclose(file);
if (!p)
return 0;
@@ -758,7 +806,7 @@ size_t get_entity_name(char *buffer, size_t bufsz)
size_t get_entity_version(char *buffer, size_t bufsz)
{
- FILE *file;
+ _cleanup_file_ FILE *file;
size_t num_bytes = 0;
/* /proc/sys/kernel/ostype typically contains the string "Linux" */
@@ -808,7 +856,6 @@ size_t get_entity_version(char *buffer, size_t bufsz)
if (s)
ver_id_len = copy_value(ver_id, sizeof(ver_id), s);
}
- fclose(file);
if (name_len) {
/* Append a space */
@@ -881,14 +928,13 @@ int nvme_uuid_from_string(const char *str, unsigned char uuid[NVME_UUID_LEN])
int nvme_uuid_random(unsigned char uuid[NVME_UUID_LEN])
{
- int f;
+ _cleanup_fd_ int f;
ssize_t n;
f = open("/dev/urandom", O_RDONLY);
if (f < 0)
return -errno;
n = read(f, uuid, NVME_UUID_LEN);
- close(f);
if (n < 0)
return -errno;
else if (n != NVME_UUID_LEN)
@@ -906,6 +952,46 @@ int nvme_uuid_random(unsigned char uuid[NVME_UUID_LEN])
}
#ifdef HAVE_NETDB
+static bool _nvme_ipaddrs_eq(struct sockaddr *addr1, struct sockaddr *addr2)
+{
+ struct sockaddr_in *sockaddr_v4;
+ struct sockaddr_in6 *sockaddr_v6;
+
+ if (addr1->sa_family == AF_INET && addr2->sa_family == AF_INET) {
+ struct sockaddr_in *sockaddr1 = (struct sockaddr_in *)addr1;
+ struct sockaddr_in *sockaddr2 = (struct sockaddr_in *)addr2;
+ return sockaddr1->sin_addr.s_addr == sockaddr2->sin_addr.s_addr;
+ }
+
+ if (addr1->sa_family == AF_INET6 && addr2->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sockaddr1 = (struct sockaddr_in6 *)addr1;
+ struct sockaddr_in6 *sockaddr2 = (struct sockaddr_in6 *)addr2;
+ return !memcmp(&sockaddr1->sin6_addr, &sockaddr2->sin6_addr, sizeof(struct in6_addr));
+ }
+
+ switch (addr1->sa_family) {
+ case AF_INET:
+ sockaddr_v6 = (struct sockaddr_in6 *)addr2;
+ if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) {
+ sockaddr_v4 = (struct sockaddr_in *)addr1;
+ return sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3];
+ }
+ break;
+
+ case AF_INET6:
+ sockaddr_v6 = (struct sockaddr_in6 *)addr1;
+ if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) {
+ sockaddr_v4 = (struct sockaddr_in *)addr2;
+ return sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3];
+ }
+ break;
+
+ default: ;
+ }
+
+ return false;
+}
+
bool nvme_ipaddrs_eq(const char *addr1, const char *addr2)
{
bool result = false;
@@ -924,37 +1010,7 @@ bool nvme_ipaddrs_eq(const char *addr1, const char *addr2)
if (getaddrinfo(addr2, 0, &hint2, &info2) || !info2)
goto ipaddrs_eq_fail;
- if (info1->ai_family == AF_INET && info2->ai_family == AF_INET) {
- struct sockaddr_in *sockaddr1 = (struct sockaddr_in *)(info1->ai_addr);
- struct sockaddr_in *sockaddr2 = (struct sockaddr_in *)(info2->ai_addr);
- result = sockaddr1->sin_addr.s_addr == sockaddr2->sin_addr.s_addr;
- } else if (info1->ai_family == AF_INET6 && info2->ai_family == AF_INET6) {
- struct sockaddr_in6 *sockaddr1 = (struct sockaddr_in6 *)(info1->ai_addr);
- struct sockaddr_in6 *sockaddr2 = (struct sockaddr_in6 *)(info2->ai_addr);
- result = !memcmp(&sockaddr1->sin6_addr, &sockaddr2->sin6_addr, sizeof(struct in6_addr));
- } else {
- struct sockaddr_in *sockaddr_v4;
- struct sockaddr_in6 *sockaddr_v6;
- switch (info1->ai_family) {
- case AF_INET:
- sockaddr_v6 = (struct sockaddr_in6 *)(info2->ai_addr);
- if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) {
- sockaddr_v4 = (struct sockaddr_in *)(info1->ai_addr);
- result = sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3];
- }
- break;
-
- case AF_INET6:
- sockaddr_v6 = (struct sockaddr_in6 *)(info1->ai_addr);
- if (IN6_IS_ADDR_V4MAPPED(&sockaddr_v6->sin6_addr)) {
- sockaddr_v4 = (struct sockaddr_in *)(info2->ai_addr);
- result = sockaddr_v4->sin_addr.s_addr == sockaddr_v6->sin6_addr.s6_addr32[3];
- }
- break;
-
- default: ;
- }
- }
+ result = _nvme_ipaddrs_eq(info1->ai_addr, info2->ai_addr);
ipaddrs_eq_fail:
if (info1)
@@ -972,3 +1028,91 @@ bool nvme_ipaddrs_eq(const char *addr1, const char *addr2)
return false;
}
#endif /* HAVE_NETDB */
+
+#ifdef HAVE_NETDB
+const char *nvme_iface_matching_addr(const struct ifaddrs *iface_list, const char *addr)
+{
+ const struct ifaddrs *iface_it;
+ struct addrinfo *info = NULL, hint = { .ai_flags = AI_NUMERICHOST, .ai_family = AF_UNSPEC };
+ const char *iface_name = NULL;
+
+ if (!iface_list || !addr || getaddrinfo(addr, 0, &hint, &info) || !info)
+ return NULL;
+
+ /* Walk through the linked list */
+ for (iface_it = iface_list; iface_it != NULL; iface_it = iface_it->ifa_next) {
+ struct sockaddr *ifaddr = iface_it->ifa_addr;
+
+ if (ifaddr && (ifaddr->sa_family == AF_INET || ifaddr->sa_family == AF_INET6) &&
+ _nvme_ipaddrs_eq(info->ai_addr, ifaddr)) {
+ iface_name = iface_it->ifa_name;
+ break;
+ }
+ }
+
+ freeaddrinfo(info);
+
+ return iface_name;
+}
+
+bool nvme_iface_primary_addr_matches(const struct ifaddrs *iface_list, const char *iface, const char *addr)
+{
+ const struct ifaddrs *iface_it;
+ struct addrinfo *info = NULL, hint = { .ai_flags = AI_NUMERICHOST, .ai_family = AF_UNSPEC };
+ bool match_found = false;
+
+ if (!iface_list || !addr || getaddrinfo(addr, 0, &hint, &info) || !info)
+ return false;
+
+ /* Walk through the linked list */
+ for (iface_it = iface_list; iface_it != NULL; iface_it = iface_it->ifa_next) {
+ if (strcmp(iface, iface_it->ifa_name))
+ continue; /* Not the interface we're looking for*/
+
+ /* The interface list is ordered in a way that the primary
+ * address is listed first. As soon as the parsed address
+ * matches the family of the address we're looking for, we
+ * have found the primary address for that family.
+ */
+ if (iface_it->ifa_addr && (iface_it->ifa_addr->sa_family == info->ai_addr->sa_family)) {
+ match_found = _nvme_ipaddrs_eq(info->ai_addr, iface_it->ifa_addr);
+ break;
+ }
+ }
+
+ freeaddrinfo(info);
+
+ return match_found;
+}
+
+#else /* HAVE_NETDB */
+
+const char *nvme_iface_matching_addr(const struct ifaddrs *iface_list, const char *addr)
+{
+ nvme_msg(NULL, LOG_ERR, "no support for interface lookup; "
+ "recompile with libnss support.\n");
+
+ return NULL;
+}
+
+bool nvme_iface_primary_addr_matches(const struct ifaddrs *iface_list, const char *iface, const char *addr)
+{
+ nvme_msg(NULL, LOG_ERR, "no support for interface lookup; "
+ "recompile with libnss support.\n");
+
+ return false;
+}
+
+#endif /* HAVE_NETDB */
+
+void *__nvme_alloc(size_t len)
+{
+ size_t _len = round_up(len, 0x1000);
+ void *p;
+
+ if (posix_memalign((void *)&p, getpagesize(), _len))
+ return NULL;
+
+ memset(p, 0, _len);
+ return p;
+}
diff --git a/src/nvme/util.h b/src/nvme/util.h
index 9d6faf3..16d5b9c 100644
--- a/src/nvme/util.h
+++ b/src/nvme/util.h
@@ -9,6 +9,8 @@
#ifndef _LIBNVME_UTIL_H
#define _LIBNVME_UTIL_H
+#include <ifaddrs.h>
+
#include "types.h"
/**
@@ -149,6 +151,40 @@ void nvme_init_copy_range_f1(struct nvme_copy_range_f1 *copy, __u16 *nlbs,
__u32 *elbats, __u16 nr);
/**
+ * nvme_init_copy_range_f2() - Constructs a copy range f2 structure
+ * @copy: Copy range array
+ * @snsids: Source namespace identifier
+ * @nlbs: Number of logical blocks
+ * @slbas: Starting LBA
+ * @sopts: Source options
+ * @eilbrts: Expected initial logical block reference tag
+ * @elbatms: Expected logical block application tag mask
+ * @elbats: Expected logical block application tag
+ * @nr: Number of descriptors to construct
+ */
+void nvme_init_copy_range_f2(struct nvme_copy_range_f2 *copy, __u32 *snsids,
+ __u16 *nlbs, __u64 *slbas, __u16 *sopts,
+ __u32 *eilbrts, __u32 *elbatms, __u32 *elbats,
+ __u16 nr);
+
+/**
+ * nvme_init_copy_range_f3() - Constructs a copy range f3 structure
+ * @copy: Copy range array
+ * @snsids: Source namespace identifier
+ * @nlbs: Number of logical blocks
+ * @slbas: Starting LBA
+ * @sopts: Source options
+ * @eilbrts: Expected initial logical block reference tag
+ * @elbatms: Expected logical block application tag mask
+ * @elbats: Expected logical block application tag
+ * @nr: Number of descriptors to construct
+ */
+void nvme_init_copy_range_f3(struct nvme_copy_range_f3 *copy, __u32 *snsids,
+ __u16 *nlbs, __u64 *slbas, __u16 *sopts,
+ __u64 *eilbrts, __u32 *elbatms, __u32 *elbats,
+ __u16 nr);
+
+/**
* nvme_get_feature_length() - Retreive the command payload length for a
* specific feature identifier
* @fid: Feature identifier, see &enum nvme_features_id.
@@ -447,8 +483,8 @@ static inline void nvme_feature_decode_namespace_write_protect(__u32 value,
static inline void nvme_id_ns_flbas_to_lbaf_inuse(__u8 flbas, __u8 *lbaf_inuse)
{
- *lbaf_inuse = (((flbas & NVME_NS_FLBAS_HIGHER_MASK) >> 1) |
- (flbas & NVME_NS_FLBAS_LOWER_MASK));
+ *lbaf_inuse = ((NVME_FLBAS_HIGHER(flbas) >> 1) |
+ NVME_FLBAS_LOWER(flbas));
}
struct nvme_root;
@@ -639,4 +675,31 @@ int nvme_uuid_random(unsigned char uuid[NVME_UUID_LEN]);
*/
bool nvme_ipaddrs_eq(const char *addr1, const char *addr2);
+/**
+ * nvme_iface_matching_addr - Get interface matching @addr
+ * @iface_list: Interface list returned by getifaddrs()
+ * @addr: Address to match
+ *
+ * Parse the interface list pointed to by @iface_list looking
+ * for the interface that has @addr as one of its assigned
+ * addresses.
+ *
+ * Return: The name of the interface that owns @addr or NULL.
+ */
+const char *nvme_iface_matching_addr(const struct ifaddrs *iface_list, const char *addr);
+
+/**
+ * nvme_iface_primary_addr_matches - Check that interface's primary address matches
+ * @iface_list: Interface list returned by getifaddrs()
+ * @iface: Interface to match
+ * @addr: Address to match
+ *
+ * Parse the interface list pointed to by @iface_list and looking for
+ * interface @iface. The get its primary address and check if it matches
+ * @addr.
+ *
+ * Return: true if a match is found, false otherwise.
+ */
+bool nvme_iface_primary_addr_matches(const struct ifaddrs *iface_list, const char *iface, const char *addr);
+
#endif /* _LIBNVME_UTIL_H */