diff options
Diffstat (limited to 'src/nvme/tree.c')
-rw-r--r-- | src/nvme/tree.c | 1006 |
1 files changed, 768 insertions, 238 deletions
diff --git a/src/nvme/tree.c b/src/nvme/tree.c index a2ac069..07a3c53 100644 --- a/src/nvme/tree.c +++ b/src/nvme/tree.c @@ -15,6 +15,7 @@ #include <fcntl.h> #include <libgen.h> #include <unistd.h> +#include <ifaddrs.h> #include <sys/types.h> #include <sys/stat.h> @@ -24,6 +25,7 @@ #include <ccan/endian/endian.h> #include <ccan/list/list.h> +#include "cleanup.h" #include "ioctl.h" #include "linux.h" #include "filters.h" @@ -34,6 +36,31 @@ #include "log.h" #include "private.h" +/** + * struct candidate_args - Used to look for a controller matching these parameters + * @transport: Transport type: loop, fc, rdma, tcp + * @traddr: Transport address (destination address) + * @trsvcid: Transport service ID + * @subsysnqn: Subsystem NQN + * @host_traddr: Host transport address (source address) + * @host_iface: Host interface for connection (tcp only) + * @iface_list: Interface list (tcp only) + * @addreq: Address comparison function (for traddr, host-traddr) + * @well_known_nqn: Set to "true" when @subsysnqn is the well-known NQN + */ +struct candidate_args { + const char *transport; + const char *traddr; + const char *trsvcid; + const char *subsysnqn; + const char *host_traddr; + const char *host_iface; + struct ifaddrs *iface_list; + bool (*addreq)(const char *, const char *); + bool well_known_nqn; +}; +typedef bool (*ctrl_match_t)(struct nvme_ctrl *c, struct candidate_args *candidate); + const char *nvme_slots_sysfs_dir = "/sys/bus/pci/slots"; static struct nvme_host *default_host; @@ -78,17 +105,24 @@ static bool streqcase0(const char *s1, const char *s2) return !strcasecmp(s1, s2); } -static inline void nvme_free_dirents(struct dirent **d, int i) +struct dirents { + struct dirent **ents; + int num; +}; + +static void cleanup_dirents(struct dirents *ents) { - while (i-- > 0) - free(d[i]); - free(d); + while (ents->num > 0) + free(ents->ents[--ents->num]); + free(ents->ents); } +#define _cleanup_dirents_ __cleanup__(cleanup_dirents) + nvme_host_t nvme_default_host(nvme_root_t r) { struct nvme_host *h; - char *hostnqn, *hostid; + _cleanup_free_ char *hostnqn, *hostid; hostnqn = nvmf_hostnqn_from_file(); if (!hostnqn) @@ -100,61 +134,55 @@ nvme_host_t nvme_default_host(nvme_root_t r) nvme_host_set_hostsymname(h, NULL); default_host = h; - free(hostnqn); - if (hostid) - free(hostid); return h; } int nvme_scan_topology(struct nvme_root *r, nvme_scan_filter_t f, void *f_args) { - struct dirent **subsys, **ctrls; - int i, num_subsys, num_ctrls, ret; + _cleanup_dirents_ struct dirents subsys = {}, ctrls = {}; + int i, ret; if (!r) return 0; - num_ctrls = nvme_scan_ctrls(&ctrls); - if (num_ctrls < 0) { + ctrls.num = nvme_scan_ctrls(&ctrls.ents); + if (ctrls.num < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan ctrls: %s\n", strerror(errno)); - return num_ctrls; + return ctrls.num; } - for (i = 0; i < num_ctrls; i++) { - nvme_ctrl_t c = nvme_scan_ctrl(r, ctrls[i]->d_name); + for (i = 0; i < ctrls.num; i++) { + nvme_ctrl_t c = nvme_scan_ctrl(r, ctrls.ents[i]->d_name); if (!c) { nvme_msg(r, LOG_DEBUG, "failed to scan ctrl %s: %s\n", - ctrls[i]->d_name, strerror(errno)); + ctrls.ents[i]->d_name, strerror(errno)); continue; } if ((f) && !f(NULL, c, NULL, f_args)) { nvme_msg(r, LOG_DEBUG, "filter out controller %s\n", - ctrls[i]->d_name); + ctrls.ents[i]->d_name); nvme_free_ctrl(c); } } - nvme_free_dirents(ctrls, i); - - num_subsys = nvme_scan_subsystems(&subsys); - if (num_subsys < 0) { + subsys.num = nvme_scan_subsystems(&subsys.ents); + if (subsys.num < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan subsystems: %s\n", strerror(errno)); - return num_subsys; + return subsys.num; } - for (i = 0; i < num_subsys; i++) { - ret = nvme_scan_subsystem(r, subsys[i]->d_name, f, f_args); + for (i = 0; i < subsys.num; i++) { + ret = nvme_scan_subsystem( + r, subsys.ents[i]->d_name, f, f_args); if (ret < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan subsystem %s: %s\n", - subsys[i]->d_name, strerror(errno)); + subsys.ents[i]->d_name, strerror(errno)); } } - nvme_free_dirents(subsys, i); - return 0; } @@ -172,6 +200,7 @@ nvme_root_t nvme_create_root(FILE *fp, int log_level) r->fp = fp; list_head_init(&r->hosts); list_head_init(&r->endpoints); + nvme_set_root(r); return r; } @@ -234,8 +263,10 @@ const char *nvme_root_get_application(nvme_root_t r) void nvme_root_set_application(nvme_root_t r, const char *a) { - if (r->application) + if (r->application) { free(r->application); + r->application = NULL; + } if (a) r->application = strdup(a); } @@ -338,9 +369,18 @@ void nvme_free_tree(nvme_root_t r) free(r->config_file); if (r->application) free(r->application); + nvme_set_root(NULL); free(r); } +void nvme_root_release_fds(nvme_root_t r) +{ + struct nvme_host *h, *_h; + + nvme_for_each_host_safe(r, h, _h) + nvme_host_release_fds(h); +} + const char *nvme_subsystem_get_nqn(nvme_subsystem_t s) { return s->subsysnqn; @@ -368,12 +408,19 @@ const char *nvme_subsystem_get_application(nvme_subsystem_t s) void nvme_subsystem_set_application(nvme_subsystem_t s, const char *a) { - if (s->application) + if (s->application) { free(s->application); + s->application = NULL; + } if (a) s->application = strdup(a); } +const char *nvme_subsystem_get_iopolicy(nvme_subsystem_t s) +{ + return s->iopolicy; +} + nvme_ctrl_t nvme_subsystem_first_ctrl(nvme_subsystem_t s) { return list_top(&s->ctrls, struct nvme_ctrl, entry); @@ -412,7 +459,7 @@ nvme_path_t nvme_namespace_next_path(nvme_ns_t ns, nvme_path_t p) static void __nvme_free_ns(struct nvme_ns *n) { list_del_init(&n->entry); - close(n->fd); + nvme_ns_release_fd(n); free(n->generic_name); free(n->name); free(n->sysfs_dir); @@ -451,9 +498,23 @@ static void __nvme_free_subsystem(struct nvme_subsystem *s) free(s->subsystype); if (s->application) free(s->application); + if (s->iopolicy) + free(s->iopolicy); free(s); } +void nvme_subsystem_release_fds(struct nvme_subsystem *s) +{ + struct nvme_ctrl *c, *_c; + struct nvme_ns *n, *_n; + + nvme_subsystem_for_each_ctrl_safe(s, c, _c) + nvme_ctrl_release_fd(c); + + nvme_subsystem_for_each_ns_safe(s, n, _n) + nvme_ns_release_fd(n); +} + /* * Stub for SWIG */ @@ -524,6 +585,14 @@ static void __nvme_free_host(struct nvme_host *h) free(h); } +void nvme_host_release_fds(struct nvme_host *h) +{ + struct nvme_subsystem *s, *_s; + + nvme_for_each_subsystem_safe(h, s, _s) + nvme_subsystem_release_fds(s); +} + /* Stub for SWIG */ void nvme_free_host(struct nvme_host *h) { @@ -563,27 +632,26 @@ struct nvme_host *nvme_lookup_host(nvme_root_t r, const char *hostnqn, static int nvme_subsystem_scan_namespaces(nvme_root_t r, nvme_subsystem_t s, nvme_scan_filter_t f, void *f_args) { - struct dirent **namespaces; - int i, num_ns, ret; + _cleanup_dirents_ struct dirents namespaces = {}; + int i, ret; - num_ns = nvme_scan_subsystem_namespaces(s, &namespaces); - if (num_ns < 0) { + namespaces.num = nvme_scan_subsystem_namespaces(s, &namespaces.ents); + if (namespaces.num < 0) { nvme_msg(r, LOG_DEBUG, "failed to scan namespaces for subsys %s: %s\n", s->subsysnqn, strerror(errno)); - return num_ns; + return namespaces.num; } - for (i = 0; i < num_ns; i++) { + for (i = 0; i < namespaces.num; i++) { ret = nvme_subsystem_scan_namespace(r, s, - namespaces[i]->d_name, f, f_args); + namespaces.ents[i]->d_name, f, f_args); if (ret < 0) nvme_msg(r, LOG_DEBUG, "failed to scan namespace %s: %s\n", - namespaces[i]->d_name, strerror(errno)); + namespaces.ents[i]->d_name, strerror(errno)); } - nvme_free_dirents(namespaces, i); return 0; } @@ -610,15 +678,28 @@ static int nvme_init_subsystem(nvme_subsystem_t s, const char *name) s->sysfs_dir = (char *)path; if (s->h->r->application) s->application = strdup(s->h->r->application); + s->iopolicy = nvme_get_attr(path, "iopolicy"); return 0; } +static bool __nvme_scan_subsystem(struct nvme_root *r, nvme_subsystem_t s, + nvme_scan_filter_t f, void *f_args) +{ + if (f && !f(s, NULL, NULL, f_args)) { + nvme_msg(r, LOG_DEBUG, "filter out subsystem %s\n", s->name); + __nvme_free_subsystem(s); + return false; + } + nvme_subsystem_scan_namespaces(r, s, f, f_args); + return true; +} + static int nvme_scan_subsystem(struct nvme_root *r, const char *name, nvme_scan_filter_t f, void *f_args) { struct nvme_subsystem *s = NULL, *_s; - char *path, *subsysnqn; + _cleanup_free_ char *path = NULL, *subsysnqn = NULL; nvme_host_t h = NULL; int ret; @@ -628,7 +709,6 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name, return ret; subsysnqn = nvme_get_attr(path, "subsysnqn"); - free(path); if (!subsysnqn) { errno = ENODEV; return -1; @@ -644,6 +724,10 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name, continue; if (strcmp(_s->name, name)) continue; + if (!__nvme_scan_subsystem(r, _s, f, f_args)) { + errno = EINVAL; + return -1; + } s = _s; } } @@ -659,26 +743,18 @@ static int nvme_scan_subsystem(struct nvme_root *r, const char *name, s = nvme_alloc_subsystem(h, name, subsysnqn); if (!s) { errno = ENOMEM; + return -1; + } + if (!__nvme_scan_subsystem(r, s, f, f_args)) { + errno = EINVAL; + return -1; } } else if (strcmp(s->subsysnqn, subsysnqn)) { - nvme_msg(r, LOG_WARNING, "NQN mismatch for subsystem '%s'\n", + nvme_msg(r, LOG_DEBUG, "NQN mismatch for subsystem '%s'\n", name); - s = NULL; - free(subsysnqn); errno = EINVAL; return -1; } - free(subsysnqn); - if (!s) - return -1; - - if (f && !f(s, NULL, NULL, f_args)) { - nvme_msg(r, LOG_DEBUG, "filter out subsystem %s\n", name); - __nvme_free_subsystem(s); - return 0; - } - - nvme_subsystem_scan_namespaces(r, s, f, f_args); return 0; } @@ -740,7 +816,7 @@ static void nvme_subsystem_set_path_ns(nvme_subsystem_t s, nvme_path_t p) static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) { struct nvme_path *p; - char *path, *grpid; + _cleanup_free_ char *path = NULL, *grpid = NULL; int ret; nvme_msg(r, LOG_DEBUG, "scan controller %s path %s\n", @@ -758,12 +834,13 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) p = calloc(1, sizeof(*p)); if (!p) { errno = ENOMEM; - goto free_path; + return -1; } p->c = c; p->name = strdup(name); p->sysfs_dir = path; + path = NULL; p->ana_state = nvme_get_path_attr(p, "ana_state"); if (!p->ana_state) p->ana_state = strdup("optimized"); @@ -771,7 +848,6 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) grpid = nvme_get_path_attr(p, "ana_grpid"); if (grpid) { sscanf(grpid, "%d", &p->grpid); - free(grpid); } list_node_init(&p->nentry); @@ -779,26 +855,29 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name) list_node_init(&p->entry); list_add(&c->paths, &p->entry); return 0; - -free_path: - free(path); - return -1; } int nvme_ctrl_get_fd(nvme_ctrl_t c) { - nvme_root_t r = c->s && c->s->h ? c->s->h->r : NULL; - if (c->fd < 0) { c->fd = nvme_open(c->name); if (c->fd < 0) - nvme_msg(r, LOG_ERR, + nvme_msg(root_from_ctrl(c), LOG_ERR, "Failed to open ctrl %s, errno %d\n", c->name, errno); } return c->fd; } +void nvme_ctrl_release_fd(nvme_ctrl_t c) +{ + if (c->fd < 0) + return; + + close(c->fd); + c->fd = -1; +} + nvme_subsystem_t nvme_ctrl_get_subsystem(nvme_ctrl_t c) { return c->s; @@ -824,6 +903,32 @@ const char *nvme_ctrl_get_address(nvme_ctrl_t c) return c->address ? c->address : ""; } +char *nvme_ctrl_get_src_addr(nvme_ctrl_t c, char *src_addr, size_t src_addr_len) +{ + size_t l; + char *p; + + if (!c->address) + return NULL; + + p = strstr(c->address, "src_addr="); + if (!p) + return NULL; + + p += strlen("src_addr="); + l = strcspn(p, ",%"); /* % to eliminate IPv6 scope (if present) */ + if (l >= src_addr_len) { + nvme_msg(root_from_ctrl(c), LOG_ERR, + "Buffer for src_addr is too small (%zu must be > %zu)\n", + src_addr_len, l); + return NULL; + } + + strncpy(src_addr, p, l); + src_addr[l] = '\0'; + return src_addr; +} + const char *nvme_ctrl_get_phy_slot(nvme_ctrl_t c) { return c->phy_slot ? c->phy_slot : ""; @@ -998,10 +1103,7 @@ nvme_path_t nvme_ctrl_next_path(nvme_ctrl_t c, nvme_path_t p) do { if (a) { free(a); (a) = NULL; } } while (0) void nvme_deconfigure_ctrl(nvme_ctrl_t c) { - if (c->fd >= 0) { - close(c->fd); - c->fd = -1; - } + nvme_ctrl_release_fd(c); FREE_CTRL_ATTR(c->name); FREE_CTRL_ATTR(c->sysfs_dir); FREE_CTRL_ATTR(c->firmware); @@ -1140,40 +1242,391 @@ struct nvme_ctrl *nvme_create_ctrl(nvme_root_t r, return c; } +/** + * _tcp_ctrl_match_host_traddr_no_src_addr() - Match host_traddr w/o src_addr + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * On kernels prior to 6.1 (i.e. src_addr is not available), try to match + * a candidate controller's host_traddr to that of an existing controller. + * + * This function takes an optimistic approach. In doubt, it will declare a + * match and return true. + * + * Return: true if @c->host_traddr matches @candidate->host_traddr. false otherwise. + */ +static bool _tcp_ctrl_match_host_traddr_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (c->cfg.host_traddr) + return candidate->addreq(candidate->host_traddr, c->cfg.host_traddr); + + /* If c->cfg.host_traddr is NULL, then the controller (c) + * uses the interface's primary address as the source + * address. If c->cfg.host_iface is defined we can + * determine the primary address associated with that + * interface and compare that to the candidate->host_traddr. + */ + if (c->cfg.host_iface) + return nvme_iface_primary_addr_matches(candidate->iface_list, + c->cfg.host_iface, + candidate->host_traddr); + + /* If both c->cfg.host_traddr and c->cfg.host_iface are + * NULL, we don't have enough information to make a + * 100% positive match. Regardless, let's be optimistic + * and assume that we have a match. + */ + nvme_msg(root_from_ctrl(c), LOG_DEBUG, + "Not enough data, but assume %s matches candidate's host_traddr: %s\n", + nvme_ctrl_get_name(c), candidate->host_traddr); + + return true; +} + +/** + * _tcp_ctrl_match_host_iface_no_src_addr() - Match host_iface w/o src_addr + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * On kernels prior to 6.1 (i.e. src_addr is not available), try to match + * a candidate controller's host_iface to that of an existing controller. + * + * This function takes an optimistic approach. In doubt, it will declare a + * match and return true. + * + * Return: true if @c->host_iface matches @candidate->host_iface. false otherwise. + */ +static bool _tcp_ctrl_match_host_iface_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (c->cfg.host_iface) + return streq0(candidate->host_iface, c->cfg.host_iface); + + /* If c->cfg.host_traddr is not NULL we can infer the controller's (c) + * interface from it and compare it to the candidate->host_iface. + */ + if (c->cfg.host_traddr) { + const char *c_host_iface; + + c_host_iface = nvme_iface_matching_addr(candidate->iface_list, c->cfg.host_traddr); + return streq0(candidate->host_iface, c_host_iface); + } + + /* If both c->cfg.host_traddr and c->cfg.host_iface are + * NULL, we don't have enough information to make a + * 100% positive match. Regardless, let's be optimistic + * and assume that we have a match. + */ + nvme_msg(root_from_ctrl(c), LOG_DEBUG, + "Not enough data, but assume %s matches candidate's host_iface: %s\n", + nvme_ctrl_get_name(c), candidate->host_iface); + + return true; +} + +/** + * _tcp_opt_params_match_no_src_addr() - Match optional host_traddr/host_iface w/o src_addr + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * Before kernel 6.1, the src_addr was not reported by the kernel which makes + * it hard to match a candidate's host_traddr and host_iface to an existing + * controller if that controller was created without specifying the + * host_traddr and/or host_iface. This function tries its best in the absense + * of a src_addr to match @c to @candidate. This may not be 100% accurate. + * Only the src_addr can provide 100% accuracy. + * + * This function takes an optimistic approach. In doubt, it will declare a + * match and return true. + * + * Return: true if @c matches @candidate. false otherwise. + */ +static bool _tcp_opt_params_match_no_src_addr(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + /* Check host_traddr only if candidate is interested */ + if (candidate->host_traddr) { + if (!_tcp_ctrl_match_host_traddr_no_src_addr(c, candidate)) + return false; + } + + /* Check host_iface only if candidate is interested */ + if (candidate->host_iface) { + if (!_tcp_ctrl_match_host_iface_no_src_addr(c, candidate)) + return false; + } + + return true; +} + +/** + * _tcp_opt_params_match() - Match optional host_traddr/host_iface + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * The host_traddr and host_iface are optional for TCP. When they are not + * specified, the kernel looks up the destination IP address (traddr) in the + * routing table to determine the best interface for the connection. The + * kernel then retrieves the primary IP address assigned to that interface + * and uses that as the connection’s source address. + * + * An interface’s primary address is the default source address used for + * all connections made on that interface unless host-traddr is used to + * override the default. Kernel-selected interfaces and/or source addresses + * are hidden from user-space applications unless the kernel makes that + * information available through the "src_addr" attribute in the + * sysfs (kernel 6.1 or later). + * + * Sometimes, an application may force the interface by specifying the + * "host-iface" or may force a different source address (instead of the + * primary address) by providing the "host-traddr". + * + * If the candidate specifies the host_traddr and/or host_iface but they + * do not match the existing controller's host_traddr and/or host_iface + * (they could be NULL), we may still be able to find a match by taking + * the existing controller's src_addr into consideration since that + * parameter identifies the actual source address of the connection and + * therefore can be used to infer the interface of the connection. However, + * the src_addr can only be read from the nvme device's sysfs "address" + * attribute starting with kernel 6.1 (or kernels that backported the + * src_addr patch). + * + * For legacy kernels that do not provide the src_addr we must use a + * different algorithm to match the host_traddr and host_iface, but + * it's not 100% accurate. + * + * Return: true if @c matches @candidate. false otherwise. + */ +static bool _tcp_opt_params_match(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + char *src_addr, buffer[INET6_ADDRSTRLEN]; + + /* Check if src_addr is available (kernel 6.1 or later) */ + src_addr = nvme_ctrl_get_src_addr(c, buffer, sizeof(buffer)); + if (!src_addr) + return _tcp_opt_params_match_no_src_addr(c, candidate); + + /* Check host_traddr only if candidate is interested */ + if (candidate->host_traddr && + !candidate->addreq(candidate->host_traddr, src_addr)) + return false; + + /* Check host_iface only if candidate is interested */ + if (candidate->host_iface && + !streq0(candidate->host_iface, + nvme_iface_matching_addr(candidate->iface_list, src_addr))) + return false; + + return true; +} + +/** + * _tcp_match_ctrl() - Check if controller matches candidate (TCP only) + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * We want to determine if an existing controller can be re-used + * for the candidate controller we're trying to instantiate. + * + * For TCP, we do not have a match if the candidate's transport, traddr, + * trsvcid are not identical to those of the the existing controller. + * These 3 parameters are mandatory for a match. + * + * The host_traddr and host_iface are optional. When the candidate does + * not specify them (both NULL), we can ignore them. Otherwise, we must + * employ advanced investigation techniques to determine if there's a match. + * + * Return: true if a match is found, false otherwise. + */ +static bool _tcp_match_ctrl(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (!streq0(c->transport, candidate->transport)) + return false; + + if (!streq0(c->trsvcid, candidate->trsvcid)) + return false; + + if (!candidate->addreq(c->traddr, candidate->traddr)) + return false; + + if (candidate->well_known_nqn && !nvme_ctrl_is_discovery_ctrl(c)) + return false; + + if (candidate->subsysnqn && !streq0(c->subsysnqn, candidate->subsysnqn)) + return false; + + /* Check host_traddr / host_iface only if candidate is interested */ + if ((candidate->host_iface || candidate->host_traddr) && + !_tcp_opt_params_match(c, candidate)) + return false; + + return true; +} + +/** + * _match_ctrl() - Check if controller matches candidate (non TCP transport) + * @c: An existing controller instance + * @candidate: Candidate ctrl we're trying to match with @c. + * + * We want to determine if an existing controller can be re-used + * for the candidate controller we're trying to instantiate. This function + * is used for all transports except TCP. + * + * Return: true if a match is found, false otherwise. + */ +static bool _match_ctrl(struct nvme_ctrl *c, struct candidate_args *candidate) +{ + if (!streq0(c->transport, candidate->transport)) + return false; + + if (candidate->traddr && c->traddr && + !candidate->addreq(c->traddr, candidate->traddr)) + return false; + + if (candidate->host_traddr && c->cfg.host_traddr && + !candidate->addreq(c->cfg.host_traddr, candidate->host_traddr)) + return false; + + if (candidate->host_iface && c->cfg.host_iface && + !streq0(c->cfg.host_iface, candidate->host_iface)) + return false; + + if (candidate->trsvcid && c->trsvcid && + !streq0(c->trsvcid, candidate->trsvcid)) + return false; + + if (candidate->well_known_nqn && !nvme_ctrl_is_discovery_ctrl(c)) + return false; + + if (candidate->subsysnqn && !streq0(c->subsysnqn, candidate->subsysnqn)) + return false; + + return true; +} +/** + * _candidate_init() - Init candidate and get the matching function + * + * @candidate: Candidate struct to initialize + * @transport: Transport name + * @traddr: Transport address + * @trsvcid: Transport service identifier + * @subsysnqn: Subsystem NQN + * @host_traddr: Host transport address + * @host_iface: Host interface name + * @host_iface: Host interface name + * + * The function _candidate_free() must be called to release resources once + * the candidate object is not longer required. + * + * Return: The matching function to use when comparing an existing + * controller to the candidate controller. + */ +static ctrl_match_t _candidate_init(struct candidate_args *candidate, + const char *transport, + const char *traddr, + const char *trsvcid, + const char *subsysnqn, + const char *host_traddr, + const char *host_iface) +{ + memset(candidate, 0, sizeof(*candidate)); + + candidate->traddr = traddr; + candidate->trsvcid = trsvcid; + candidate->transport = transport; + candidate->subsysnqn = subsysnqn; + candidate->host_iface = host_iface; + candidate->host_traddr = host_traddr; + + if (streq0(subsysnqn, NVME_DISC_SUBSYS_NAME)) { + /* Since TP8013, the NQN of discovery controllers can be the + * well-known NQN (i.e. nqn.2014-08.org.nvmexpress.discovery) or + * a unique NQN. A DC created using the well-known NQN may later + * display a unique NQN when looked up in the sysfs. Therefore, + * ignore (i.e. set to NULL) the well-known NQN when looking for + * a match. + */ + candidate->subsysnqn = NULL; + candidate->well_known_nqn = true; + } + + if (streq0(transport, "tcp")) { + /* For TCP we may need to access the interface map. + * Let's retrieve and cache the map. + */ + if (getifaddrs(&candidate->iface_list) == -1) + candidate->iface_list = NULL; + + candidate->addreq = nvme_ipaddrs_eq; + return _tcp_match_ctrl; + } + + if (streq0(transport, "rdma")) { + candidate->addreq = nvme_ipaddrs_eq; + return _match_ctrl; + } + + /* All other transport types */ + candidate->addreq = streqcase0; + return _match_ctrl; +} + +/** + * _candidate_free() - Release resources allocated by _candidate_init() + * + * @candidate: data to free. + */ +static void _candidate_free(struct candidate_args *candidate) +{ + freeifaddrs(candidate->iface_list); /* This is NULL-safe */ +} + +#define _cleanup_candidate_ __cleanup__(_candidate_free) + nvme_ctrl_t __nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, const char *traddr, const char *host_traddr, const char *host_iface, const char *trsvcid, - nvme_ctrl_t p) - + const char *subsysnqn, nvme_ctrl_t p) { - struct nvme_ctrl *c; - bool (*addreq)(const char *, const char *); + struct nvme_ctrl *c, *matching_c = NULL; + _cleanup_candidate_ struct candidate_args candidate; + ctrl_match_t ctrl_match; - if (!strcmp(transport, "tcp") || !strcmp(transport, "rdma")) - addreq = nvme_ipaddrs_eq; /* IP address compare for TCP/RDMA */ - else - addreq = streqcase0; /* Case-insensitive for FC (n/a for loop) */ + /* Init candidate and get the matching function to use */ + ctrl_match = _candidate_init(&candidate, transport, traddr, trsvcid, + subsysnqn, host_traddr, host_iface); c = p ? nvme_subsystem_next_ctrl(s, p) : nvme_subsystem_first_ctrl(s); for (; c != NULL; c = nvme_subsystem_next_ctrl(s, c)) { - if (!streq0(c->transport, transport)) - continue; - if (traddr && c->traddr && - !addreq(c->traddr, traddr)) - continue; - if (host_traddr && c->cfg.host_traddr && - !addreq(c->cfg.host_traddr, host_traddr)) - continue; - if (host_iface && c->cfg.host_iface && - !streq0(c->cfg.host_iface, host_iface)) - continue; - if (trsvcid && c->trsvcid && - !streq0(c->trsvcid, trsvcid)) - continue; - return c; + if (ctrl_match(c, &candidate)) { + matching_c = c; + break; + } } - return NULL; + return matching_c; +} + +bool nvme_ctrl_config_match(struct nvme_ctrl *c, const char *transport, + const char *traddr, const char *trsvcid, + const char *subsysnqn, const char *host_traddr, + const char *host_iface) +{ + ctrl_match_t ctrl_match; + _cleanup_candidate_ struct candidate_args candidate; + + /* Init candidate and get the matching function to use */ + ctrl_match = _candidate_init(&candidate, transport, traddr, trsvcid, + subsysnqn, host_traddr, host_iface); + + return ctrl_match(c, &candidate); +} + +nvme_ctrl_t nvme_ctrl_find(nvme_subsystem_t s, const char *transport, + const char *traddr, const char *trsvcid, + const char *subsysnqn, const char *host_traddr, + const char *host_iface) +{ + return __nvme_lookup_ctrl(s, transport, traddr, host_traddr, host_iface, + trsvcid, subsysnqn, NULL/*p*/); } nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, @@ -1188,7 +1641,7 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, return NULL; c = __nvme_lookup_ctrl(s, transport, traddr, host_traddr, - host_iface, trsvcid, p); + host_iface, trsvcid, NULL, p); if (c) return c; @@ -1205,73 +1658,63 @@ nvme_ctrl_t nvme_lookup_ctrl(nvme_subsystem_t s, const char *transport, static int nvme_ctrl_scan_paths(nvme_root_t r, struct nvme_ctrl *c) { - struct dirent **paths; - int i, ret; + _cleanup_dirents_ struct dirents paths = {}; + int i; - ret = nvme_scan_ctrl_namespace_paths(c, &paths); - if (ret < 0) - return ret; + paths.num = nvme_scan_ctrl_namespace_paths(c, &paths.ents); + if (paths.num < 0) + return paths.num; - for (i = 0; i < ret; i++) - nvme_ctrl_scan_path(r, c, paths[i]->d_name); + for (i = 0; i < paths.num; i++) + nvme_ctrl_scan_path(r, c, paths.ents[i]->d_name); - nvme_free_dirents(paths, i); return 0; } static int nvme_ctrl_scan_namespaces(nvme_root_t r, struct nvme_ctrl *c) { - struct dirent **namespaces; - int i, ret; + _cleanup_dirents_ struct dirents namespaces = {}; + int i; - ret = nvme_scan_ctrl_namespaces(c, &namespaces); - for (i = 0; i < ret; i++) - nvme_ctrl_scan_namespace(r, c, namespaces[i]->d_name); + namespaces.num = nvme_scan_ctrl_namespaces(c, &namespaces.ents); + for (i = 0; i < namespaces.num; i++) + nvme_ctrl_scan_namespace(r, c, namespaces.ents[i]->d_name); - nvme_free_dirents(namespaces, i); return 0; } static char *nvme_ctrl_lookup_subsystem_name(nvme_root_t r, const char *ctrl_name) { - struct dirent **subsys; - char *subsys_name = NULL; - int ret, i; + _cleanup_dirents_ struct dirents subsys = {}; + int i; - ret = nvme_scan_subsystems(&subsys); - if (ret < 0) + subsys.num = nvme_scan_subsystems(&subsys.ents); + if (subsys.num < 0) return NULL; - for (i = 0; i < ret; i++) { + for (i = 0; i < subsys.num; i++) { struct stat st; - char *path; + _cleanup_free_ char *path = NULL; if (asprintf(&path, "%s/%s/%s", nvme_subsys_sysfs_dir, - subsys[i]->d_name, ctrl_name) < 0) { + subsys.ents[i]->d_name, ctrl_name) < 0) { errno = ENOMEM; return NULL; } nvme_msg(r, LOG_DEBUG, "lookup subsystem %s\n", path); if (stat(path, &st) < 0) { - free(path); continue; } - subsys_name = strdup(subsys[i]->d_name); - free(path); - break; + return strdup(subsys.ents[i]->d_name); } - nvme_free_dirents(subsys, ret); - return subsys_name; + return NULL; } static char *nvme_ctrl_lookup_phy_slot(nvme_root_t r, const char *address) { - char *target_addr; - char *addr; - char *path; - int found = 0; + _cleanup_free_ char *target_addr = NULL; int ret; - DIR *slots_dir; + _cleanup_dir_ DIR *slots_dir = NULL; struct dirent *entry; if (!address) @@ -1289,25 +1732,20 @@ static char *nvme_ctrl_lookup_phy_slot(nvme_root_t r, const char *address) if (entry->d_type == DT_DIR && strncmp(entry->d_name, ".", 1) != 0 && strncmp(entry->d_name, "..", 2) != 0) { - ret = asprintf(&path, "/sys/bus/pci/slots/%s", entry->d_name); + _cleanup_free_ char *path = NULL; + _cleanup_free_ char *addr = NULL; + + ret = asprintf(&path, "%s/%s", + nvme_slots_sysfs_dir, entry->d_name); if (ret < 0) { errno = ENOMEM; return NULL; } addr = nvme_get_attr(path, "address"); - if (strcmp(addr, target_addr) == 0) { - found = 1; - free(path); - free(addr); - break; - } - free(path); - free(addr); + if (strcmp(addr, target_addr) == 0) + return strdup(entry->d_name); } } - free(target_addr); - if (found) - return strdup(entry->d_name); return NULL; } @@ -1361,8 +1799,9 @@ static int nvme_configure_ctrl(nvme_root_t r, nvme_ctrl_t c, const char *path, int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance) { nvme_subsystem_t s; - char *subsys_name = NULL; - char *path, *name; + _cleanup_free_ char *subsys_name = NULL; + char *path; + _cleanup_free_ char *name = NULL; int ret; ret = asprintf(&name, "nvme%d", instance); @@ -1373,20 +1812,19 @@ int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance) ret = asprintf(&path, "%s/nvme%d", nvme_ctrl_sysfs_dir, instance); if (ret < 0) { errno = ENOMEM; - goto out_free_name; + return ret; } ret = nvme_configure_ctrl(h->r, c, path, name); if (ret < 0) { free(path); - goto out_free_name; + return ret; } c->address = nvme_get_attr(path, "address"); if (!c->address && strcmp(c->transport, "loop")) { errno = ENVME_CONNECT_INVAL_TR; - ret = -1; - goto out_free_name; + return -1; } subsys_name = nvme_ctrl_lookup_subsystem_name(h->r, name); @@ -1395,23 +1833,17 @@ int nvme_init_ctrl(nvme_host_t h, nvme_ctrl_t c, int instance) "Failed to lookup subsystem name for %s\n", c->name); errno = ENVME_CONNECT_LOOKUP_SUBSYS_NAME; - ret = -1; - goto out_free_name; + return -1; } s = nvme_lookup_subsystem(h, subsys_name, c->subsysnqn); if (!s) { errno = ENVME_CONNECT_LOOKUP_SUBSYS; - ret = -1; - goto out_free_subsys; + return -1; } if (s->subsystype && !strcmp(s->subsystype, "discovery")) c->discovery_ctrl = true; c->s = s; list_add(&s->ctrls, &c->entry); -out_free_subsys: - free(subsys_name); - out_free_name: - free(name); return ret; } @@ -1419,8 +1851,10 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, const char *path, const char *name) { nvme_ctrl_t c, p; - char *addr = NULL, *address = NULL, *a, *e; - char *transport, *traddr = NULL, *trsvcid = NULL; + _cleanup_free_ char *addr = NULL, *address = NULL; + char *a, *e; + _cleanup_free_ char *transport; + char *traddr = NULL, *trsvcid = NULL; char *host_traddr = NULL, *host_iface = NULL; int ret; @@ -1432,7 +1866,8 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, /* Parse 'address' string into components */ addr = nvme_get_attr(path, "address"); if (!addr) { - char *rpath = NULL, *p = NULL, *_a = NULL; + _cleanup_free_ char *rpath = NULL; + char *p = NULL, *_a = NULL; /* loop transport might not have an address */ if (!strcmp(transport, "loop")) @@ -1440,14 +1875,12 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, /* Older kernel don't support pcie transport addresses */ if (strcmp(transport, "pcie")) { - free(transport); errno = ENXIO; return NULL; } /* Figure out the PCI address from the attribute path */ rpath = realpath(path, NULL); if (!rpath) { - free(transport); errno = ENOMEM; return NULL; } @@ -1462,7 +1895,6 @@ static nvme_ctrl_t nvme_ctrl_alloc(nvme_root_t r, nvme_subsystem_t s, } if (p) addr = strdup(p); - free(rpath); } else if (!strcmp(transport, "pcie")) { /* The 'address' string is the transport address */ traddr = addr; @@ -1500,16 +1932,13 @@ skip_address: } while (c); if (!c) c = p; - free(transport); - if (address) - free(address); if (!c && !p) { nvme_msg(r, LOG_ERR, "failed to lookup ctrl\n"); errno = ENODEV; - free(addr); return NULL; } c->address = addr; + addr = NULL; if (s->subsystype && !strcmp(s->subsystype, "discovery")) c->discovery_ctrl = true; ret = nvme_configure_ctrl(r, c, path, name); @@ -1521,8 +1950,9 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) nvme_host_t h; nvme_subsystem_t s; nvme_ctrl_t c; - char *path; - char *hostnqn, *hostid, *subsysnqn, *subsysname; + _cleanup_free_ char *path = NULL; + _cleanup_free_ char *hostnqn = NULL, *hostid = NULL; + _cleanup_free_ char *subsysnqn = NULL, *subsysname = NULL; int ret; nvme_msg(r, LOG_DEBUG, "scan controller %s\n", name); @@ -1535,10 +1965,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) hostnqn = nvme_get_attr(path, "hostnqn"); hostid = nvme_get_attr(path, "hostid"); h = nvme_lookup_host(r, hostnqn, hostid); - if (hostnqn) - free(hostnqn); - if (hostid) - free(hostid); if (h) { if (h->dhchap_key) free(h->dhchap_key); @@ -1551,7 +1977,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) if (!h) { h = nvme_default_host(r); if (!h) { - free(path); errno = ENOMEM; return NULL; } @@ -1559,7 +1984,6 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) subsysnqn = nvme_get_attr(path, "subsysnqn"); if (!subsysnqn) { - free(path); errno = ENXIO; return NULL; } @@ -1568,27 +1992,21 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name) nvme_msg(r, LOG_ERR, "failed to lookup subsystem for controller %s\n", name); - free(subsysnqn); - free(path); errno = ENXIO; return NULL; } s = nvme_lookup_subsystem(h, subsysname, subsysnqn); - free(subsysnqn); - free(subsysname); if (!s) { - free(path); errno = ENOMEM; return NULL; } c = nvme_ctrl_alloc(r, s, path, name); - if (!c) { - free(path); + if (!c) return NULL; - } + path = NULL; nvme_ctrl_scan_namespaces(r, c); nvme_ctrl_scan_paths(r, c); return c; @@ -1622,9 +2040,26 @@ static int nvme_bytes_to_lba(nvme_ns_t n, off_t offset, size_t count, int nvme_ns_get_fd(nvme_ns_t n) { + if (n->fd < 0) { + n->fd = nvme_open(n->name); + if (n->fd < 0) + nvme_msg(root_from_ns(n), LOG_ERR, + "Failed to open ns %s, errno %d\n", + n->name, errno); + } + return n->fd; } +void nvme_ns_release_fd(nvme_ns_t n) +{ + if (n->fd < 0) + return; + + close(n->fd); + n->fd = -1; +} + nvme_subsystem_t nvme_ns_get_subsystem(nvme_ns_t n) { return n->s; @@ -1887,57 +2322,164 @@ int nvme_ns_flush(nvme_ns_t n) return nvme_flush(nvme_ns_get_fd(n), nvme_ns_get_nsid(n)); } -static void nvme_ns_parse_descriptors(struct nvme_ns *n, - struct nvme_ns_id_desc *descs) +static int nvme_strtou64(const char *str, void *res) { - void *d = descs; - int i, len; + char *endptr; + __u64 v; - for (i = 0; i < NVME_IDENTIFY_DATA_SIZE; i += len) { - struct nvme_ns_id_desc *desc = d + i; + errno = 0; + v = strtoull(str, &endptr, 0); - if (!desc->nidl) - break; - len = desc->nidl + sizeof(*desc); + if (errno != 0) + return -errno; - switch (desc->nidt) { - case NVME_NIDT_EUI64: - memcpy(n->eui64, desc->nid, sizeof(n->eui64)); - break; - case NVME_NIDT_NGUID: - memcpy(n->nguid, desc->nid, sizeof(n->nguid)); - break; - case NVME_NIDT_UUID: - memcpy(n->uuid, desc->nid, sizeof(n->uuid)); - break; - case NVME_NIDT_CSI: - memcpy(&n->csi, desc->nid, sizeof(n->csi)); - break; + if (endptr == str) { + /* no digits found */ + return -EINVAL; + } + + *(__u64 *)res = v; + return 0; +} + +static int nvme_strtou32(const char *str, void *res) +{ + char *endptr; + __u32 v; + + errno = 0; + v = strtol(str, &endptr, 0); + + if (errno != 0) + return -errno; + + if (endptr == str) { + /* no digits found */ + return -EINVAL; + } + + *(__u32 *)res = v; + return 0; +} + +static int nvme_strtoi(const char *str, void *res) +{ + char *endptr; + int v; + + errno = 0; + v = strtol(str, &endptr, 0); + + if (errno != 0) + return -errno; + + if (endptr == str) { + /* no digits found */ + return -EINVAL; + } + + *(int *)res = v; + return 0; +} + +static int nvme_strtoeuid(const char *str, void *res) +{ + memcpy(res, str, 8); + return 0; +} + +static int nvme_strtouuid(const char *str, void *res) +{ + memcpy(res, str, NVME_UUID_LEN); + return 0; +} + +struct sysfs_attr_table { + void *var; + int (*parse)(const char *str, void *res); + bool mandatory; + const char *name; +}; + +#define GETSHIFT(x) (__builtin_ffsll(x) - 1) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static int parse_attrs(const char *path, struct sysfs_attr_table *tbl, int size) +{ + char *str; + int ret, i; + + for (i = 0; i < size; i++) { + struct sysfs_attr_table *e = &tbl[i]; + + str = nvme_get_attr(path, e->name); + if (!str) { + if (!e->mandatory) + continue; + return -ENOENT; } + ret = e->parse(str, e->var); + free(str); + if (ret) + return ret; } + + return 0; } -static int nvme_ns_init(struct nvme_ns *n) +static int nvme_ns_init(const char *path, struct nvme_ns *ns) { - struct nvme_id_ns ns = { }; - uint8_t buffer[NVME_IDENTIFY_DATA_SIZE] = { }; - struct nvme_ns_id_desc *descs = (void *)buffer; - uint8_t flbas; + _cleanup_free_ char *attr = NULL; + struct stat sb; int ret; - ret = nvme_ns_identify(n, &ns); + struct sysfs_attr_table base[] = { + { &ns->nsid, nvme_strtou32, true, "nsid" }, + { &ns->lba_count, nvme_strtou64, true, "size" }, + { &ns->lba_size, nvme_strtou64, true, "queue/physical_block_size" }, + { ns->eui64, nvme_strtoeuid, false, "eui" }, + { ns->nguid, nvme_strtouuid, false, "nguid" }, + { ns->uuid, nvme_strtouuid, false, "uuid" } + }; + + ret = parse_attrs(path, base, ARRAY_SIZE(base)); if (ret) return ret; - nvme_id_ns_flbas_to_lbaf_inuse(ns.flbas, &flbas); - n->lba_shift = ns.lbaf[flbas].ds; - n->lba_size = 1 << n->lba_shift; - n->lba_count = le64_to_cpu(ns.nsze); - n->lba_util = le64_to_cpu(ns.nuse); - n->meta_size = le16_to_cpu(ns.lbaf[flbas].ms); + ns->lba_shift = GETSHIFT(ns->lba_size); + + if (asprintf(&attr, "%s/csi", path) < 0) + return -errno; + ret = stat(attr, &sb); + if (ret == 0) { + /* only available on kernels >= 6.8 */ + struct sysfs_attr_table ext[] = { + { &ns->csi, nvme_strtoi, true, "csi" }, + { &ns->lba_util, nvme_strtou64, true, "nuse" }, + { &ns->meta_size, nvme_strtoi, true, "metadata_bytes"}, + + }; - if (!nvme_ns_identify_descs(n, descs)) - nvme_ns_parse_descriptors(n, descs); + ret = parse_attrs(path, ext, ARRAY_SIZE(ext)); + if (ret) + return ret; + } else { + struct nvme_id_ns *id; + uint8_t flbas; + + id = __nvme_alloc(sizeof(*ns)); + if (!id) + return -ENOMEM; + + ret = nvme_ns_identify(ns, id); + if (ret) + return ret; + + nvme_id_ns_flbas_to_lbaf_inuse(id->flbas, &flbas); + ns->lba_count = le64_to_cpu(id->nsze); + ns->lba_util = le64_to_cpu(id->nuse); + ns->meta_size = le16_to_cpu(id->lbaf[flbas].ms); + } return 0; } @@ -1956,7 +2498,7 @@ static void nvme_ns_set_generic_name(struct nvme_ns *n, const char *name) n->generic_name = strdup(generic_name); } -static nvme_ns_t nvme_ns_open(const char *name) +static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name) { struct nvme_ns *n; @@ -1966,26 +2508,20 @@ static nvme_ns_t nvme_ns_open(const char *name) return NULL; } + n->fd = -1; n->name = strdup(name); - n->fd = nvme_open(n->name); - if (n->fd < 0) - goto free_ns; nvme_ns_set_generic_name(n, name); - if (nvme_get_nsid(n->fd, &n->nsid) < 0) - goto close_fd; - - if (nvme_ns_init(n) != 0) - goto close_fd; + if (nvme_ns_init(sys_path, n) != 0) + goto free_ns; list_head_init(&n->paths); list_node_init(&n->entry); + nvme_ns_release_fd(n); /* Do not leak fds */ return n; -close_fd: - close(n->fd); free_ns: free(n->generic_name); free(n->name); @@ -2020,9 +2556,9 @@ static char *nvme_ns_generic_to_blkdev(const char *generic) static struct nvme_ns *__nvme_scan_namespace(const char *sysfs_dir, const char *name) { struct nvme_ns *n; - char *path; + _cleanup_free_ char *path = NULL; int ret; - char *blkdev; + _cleanup_free_ char *blkdev = NULL; blkdev = nvme_ns_generic_to_blkdev(name); if (!blkdev) { @@ -2033,23 +2569,17 @@ static struct nvme_ns *__nvme_scan_namespace(const char *sysfs_dir, const char * ret = asprintf(&path, "%s/%s", sysfs_dir, blkdev); if (ret < 0) { errno = ENOMEM; - goto free_blkdev; + return NULL; } - n = nvme_ns_open(blkdev); + n = nvme_ns_open(path, blkdev); if (!n) - goto free_path; + return NULL; n->sysfs_dir = path; + path = NULL; - free(blkdev); return n; - -free_path: - free(path); -free_blkdev: - free(blkdev); - return NULL; } nvme_ns_t nvme_scan_namespace(const char *name) |