diff options
Diffstat (limited to 'nvme-topology.c')
-rw-r--r-- | nvme-topology.c | 608 |
1 files changed, 608 insertions, 0 deletions
diff --git a/nvme-topology.c b/nvme-topology.c new file mode 100644 index 0000000..d24ef6b --- /dev/null +++ b/nvme-topology.c @@ -0,0 +1,608 @@ +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include "nvme.h" +#include "nvme-ioctl.h" + +static const char *dev = "/dev/"; +static const char *subsys_dir = "/sys/class/nvme-subsystem/"; + +char *get_nvme_subsnqn(char *path) +{ + char sspath[320], *subsysnqn; + int fd, ret; + + snprintf(sspath, sizeof(sspath), "%s/subsysnqn", path); + + fd = open(sspath, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Failed to open %s: %s\n", + sspath, strerror(errno)); + return NULL; + } + + subsysnqn = calloc(1, 256); + if (!subsysnqn) + goto close_fd; + + ret = read(fd, subsysnqn, 256); + if (ret < 0) { + fprintf(stderr, "Failed to read %s: %s\n", sspath, + strerror(errno)); + free(subsysnqn); + subsysnqn = NULL; + } else if (subsysnqn[strlen(subsysnqn) - 1] == '\n') { + subsysnqn[strlen(subsysnqn) - 1] = '\0'; + } + +close_fd: + close(fd); + return subsysnqn; +} + +char *nvme_get_ctrl_attr(char *path, const char *attr) +{ + char *attrpath, *value; + ssize_t ret; + int fd, i; + + ret = asprintf(&attrpath, "%s/%s", path, attr); + if (ret < 0) + return NULL; + + value = calloc(1, 1024); + if (!value) + goto err_free_path; + + fd = open(attrpath, O_RDONLY); + if (fd < 0) + goto err_free_value; + + ret = read(fd, value, 1024); + if (ret < 0) { + fprintf(stderr, "read :%s :%s\n", attrpath, strerror(errno)); + goto err_close_fd; + } + + if (value[strlen(value) - 1] == '\n') + value[strlen(value) - 1] = '\0'; + + for (i = 0; i < strlen(value); i++) { + if (value[i] == ',' ) + value[i] = ' '; + } + + close(fd); + free(attrpath); + return value; +err_close_fd: + close(fd); +err_free_value: + free(value); +err_free_path: + free(attrpath); + return NULL; +} + +static char *path_trim_last(char *path, char needle) +{ + int i; + i = strlen(path); + if (i>0 && path[i-1] == needle) // remove trailing slash + path[--i] = 0; + for (; i>0; i--) + if (path[i] == needle) { + path[i] = 0; + return path+i+1; + } + return NULL; +} + +static void legacy_get_pci_bdf(char *node, char *bdf) +{ + int ret; + char path[264], nodetmp[264]; + struct stat st; + char *p, *__path = path; + + bdf[0] = 0; + strcpy(nodetmp, node); + p = path_trim_last(nodetmp, '/'); + sprintf(path, "/sys/block/%s/device", p); + ret = readlink(path, nodetmp, sizeof(nodetmp)); + if (ret <= 0) + return; + nodetmp[ret] = 0; + /* The link value is either "device -> ../../../0000:86:00.0" or "device -> ../../nvme0" */ + (void) path_trim_last(path, '/'); + sprintf(path+strlen(path), "/%s/device", nodetmp); + ret = stat(path, &st); + if (ret < 0) + return; + if ((st.st_mode & S_IFLNK) == 0) { + /* follow the second link to get the PCI address */ + ret = readlink(path, __path, sizeof(path)); + if (ret <= 0) + return; + path[ret] = 0; + } + else + (void) path_trim_last(path, '/'); + + p = path_trim_last(path, '/'); + if (p && strlen(p) == 12) + strcpy(bdf, p); +} + +static int scan_namespace(struct nvme_namespace *n) +{ + int ret, fd; + char *path; + + ret = asprintf(&path, "%s%s", dev, n->name); + if (ret < 0) + return ret; + + fd = open(path, O_RDONLY); + if (fd < 0) + goto free; + + n->nsid = nvme_get_nsid(fd); + if (n->nsid < 0) + goto close_fd; + + ret = nvme_identify_ns(fd, n->nsid, 0, &n->ns); + if (ret < 0) + goto close_fd; +close_fd: + close(fd); +free: + free(path); + return 0; +} + +static char *get_nvme_ctrl_path_ana_state(char *path, int nsid) +{ + struct dirent **paths; + char *ana_state; + int i, n; + + ana_state = calloc(1, 16); + if (!ana_state) + return NULL; + + n = scandir(path, &paths, scan_ctrl_paths_filter, alphasort); + if (n <= 0) { + free(ana_state); + return NULL; + } + for (i = 0; i < n; i++) { + int id, cntlid, ns, fd; + char *ctrl_path; + ssize_t ret; + + if (sscanf(paths[i]->d_name, "nvme%dc%dn%d", + &id, &cntlid, &ns) != 3) { + if (sscanf(paths[i]->d_name, "nvme%dn%d", + &id, &ns) != 2) { + continue; + } + } + if (ns != nsid) + continue; + + ret = asprintf(&ctrl_path, "%s/%s/ana_state", + path, paths[i]->d_name); + if (ret < 0) { + free(ana_state); + ana_state = NULL; + break; + } + fd = open(ctrl_path, O_RDONLY); + if (fd < 0) { + free(ctrl_path); + free(ana_state); + ana_state = NULL; + break; + } + ret = read(fd, ana_state, 16); + if (ret < 0) { + fprintf(stderr, "Failed to read ANA state from %s\n", + ctrl_path); + free(ana_state); + ana_state = NULL; + } else if (ana_state[strlen(ana_state) - 1] == '\n') + ana_state[strlen(ana_state) - 1] = '\0'; + close(fd); + free(ctrl_path); + break; + } + for (i = 0; i < n; i++) + free(paths[i]); + free(paths); + return ana_state; +} + +static int scan_ctrl(struct nvme_ctrl *c, char *p, __u32 ns_instance) +{ + struct nvme_namespace *n; + struct dirent **ns; + char *path; + int i, fd, ret; + + ret = asprintf(&path, "%s/%s", p, c->name); + if (ret < 0) + return ret; + + c->address = nvme_get_ctrl_attr(path, "address"); + c->transport = nvme_get_ctrl_attr(path, "transport"); + c->state = nvme_get_ctrl_attr(path, "state"); + c->hostnqn = nvme_get_ctrl_attr(path, "hostnqn"); + c->hostid = nvme_get_ctrl_attr(path, "hostid"); + + if (ns_instance) + c->ana_state = get_nvme_ctrl_path_ana_state(path, ns_instance); + + ret = scandir(path, &ns, scan_namespace_filter, alphasort); + if (ret == -1) { + fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno)); + return errno; + } + + c->nr_namespaces = ret; + c->namespaces = calloc(c->nr_namespaces, sizeof(*n)); + for (i = 0; i < c->nr_namespaces; i++) { + n = &c->namespaces[i]; + n->name = strdup(ns[i]->d_name); + n->ctrl = c; + scan_namespace(n); + } + + while (i--) + free(ns[i]); + free(ns); + free(path); + + ret = asprintf(&path, "%s%s", dev, c->name); + if (ret < 0) + return ret; + + fd = open(path, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Failed to open %s\n", path); + goto free; + } + + ret = nvme_identify_ctrl(fd, &c->id); + if (ret < 0) + goto close_fd; +close_fd: + close(fd); +free: + free(path); + return 0; +} + +static int scan_subsystem(struct nvme_subsystem *s, __u32 ns_instance) +{ + struct dirent **ctrls, **ns; + struct nvme_namespace *n; + struct nvme_ctrl *c; + int i, ret; + char *path; + + ret = asprintf(&path, "%s%s", subsys_dir, s->name); + if (ret < 0) + return ret; + + s->subsysnqn = get_nvme_subsnqn(path); + ret = scandir(path, &ctrls, scan_ctrls_filter, alphasort); + if (ret == -1) { + fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno)); + return errno; + } + s->nr_ctrls = ret; + s->ctrls = calloc(s->nr_ctrls, sizeof(*c)); + for (i = 0; i < s->nr_ctrls; i++) { + c = &s->ctrls[i]; + c->name = strdup(ctrls[i]->d_name); + c->subsys = s; + scan_ctrl(c, path, ns_instance); + } + + while (i--) + free(ctrls[i]); + free(ctrls); + + ret = scandir(path, &ns, scan_namespace_filter, alphasort); + if (ret == -1) { + fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno)); + return errno; + } + + s->nr_namespaces = ret; + s->namespaces = calloc(s->nr_namespaces, sizeof(*n)); + for (i = 0; i < s->nr_namespaces; i++) { + n = &s->namespaces[i]; + n->name = strdup(ns[i]->d_name); + n->ctrl = &s->ctrls[0]; + scan_namespace(n); + } + + while (i--) + free(ns[i]); + free(ns); + + free(path); + return 0; +} + +static int verify_legacy_ns(struct nvme_namespace *n) +{ + struct nvme_ctrl *c = n->ctrl; + struct nvme_id_ctrl id; + char *path; + int ret, fd; + + ret = asprintf(&path, "%s%s", dev, n->name); + if (ret < 0) + return ret; + + if (!n->ctrl->transport && !n->ctrl->address) { + char tmp_address[64] = ""; + legacy_get_pci_bdf(path, tmp_address); + if (tmp_address[0]) { + if (asprintf(&n->ctrl->transport, "pcie") < 0) + return -1; + if (asprintf(&n->ctrl->address, "%s", tmp_address) < 0) + return -1; + } + } + + fd = open(path, O_RDONLY); + free (path); + + if (fd < 0) + return fd; + + ret = nvme_identify_ctrl(fd, &id); + close(fd); + + if (ret) + return ret; + + if (memcmp(id.mn, c->id.mn, sizeof(id.mn)) || + memcmp(id.sn, c->id.sn, sizeof(id.sn))) + return -ENODEV; + return 0; +} + +/* + * For pre-subsystem enabled kernel. Topology information is limited, but we can + * assume controller names are always a prefix to their namespaces, i.e. nvme0 + * is the controller to nvme0n1 for such older kernels. We will also assume + * every controller is its own subsystem. + */ +static int legacy_list(struct nvme_topology *t) +{ + struct nvme_ctrl *c; + struct nvme_subsystem *s; + struct nvme_namespace *n; + struct dirent **devices, **namespaces; + int ret = 0, fd, i; + char *path; + + t->nr_subsystems = scandir(dev, &devices, scan_ctrls_filter, alphasort); + if (t->nr_subsystems < 0) { + fprintf(stderr, "no NVMe device(s) detected.\n"); + return t->nr_subsystems; + } + + t->subsystems = calloc(t->nr_subsystems, sizeof(*s)); + for (i = 0; i < t->nr_subsystems; i++) { + int j; + + s = &t->subsystems[i]; + s->nr_ctrls = 1; + s->ctrls = calloc(s->nr_ctrls, sizeof(*c)); + s->name = strdup(devices[i]->d_name); + s->subsysnqn = strdup(s->name); + s->nr_namespaces = 0; + + c = s->ctrls; + c->name = strdup(s->name); + sscanf(c->name, "nvme%d", ¤t_index); + c->nr_namespaces = scandir(dev, &namespaces, scan_dev_filter, + alphasort); + c->namespaces = calloc(c->nr_namespaces, sizeof(*n)); + + ret = asprintf(&path, "%s%s", dev, c->name); + if (ret < 0) + continue; + ret = 0; + + fd = open(path, O_RDONLY); + if (fd > 0) { + nvme_identify_ctrl(fd, &c->id); + close(fd); + } + free(path); + + for (j = 0; j < c->nr_namespaces; j++) { + n = &c->namespaces[j]; + n->name = strdup(namespaces[j]->d_name); + n->ctrl = c; + scan_namespace(n); + ret = verify_legacy_ns(n); + if (ret) + goto free; + } + while (j--) + free(namespaces[j]); + free(namespaces); + } + +free: + while (i--) + free(devices[i]); + free(devices); + return ret; +} + +static void free_ctrl(struct nvme_ctrl *c) +{ + int i; + + for (i = 0; i < c->nr_namespaces; i++) { + struct nvme_namespace *n = &c->namespaces[i]; + free(n->name); + } + free(c->name); + free(c->transport); + free(c->address); + free(c->state); + free(c->hostnqn); + free(c->hostid); + free(c->ana_state); + free(c->namespaces); +} + +static void free_subsystem(struct nvme_subsystem *s) +{ + int i; + + for (i = 0; i < s->nr_ctrls; i++) + free_ctrl(&s->ctrls[i]); + for (i = 0; i < s->nr_namespaces; i++) { + struct nvme_namespace *n = &s->namespaces[i]; + free(n->name); + } + free(s->name); + free(s->subsysnqn); + free(s->ctrls); + free(s->namespaces); +} + +int scan_subsystems(struct nvme_topology *t, const char *subsysnqn, + __u32 ns_instance) +{ + struct nvme_subsystem *s; + struct dirent **subsys; + int i, j = 0; + + t->nr_subsystems = scandir(subsys_dir, &subsys, scan_subsys_filter, + alphasort); + if (t->nr_subsystems < 0) + return legacy_list(t); + + t->subsystems = calloc(t->nr_subsystems, sizeof(*s)); + for (i = 0; i < t->nr_subsystems; i++) { + s = &t->subsystems[j]; + s->name = strdup(subsys[i]->d_name); + scan_subsystem(s, ns_instance); + + if (!subsysnqn || !strcmp(s->subsysnqn, subsysnqn)) + j++; + else + free_subsystem(s); + } + t->nr_subsystems = j; + + while (i--) + free(subsys[i]); + free(subsys); + return 0; +} + +void free_topology(struct nvme_topology *t) +{ + int i; + + for (i = 0; i < t->nr_subsystems; i++) + free_subsystem(&t->subsystems[i]); + free(t->subsystems); +} + +char *nvme_char_from_block(char *dev) +{ + char *path = NULL; + char buf[256] = {0}; + int ret, id, nsid; + + ret = sscanf(dev, "nvme%dn%d", &id, &nsid); + switch (ret) { + case 1: + return strdup(dev); + break; + case 2: + if (asprintf(&path, "/sys/block/%s/device", dev) < 0) + path = NULL; + break; + default: + fprintf(stderr, "%s is not an nvme device\n", dev); + return NULL; + } + + if (!path) + return NULL; + + ret = readlink(path, buf, sizeof(buf)); + if (ret > 0) { + char *r = strdup(basename(buf)); + + free(path); + if (sscanf(r, "nvme%d", &id) != 1) { + fprintf(stderr, "%s is not a physical nvme controller\n", r); + free(r); + r = NULL; + } + return r; + } + + free(path); + ret = asprintf(&path, "nvme%d", id); + if (ret < 0) + return NULL; + return path; +} + +void *mmap_registers(const char *dev) +{ + int fd; + char *base, path[512]; + void *membase; + + base = nvme_char_from_block((char *)dev); + if (!base) + return NULL; + + sprintf(path, "/sys/class/nvme/%s/device/resource0", base); + fd = open(path, O_RDONLY); + if (fd < 0) { + sprintf(path, "/sys/class/misc/%s/device/resource0", base); + fd = open(path, O_RDONLY); + } + if (fd < 0) { + fprintf(stderr, "%s did not find a pci resource, open failed %s\n", + base, strerror(errno)); + free(base); + return NULL; + } + + membase = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0); + if (membase == MAP_FAILED) { + fprintf(stderr, "%s failed to map. ", base); + fprintf(stderr, "Did your kernel enable CONFIG_IO_STRICT_DEVMEM?\n"); + membase = NULL; + } + + free(base); + close(fd); + return membase; +} + |