summaryrefslogtreecommitdiffstats
path: root/nvme-topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'nvme-topology.c')
-rw-r--r--nvme-topology.c608
1 files changed, 608 insertions, 0 deletions
diff --git a/nvme-topology.c b/nvme-topology.c
new file mode 100644
index 0000000..d24ef6b
--- /dev/null
+++ b/nvme-topology.c
@@ -0,0 +1,608 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "nvme.h"
+#include "nvme-ioctl.h"
+
+static const char *dev = "/dev/";
+static const char *subsys_dir = "/sys/class/nvme-subsystem/";
+
+char *get_nvme_subsnqn(char *path)
+{
+ char sspath[320], *subsysnqn;
+ int fd, ret;
+
+ snprintf(sspath, sizeof(sspath), "%s/subsysnqn", path);
+
+ fd = open(sspath, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "Failed to open %s: %s\n",
+ sspath, strerror(errno));
+ return NULL;
+ }
+
+ subsysnqn = calloc(1, 256);
+ if (!subsysnqn)
+ goto close_fd;
+
+ ret = read(fd, subsysnqn, 256);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to read %s: %s\n", sspath,
+ strerror(errno));
+ free(subsysnqn);
+ subsysnqn = NULL;
+ } else if (subsysnqn[strlen(subsysnqn) - 1] == '\n') {
+ subsysnqn[strlen(subsysnqn) - 1] = '\0';
+ }
+
+close_fd:
+ close(fd);
+ return subsysnqn;
+}
+
+char *nvme_get_ctrl_attr(char *path, const char *attr)
+{
+ char *attrpath, *value;
+ ssize_t ret;
+ int fd, i;
+
+ ret = asprintf(&attrpath, "%s/%s", path, attr);
+ if (ret < 0)
+ return NULL;
+
+ value = calloc(1, 1024);
+ if (!value)
+ goto err_free_path;
+
+ fd = open(attrpath, O_RDONLY);
+ if (fd < 0)
+ goto err_free_value;
+
+ ret = read(fd, value, 1024);
+ if (ret < 0) {
+ fprintf(stderr, "read :%s :%s\n", attrpath, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if (value[strlen(value) - 1] == '\n')
+ value[strlen(value) - 1] = '\0';
+
+ for (i = 0; i < strlen(value); i++) {
+ if (value[i] == ',' )
+ value[i] = ' ';
+ }
+
+ close(fd);
+ free(attrpath);
+ return value;
+err_close_fd:
+ close(fd);
+err_free_value:
+ free(value);
+err_free_path:
+ free(attrpath);
+ return NULL;
+}
+
+static char *path_trim_last(char *path, char needle)
+{
+ int i;
+ i = strlen(path);
+ if (i>0 && path[i-1] == needle) // remove trailing slash
+ path[--i] = 0;
+ for (; i>0; i--)
+ if (path[i] == needle) {
+ path[i] = 0;
+ return path+i+1;
+ }
+ return NULL;
+}
+
+static void legacy_get_pci_bdf(char *node, char *bdf)
+{
+ int ret;
+ char path[264], nodetmp[264];
+ struct stat st;
+ char *p, *__path = path;
+
+ bdf[0] = 0;
+ strcpy(nodetmp, node);
+ p = path_trim_last(nodetmp, '/');
+ sprintf(path, "/sys/block/%s/device", p);
+ ret = readlink(path, nodetmp, sizeof(nodetmp));
+ if (ret <= 0)
+ return;
+ nodetmp[ret] = 0;
+ /* The link value is either "device -> ../../../0000:86:00.0" or "device -> ../../nvme0" */
+ (void) path_trim_last(path, '/');
+ sprintf(path+strlen(path), "/%s/device", nodetmp);
+ ret = stat(path, &st);
+ if (ret < 0)
+ return;
+ if ((st.st_mode & S_IFLNK) == 0) {
+ /* follow the second link to get the PCI address */
+ ret = readlink(path, __path, sizeof(path));
+ if (ret <= 0)
+ return;
+ path[ret] = 0;
+ }
+ else
+ (void) path_trim_last(path, '/');
+
+ p = path_trim_last(path, '/');
+ if (p && strlen(p) == 12)
+ strcpy(bdf, p);
+}
+
+static int scan_namespace(struct nvme_namespace *n)
+{
+ int ret, fd;
+ char *path;
+
+ ret = asprintf(&path, "%s%s", dev, n->name);
+ if (ret < 0)
+ return ret;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ goto free;
+
+ n->nsid = nvme_get_nsid(fd);
+ if (n->nsid < 0)
+ goto close_fd;
+
+ ret = nvme_identify_ns(fd, n->nsid, 0, &n->ns);
+ if (ret < 0)
+ goto close_fd;
+close_fd:
+ close(fd);
+free:
+ free(path);
+ return 0;
+}
+
+static char *get_nvme_ctrl_path_ana_state(char *path, int nsid)
+{
+ struct dirent **paths;
+ char *ana_state;
+ int i, n;
+
+ ana_state = calloc(1, 16);
+ if (!ana_state)
+ return NULL;
+
+ n = scandir(path, &paths, scan_ctrl_paths_filter, alphasort);
+ if (n <= 0) {
+ free(ana_state);
+ return NULL;
+ }
+ for (i = 0; i < n; i++) {
+ int id, cntlid, ns, fd;
+ char *ctrl_path;
+ ssize_t ret;
+
+ if (sscanf(paths[i]->d_name, "nvme%dc%dn%d",
+ &id, &cntlid, &ns) != 3) {
+ if (sscanf(paths[i]->d_name, "nvme%dn%d",
+ &id, &ns) != 2) {
+ continue;
+ }
+ }
+ if (ns != nsid)
+ continue;
+
+ ret = asprintf(&ctrl_path, "%s/%s/ana_state",
+ path, paths[i]->d_name);
+ if (ret < 0) {
+ free(ana_state);
+ ana_state = NULL;
+ break;
+ }
+ fd = open(ctrl_path, O_RDONLY);
+ if (fd < 0) {
+ free(ctrl_path);
+ free(ana_state);
+ ana_state = NULL;
+ break;
+ }
+ ret = read(fd, ana_state, 16);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to read ANA state from %s\n",
+ ctrl_path);
+ free(ana_state);
+ ana_state = NULL;
+ } else if (ana_state[strlen(ana_state) - 1] == '\n')
+ ana_state[strlen(ana_state) - 1] = '\0';
+ close(fd);
+ free(ctrl_path);
+ break;
+ }
+ for (i = 0; i < n; i++)
+ free(paths[i]);
+ free(paths);
+ return ana_state;
+}
+
+static int scan_ctrl(struct nvme_ctrl *c, char *p, __u32 ns_instance)
+{
+ struct nvme_namespace *n;
+ struct dirent **ns;
+ char *path;
+ int i, fd, ret;
+
+ ret = asprintf(&path, "%s/%s", p, c->name);
+ if (ret < 0)
+ return ret;
+
+ c->address = nvme_get_ctrl_attr(path, "address");
+ c->transport = nvme_get_ctrl_attr(path, "transport");
+ c->state = nvme_get_ctrl_attr(path, "state");
+ c->hostnqn = nvme_get_ctrl_attr(path, "hostnqn");
+ c->hostid = nvme_get_ctrl_attr(path, "hostid");
+
+ if (ns_instance)
+ c->ana_state = get_nvme_ctrl_path_ana_state(path, ns_instance);
+
+ ret = scandir(path, &ns, scan_namespace_filter, alphasort);
+ if (ret == -1) {
+ fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno));
+ return errno;
+ }
+
+ c->nr_namespaces = ret;
+ c->namespaces = calloc(c->nr_namespaces, sizeof(*n));
+ for (i = 0; i < c->nr_namespaces; i++) {
+ n = &c->namespaces[i];
+ n->name = strdup(ns[i]->d_name);
+ n->ctrl = c;
+ scan_namespace(n);
+ }
+
+ while (i--)
+ free(ns[i]);
+ free(ns);
+ free(path);
+
+ ret = asprintf(&path, "%s%s", dev, c->name);
+ if (ret < 0)
+ return ret;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "Failed to open %s\n", path);
+ goto free;
+ }
+
+ ret = nvme_identify_ctrl(fd, &c->id);
+ if (ret < 0)
+ goto close_fd;
+close_fd:
+ close(fd);
+free:
+ free(path);
+ return 0;
+}
+
+static int scan_subsystem(struct nvme_subsystem *s, __u32 ns_instance)
+{
+ struct dirent **ctrls, **ns;
+ struct nvme_namespace *n;
+ struct nvme_ctrl *c;
+ int i, ret;
+ char *path;
+
+ ret = asprintf(&path, "%s%s", subsys_dir, s->name);
+ if (ret < 0)
+ return ret;
+
+ s->subsysnqn = get_nvme_subsnqn(path);
+ ret = scandir(path, &ctrls, scan_ctrls_filter, alphasort);
+ if (ret == -1) {
+ fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno));
+ return errno;
+ }
+ s->nr_ctrls = ret;
+ s->ctrls = calloc(s->nr_ctrls, sizeof(*c));
+ for (i = 0; i < s->nr_ctrls; i++) {
+ c = &s->ctrls[i];
+ c->name = strdup(ctrls[i]->d_name);
+ c->subsys = s;
+ scan_ctrl(c, path, ns_instance);
+ }
+
+ while (i--)
+ free(ctrls[i]);
+ free(ctrls);
+
+ ret = scandir(path, &ns, scan_namespace_filter, alphasort);
+ if (ret == -1) {
+ fprintf(stderr, "Failed to open %s: %s\n", path, strerror(errno));
+ return errno;
+ }
+
+ s->nr_namespaces = ret;
+ s->namespaces = calloc(s->nr_namespaces, sizeof(*n));
+ for (i = 0; i < s->nr_namespaces; i++) {
+ n = &s->namespaces[i];
+ n->name = strdup(ns[i]->d_name);
+ n->ctrl = &s->ctrls[0];
+ scan_namespace(n);
+ }
+
+ while (i--)
+ free(ns[i]);
+ free(ns);
+
+ free(path);
+ return 0;
+}
+
+static int verify_legacy_ns(struct nvme_namespace *n)
+{
+ struct nvme_ctrl *c = n->ctrl;
+ struct nvme_id_ctrl id;
+ char *path;
+ int ret, fd;
+
+ ret = asprintf(&path, "%s%s", dev, n->name);
+ if (ret < 0)
+ return ret;
+
+ if (!n->ctrl->transport && !n->ctrl->address) {
+ char tmp_address[64] = "";
+ legacy_get_pci_bdf(path, tmp_address);
+ if (tmp_address[0]) {
+ if (asprintf(&n->ctrl->transport, "pcie") < 0)
+ return -1;
+ if (asprintf(&n->ctrl->address, "%s", tmp_address) < 0)
+ return -1;
+ }
+ }
+
+ fd = open(path, O_RDONLY);
+ free (path);
+
+ if (fd < 0)
+ return fd;
+
+ ret = nvme_identify_ctrl(fd, &id);
+ close(fd);
+
+ if (ret)
+ return ret;
+
+ if (memcmp(id.mn, c->id.mn, sizeof(id.mn)) ||
+ memcmp(id.sn, c->id.sn, sizeof(id.sn)))
+ return -ENODEV;
+ return 0;
+}
+
+/*
+ * For pre-subsystem enabled kernel. Topology information is limited, but we can
+ * assume controller names are always a prefix to their namespaces, i.e. nvme0
+ * is the controller to nvme0n1 for such older kernels. We will also assume
+ * every controller is its own subsystem.
+ */
+static int legacy_list(struct nvme_topology *t)
+{
+ struct nvme_ctrl *c;
+ struct nvme_subsystem *s;
+ struct nvme_namespace *n;
+ struct dirent **devices, **namespaces;
+ int ret = 0, fd, i;
+ char *path;
+
+ t->nr_subsystems = scandir(dev, &devices, scan_ctrls_filter, alphasort);
+ if (t->nr_subsystems < 0) {
+ fprintf(stderr, "no NVMe device(s) detected.\n");
+ return t->nr_subsystems;
+ }
+
+ t->subsystems = calloc(t->nr_subsystems, sizeof(*s));
+ for (i = 0; i < t->nr_subsystems; i++) {
+ int j;
+
+ s = &t->subsystems[i];
+ s->nr_ctrls = 1;
+ s->ctrls = calloc(s->nr_ctrls, sizeof(*c));
+ s->name = strdup(devices[i]->d_name);
+ s->subsysnqn = strdup(s->name);
+ s->nr_namespaces = 0;
+
+ c = s->ctrls;
+ c->name = strdup(s->name);
+ sscanf(c->name, "nvme%d", &current_index);
+ c->nr_namespaces = scandir(dev, &namespaces, scan_dev_filter,
+ alphasort);
+ c->namespaces = calloc(c->nr_namespaces, sizeof(*n));
+
+ ret = asprintf(&path, "%s%s", dev, c->name);
+ if (ret < 0)
+ continue;
+ ret = 0;
+
+ fd = open(path, O_RDONLY);
+ if (fd > 0) {
+ nvme_identify_ctrl(fd, &c->id);
+ close(fd);
+ }
+ free(path);
+
+ for (j = 0; j < c->nr_namespaces; j++) {
+ n = &c->namespaces[j];
+ n->name = strdup(namespaces[j]->d_name);
+ n->ctrl = c;
+ scan_namespace(n);
+ ret = verify_legacy_ns(n);
+ if (ret)
+ goto free;
+ }
+ while (j--)
+ free(namespaces[j]);
+ free(namespaces);
+ }
+
+free:
+ while (i--)
+ free(devices[i]);
+ free(devices);
+ return ret;
+}
+
+static void free_ctrl(struct nvme_ctrl *c)
+{
+ int i;
+
+ for (i = 0; i < c->nr_namespaces; i++) {
+ struct nvme_namespace *n = &c->namespaces[i];
+ free(n->name);
+ }
+ free(c->name);
+ free(c->transport);
+ free(c->address);
+ free(c->state);
+ free(c->hostnqn);
+ free(c->hostid);
+ free(c->ana_state);
+ free(c->namespaces);
+}
+
+static void free_subsystem(struct nvme_subsystem *s)
+{
+ int i;
+
+ for (i = 0; i < s->nr_ctrls; i++)
+ free_ctrl(&s->ctrls[i]);
+ for (i = 0; i < s->nr_namespaces; i++) {
+ struct nvme_namespace *n = &s->namespaces[i];
+ free(n->name);
+ }
+ free(s->name);
+ free(s->subsysnqn);
+ free(s->ctrls);
+ free(s->namespaces);
+}
+
+int scan_subsystems(struct nvme_topology *t, const char *subsysnqn,
+ __u32 ns_instance)
+{
+ struct nvme_subsystem *s;
+ struct dirent **subsys;
+ int i, j = 0;
+
+ t->nr_subsystems = scandir(subsys_dir, &subsys, scan_subsys_filter,
+ alphasort);
+ if (t->nr_subsystems < 0)
+ return legacy_list(t);
+
+ t->subsystems = calloc(t->nr_subsystems, sizeof(*s));
+ for (i = 0; i < t->nr_subsystems; i++) {
+ s = &t->subsystems[j];
+ s->name = strdup(subsys[i]->d_name);
+ scan_subsystem(s, ns_instance);
+
+ if (!subsysnqn || !strcmp(s->subsysnqn, subsysnqn))
+ j++;
+ else
+ free_subsystem(s);
+ }
+ t->nr_subsystems = j;
+
+ while (i--)
+ free(subsys[i]);
+ free(subsys);
+ return 0;
+}
+
+void free_topology(struct nvme_topology *t)
+{
+ int i;
+
+ for (i = 0; i < t->nr_subsystems; i++)
+ free_subsystem(&t->subsystems[i]);
+ free(t->subsystems);
+}
+
+char *nvme_char_from_block(char *dev)
+{
+ char *path = NULL;
+ char buf[256] = {0};
+ int ret, id, nsid;
+
+ ret = sscanf(dev, "nvme%dn%d", &id, &nsid);
+ switch (ret) {
+ case 1:
+ return strdup(dev);
+ break;
+ case 2:
+ if (asprintf(&path, "/sys/block/%s/device", dev) < 0)
+ path = NULL;
+ break;
+ default:
+ fprintf(stderr, "%s is not an nvme device\n", dev);
+ return NULL;
+ }
+
+ if (!path)
+ return NULL;
+
+ ret = readlink(path, buf, sizeof(buf));
+ if (ret > 0) {
+ char *r = strdup(basename(buf));
+
+ free(path);
+ if (sscanf(r, "nvme%d", &id) != 1) {
+ fprintf(stderr, "%s is not a physical nvme controller\n", r);
+ free(r);
+ r = NULL;
+ }
+ return r;
+ }
+
+ free(path);
+ ret = asprintf(&path, "nvme%d", id);
+ if (ret < 0)
+ return NULL;
+ return path;
+}
+
+void *mmap_registers(const char *dev)
+{
+ int fd;
+ char *base, path[512];
+ void *membase;
+
+ base = nvme_char_from_block((char *)dev);
+ if (!base)
+ return NULL;
+
+ sprintf(path, "/sys/class/nvme/%s/device/resource0", base);
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ sprintf(path, "/sys/class/misc/%s/device/resource0", base);
+ fd = open(path, O_RDONLY);
+ }
+ if (fd < 0) {
+ fprintf(stderr, "%s did not find a pci resource, open failed %s\n",
+ base, strerror(errno));
+ free(base);
+ return NULL;
+ }
+
+ membase = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0);
+ if (membase == MAP_FAILED) {
+ fprintf(stderr, "%s failed to map. ", base);
+ fprintf(stderr, "Did your kernel enable CONFIG_IO_STRICT_DEVMEM?\n");
+ membase = NULL;
+ }
+
+ free(base);
+ close(fd);
+ return membase;
+}
+