summaryrefslogtreecommitdiffstats
path: root/utils/blkmapd/device-discovery.c
diff options
context:
space:
mode:
Diffstat (limited to 'utils/blkmapd/device-discovery.c')
-rw-r--r--utils/blkmapd/device-discovery.c580
1 files changed, 580 insertions, 0 deletions
diff --git a/utils/blkmapd/device-discovery.c b/utils/blkmapd/device-discovery.c
new file mode 100644
index 0000000..a565fdb
--- /dev/null
+++ b/utils/blkmapd/device-discovery.c
@@ -0,0 +1,580 @@
+/*
+ * device-discovery.c: main function, discovering device and processing
+ * pipe request from kernel.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+#include <sys/inotify.h>
+#include <linux/kdev_t.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+#include <signal.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <errno.h>
+#include <libdevmapper.h>
+
+#include "device-discovery.h"
+#include "xcommon.h"
+#include "nfslib.h"
+#include "conffile.h"
+
+#define EVENT_SIZE (sizeof(struct inotify_event))
+#define EVENT_BUFSIZE (1024 * EVENT_SIZE)
+
+#define RPCPIPE_DIR NFS_STATEDIR "/rpc_pipefs"
+#define PID_FILE "/run/blkmapd.pid"
+
+#define CONF_SAVE(w, f) do { \
+ char *p = f; \
+ if (p != NULL) \
+ (w) = p; \
+} while (0)
+
+static char bl_pipe_file[PATH_MAX];
+static char nfspipe_dir[PATH_MAX];
+static char rpcpipe_dir[PATH_MAX];
+
+struct bl_disk *visible_disk_list;
+int bl_watch_fd, bl_pipe_fd, nfs_pipedir_wfd, rpc_pipedir_wfd;
+int pidfd = -1;
+
+
+static struct bl_disk_path *bl_get_path(const char *filepath,
+ struct bl_disk_path *paths)
+{
+ struct bl_disk_path *tmp = paths;
+
+ while (tmp) {
+ if (!strcmp(tmp->full_path, filepath))
+ break;
+ tmp = tmp->next;
+ }
+ return tmp;
+}
+
+/*
+ * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
+ * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
+ * create pseudo device. So if state is higher, the device path needs to
+ * be updated.
+ * If device-mapper multipath support is a must, pseudo devices should
+ * exist for each multipath device. If not, active device path will be
+ * chosen for device creation.
+ */
+static int bl_update_path(enum bl_path_state_e state, struct bl_disk *disk)
+{
+ struct bl_disk_path *valid_path = disk->valid_path;
+
+ if (valid_path && valid_path->state >= state)
+ return 0;
+ return 1;
+}
+
+static void bl_release_disk(void)
+{
+ struct bl_disk *disk;
+ struct bl_disk_path *path = NULL;
+
+ while (visible_disk_list) {
+ disk = visible_disk_list;
+ path = disk->paths;
+ while (path) {
+ disk->paths = path->next;
+ free(path->full_path);
+ free(path);
+ path = disk->paths;
+ }
+ if (disk->serial)
+ free(disk->serial);
+ visible_disk_list = disk->next;
+ free(disk);
+ }
+}
+
+static void bl_add_disk(char *filepath)
+{
+ struct bl_disk *disk = NULL;
+ int fd = 0;
+ struct stat sb;
+ off_t size = 0;
+ struct bl_serial *serial = NULL;
+ enum bl_path_state_e ap_state;
+ struct bl_disk_path *diskpath = NULL, *path = NULL;
+ dev_t dev;
+
+ fd = open(filepath, O_RDONLY | O_LARGEFILE);
+ if (fd < 0)
+ return;
+
+ if (fstat(fd, &sb)) {
+ close(fd);
+ return;
+ }
+
+ if (!sb.st_size)
+ ioctl(fd, BLKGETSIZE, &size);
+ else
+ size = sb.st_size;
+
+ if (!size) {
+ close(fd);
+ return;
+ }
+
+ dev = sb.st_rdev;
+ serial = bldev_read_serial(fd, filepath);
+ if (!serial) {
+ BL_LOG_ERR("%s: no serial found for %s\n",
+ __func__, filepath);
+ ap_state = BL_PATH_STATE_PASSIVE;
+ } else if (dm_is_dm_major(major(dev)))
+ ap_state = BL_PATH_STATE_PSEUDO;
+ else
+ ap_state = bldev_read_ap_state(fd);
+ close(fd);
+
+ for (disk = visible_disk_list; disk != NULL; disk = disk->next) {
+ /* Already scanned or a partition?
+ * XXX: if released each time, maybe not need to compare
+ */
+ if ((serial->len == disk->serial->len) &&
+ !memcmp(serial->data, disk->serial->data, serial->len)) {
+ diskpath = bl_get_path(filepath, disk->paths);
+ break;
+ }
+ }
+
+ if (disk && diskpath) {
+ bl_free_scsi_string(serial);
+ return;
+ }
+
+ /* add path */
+ path = malloc(sizeof(struct bl_disk_path));
+ if (!path) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out_err;
+ }
+ path->next = NULL;
+ path->state = ap_state;
+ path->full_path = strdup(filepath);
+ if (!path->full_path)
+ goto out_err;
+
+ if (!disk) { /* add disk */
+ disk = malloc(sizeof(struct bl_disk));
+ if (!disk) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out_err;
+ }
+ disk->next = visible_disk_list;
+ disk->dev = dev;
+ disk->size = size;
+ disk->serial = serial;
+ disk->valid_path = path;
+ disk->paths = path;
+ visible_disk_list = disk;
+ } else {
+ path->next = disk->paths;
+ disk->paths = path;
+ /* check whether we need to update disk info */
+ if (bl_update_path(path->state, disk)) {
+ disk->dev = dev;
+ disk->size = size;
+ disk->valid_path = path;
+ }
+ bl_free_scsi_string(serial);
+ }
+ return;
+
+ out_err:
+ if (path) {
+ if (path->full_path)
+ free(path->full_path);
+ free(path);
+ }
+ bl_free_scsi_string(serial);
+ return;
+}
+
+int bl_discover_devices(void)
+{
+ FILE *f;
+ int n;
+ char buf[PATH_MAX], devname[NAME_MAX], fulldevname[PATH_MAX];
+
+ /* release previous list */
+ bl_release_disk();
+
+ /* scan all block devices */
+ f = fopen("/proc/partitions", "r");
+ if (f == NULL)
+ return 0;
+
+ while (1) {
+ if (fgets(buf, sizeof buf, f) == NULL)
+ break;
+ n = sscanf(buf, "%*d %*d %*d %31s", devname);
+ if (n != 1)
+ continue;
+ snprintf(fulldevname, sizeof fulldevname, "/sys/block/%s",
+ devname);
+ if (access(fulldevname, F_OK) < 0)
+ continue;
+ snprintf(fulldevname, sizeof fulldevname, "/dev/%s", devname);
+ bl_add_disk(fulldevname);
+ }
+
+ fclose(f);
+
+ return 0;
+}
+
+/* process kernel request
+ * return 0: request processed, and no more request waiting;
+ * return 1: request processed, and more requests waiting;
+ * return < 0: error
+ */
+static int bl_disk_inquiry_process(int fd)
+{
+ int ret = 0;
+ struct bl_pipemsg_hdr head;
+ char *buf = NULL;
+ uint32_t major, minor;
+ uint16_t buflen;
+ struct bl_dev_msg reply;
+
+ /* read request */
+ if (atomicio(read, fd, &head, sizeof(head)) != sizeof(head)) {
+ /* Note that an error in this or the next read is pretty
+ * catastrophic, as there is no good way to resync into
+ * the pipe's stream.
+ */
+ BL_LOG_ERR("Read pipefs head error!\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ buflen = head.totallen;
+ buf = malloc(buflen);
+ if (!buf) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (atomicio(read, fd, buf, buflen) != buflen) {
+ BL_LOG_ERR("Read pipefs content error!\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ reply.status = BL_DEVICE_REQUEST_PROC;
+
+ switch (head.type) {
+ case BL_DEVICE_MOUNT:
+ /*
+ * It shouldn't be necessary to discover devices here, since
+ * process_deviceinfo() will re-discover if it can't find
+ * the devices it needs. But in the case of multipath
+ * devices (ones that appear more than once, for example an
+ * active and a standby LUN), this will re-order them in the
+ * correct priority.
+ */
+ bl_discover_devices();
+ if (!process_deviceinfo(buf, buflen, &major, &minor)) {
+ reply.status = BL_DEVICE_REQUEST_ERR;
+ break;
+ }
+ reply.major = major;
+ reply.minor = minor;
+ break;
+ case BL_DEVICE_UMOUNT:
+ if (!dm_device_remove_all((uint64_t *) buf))
+ reply.status = BL_DEVICE_REQUEST_ERR;
+ break;
+ default:
+ reply.status = BL_DEVICE_REQUEST_ERR;
+ break;
+ }
+
+ /* write to pipefs */
+ if (atomicio((void *)write, fd, &reply, sizeof(reply))
+ != sizeof(reply)) {
+ BL_LOG_ERR("Write pipefs error!\n");
+ ret = -EIO;
+ }
+
+ out:
+ if (buf)
+ free(buf);
+ return ret;
+}
+
+static void bl_watch_dir(const char* dir, int *wd)
+{
+ *wd = inotify_add_watch(bl_watch_fd, dir, IN_CREATE|IN_DELETE);
+ if (*wd < 0)
+ BL_LOG_ERR("failed to watch %s: %s\n", dir, strerror(errno));
+}
+
+static void bl_rpcpipe_cb(void)
+{
+ int rc, curr_byte = 0;
+ char eventArr[EVENT_BUFSIZE];
+ struct inotify_event *event;
+
+ rc = read(bl_watch_fd, &eventArr, EVENT_BUFSIZE);
+ if (rc < 0)
+ BL_LOG_ERR("read event fail: %s", strerror(errno));
+
+ while (rc > curr_byte) {
+ event = (struct inotify_event *)&eventArr[curr_byte];
+ curr_byte += EVENT_SIZE + event->len;
+ if (event->wd == rpc_pipedir_wfd) {
+ if (strncmp(event->name, "nfs", 3))
+ continue;
+ if (event->mask & IN_CREATE) {
+ BL_LOG_WARNING("nfs pipe dir created\n");
+ bl_watch_dir(nfspipe_dir, &nfs_pipedir_wfd);
+ if (bl_pipe_fd >= 0)
+ close(bl_pipe_fd);
+ bl_pipe_fd = open(bl_pipe_file, O_RDWR);
+ if (bl_pipe_fd < 0)
+ BL_LOG_ERR("open %s failed: %s\n",
+ event->name, strerror(errno));
+ } else if (event->mask & IN_DELETE) {
+ BL_LOG_WARNING("nfs pipe dir deleted\n");
+ inotify_rm_watch(bl_watch_fd, nfs_pipedir_wfd);
+ close(bl_pipe_fd);
+ nfs_pipedir_wfd = -1;
+ bl_pipe_fd = -1;
+ }
+ } else if (event->wd == nfs_pipedir_wfd) {
+ if (strncmp(event->name, "blocklayout", 11))
+ continue;
+ if (event->mask & IN_CREATE) {
+ BL_LOG_WARNING("blocklayout pipe file created\n");
+ if (bl_pipe_fd >= 0)
+ close(bl_pipe_fd);
+ bl_pipe_fd = open(bl_pipe_file, O_RDWR);
+ if (bl_pipe_fd < 0)
+ BL_LOG_ERR("open %s failed: %s\n",
+ event->name, strerror(errno));
+ } else if (event->mask & IN_DELETE) {
+ BL_LOG_WARNING("blocklayout pipe file deleted\n");
+ close(bl_pipe_fd);
+ bl_pipe_fd = -1;
+ }
+ }
+ }
+}
+
+static int bl_event_helper(void)
+{
+ fd_set rset;
+ int ret = 0, maxfd;
+
+ for (;;) {
+ FD_ZERO(&rset);
+ FD_SET(bl_watch_fd, &rset);
+ if (bl_pipe_fd > 0)
+ FD_SET(bl_pipe_fd, &rset);
+ maxfd = (bl_watch_fd>bl_pipe_fd)?bl_watch_fd:bl_pipe_fd;
+ switch (select(maxfd + 1, &rset, NULL, NULL, NULL)) {
+ case -1:
+ if (errno == EINTR)
+ continue;
+ else {
+ ret = -errno;
+ goto out;
+ }
+ case 0:
+ goto out;
+ default:
+ if (FD_ISSET(bl_watch_fd, &rset))
+ bl_rpcpipe_cb();
+ else if (bl_pipe_fd > 0 && FD_ISSET(bl_pipe_fd, &rset))
+ ret = bl_disk_inquiry_process(bl_pipe_fd);
+ if (ret)
+ goto out;
+ }
+ }
+ out:
+ return ret;
+}
+
+static void sig_die(int signal)
+{
+ if (pidfd >= 0) {
+ close(pidfd);
+ unlink(PID_FILE);
+ }
+ BL_LOG_ERR("exit on signal(%d)\n", signal);
+ exit(0);
+}
+static void usage(void)
+{
+ fprintf(stderr, "Usage: blkmapd [-hdf]\n" );
+}
+/* Daemon */
+int main(int argc, char **argv)
+{
+ int opt, dflag = 0, fg = 0, ret = 1;
+ char pidbuf[64];
+ char *xrpcpipe_dir = NULL;
+
+ strncpy(rpcpipe_dir, RPCPIPE_DIR, sizeof(rpcpipe_dir));
+ conf_init_file(NFS_CONFFILE);
+ CONF_SAVE(xrpcpipe_dir, conf_get_str("general", "pipefs-directory"));
+ if (xrpcpipe_dir != NULL)
+ strlcpy(rpcpipe_dir, xrpcpipe_dir, sizeof(rpcpipe_dir));
+
+ strncpy(nfspipe_dir, rpcpipe_dir, sizeof(nfspipe_dir));
+ strlcat(nfspipe_dir, "/nfs", sizeof(nfspipe_dir));
+ strncpy(bl_pipe_file, rpcpipe_dir, sizeof(bl_pipe_file));
+ strlcat(bl_pipe_file, "/nfs/blocklayout", sizeof(bl_pipe_file));
+
+ while ((opt = getopt(argc, argv, "hdf")) != -1) {
+ switch (opt) {
+ case 'd':
+ dflag = 1;
+ break;
+ case 'f':
+ fg = 1;
+ break;
+ case 'h':
+ usage();
+ exit(0);
+ default:
+ usage();
+ exit(1);
+
+ }
+ }
+
+ if (fg) {
+ openlog("blkmapd", LOG_PERROR, 0);
+ } else {
+ pid_t pid = fork();
+ if (pid < 0) {
+ BL_LOG_ERR("fork error\n");
+ exit(1);
+ } else if (pid != 0) {
+ pidfd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
+ if (pidfd < 0) {
+ BL_LOG_ERR("Create pid file %s failed\n", PID_FILE);
+ exit(1);
+ }
+
+ if (lockf(pidfd, F_TLOCK, 0) < 0) {
+ BL_LOG_ERR("Already running; Exiting!");
+ close(pidfd);
+ exit(1);
+ }
+ if (ftruncate(pidfd, 0) < 0)
+ BL_LOG_ERR("ftruncate on %s failed: m\n", PID_FILE);
+ sprintf(pidbuf, "%d\n", pid);
+ if (write(pidfd, pidbuf, strlen(pidbuf)) != (ssize_t)strlen(pidbuf))
+ BL_LOG_ERR("write on %s failed: m\n", PID_FILE);
+ exit(0);
+ }
+
+ (void)setsid();
+ if (chdir("/")) {
+ BL_LOG_ERR("chdir error\n");
+ }
+ int fd = open("/dev/null", O_RDWR, 0);
+ if (fd >= 0) {
+ (void)dup2(fd, STDIN_FILENO);
+ (void)dup2(fd, STDOUT_FILENO);
+ (void)dup2(fd, STDERR_FILENO);
+
+ (void)close(fd);
+ }
+
+ openlog("blkmapd", LOG_PID, 0);
+ }
+
+ signal(SIGINT, sig_die);
+ signal(SIGTERM, sig_die);
+ signal(SIGHUP, SIG_IGN);
+
+ if (dflag) {
+ ret = bl_discover_devices();
+ goto out;
+ }
+
+ if ((bl_watch_fd = inotify_init()) < 0) {
+ BL_LOG_ERR("init inotify failed %s\n", strerror(errno));
+ goto out;
+ }
+
+ /* open pipe file */
+ bl_watch_dir(rpcpipe_dir, &rpc_pipedir_wfd);
+ bl_watch_dir(nfspipe_dir, &nfs_pipedir_wfd);
+
+ bl_pipe_fd = open(bl_pipe_file, O_RDWR);
+ if (bl_pipe_fd < 0)
+ BL_LOG_ERR("open pipe file %s failed: %s\n", bl_pipe_file, strerror(errno));
+
+ while (1) {
+ /* discover device when needed */
+ bl_discover_devices();
+
+ ret = bl_event_helper();
+ if (ret < 0) {
+ /* what should we do with process error? */
+ BL_LOG_ERR("inquiry process return %d\n", ret);
+ }
+ }
+out:
+ if (pidfd >= 0) {
+ close(pidfd);
+ unlink(PID_FILE);
+ }
+
+ exit(ret);
+}