diff options
Diffstat (limited to '')
-rw-r--r-- | src/doveadm/doveadm-sis.c | 330 |
1 files changed, 330 insertions, 0 deletions
diff --git a/src/doveadm/doveadm-sis.c b/src/doveadm/doveadm-sis.c new file mode 100644 index 0000000..85e82e9 --- /dev/null +++ b/src/doveadm/doveadm-sis.c @@ -0,0 +1,330 @@ +/* Copyright (c) 2009-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "hex-binary.h" +#include "hostpid.h" +#include "randgen.h" +#include "read-full.h" +#include "fs-sis-common.h" +#include "doveadm.h" +#include "doveadm-print.h" + +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <dirent.h> +#include <sys/stat.h> + +/* Files are in <rootdir>/ha/sh/<hash>-<guid> + They may be hard linked to hashes/<hash> +*/ + +static const char *sis_get_dir(const char *rootdir, const char *hash) +{ + if (strlen(hash) < 4 || strchr(hash, '/') != NULL) + i_fatal("Invalid hash in filename: %s", hash); + return t_strdup_printf("%s/%c%c/%c%c", rootdir, + hash[0], hash[1], hash[2], hash[3]); +} + +static int +file_contents_equal(const char *path1, const char *path2, ino_t *path2_inode_r) +{ + struct stat st1, st2; + int fd1, fd2, ret = -1; + + *path2_inode_r = 0; + + /* do a byte-by-byte comparison for the files to find out if they're + the same or if this is a hash collision */ + fd1 = open(path1, O_RDONLY); + if (fd1 == -1) { + if (errno != ENOENT) + i_error("open(%s) failed: %m", path1); + return -1; + } + fd2 = open(path2, O_RDONLY); + if (fd2 == -1) { + if (errno != ENOENT) + i_error("open(%s) failed: %m", path2); + i_close_fd(&fd1); + return -1; + } + + if (fstat(fd1, &st1) < 0) + i_error("fstat(%s) failed: %m", path1); + else if (fstat(fd2, &st2) < 0) + i_error("fstat(%s) failed: %m", path1); + else if (st1.st_size != st2.st_size) + ret = 0; + else { + /* @UNSAFE: sizes match. compare. */ + unsigned char buf1[IO_BLOCK_SIZE], buf2[IO_BLOCK_SIZE]; + ssize_t ret1; + int ret2; + + while ((ret1 = read(fd1, buf1, sizeof(buf1))) > 0) { + i_assert((size_t)ret1 <= sizeof(buf2)); + if ((ret2 = read_full(fd2, buf2, ret1)) <= 0) { + if (ret2 < 0) + i_error("read(%s) failed: %m", path2); + else + ret = 0; + break; + } + if (memcmp(buf1, buf2, ret1) != 0) { + ret = 0; + break; + } + } + if (ret1 < 0) + i_error("read(%s) failed: %m", path1); + else if (ret1 == 0) + ret = 1; + *path2_inode_r = st2.st_ino; + } + + if (close(fd1) < 0) + i_error("close(%s) failed: %m", path1); + if (close(fd2) < 0) + i_error("close(%s) failed: %m", path2); + + return ret; +} + +static int +hardlink_replace(const char *src, const char *dest, ino_t src_inode) +{ + const char *p, *destdir, *tmppath; + unsigned char randbuf[8]; + struct stat st; + + p = strrchr(dest, '/'); + i_assert(p != NULL); + destdir = t_strdup_until(dest, p); + + random_fill(randbuf, sizeof(randbuf)); + tmppath = t_strdup_printf("%s/temp.%s.%s.%s", + destdir, my_hostname, my_pid, + binary_to_hex(randbuf, sizeof(randbuf))); + if (link(src, tmppath) < 0) { + if (errno == EMLINK) + return 0; + i_error("link(%s, %s) failed: %m", src, tmppath); + return -1; + } + if (stat(tmppath, &st) < 0) { + i_error("stat(%s) failed: %m", tmppath); + return -1; + } + if (st.st_ino != src_inode) { + i_unlink(tmppath); + return 0; + } + if (rename(tmppath, dest) < 0) { + i_error("rename(%s, %s) failed: %m", src, tmppath); + i_unlink(tmppath); + return -1; + } + return 1; +} + +static int sis_try_deduplicate(const char *rootdir, const char *fname) +{ + const char *p, *hash, *hashdir, *path, *hashes_dir, *hashes_path; + struct stat st; + ino_t inode; + int ret; + + /* fname should be in <hash>-<guid> format */ + p = strchr(fname, '-'); + i_assert(p != NULL); + + hash = t_strdup_until(fname, p); + hashdir = sis_get_dir(rootdir, hash); + path = t_strdup_printf("%s/%s", hashdir, fname); + + hashes_dir = t_strconcat(hashdir, "/", HASH_DIR_NAME, NULL); + hashes_path = t_strconcat(hashes_dir, "/", hash, NULL); + if (link(path, hashes_path) == 0) { + /* first file with this hash. we're done */ + return 0; + } + if (errno == ENOENT) { + /* either path was already deleted or hashes dir + doesn't exist */ + if (mkdir(hashes_dir, 0700) < 0) { + if (errno == EEXIST) + return 0; + i_error("mkdir(%s) failed: %m", hashes_dir); + return -1; + } + /* try again */ + if (link(path, hashes_path) == 0 || errno == ENOENT) + return 0; + } + if (errno != EEXIST) { + i_error("link(%s, %s) failed: %m", path, hashes_path); + return -1; + } + + /* need to do a byte-by-byte comparison. but check first if someone + else already had deduplicated the file. */ + if (stat(path, &st) < 0) { + if (errno == ENOENT) { + /* just got deleted */ + return 0; + } + i_error("stat(%s) failed: %m", path); + return -1; + } + if (st.st_nlink > 1) { + /* already deduplicated */ + return 0; + } + + ret = file_contents_equal(path, hashes_path, &inode); + if (ret < 0) { + if (errno == ENOENT) { + /* either path or hashes_path was deleted. */ + return sis_try_deduplicate(rootdir, fname); + } + return -1; + } + if (ret > 0) { + /* equal, replace with hard link */ + ret = hardlink_replace(hashes_path, path, inode); + if (ret > 0) + return 0; + else if (ret < 0) + return -1; + /* too many hard links or inode changed */ + } + + /* replace hashes link with this */ + return hardlink_replace(path, hashes_path, st.st_ino) < 0 ? -1 : 0; +} + +static void cmd_sis_deduplicate(struct doveadm_cmd_context *cctx) +{ + const char *rootdir, *queuedir; + DIR *dir; + struct dirent *d; + struct stat st, first_st; + string_t *path; + size_t dir_len; + int ret; + + if (!doveadm_cmd_param_str(cctx, "root-dir", &rootdir) || + !doveadm_cmd_param_str(cctx, "queue-dir", &queuedir)) + help_ver2(&doveadm_cmd_sis_deduplicate); + + /* go through the filenames in the queue dir and see if + we can deduplicate them. */ + if (stat(rootdir, &st) < 0) + i_fatal("stat(%s) failed: %m", rootdir); + + path = t_str_new(256); + str_append(path, queuedir); + str_append_c(path, '/'); + dir_len = str_len(path); + + dir = opendir(queuedir); + if (dir == NULL) + i_fatal("opendir(%s) failed: %m", queuedir); + + first_st.st_size = -1; + while ((d = readdir(dir)) != NULL) { + if (d->d_name[0] == '.') + continue; + + str_truncate(path, dir_len); + str_append(path, d->d_name); + + if (first_st.st_size < 0) { + if (stat(str_c(path), &first_st) < 0) + i_fatal("stat(%s) failed: %m", str_c(path)); + } + if (strchr(d->d_name, '-') == NULL || first_st.st_size != 0) { + i_fatal("%s is not a valid sis-queue file, " + "is the queue directory correct?", + str_c(path)); + } + + T_BEGIN { + ret = sis_try_deduplicate(rootdir, d->d_name); + } T_END; + if (ret == 0) + i_unlink(str_c(path)); + } + if (closedir(dir) < 0) + i_error("closedir(%s) failed: %m", queuedir); +} + +static void cmd_sis_find(struct doveadm_cmd_context *cctx) +{ + const char *rootdir, *path, *hash; + DIR *dir; + struct dirent *d; + struct stat st; + string_t *str; + size_t dir_len, hash_len; + + if (!doveadm_cmd_param_str(cctx, "root-dir", &rootdir) || + !doveadm_cmd_param_str(cctx, "hash", &hash) || + strlen(hash) < 4) + help_ver2(&doveadm_cmd_sis_find); + + if (stat(rootdir, &st) < 0) { + if (errno == ENOENT) + i_fatal("Attachment dir doesn't exist: %s", rootdir); + i_fatal("stat(%s) failed: %m", rootdir); + } + hash_len = strlen(hash); + + path = sis_get_dir(rootdir, hash); + str = t_str_new(256); + str_append(str, path); + str_append_c(str, '/'); + dir_len = str_len(str); + + dir = opendir(path); + if (dir == NULL) { + if (errno == ENOENT) + return; + i_fatal("opendir(%s) failed: %m", path); + } + + doveadm_print_init(DOVEADM_PRINT_TYPE_FLOW); + doveadm_print_header("path", "path", + DOVEADM_PRINT_HEADER_FLAG_HIDE_TITLE); + while ((d = readdir(dir)) != NULL) { + if (strncmp(d->d_name, hash, hash_len) == 0) { + str_truncate(str, dir_len); + str_append(str, d->d_name); + doveadm_print(str_c(str)); + } + } + if (closedir(dir) < 0) + i_error("closedir(%s) failed: %m", path); +} + +struct doveadm_cmd_ver2 doveadm_cmd_sis_deduplicate = { + .name = "sis deduplicate", + .cmd = cmd_sis_deduplicate, + .usage = "<root dir> <queue dir>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_PARAM('\0', "root-dir", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAM('\0', "queue-dir", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}; +struct doveadm_cmd_ver2 doveadm_cmd_sis_find = { + .name = "sis find", + .cmd = cmd_sis_find, + .usage = "<root dir> <hash>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_PARAM('\0', "root-dir", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAM('\0', "hash", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}; |