diff options
Diffstat (limited to 'ctdb/server/ctdb_mutex_fcntl_helper.c')
-rw-r--r-- | ctdb/server/ctdb_mutex_fcntl_helper.c | 794 |
1 files changed, 794 insertions, 0 deletions
diff --git a/ctdb/server/ctdb_mutex_fcntl_helper.c b/ctdb/server/ctdb_mutex_fcntl_helper.c new file mode 100644 index 0000000..84d3790 --- /dev/null +++ b/ctdb/server/ctdb_mutex_fcntl_helper.c @@ -0,0 +1,794 @@ +/* + CTDB mutex fcntl lock file helper + + Copyright (C) Martin Schwenke 2015 + + wait_for_parent() code from ctdb_lock_helper.c: + + Copyright (C) Amitay Isaacs 2013 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/network.h" +#include "system/wait.h" + +#include <tevent.h> + +#include "lib/util/sys_rw.h" +#include "lib/util/tevent_unix.h" +#include "lib/util/util.h" +#include "lib/util/smb_strtox.h" + +/* protocol.h is just needed for ctdb_sock_addr, which is used in system.h */ +#include "protocol/protocol.h" +#include "common/system.h" +#include "common/tmon.h" + +static char progpath[PATH_MAX]; +static char *progname = NULL; + +static int fcntl_lock_fd(int fd, bool block, off_t start) +{ + static struct flock lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_len = 1, + .l_pid = 0, + }; + int cmd = block ? F_SETLKW : F_SETLK; + + lock.l_start = start; + if (fcntl(fd, cmd, &lock) != 0) { + return errno; + } + + return 0; +} + +static char fcntl_lock(const char *file, int *outfd) +{ + int fd; + int ret; + + fd = open(file, O_RDWR|O_CREAT, 0600); + if (fd == -1) { + fprintf(stderr, "%s: Unable to open %s - (%s)\n", + progname, file, strerror(errno)); + return '3'; + } + + ret = fcntl_lock_fd(fd, false, 0); + if (ret != 0) { + close(fd); + if (ret == EACCES || ret == EAGAIN) { + /* Lock contention, fail silently */ + return '1'; + } + + /* Log an error for any other failure */ + fprintf(stderr, + "%s: Failed to get lock on '%s' - (%s)\n", + progname, + file, + strerror(ret)); + return '3'; + } + + *outfd = fd; + + return '0'; +} + +/* + * Wait and see if the parent exits + */ + +struct wait_for_parent_state { + struct tevent_context *ev; + pid_t ppid; +}; + +static void wait_for_parent_check(struct tevent_req *subreq); + +static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + pid_t ppid) +{ + struct tevent_req *req, *subreq; + struct wait_for_parent_state *state; + + req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->ppid = ppid; + + if (ppid == 1) { + fprintf(stderr, "parent == 1\n"); + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + subreq = tevent_wakeup_send(state, ev, + tevent_timeval_current_ofs(5,0)); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, wait_for_parent_check, req); + + return req; +} + +static void wait_for_parent_check(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct wait_for_parent_state *state = tevent_req_data( + req, struct wait_for_parent_state); + bool status; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (! status) { + /* Ignore error */ + fprintf(stderr, "%s: tevent_wakeup_recv() failed\n", progname); + } + + if (kill(state->ppid, 0) == -1 && errno == ESRCH) { + fprintf(stderr, "parent gone\n"); + tevent_req_done(req); + return; + } + + subreq = tevent_wakeup_send(state, state->ev, + tevent_timeval_current_ofs(5,0)); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, wait_for_parent_check, req); +} + +static bool wait_for_parent_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + return false; + } + + return true; +} + +/* + * Perform I/O on lock in a loop - complete when file removed or replaced + */ + +struct lock_io_check_state { + struct tevent_context *ev; + const char *lock_file; + ino_t inode; + unsigned long recheck_interval; +}; + +static void lock_io_check_loop(struct tevent_req *subreq); + +static struct tevent_req *lock_io_check_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + const char *lock_file, + ino_t inode, + unsigned long recheck_interval) +{ + struct tevent_req *req, *subreq; + struct lock_io_check_state *state; + + req = tevent_req_create(mem_ctx, &state, struct lock_io_check_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->lock_file = lock_file; + state->inode = inode; + state->recheck_interval = recheck_interval; + + subreq = tevent_wakeup_send( + state, + ev, + tevent_timeval_current_ofs(state->recheck_interval, 0)); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, lock_io_check_loop, req); + + return req; +} + +static void lock_io_check_loop(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct lock_io_check_state *state = tevent_req_data( + req, struct lock_io_check_state); + bool status; + struct stat sb; + int fd = -1; + int ret; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (! status) { + /* Ignore error */ + fprintf(stderr, "%s: tevent_wakeup_recv() failed\n", progname); + } + + fd = open(state->lock_file, O_RDWR); + if (fd == -1) { + fprintf(stderr, + "%s: " + "lock lost - lock file \"%s\" open failed (ret=%d)\n", + progname, + state->lock_file, + errno); + goto done; + } + + ret = fstat(fd, &sb); + if (ret != 0) { + fprintf(stderr, + "%s: " + "lock lost - lock file \"%s\" check failed (ret=%d)\n", + progname, + state->lock_file, + errno); + goto done; + } + + if (sb.st_ino != state->inode) { + fprintf(stderr, + "%s: lock lost - lock file \"%s\" inode changed\n", + progname, + state->lock_file); + goto done; + } + + /* + * Attempt to lock a 2nd byte range. Using a blocking lock + * encourages ping timeouts if the cluster filesystem is in a + * bad state. It also makes testing easier. + */ + ret = fcntl_lock_fd(fd, true, 1); + if (ret != 0) { + fprintf(stderr, + "%s: " + "lock fail - lock file \"%s\" test lock error (%d)\n", + progname, + state->lock_file, + ret); + goto done; + } + + /* Unlock occurs on close */ + close(fd); + + subreq = tevent_wakeup_send( + state, + state->ev, + tevent_timeval_current_ofs(state->recheck_interval, 0)); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, lock_io_check_loop, req); + + return; + +done: + if (fd != -1) { + close(fd); + } + tevent_req_done(req); +} + +static bool lock_io_check_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + return false; + } + + return true; +} + +struct lock_test_child_state { +}; + +static void lock_test_child_ping_done(struct tevent_req *subreq); +static void lock_test_child_io_check_done(struct tevent_req *subreq); + +static struct tevent_req *lock_test_child_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + const char *lock_file, + int fd, + ino_t inode, + unsigned long recheck_interval, + bool send_pings) +{ + struct tevent_req *req, *subreq; + struct lock_test_child_state *state; + unsigned int interval = send_pings ? 1 : 0; + + req = tevent_req_create(mem_ctx, &state, struct lock_test_child_state); + if (req == NULL) { + return NULL; + } + + subreq = tmon_ping_send(state, ev, fd, TMON_FD_BOTH, 0, interval); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, lock_test_child_ping_done, req); + + subreq = lock_io_check_send(state, + ev, + lock_file, + inode, + recheck_interval); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, lock_test_child_io_check_done, req); + + return req; +} + +static void lock_test_child_ping_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + bool status; + int err; + + status = tmon_ping_recv(subreq, &err); + TALLOC_FREE(subreq); + if (!status) { + tevent_req_error(req, err); + return; + } + + tevent_req_done(req); +} + +static void lock_test_child_io_check_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + bool status; + int err; + + status = lock_io_check_recv(subreq, &err); + TALLOC_FREE(subreq); + if (!status) { + tevent_req_error(req, err); + return; + } + + tevent_req_done(req); +} + +static bool lock_test_child_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + /* Parent exit is expected */ + if (*perr == EPIPE) { + return true; + } + return false; + } + + return true; +} + +static void lock_test_child(const char *lock_file, + int lock_fd, + int pipe_fd, + unsigned long recheck_interval, + bool send_pings) +{ + struct tevent_context *ev; + struct tevent_req *req; + struct stat sb; + ino_t inode; + bool status; + int ret; + + ret = fstat(lock_fd, &sb); + if (ret != 0) { + fprintf(stderr, + "%s: lock lost - " + "lock file \"%s\" stat failed (ret=%d)\n", + progname, + lock_file, + errno); + _exit(1); + } + inode = sb.st_ino; + close(lock_fd); + + ev = tevent_context_init(NULL); + if (ev == NULL) { + fprintf(stderr, "%s: tevent_context_init() failed\n", progname); + _exit(1); + } + + req = lock_test_child_send(ev, + ev, + lock_file, + pipe_fd, + inode, + recheck_interval, + send_pings); + if (req == NULL) { + fprintf(stderr, + "%s: lock_test_child_send() failed\n", + progname); + _exit(1); + } + + tevent_req_poll(req, ev); + + status = lock_test_child_recv(req, &ret); + if (! status) { + fprintf(stderr, + "%s: lock_test_child_recv() failed (%d)\n", + progname, + ret); + _exit(1); + } + + _exit(0); +} + +struct lock_test_state { + int *lock_fdp; + int pipe_fd; + pid_t child_pid; +}; + +static void lock_test_ping_done(struct tevent_req *subreq); + +static struct tevent_req *lock_test_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + const char *lock_file, + int *fdp, + unsigned long recheck_interval, + unsigned long ping_timeout) +{ + struct tevent_req *req, *subreq; + struct lock_test_state *state; + pid_t pid; + int sv[2]; + int ret; + + req = tevent_req_create(mem_ctx, &state, struct lock_test_state); + if (req == NULL) { + return NULL; + } + + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sv); + if (ret != 0) { + fprintf(stderr, + "%s: socketpair() failed (errno=%d)\n", + progname, + errno); + tevent_req_error(req, errno); + return tevent_req_post(req, ev); + } + + pid = fork(); + if (pid == -1) { + + int err = errno; + fprintf(stderr, "%s: fork() failed (errno=%d)\n", progname, err); + close(sv[0]); + close(sv[1]); + tevent_req_error(req, err); + return tevent_req_post(req, ev); + } + if (pid == 0) { + /* Child */ + close(sv[0]); + TALLOC_FREE(ev); + + lock_test_child(lock_file, + *fdp, + sv[1], + recheck_interval, + ping_timeout != 0); + /* Above does not return */ + } + + /* Parent */ + close(sv[1]); + + state->lock_fdp = fdp; + state->pipe_fd = sv[0]; + state->child_pid = pid; + + subreq = tmon_ping_send(state, ev, sv[0], TMON_FD_BOTH, ping_timeout, 0); + if (tevent_req_nomem(subreq, req)) { + close(sv[0]); + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, lock_test_ping_done, req); + + return req; +} + +static void lock_test_ping_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct lock_test_state *state = tevent_req_data( + req, struct lock_test_state); + int wstatus; + bool status; + int err; + + status = tmon_ping_recv(subreq, &err); + TALLOC_FREE(subreq); + if (! status) { + switch (err) { + case EPIPE: + /* Child exit, child already printed message */ + break; + case ETIMEDOUT: + fprintf(stderr, + "%s: ping timeout from lock test child\n", + progname); + break; + default: + fprintf(stderr, + "%s: tmon_ping_recv() failed (%d)\n", + progname, + err); + } + /* Ignore error */ + } + + /* + * Lock checking child is gone or not sending pings. Release + * the lock, close this end of pipe, send SIGKILL to the child + * process and wait for the child to exit. + */ + close(*state->lock_fdp); + *state->lock_fdp = -1; + close(state->pipe_fd); + kill(state->child_pid, SIGKILL); + waitpid(state->child_pid, &wstatus, 0); + + tevent_req_done(req); +} + +static bool lock_test_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + return false; + } + + return true; +} + +/* + * Wait for a reason to exit, indicating that parent has exited or I/O + * on lock failed + */ + +struct wait_for_exit_state { +}; + +static void wait_for_exit_parent_done(struct tevent_req *subreq); +static void wait_for_exit_lock_test_done(struct tevent_req *subreq); + +static struct tevent_req *wait_for_exit_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + pid_t ppid, + const char *lock_file, + int *fdp, + unsigned long recheck_interval, + unsigned long ping_timeout) +{ + struct tevent_req *req, *subreq; + struct wait_for_exit_state *state; + + req = tevent_req_create(mem_ctx, &state, struct wait_for_exit_state); + if (req == NULL) { + return NULL; + } + + subreq = wait_for_parent_send(state, ev, ppid); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, wait_for_exit_parent_done, req); + + if (recheck_interval > 0) { + subreq = lock_test_send(state, + ev, + lock_file, + fdp, + recheck_interval, + ping_timeout); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, + wait_for_exit_lock_test_done, + req); + } + + return req; +} + +static void wait_for_exit_parent_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + bool status; + int err; + + status = wait_for_parent_recv(subreq, &err); + TALLOC_FREE(subreq); + if (! status) { + /* Ignore error */ + fprintf(stderr, + "%s: " + "wait_for_parent_recv() failed (%d)\n", + progname, + err); + } + + tevent_req_done(req); +} + +static void wait_for_exit_lock_test_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + bool status; + int err; + + status = lock_test_recv(subreq, &err); + TALLOC_FREE(subreq); + if (! status) { + fprintf(stderr, + "%s: " + "lock_test_recv() failed (%d)\n", + progname, + err); + /* Ignore error, fall through to done */ + } + + tevent_req_done(req); +} + +static bool wait_for_exit_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + return false; + } + + return true; +} + +static void usage(void) +{ + fprintf(stderr, + "Usage: %s <file> [recheck_interval [ping_timeout]]\n", + progname); +} + +int main(int argc, char *argv[]) +{ + struct tevent_context *ev; + char result; + int ppid; + const char *file = NULL; + unsigned long recheck_interval; + unsigned long ping_timeout; + int ret; + int fd = -1; + struct tevent_req *req; + bool status; + + strlcpy(progpath, argv[0], sizeof(progpath)); + progname = basename(progpath); + + if (argc < 2 || argc > 4) { + usage(); + exit(1); + } + + ev = tevent_context_init(NULL); + if (ev == NULL) { + fprintf(stderr, "locking: tevent_context_init() failed\n"); + exit(1); + } + + ppid = getppid(); + + file = argv[1]; + + recheck_interval = 5; + ping_timeout = 0; + if (argc >= 3) { + recheck_interval = smb_strtoul(argv[2], + NULL, + 10, + &ret, + SMB_STR_STANDARD); + if (ret != 0) { + usage(); + exit(1); + } + } + if (argc >= 4) { + ping_timeout = smb_strtoul(argv[3], + NULL, + 10, + &ret, + SMB_STR_STANDARD); + if (ret != 0) { + usage(); + exit(1); + } + } + + result = fcntl_lock(file, &fd); + sys_write(STDOUT_FILENO, &result, 1); + + if (result != '0') { + return 0; + } + + req = wait_for_exit_send(ev, + ev, + ppid, + file, + &fd, + recheck_interval, + ping_timeout); + if (req == NULL) { + fprintf(stderr, + "%s: wait_for_exit_send() failed\n", + progname); + exit(1); + } + + tevent_req_poll(req, ev); + + status = wait_for_exit_recv(req, &ret); + if (! status) { + fprintf(stderr, + "%s: wait_for_exit_recv() failed (%d)\n", + progname, + ret); + } + + if (fd != -1) { + close(fd); + } + + return 0; +} |