diff options
Diffstat (limited to 'ctdb/utils/ceph')
-rw-r--r-- | ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c | 457 | ||||
-rwxr-xr-x | ctdb/utils/ceph/test_ceph_rados_reclock.sh | 212 |
2 files changed, 669 insertions, 0 deletions
diff --git a/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c new file mode 100644 index 0000000..7d868a3 --- /dev/null +++ b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c @@ -0,0 +1,457 @@ +/* + CTDB mutex helper using Ceph librados locks + + Copyright (C) David Disseldorp 2016-2020 + + Based on ctdb_mutex_fcntl_helper.c, which is: + Copyright (C) Martin Schwenke 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include "tevent.h" +#include "talloc.h" +#include "rados/librados.h" + +#define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex" +#define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME +#define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB cluster lock" +/* + * During failover it may take up to <lock duration> seconds before the + * newly elected recovery master can obtain the lock. + */ +#define CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT 10 + +#define CTDB_MUTEX_STATUS_HOLDING "0" +#define CTDB_MUTEX_STATUS_CONTENDED "1" +#define CTDB_MUTEX_STATUS_TIMEOUT "2" +#define CTDB_MUTEX_STATUS_ERROR "3" + +static char *progname = NULL; + +static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name, + const char *ceph_auth_name, + const char *pool_name, + rados_t *_ceph_cluster, + rados_ioctx_t *_ioctx) +{ + rados_t ceph_cluster = NULL; + rados_ioctx_t ioctx = NULL; + int ret; + + ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0); + if (ret < 0) { + fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s" + " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name, + strerror(-ret)); + return ret; + } + + /* path=NULL tells librados to use default locations */ + ret = rados_conf_read_file(ceph_cluster, NULL); + if (ret < 0) { + fprintf(stderr, "%s: failed to parse Ceph cluster config" + " - (%s)\n", progname, strerror(-ret)); + rados_shutdown(ceph_cluster); + return ret; + } + + ret = rados_connect(ceph_cluster); + if (ret < 0) { + fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s" + " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name, + strerror(-ret)); + rados_shutdown(ceph_cluster); + return ret; + } + + + ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx); + if (ret < 0) { + fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s" + " - (%s)\n", progname, pool_name, strerror(-ret)); + rados_shutdown(ceph_cluster); + return ret; + } + + *_ceph_cluster = ceph_cluster; + *_ioctx = ioctx; + + return 0; +} + +static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx, + const char *oid, + uint64_t lock_duration_s, + uint8_t flags) +{ + int ret; + struct timeval tv = { lock_duration_s, 0 }; + + ret = rados_lock_exclusive(ioctx, oid, + CTDB_MUTEX_CEPH_LOCK_NAME, + CTDB_MUTEX_CEPH_LOCK_COOKIE, + CTDB_MUTEX_CEPH_LOCK_DESC, + lock_duration_s == 0 ? NULL : &tv, + flags); + if ((ret == -EEXIST) || (ret == -EBUSY)) { + /* lock contention */ + return ret; + } else if (ret < 0) { + /* unexpected failure */ + fprintf(stderr, + "%s: Failed to get lock on RADOS object '%s' - (%s)\n", + progname, oid, strerror(-ret)); + return ret; + } + + /* lock obtained */ + return 0; +} + +static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx, + const char *oid) +{ + int ret; + + ret = rados_unlock(ioctx, oid, + CTDB_MUTEX_CEPH_LOCK_NAME, + CTDB_MUTEX_CEPH_LOCK_COOKIE); + if (ret < 0) { + fprintf(stderr, + "%s: Failed to drop lock on RADOS object '%s' - (%s)\n", + progname, oid, strerror(-ret)); + return ret; + } + + return 0; +} + +struct ctdb_mutex_rados_state { + bool holding_mutex; + const char *ceph_cluster_name; + const char *ceph_auth_name; + const char *pool_name; + const char *object; + uint64_t lock_duration_s; + int ppid; + struct tevent_context *ev; + struct tevent_signal *sigterm_ev; + struct tevent_signal *sigint_ev; + struct tevent_timer *ppid_timer_ev; + struct tevent_timer *renew_timer_ev; + rados_t ceph_cluster; + rados_ioctx_t ioctx; +}; + +static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev, + struct tevent_signal *se, + int signum, + int count, + void *siginfo, + void *private_data) +{ + struct ctdb_mutex_rados_state *cmr_state = private_data; + int ret = 0; + + if (!cmr_state->holding_mutex) { + fprintf(stderr, "Sigterm callback invoked without mutex!\n"); + ret = -EINVAL; + } + + talloc_free(cmr_state); + exit(ret ? 1 : 0); +} + +static void ctdb_mutex_rados_ppid_timer_cb(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + struct ctdb_mutex_rados_state *cmr_state = private_data; + int ret = 0; + + if (!cmr_state->holding_mutex) { + fprintf(stderr, "Timer callback invoked without mutex!\n"); + ret = -EINVAL; + goto err_ctx_cleanup; + } + + if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) { + /* parent still around, keep waiting */ + cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, + cmr_state, + tevent_timeval_current_ofs(5, 0), + ctdb_mutex_rados_ppid_timer_cb, + cmr_state); + if (cmr_state->ppid_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + /* rely on signal cb */ + } + return; + } + + /* parent ended, drop lock (via destructor) and exit */ +err_ctx_cleanup: + talloc_free(cmr_state); + exit(ret ? 1 : 0); +} + +#define USECS_IN_SEC 1000000 + +static void ctdb_mutex_rados_lock_renew_timer_cb(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + struct ctdb_mutex_rados_state *cmr_state = private_data; + struct timeval tv; + int ret; + + ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object, + cmr_state->lock_duration_s, + LIBRADOS_LOCK_FLAG_RENEW); + if (ret == -EBUSY) { + /* should never get -EEXIST on renewal */ + fprintf(stderr, "Lock contention during renew: %d\n", ret); + goto err_ctx_cleanup; + } else if (ret < 0) { + fprintf(stderr, "Lock renew failed\n"); + goto err_ctx_cleanup; + } + + tv = tevent_timeval_current_ofs(0, + cmr_state->lock_duration_s * (USECS_IN_SEC / 2)); + cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev, + cmr_state, + tv, + ctdb_mutex_rados_lock_renew_timer_cb, + cmr_state); + if (cmr_state->renew_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + goto err_ctx_cleanup; + } + + return; + +err_ctx_cleanup: + /* drop lock (via destructor) and exit */ + talloc_free(cmr_state); + exit(1); +} + +static int ctdb_mutex_rados_state_destroy(struct ctdb_mutex_rados_state *cmr_state) +{ + if (cmr_state->holding_mutex) { + ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object); + } + if (cmr_state->ioctx != NULL) { + rados_ioctx_destroy(cmr_state->ioctx); + } + if (cmr_state->ceph_cluster != NULL) { + rados_shutdown(cmr_state->ceph_cluster); + } + return 0; +} + +/* register this host+service with ceph-mgr for visibility */ +static int ctdb_mutex_rados_mgr_reg(rados_t ceph_cluster) +{ + int ret; + uint64_t instance_guid; + char id_buf[128]; + + instance_guid = rados_get_instance_id(ceph_cluster); + ret = snprintf(id_buf, sizeof(id_buf), "%s:0x%016llx", + "ctdb_mutex_ceph_rados_helper", + (unsigned long long)instance_guid); + if (ret < 0 || ret >= sizeof(id_buf)) { + fprintf(stderr, "Ceph instance name too long\n"); + return -ENAMETOOLONG; + } + + ret = rados_service_register(ceph_cluster, "ctdb", id_buf, ""); + if (ret < 0) { + fprintf(stderr, "failed to register service with ceph-mgr\n"); + return ret; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret; + struct ctdb_mutex_rados_state *cmr_state; + + progname = argv[0]; + + if ((argc != 5) && (argc != 6)) { + fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> " + "<RADOS pool> <RADOS object> " + "[lock duration secs]\n", + progname); + ret = -EINVAL; + goto err_out; + } + + ret = setvbuf(stdout, NULL, _IONBF, 0); + if (ret != 0) { + fprintf(stderr, "Failed to configure unbuffered stdout I/O\n"); + } + + cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state); + if (cmr_state == NULL) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_out; + } + + talloc_set_destructor(cmr_state, ctdb_mutex_rados_state_destroy); + cmr_state->ceph_cluster_name = argv[1]; + cmr_state->ceph_auth_name = argv[2]; + cmr_state->pool_name = argv[3]; + cmr_state->object = argv[4]; + if (argc == 6) { + /* optional lock duration provided */ + char *endptr = NULL; + cmr_state->lock_duration_s = strtoull(argv[5], &endptr, 0); + if ((endptr == argv[5]) || (*endptr != '\0')) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -EINVAL; + goto err_ctx_cleanup; + } + } else { + cmr_state->lock_duration_s + = CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT; + } + + cmr_state->ppid = getppid(); + if (cmr_state->ppid == 1) { + /* + * The original parent is gone and the process has + * been reparented to init. This can happen if the + * helper is started just as the parent is killed + * during shutdown. The error message doesn't need to + * be stellar, since there won't be anything around to + * capture and log it... + */ + fprintf(stderr, "%s: PPID == 1\n", progname); + ret = -EPIPE; + goto err_ctx_cleanup; + } + + cmr_state->ev = tevent_context_init(cmr_state); + if (cmr_state->ev == NULL) { + fprintf(stderr, "tevent_context_init failed\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + /* wait for sigterm */ + cmr_state->sigterm_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0, + ctdb_mutex_rados_sigterm_cb, + cmr_state); + if (cmr_state->sigterm_ev == NULL) { + fprintf(stderr, "Failed to create term signal event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + cmr_state->sigint_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGINT, 0, + ctdb_mutex_rados_sigterm_cb, + cmr_state); + if (cmr_state->sigint_ev == NULL) { + fprintf(stderr, "Failed to create int signal event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + /* periodically check parent */ + cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, cmr_state, + tevent_timeval_current_ofs(5, 0), + ctdb_mutex_rados_ppid_timer_cb, + cmr_state); + if (cmr_state->ppid_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name, + cmr_state->ceph_auth_name, + cmr_state->pool_name, + &cmr_state->ceph_cluster, + &cmr_state->ioctx); + if (ret < 0) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + goto err_ctx_cleanup; + } + + ret = ctdb_mutex_rados_mgr_reg(cmr_state->ceph_cluster); + if (ret < 0) { + fprintf(stderr, "Failed to register with ceph-mgr\n"); + /* ignore: ceph-mgr service registration is informational */ + } + + ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object, + cmr_state->lock_duration_s, + 0); + if ((ret == -EEXIST) || (ret == -EBUSY)) { + fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED); + goto err_ctx_cleanup; + } else if (ret < 0) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + goto err_ctx_cleanup; + } + cmr_state->holding_mutex = true; + + if (cmr_state->lock_duration_s != 0) { + /* + * renew (reobtain) the lock, using a period of half the lock + * duration. Convert to usecs to avoid rounding. + */ + struct timeval tv = tevent_timeval_current_ofs(0, + cmr_state->lock_duration_s * (USECS_IN_SEC / 2)); + cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev, + cmr_state, + tv, + ctdb_mutex_rados_lock_renew_timer_cb, + cmr_state); + if (cmr_state->renew_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + } + + fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING); + + /* wait for the signal / timer events to do their work */ + ret = tevent_loop_wait(cmr_state->ev); + if (ret < 0) { + goto err_ctx_cleanup; + } +err_ctx_cleanup: + talloc_free(cmr_state); +err_out: + return ret ? 1 : 0; +} diff --git a/ctdb/utils/ceph/test_ceph_rados_reclock.sh b/ctdb/utils/ceph/test_ceph_rados_reclock.sh new file mode 100755 index 0000000..bfb9c32 --- /dev/null +++ b/ctdb/utils/ceph/test_ceph_rados_reclock.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# standalone test for ctdb_mutex_ceph_rados_helper +# +# Copyright (C) David Disseldorp 2016-2020 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +# XXX The following parameters may require configuration: +CLUSTER="ceph" # Name of the Ceph cluster under test +USER="client.admin" # Ceph user - a keyring must exist +POOL="rbd" # RADOS pool - must exist +OBJECT="ctdb_reclock" # RADOS object: target for lock requests + +# test procedure: +# - using ctdb_mutex_ceph_rados_helper, take a lock on the Ceph RADOS object at +# CLUSTER/$POOL/$OBJECT using the Ceph keyring for $USER +# + confirm that lock is obtained, via ctdb_mutex_ceph_rados_helper "0" output +# - check for ceph-mgr service registration +# - check RADOS object lock state, using the "rados lock info" command +# - attempt to obtain the lock again, using ctdb_mutex_ceph_rados_helper +# + confirm that the lock is not successfully taken ("1" output=contention) +# - tell the first locker to drop the lock and exit, via SIGTERM +# - once the first locker has exited, attempt to get the lock again +# + confirm that this attempt succeeds + +function _fail() { + echo "FAILED: $*" + exit 1 +} + +# this test requires the Ceph "rados" binary, and "jq" json parser +which jq > /dev/null || exit 1 +which rados > /dev/null || exit 1 +which ceph > /dev/null || exit 1 +which ctdb_mutex_ceph_rados_helper || exit 1 + +TMP_DIR="$(mktemp --directory)" || exit 1 +rados -p "$POOL" rm "$OBJECT" + +# explicitly disable lock expiry (duration=0), to ensure that we don't get +# intermittent failures (due to renewal) from the lock state diff further down +(ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" 0 \ + > ${TMP_DIR}/first) & +locker_pid=$! + +# TODO wait for ctdb_mutex_ceph_rados_helper to write one byte to stdout, +# indicating lock acquisition success/failure +sleep 1 + +first_out=$(cat ${TMP_DIR}/first) +[ "$first_out" == "0" ] \ + || _fail "expected lock acquisition (0), but got $first_out" + +ceph service dump > ${TMP_DIR}/service_dump +SERVICE_COUNT=$(jq -r '.services.ctdb.daemons | length' ${TMP_DIR}/service_dump) +[ $SERVICE_COUNT -gt 0 ] || _fail "lock holder missing from ceph service dump" + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_first + +# echo "with lock: `cat ${TMP_DIR}/lock_state_first`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_first)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_first)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" + +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_first)" +[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT" +LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_first)" +[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected locker cookie: $LOCKER_COOKIE" +LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_first)" +[ "$LOCKER_DESC" == "CTDB cluster lock" ] \ + || _fail "unexpected locker description: $LOCKER_DESC" +LOCKER_EXP="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_first)" +[ "$LOCKER_EXP" == "0.000000" ] \ + || _fail "unexpected locker expiration: $LOCKER_EXP" + +# second attempt while first is still holding the lock - expect failure +ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" \ + > ${TMP_DIR}/second +second_out=$(cat ${TMP_DIR}/second) +[ "$second_out" == "1" ] \ + || _fail "expected lock contention (1), but got $second_out" + +# confirm lock state didn't change +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_second + +diff ${TMP_DIR}/lock_state_first ${TMP_DIR}/lock_state_second \ + || _fail "unexpected lock state change" + +# tell first locker to drop the lock and terminate +kill $locker_pid || exit 1 + +wait $locker_pid &> /dev/null + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_third +# echo "without lock: `cat ${TMP_DIR}/lock_state_third`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_third)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_third)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" + +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_third)" +[ $LOCK_COUNT -eq 0 ] \ + || _fail "didn\'t expect any locks in rados state, got $LOCK_COUNT" + +exec >${TMP_DIR}/third -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" & +locker_pid=$! + +sleep 1 + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_fourth +# echo "with lock again: `cat ${TMP_DIR}/lock_state_fourth`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" + +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fourth)" +[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT" +LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected locker cookie: $LOCKER_COOKIE" +LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCKER_DESC" == "CTDB cluster lock" ] \ + || _fail "unexpected locker description: $LOCKER_DESC" + +kill $locker_pid || exit 1 +wait $locker_pid &> /dev/null + +third_out=$(cat ${TMP_DIR}/third) +[ "$third_out" == "0" ] \ + || _fail "expected lock acquisition (0), but got $third_out" + +# test renew / expire behaviour using a 1s expiry (update period = 500ms) +exec >${TMP_DIR}/forth -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" \ + "$POOL" "$OBJECT" 1 & +locker_pid=$! + +sleep 1 + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_fifth_a +#echo "with lock fifth: `cat ${TMP_DIR}/lock_state_fifth_a`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fifth_a)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fifth_a)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fifth_a)" +[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT" +LOCKER_EXP_A="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_a)" +[ "$LOCKER_EXP_A" != "0.000000" ] \ + || _fail "unexpected locker expiration: $LOCKER_EXP_A" +sleep 1 # sleep until renewal +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_fifth_b +LOCKER_EXP_B="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_b)" +[ "$LOCKER_EXP_B" != "0.000000" ] \ + || _fail "unexpected locker expiration: $LOCKER_EXP_B" +#echo "lock expiration before renewal $LOCKER_EXP_A, after renewal $LOCKER_EXP_B" +[ "$LOCKER_EXP_B" != "$LOCKER_EXP_A" ] \ + || _fail "locker expiration matches: $LOCKER_EXP_B" + +# no chance to drop the lock, rely on expiry +kill -KILL $locker_pid || exit 1 +wait $locker_pid &> /dev/null +sleep 1 # sleep until lock expiry + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_sixth +#echo "lock expiry sixth: `cat ${TMP_DIR}/lock_state_sixth`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_sixth)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_sixth)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_sixth)" +[ $LOCK_COUNT -eq 0 ] || _fail "expected 0 locks in rados state, got $LOCK_COUNT" + +rm ${TMP_DIR}/* +rmdir $TMP_DIR + +echo "$0: all tests passed" |