diff options
Diffstat (limited to '')
-rw-r--r-- | ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c | 457 | ||||
-rwxr-xr-x | ctdb/utils/ceph/test_ceph_rados_reclock.sh | 212 | ||||
-rwxr-xr-x | ctdb/utils/etcd/ctdb_etcd_lock | 213 | ||||
-rw-r--r-- | ctdb/utils/nagios/README | 56 | ||||
-rwxr-xr-x | ctdb/utils/nagios/check_ctdb | 279 | ||||
-rw-r--r-- | ctdb/utils/ping_pong/ping_pong.c | 303 | ||||
-rw-r--r-- | ctdb/utils/pmda/Install | 36 | ||||
-rw-r--r-- | ctdb/utils/pmda/README | 84 | ||||
-rw-r--r-- | ctdb/utils/pmda/Remove | 29 | ||||
-rw-r--r-- | ctdb/utils/pmda/domain.h | 19 | ||||
-rw-r--r-- | ctdb/utils/pmda/help | 106 | ||||
-rw-r--r-- | ctdb/utils/pmda/pmda_ctdb.c | 559 | ||||
-rw-r--r-- | ctdb/utils/pmda/pmns | 73 | ||||
-rw-r--r-- | ctdb/utils/pmda/root | 10 | ||||
-rw-r--r-- | ctdb/utils/smnotify/smnotify.c | 151 | ||||
-rw-r--r-- | ctdb/utils/smnotify/smnotify.x | 21 | ||||
-rw-r--r-- | ctdb/utils/tdb/tdb_mutex_check.c | 160 |
17 files changed, 2768 insertions, 0 deletions
diff --git a/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c new file mode 100644 index 0000000..7d868a3 --- /dev/null +++ b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c @@ -0,0 +1,457 @@ +/* + CTDB mutex helper using Ceph librados locks + + Copyright (C) David Disseldorp 2016-2020 + + Based on ctdb_mutex_fcntl_helper.c, which is: + Copyright (C) Martin Schwenke 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include "tevent.h" +#include "talloc.h" +#include "rados/librados.h" + +#define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex" +#define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME +#define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB cluster lock" +/* + * During failover it may take up to <lock duration> seconds before the + * newly elected recovery master can obtain the lock. + */ +#define CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT 10 + +#define CTDB_MUTEX_STATUS_HOLDING "0" +#define CTDB_MUTEX_STATUS_CONTENDED "1" +#define CTDB_MUTEX_STATUS_TIMEOUT "2" +#define CTDB_MUTEX_STATUS_ERROR "3" + +static char *progname = NULL; + +static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name, + const char *ceph_auth_name, + const char *pool_name, + rados_t *_ceph_cluster, + rados_ioctx_t *_ioctx) +{ + rados_t ceph_cluster = NULL; + rados_ioctx_t ioctx = NULL; + int ret; + + ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0); + if (ret < 0) { + fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s" + " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name, + strerror(-ret)); + return ret; + } + + /* path=NULL tells librados to use default locations */ + ret = rados_conf_read_file(ceph_cluster, NULL); + if (ret < 0) { + fprintf(stderr, "%s: failed to parse Ceph cluster config" + " - (%s)\n", progname, strerror(-ret)); + rados_shutdown(ceph_cluster); + return ret; + } + + ret = rados_connect(ceph_cluster); + if (ret < 0) { + fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s" + " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name, + strerror(-ret)); + rados_shutdown(ceph_cluster); + return ret; + } + + + ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx); + if (ret < 0) { + fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s" + " - (%s)\n", progname, pool_name, strerror(-ret)); + rados_shutdown(ceph_cluster); + return ret; + } + + *_ceph_cluster = ceph_cluster; + *_ioctx = ioctx; + + return 0; +} + +static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx, + const char *oid, + uint64_t lock_duration_s, + uint8_t flags) +{ + int ret; + struct timeval tv = { lock_duration_s, 0 }; + + ret = rados_lock_exclusive(ioctx, oid, + CTDB_MUTEX_CEPH_LOCK_NAME, + CTDB_MUTEX_CEPH_LOCK_COOKIE, + CTDB_MUTEX_CEPH_LOCK_DESC, + lock_duration_s == 0 ? NULL : &tv, + flags); + if ((ret == -EEXIST) || (ret == -EBUSY)) { + /* lock contention */ + return ret; + } else if (ret < 0) { + /* unexpected failure */ + fprintf(stderr, + "%s: Failed to get lock on RADOS object '%s' - (%s)\n", + progname, oid, strerror(-ret)); + return ret; + } + + /* lock obtained */ + return 0; +} + +static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx, + const char *oid) +{ + int ret; + + ret = rados_unlock(ioctx, oid, + CTDB_MUTEX_CEPH_LOCK_NAME, + CTDB_MUTEX_CEPH_LOCK_COOKIE); + if (ret < 0) { + fprintf(stderr, + "%s: Failed to drop lock on RADOS object '%s' - (%s)\n", + progname, oid, strerror(-ret)); + return ret; + } + + return 0; +} + +struct ctdb_mutex_rados_state { + bool holding_mutex; + const char *ceph_cluster_name; + const char *ceph_auth_name; + const char *pool_name; + const char *object; + uint64_t lock_duration_s; + int ppid; + struct tevent_context *ev; + struct tevent_signal *sigterm_ev; + struct tevent_signal *sigint_ev; + struct tevent_timer *ppid_timer_ev; + struct tevent_timer *renew_timer_ev; + rados_t ceph_cluster; + rados_ioctx_t ioctx; +}; + +static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev, + struct tevent_signal *se, + int signum, + int count, + void *siginfo, + void *private_data) +{ + struct ctdb_mutex_rados_state *cmr_state = private_data; + int ret = 0; + + if (!cmr_state->holding_mutex) { + fprintf(stderr, "Sigterm callback invoked without mutex!\n"); + ret = -EINVAL; + } + + talloc_free(cmr_state); + exit(ret ? 1 : 0); +} + +static void ctdb_mutex_rados_ppid_timer_cb(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + struct ctdb_mutex_rados_state *cmr_state = private_data; + int ret = 0; + + if (!cmr_state->holding_mutex) { + fprintf(stderr, "Timer callback invoked without mutex!\n"); + ret = -EINVAL; + goto err_ctx_cleanup; + } + + if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) { + /* parent still around, keep waiting */ + cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, + cmr_state, + tevent_timeval_current_ofs(5, 0), + ctdb_mutex_rados_ppid_timer_cb, + cmr_state); + if (cmr_state->ppid_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + /* rely on signal cb */ + } + return; + } + + /* parent ended, drop lock (via destructor) and exit */ +err_ctx_cleanup: + talloc_free(cmr_state); + exit(ret ? 1 : 0); +} + +#define USECS_IN_SEC 1000000 + +static void ctdb_mutex_rados_lock_renew_timer_cb(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + struct ctdb_mutex_rados_state *cmr_state = private_data; + struct timeval tv; + int ret; + + ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object, + cmr_state->lock_duration_s, + LIBRADOS_LOCK_FLAG_RENEW); + if (ret == -EBUSY) { + /* should never get -EEXIST on renewal */ + fprintf(stderr, "Lock contention during renew: %d\n", ret); + goto err_ctx_cleanup; + } else if (ret < 0) { + fprintf(stderr, "Lock renew failed\n"); + goto err_ctx_cleanup; + } + + tv = tevent_timeval_current_ofs(0, + cmr_state->lock_duration_s * (USECS_IN_SEC / 2)); + cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev, + cmr_state, + tv, + ctdb_mutex_rados_lock_renew_timer_cb, + cmr_state); + if (cmr_state->renew_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + goto err_ctx_cleanup; + } + + return; + +err_ctx_cleanup: + /* drop lock (via destructor) and exit */ + talloc_free(cmr_state); + exit(1); +} + +static int ctdb_mutex_rados_state_destroy(struct ctdb_mutex_rados_state *cmr_state) +{ + if (cmr_state->holding_mutex) { + ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object); + } + if (cmr_state->ioctx != NULL) { + rados_ioctx_destroy(cmr_state->ioctx); + } + if (cmr_state->ceph_cluster != NULL) { + rados_shutdown(cmr_state->ceph_cluster); + } + return 0; +} + +/* register this host+service with ceph-mgr for visibility */ +static int ctdb_mutex_rados_mgr_reg(rados_t ceph_cluster) +{ + int ret; + uint64_t instance_guid; + char id_buf[128]; + + instance_guid = rados_get_instance_id(ceph_cluster); + ret = snprintf(id_buf, sizeof(id_buf), "%s:0x%016llx", + "ctdb_mutex_ceph_rados_helper", + (unsigned long long)instance_guid); + if (ret < 0 || ret >= sizeof(id_buf)) { + fprintf(stderr, "Ceph instance name too long\n"); + return -ENAMETOOLONG; + } + + ret = rados_service_register(ceph_cluster, "ctdb", id_buf, ""); + if (ret < 0) { + fprintf(stderr, "failed to register service with ceph-mgr\n"); + return ret; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret; + struct ctdb_mutex_rados_state *cmr_state; + + progname = argv[0]; + + if ((argc != 5) && (argc != 6)) { + fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> " + "<RADOS pool> <RADOS object> " + "[lock duration secs]\n", + progname); + ret = -EINVAL; + goto err_out; + } + + ret = setvbuf(stdout, NULL, _IONBF, 0); + if (ret != 0) { + fprintf(stderr, "Failed to configure unbuffered stdout I/O\n"); + } + + cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state); + if (cmr_state == NULL) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_out; + } + + talloc_set_destructor(cmr_state, ctdb_mutex_rados_state_destroy); + cmr_state->ceph_cluster_name = argv[1]; + cmr_state->ceph_auth_name = argv[2]; + cmr_state->pool_name = argv[3]; + cmr_state->object = argv[4]; + if (argc == 6) { + /* optional lock duration provided */ + char *endptr = NULL; + cmr_state->lock_duration_s = strtoull(argv[5], &endptr, 0); + if ((endptr == argv[5]) || (*endptr != '\0')) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -EINVAL; + goto err_ctx_cleanup; + } + } else { + cmr_state->lock_duration_s + = CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT; + } + + cmr_state->ppid = getppid(); + if (cmr_state->ppid == 1) { + /* + * The original parent is gone and the process has + * been reparented to init. This can happen if the + * helper is started just as the parent is killed + * during shutdown. The error message doesn't need to + * be stellar, since there won't be anything around to + * capture and log it... + */ + fprintf(stderr, "%s: PPID == 1\n", progname); + ret = -EPIPE; + goto err_ctx_cleanup; + } + + cmr_state->ev = tevent_context_init(cmr_state); + if (cmr_state->ev == NULL) { + fprintf(stderr, "tevent_context_init failed\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + /* wait for sigterm */ + cmr_state->sigterm_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0, + ctdb_mutex_rados_sigterm_cb, + cmr_state); + if (cmr_state->sigterm_ev == NULL) { + fprintf(stderr, "Failed to create term signal event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + cmr_state->sigint_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGINT, 0, + ctdb_mutex_rados_sigterm_cb, + cmr_state); + if (cmr_state->sigint_ev == NULL) { + fprintf(stderr, "Failed to create int signal event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + /* periodically check parent */ + cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, cmr_state, + tevent_timeval_current_ofs(5, 0), + ctdb_mutex_rados_ppid_timer_cb, + cmr_state); + if (cmr_state->ppid_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + + ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name, + cmr_state->ceph_auth_name, + cmr_state->pool_name, + &cmr_state->ceph_cluster, + &cmr_state->ioctx); + if (ret < 0) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + goto err_ctx_cleanup; + } + + ret = ctdb_mutex_rados_mgr_reg(cmr_state->ceph_cluster); + if (ret < 0) { + fprintf(stderr, "Failed to register with ceph-mgr\n"); + /* ignore: ceph-mgr service registration is informational */ + } + + ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object, + cmr_state->lock_duration_s, + 0); + if ((ret == -EEXIST) || (ret == -EBUSY)) { + fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED); + goto err_ctx_cleanup; + } else if (ret < 0) { + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + goto err_ctx_cleanup; + } + cmr_state->holding_mutex = true; + + if (cmr_state->lock_duration_s != 0) { + /* + * renew (reobtain) the lock, using a period of half the lock + * duration. Convert to usecs to avoid rounding. + */ + struct timeval tv = tevent_timeval_current_ofs(0, + cmr_state->lock_duration_s * (USECS_IN_SEC / 2)); + cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev, + cmr_state, + tv, + ctdb_mutex_rados_lock_renew_timer_cb, + cmr_state); + if (cmr_state->renew_timer_ev == NULL) { + fprintf(stderr, "Failed to create timer event\n"); + fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); + ret = -ENOMEM; + goto err_ctx_cleanup; + } + } + + fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING); + + /* wait for the signal / timer events to do their work */ + ret = tevent_loop_wait(cmr_state->ev); + if (ret < 0) { + goto err_ctx_cleanup; + } +err_ctx_cleanup: + talloc_free(cmr_state); +err_out: + return ret ? 1 : 0; +} diff --git a/ctdb/utils/ceph/test_ceph_rados_reclock.sh b/ctdb/utils/ceph/test_ceph_rados_reclock.sh new file mode 100755 index 0000000..bfb9c32 --- /dev/null +++ b/ctdb/utils/ceph/test_ceph_rados_reclock.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# standalone test for ctdb_mutex_ceph_rados_helper +# +# Copyright (C) David Disseldorp 2016-2020 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +# XXX The following parameters may require configuration: +CLUSTER="ceph" # Name of the Ceph cluster under test +USER="client.admin" # Ceph user - a keyring must exist +POOL="rbd" # RADOS pool - must exist +OBJECT="ctdb_reclock" # RADOS object: target for lock requests + +# test procedure: +# - using ctdb_mutex_ceph_rados_helper, take a lock on the Ceph RADOS object at +# CLUSTER/$POOL/$OBJECT using the Ceph keyring for $USER +# + confirm that lock is obtained, via ctdb_mutex_ceph_rados_helper "0" output +# - check for ceph-mgr service registration +# - check RADOS object lock state, using the "rados lock info" command +# - attempt to obtain the lock again, using ctdb_mutex_ceph_rados_helper +# + confirm that the lock is not successfully taken ("1" output=contention) +# - tell the first locker to drop the lock and exit, via SIGTERM +# - once the first locker has exited, attempt to get the lock again +# + confirm that this attempt succeeds + +function _fail() { + echo "FAILED: $*" + exit 1 +} + +# this test requires the Ceph "rados" binary, and "jq" json parser +which jq > /dev/null || exit 1 +which rados > /dev/null || exit 1 +which ceph > /dev/null || exit 1 +which ctdb_mutex_ceph_rados_helper || exit 1 + +TMP_DIR="$(mktemp --directory)" || exit 1 +rados -p "$POOL" rm "$OBJECT" + +# explicitly disable lock expiry (duration=0), to ensure that we don't get +# intermittent failures (due to renewal) from the lock state diff further down +(ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" 0 \ + > ${TMP_DIR}/first) & +locker_pid=$! + +# TODO wait for ctdb_mutex_ceph_rados_helper to write one byte to stdout, +# indicating lock acquisition success/failure +sleep 1 + +first_out=$(cat ${TMP_DIR}/first) +[ "$first_out" == "0" ] \ + || _fail "expected lock acquisition (0), but got $first_out" + +ceph service dump > ${TMP_DIR}/service_dump +SERVICE_COUNT=$(jq -r '.services.ctdb.daemons | length' ${TMP_DIR}/service_dump) +[ $SERVICE_COUNT -gt 0 ] || _fail "lock holder missing from ceph service dump" + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_first + +# echo "with lock: `cat ${TMP_DIR}/lock_state_first`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_first)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_first)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" + +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_first)" +[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT" +LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_first)" +[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected locker cookie: $LOCKER_COOKIE" +LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_first)" +[ "$LOCKER_DESC" == "CTDB cluster lock" ] \ + || _fail "unexpected locker description: $LOCKER_DESC" +LOCKER_EXP="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_first)" +[ "$LOCKER_EXP" == "0.000000" ] \ + || _fail "unexpected locker expiration: $LOCKER_EXP" + +# second attempt while first is still holding the lock - expect failure +ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" \ + > ${TMP_DIR}/second +second_out=$(cat ${TMP_DIR}/second) +[ "$second_out" == "1" ] \ + || _fail "expected lock contention (1), but got $second_out" + +# confirm lock state didn't change +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_second + +diff ${TMP_DIR}/lock_state_first ${TMP_DIR}/lock_state_second \ + || _fail "unexpected lock state change" + +# tell first locker to drop the lock and terminate +kill $locker_pid || exit 1 + +wait $locker_pid &> /dev/null + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_third +# echo "without lock: `cat ${TMP_DIR}/lock_state_third`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_third)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_third)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" + +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_third)" +[ $LOCK_COUNT -eq 0 ] \ + || _fail "didn\'t expect any locks in rados state, got $LOCK_COUNT" + +exec >${TMP_DIR}/third -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" & +locker_pid=$! + +sleep 1 + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_fourth +# echo "with lock again: `cat ${TMP_DIR}/lock_state_fourth`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" + +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fourth)" +[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT" +LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected locker cookie: $LOCKER_COOKIE" +LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_fourth)" +[ "$LOCKER_DESC" == "CTDB cluster lock" ] \ + || _fail "unexpected locker description: $LOCKER_DESC" + +kill $locker_pid || exit 1 +wait $locker_pid &> /dev/null + +third_out=$(cat ${TMP_DIR}/third) +[ "$third_out" == "0" ] \ + || _fail "expected lock acquisition (0), but got $third_out" + +# test renew / expire behaviour using a 1s expiry (update period = 500ms) +exec >${TMP_DIR}/forth -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" \ + "$POOL" "$OBJECT" 1 & +locker_pid=$! + +sleep 1 + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_fifth_a +#echo "with lock fifth: `cat ${TMP_DIR}/lock_state_fifth_a`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fifth_a)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fifth_a)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fifth_a)" +[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT" +LOCKER_EXP_A="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_a)" +[ "$LOCKER_EXP_A" != "0.000000" ] \ + || _fail "unexpected locker expiration: $LOCKER_EXP_A" +sleep 1 # sleep until renewal +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_fifth_b +LOCKER_EXP_B="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_b)" +[ "$LOCKER_EXP_B" != "0.000000" ] \ + || _fail "unexpected locker expiration: $LOCKER_EXP_B" +#echo "lock expiration before renewal $LOCKER_EXP_A, after renewal $LOCKER_EXP_B" +[ "$LOCKER_EXP_B" != "$LOCKER_EXP_A" ] \ + || _fail "locker expiration matches: $LOCKER_EXP_B" + +# no chance to drop the lock, rely on expiry +kill -KILL $locker_pid || exit 1 +wait $locker_pid &> /dev/null +sleep 1 # sleep until lock expiry + +rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \ + > ${TMP_DIR}/lock_state_sixth +#echo "lock expiry sixth: `cat ${TMP_DIR}/lock_state_sixth`" + +LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_sixth)" +[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \ + || _fail "unexpected lock name: $LOCK_NAME" +LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_sixth)" +[ "$LOCK_TYPE" == "exclusive" ] \ + || _fail "unexpected lock type: $LOCK_TYPE" +LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_sixth)" +[ $LOCK_COUNT -eq 0 ] || _fail "expected 0 locks in rados state, got $LOCK_COUNT" + +rm ${TMP_DIR}/* +rmdir $TMP_DIR + +echo "$0: all tests passed" diff --git a/ctdb/utils/etcd/ctdb_etcd_lock b/ctdb/utils/etcd/ctdb_etcd_lock new file mode 100755 index 0000000..dac2436 --- /dev/null +++ b/ctdb/utils/etcd/ctdb_etcd_lock @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Copyright (C) 2016 Jose A. Rivera <jarrpa@samba.org> +# Copyright (C) 2016 Ira Cooper <ira@samba.org> +"""CTDB mutex helper using etcd. + +This script is intended to be run as a mutex helper for CTDB. It will try to +connect to an existing etcd cluster and grab an etcd.Lock() to function as +CTDB's cluster lock. Please see ctdb/doc/cluster_mutex_helper.txt for +details on what we're SUPPOSED to be doing. :) To use this, include +the following line in the ctdb.conf: + + cluster lock = !/path/to/script + +You can also pass "-v", "-vv", or "-vvv" to include verbose output in the +CTDB log. Additional "v"s indicate increases in verbosity. + +This mutex helper expects the system Python interpreter to have access to the +etcd Python module. It also expects an etcd cluster to be configured and +running. To integrate with this, there is an optional config file of the +following format: + +key = value + +The following configuration variables (and their defaults) are defined for +use by this script: + +port = 2379 # connecting port for the etcd cluster +lock_ttl = 9 # seconds for TTL +refresh = 2 # seconds between attempts to maintain lock +locks_dir = _ctdb # where to store CTDB locks in etcd + # The final etcd directory for any given lock looks like: + # /_locks/{locks_dir}/{netbios name}/ + +In addition, any keyword parameter that can be used to configure an etcd +client may be specified and modified here. For more documentation on these +parameters, see here: https://github.com/jplana/python-etcd/ + +""" +import signal +import time +import sys +import os +import argparse +import logging +import subprocess + +import etcd + +# Helper Functions ------------------------------------------------------------ +# + + +def process_args(): + '''Process command-line arguments and return them. + ''' + parser = argparse.ArgumentParser( + description=__doc__, + epilog='', + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('-v', '--verbose', + action='count', + help='Display verbose output to stderr. ' + 'Default is no output.', + default=0, + ) + parser.add_argument('-c', '--config', + action='store', + help='Configuration file to use. The default behavior ' + 'is to look is the base CTDB configuration ' + 'directory, which can be overwritten by setting ' + 'the CTDB_BASE environment variable, for a file ' + 'called \'etcd\'. Default value is %(default)s.', + default=os.path.join(os.getenv('CTDB_BASE', + '/usr/local/etc/ctdb'), + 'etcd'), + ) + args = parser.parse_args() + + return args + + +def setup_logging(verbose): + '''Setup logging based on specified verbosity. + ''' + + log_levels = [logging.ERROR, logging.WARNING, logging.DEBUG] + logging.basicConfig(level=log_levels[min(verbose, len(log_levels)-1)]) + + +def sigterm_handler(signum, frame): + """Handler for SIGTERM signals. + """ + sys.exit() + + +def print_nonl(out): + """Dumb shortcut for printing to stdout with no newline. + """ + sys.stdout.write(str(out)) + sys.stdout.flush() + + +def int_or_not(s): + """Try to convert input to an integer. + """ + try: + return int(s) + except ValueError: + return s + +# Mainline -------------------------------------------------------------------- +# + + +def main(): + args = process_args() + + setup_logging(args.verbose) + + # etcd config defaults + etcd_config = { + 'port': 2379, + 'locks_dir': '_ctdb', + 'lock_ttl': 9, + 'lock_refresh': 2, + } + # Find and read etcd config file + etcd_client_params = ( + 'host', + 'port', + 'srv_domain', + 'version_prefix', + 'read_timeout', + 'allow_redirect', + 'protocol', + 'cert', + 'ca_cert', + 'username', + 'password', + 'allow_reconnect', + 'use_proxies', + 'expected_cluster_id', + 'per_host_pool_size', + ) + if os.path.isfile(args.config): + f = open(args.config, 'r') + for line in f: + (key, value) = line.split("=", 1) + etcd_config[key.strip()] = int_or_not(value.strip()) + + # Minor hack: call out to shell to retrieve CTDB netbios name and PNN. + tmp = subprocess.Popen("testparm -s --parameter-name 'netbios name'; \ + ctdb pnn", + shell=True, + universal_newlines=True, + stdout=subprocess.PIPE + ).stdout.read().strip() + nb_name, pnn = tmp.split() + + # Try to get and hold the lock + try: + client = etcd.Client( + **{k: etcd_config[k] for k in + set(etcd_client_params).intersection(etcd_config)}) + lock = etcd.Lock(client, etcd_config['locks_dir'] + "/" + nb_name) + lock._uuid = lock._uuid + "_" + pnn + logging.debug("Updated lock UUID: %s", lock.uuid) + ppid = os.getppid() + while True: + lock.acquire(blocking=False, lock_ttl=etcd_config['lock_ttl']) + if lock.is_acquired: + print_nonl(0) + else: + locks = "No locks found." + if logging.getLogger().getEffectiveLevel() == logging.DEBUG: + keys = client.read(lock.path, recursive=True) + if keys is not None: + locks = "Existing locks:\n " + locks += '\n '.join( + (child.key + ": " + child.value for child in + keys.children)) + logging.debug("Lock contention. %s", locks) + print_nonl(1) + break + os.kill(ppid, 0) + time.sleep(etcd_config['lock_refresh']) + except (OSError, SystemExit): + if lock is not None and lock.is_acquired: + lock.release() + except Exception: + print_nonl(3) + if logging.getLogger().getEffectiveLevel() == logging.DEBUG: + raise + + +if __name__ == "__main__": + signal.signal(signal.SIGTERM, sigterm_handler) + + main() diff --git a/ctdb/utils/nagios/README b/ctdb/utils/nagios/README new file mode 100644 index 0000000..99fa6dc --- /dev/null +++ b/ctdb/utils/nagios/README @@ -0,0 +1,56 @@ +check_ctdb 0.3 + +This nagios plugin is free software, and comes with ABSOLUTELY NO WARRANTY. +It may be used, redistributed and/or modified under the terms of the GNU +General Public Licence (see http://www.fsf.org/licensing/licenses/gpl.txt). + +CTDB plugin + +Usage: check_ctdb -i <info> + [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ] + [ -H <host> ] [-s] [ -l <login_name> ] + [ -V ] [ -h ] + + -?, --usage + Print usage information + -h, --help + Print detailed help screen + -V, --version + Print version information + --extra-opts=[section][@file] + Read options from an ini file. See http://nagiosplugins.org/extra-opts for usage + -i, --info=<info> + Information: One of scriptstatus or ping. + -H, --hostname=<login_name> + Host name or IP Address. + -s, --sudo + Use sudo. + -l, --login=<host> + The user to log in as on the remote machine. + -w, --warning=THRESHOLD + Warning threshold. See + http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT + for the threshold format. + -c, --critical=THRESHOLD + Critical threshold. See + http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT + for the threshold format. + -t, --timeout=INTEGER + Seconds before plugin times out (default: 30) + -v, --verbose + Show details for command-line debugging (can repeat up to 3 times) +Supported commands: + * scriptstatus : + check the ctdb scriptstatus command and return CRITICAL if one of the + scripts fails. + Perfdata count the number of scripts by state (ok, disabled, error, + total). + * ping : + check the ctdb ping command. + Perfdata count the number of nodes, the total ping time and the number + of clients. + Thresholds are checked against the number of nodes. + + +Copyright (c) 2011 Nantes Metropole + diff --git a/ctdb/utils/nagios/check_ctdb b/ctdb/utils/nagios/check_ctdb new file mode 100755 index 0000000..7803f9a --- /dev/null +++ b/ctdb/utils/nagios/check_ctdb @@ -0,0 +1,279 @@ +#!/usr/bin/perl -w +# Nagios plugin to monitor CTDB (Clustered Trivial Database) +# +# License: GPL +# Copyright (c) 2011 Nantes Metropole +# Author: Mathieu Parent <math.parent@gmail.com> +# Contributor(s): - +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +use strict; +use warnings; +use vars qw($PROGNAME $VERSION $output $values $result); +use Nagios::Plugin; +use File::Basename; + +$PROGNAME = basename($0); +$VERSION = '0.4'; + +my $np = Nagios::Plugin->new( + usage => "Usage: %s -i <info>\n" + . " [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]\n" + . " [ -H <host> ] [-s] [ -l <login_name> ]\n" + . ' [ -V ] [ -h ]', + version => $VERSION, + plugin => $PROGNAME, + shortname => uc($PROGNAME), + blurb => 'CTDB plugin', + extra => "Supported commands:\n" + . " * scriptstatus :\n" + . " check the ctdb scriptstatus command and return CRITICAL if one of the\n" + . " scripts fails.\n" + . " Perfdata count the number of scripts by state (ok, disabled, error,\n" + . " total).\n" + . " * ping :\n" + . " check the ctdb ping command.\n" + . " Perfdata count the number of nodes, the total ping time and the number\n" + . " of clients.\n" + . " Thresholds are checked against the number of nodes.\n" + . "\n\nCopyright (c) 2011 Nantes Metropole", + timeout => 30, +); + +$np->add_arg( + spec => 'info|i=s', + help => "-i, --info=<info>\n" + . ' Information: One of scriptstatus or ping.', + required => 1, +); + +$np->add_arg( + spec => 'hostname|H=s', + help => "-H, --hostname=<login_name>\n" + . ' Host name or IP Address.', + required => 0, +); + +$np->add_arg( + spec => 'sudo|s', + help => "-s, --sudo\n" + . ' Use sudo.', + required => 0, +); + +$np->add_arg( + spec => 'login|l=s', + help => "-l, --login=<host>\n" + . ' The user to log in as on the remote machine.', + required => 0, +); + +$np->add_arg( + spec => 'warning|w=s', + help => "-w, --warning=THRESHOLD\n" + . " Warning threshold. See\n" + . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n" + . ' for the threshold format.', + required => 0, +); + +$np->add_arg( + spec => 'critical|c=s', + help => "-c, --critical=THRESHOLD\n" + . " Critical threshold. See\n" + . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n" + . ' for the threshold format.', + required => 0, +); + +$np->getopts; + +my $info = $np->opts->info; +my $hostname = $np->opts->hostname; +my $login = $np->opts->login; +my $sudo = $np->opts->sudo; +my $warning = $np->opts->warning; +my $critical = $np->opts->critical; +my $percw; +my $percc; + +$output = ""; + +if (defined($critical)) +{ + ($percc, $critical) = check_percantage($critical); + $critical = undef if ($critical eq ''); +} + +if (defined($warning)) +{ + ($percw, $warning) = check_percantage($warning); + $warning = undef if ($warning eq ''); +} + +$np->set_thresholds(critical => $critical, warning => $warning); + +my $stderr; + +sub safe_open_command { + unshift @_, "sudo" if $sudo; + if ($hostname) { + unshift @_, $hostname; + unshift @_, "-l", $login if $login; + unshift @_, "ssh"; + } + open(OLDERR, ">&", \*STDERR) or die "Can't dup STDERR: $!"; + $stderr = ""; + close STDERR; + open(STDERR, ">>", \$stderr) or die "Can't open STDERR: $!"; + if ($np->opts->verbose) { + print "Executing: @_\n"; + } + if (!open(PIPE, '-|', @_)) { + $result = CRITICAL; + $output .= "Cannot open command '@_': $! ($stderr). "; + # restore STDERR + open(STDERR, ">", \*OLDERR) or die "Can't dup OLDERR: $!"; + } +} + +sub safe_close_command { + close(PIPE); + + if ($? == -1) { + $result = CRITICAL; + $output .= "failed to execute: $!. "; + } elsif ($? & 127) { + $result = CRITICAL; + $output .= sprintf("child died with signal %d, %s coredump. ", + ($? & 127), ($? & 128) ? 'with' : 'without'); + } elsif ($? >> 8) { + if (($? >> 8) == 255) { + # ctdb returns -1=255 if any node is disconnected + $result = WARNING; + $output .= sprintf("child exited with value %d. ", $? >> 8) if $output eq ""; + } else { + $result = CRITICAL; + $output .= sprintf("child exited with value %d. ", $? >> 8); + } + } + # restore STDERR + open(STDERR, ">&OLDERR") or die "Can't dup OLDERR: $!"; +} + +# main : + +if ($info eq "scriptstatus") { + $result = OK; + safe_open_command('ctdb', '-X', 'scriptstatus'); + if ($result == OK) { + my $script_count = 0; + my $ok_script_count = 0; + my $disabled_script_count = 0; + my $error_script_count = 0; + while (<PIPE>) { + next if $. == 1; # Header + $script_count++; + chop; + my ($col0, $type, $name, $code, $status, $start, $end, @error) = split("|"); + if ($col0 ne '') { + # Old version, before 30 Aug 2011 and commit a779d83a6213 + ($type, $name, $code, $status, $start, $end, @error) = ($col0, $type, $name, $code, $status, $start, $end, @error); + } + my $error = join(':', @error); + if ($error ne "") { + $output = "$output ;; " if $output; + $output = "$output$name ($status=$code): $error "; + if ($result != CRITICAL) { + $result = WARNING; + } + } + if ($status eq "OK") { + $ok_script_count++; + next; + } + if ($status eq "DISABLED") { + $disabled_script_count++; + next; + } + $error_script_count++; + $result = WARNING; + } + safe_close_command(); + $np->add_perfdata(label => "ok", value => $ok_script_count, uom => '', + min => 0, max => $script_count); + $np->add_perfdata(label => "disabled", value => $disabled_script_count, uom => '', + min => 0, max => $script_count); + $np->add_perfdata(label => "error", value => $error_script_count, uom => '', + min => 0, max => $script_count, warning => '0', critical => '0'); + $np->add_perfdata(label => "total", value => $script_count, uom => '', + min => 0, max => $script_count); + if ($result == OK) { + $result = $np->check_threshold(check => $error_script_count, warning => '0', critical => '0'); + } + } + $np->nagios_exit($result, $output); +} elsif ($info eq "ping") { + # Get expected nodes count + $result = OK; + safe_open_command('cat', '/etc/ctdb/nodes'); + 1 while( <PIPE> ); + my $max_nodes_count = $.; + safe_close_command(); + # ctdb ping + $result = OK; + safe_open_command('ctdb', '-n', 'all', 'ping'); + if ($result == OK) { + my $nodes_count = 0; + my $time_total = 0.0; + my $clients_count = 0; + while (<PIPE>) { + chop; + if ($_ =~ /^response from (\d+) time=([0-9.]+) sec \((\d+) clients\)$/) { + my ($node_id, $time, $clients) = ($1,$2,$3); + $nodes_count += 1; + $time_total += $time; + $clients_count += $clients; + } elsif ($_ =~ /^Unable to get ping response from node (\d+)$/) { + # + } else { + $result = CRITICAL; + $output .= "'$_' doesn't match regexp. " + } + } + $output .= sprintf("%d missing nodes. ", $max_nodes_count - $nodes_count) if $nodes_count < $max_nodes_count; + safe_close_command(); + $np->add_perfdata(label => "nodes", value => $nodes_count, uom => '', + min => 0, max => $max_nodes_count, warning => $warning, critical => $critical); + $np->add_perfdata(label => "ping_time", value => $time_total, uom => 's', + min => 0, max => undef); + $np->add_perfdata(label => "clients", value => $clients_count, uom => '', + min => 0, max => undef); + if ($result == OK) { + $result = $np->check_threshold(check => $nodes_count); + } + } + $np->nagios_exit($result, $output); +} else { + $np->nagios_exit(UNKNOWN, "Unknown command: '$info'"); +} + +sub check_percantage +{ + my ($number) = shift(@_); + my $perc = $number =~ s/\%//; + return ($perc, $number); +} + diff --git a/ctdb/utils/ping_pong/ping_pong.c b/ctdb/utils/ping_pong/ping_pong.c new file mode 100644 index 0000000..3d28f34 --- /dev/null +++ b/ctdb/utils/ping_pong/ping_pong.c @@ -0,0 +1,303 @@ +/* + A ping-pong fcntl byte range lock test + + Copyright (C) Andrew Tridgell 2002 + Copyright (C) Michael Adam 2012 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +/* + This measures the ping-pong byte range lock latency. It is + especially useful on a cluster of nodes sharing a common lock + manager as it will give some indication of the lock managers + performance under stress. + + tridge@samba.org, February 2002 + +*/ + +#define _XOPEN_SOURCE 500 + +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> +#include <time.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <stdbool.h> + +static struct timeval tp1,tp2; + +static int do_reads, do_writes, use_mmap, do_check, do_brl_test; + +static void start_timer(void) +{ + gettimeofday(&tp1,NULL); +} + +static double end_timer(void) +{ + gettimeofday(&tp2,NULL); + return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) - + (tp1.tv_sec + (tp1.tv_usec*1.0e-6)); +} + +/* lock a byte range in a open file */ +static int lock_range(int fd, int offset, int len, bool wait) +{ + struct flock lock; + + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = offset; + lock.l_len = len; + lock.l_pid = 0; + + return fcntl(fd, wait ? F_SETLKW : F_SETLK, &lock); +} + +/* check whether we could place a lock */ +static int check_lock(int fd, int offset, int len) +{ + struct flock lock; + int ret; + + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = offset; + lock.l_len = len; + lock.l_pid = 0; + + ret = fcntl(fd, F_GETLK, &lock); + if (ret != 0) { + printf("error calling fcntl F_GETLCK: %s\n", strerror(errno)); + return -1; + } + + if (lock.l_type == F_UNLCK) { + /* we would be able to place the lock */ + return 0; + } + + /* we would not be able to place lock */ + printf("check_lock failed: lock held: " + "pid='%d', type='%d', start='%d', len='%d'\n", + (int)lock.l_pid, (int)lock.l_type, (int)lock.l_start, (int)lock.l_len); + return 1; +} + +/* unlock a byte range in a open file */ +static int unlock_range(int fd, int offset, int len) +{ + struct flock lock; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = offset; + lock.l_len = len; + lock.l_pid = 0; + + return fcntl(fd,F_SETLKW,&lock); +} + +/* run the ping pong test on fd */ +static void ping_pong(int fd, int num_locks) +{ + unsigned count = 0; + int i=0, loops=0; + unsigned char *val; + unsigned char incr=0, last_incr=0; + unsigned char *p = NULL; + int ret; + + ret = ftruncate(fd, num_locks+1); + if (ret == -1) { + printf("ftruncate failed: %s\n", strerror(errno)); + return; + } + + if (use_mmap) { + p = mmap(NULL, num_locks+1, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + printf("mmap failed: %s\n", strerror(errno)); + return; + } + } + + val = (unsigned char *)calloc(num_locks+1, sizeof(unsigned char)); + if (val == NULL) { + printf("calloc failed\n"); + if (use_mmap) { + munmap(p, num_locks+1); + } + return; + } + + start_timer(); + + ret = lock_range(fd, 0, 1, true); + if (ret != 0) { + printf("initial lock at 0 failed! - %s\n", strerror(errno)); + goto done; + } + + i = 0; + + while (1) { + if (lock_range(fd, (i+1) % num_locks, 1, true) != 0) { + printf("lock at %d failed! - %s\n", + (i+1) % num_locks, strerror(errno)); + } + if (do_check) { + ret = check_lock(fd, i, 1); + if (ret != 0) { + goto done; + } + } + if (do_reads) { + unsigned char c; + if (use_mmap) { + c = p[i]; + } else if (pread(fd, &c, 1, i) != 1) { + printf("read failed at %d\n", i); + } + incr = c - val[i]; + val[i] = c; + } + if (do_writes) { + char c = val[i] + 1; + if (use_mmap) { + p[i] = c; + } else if (pwrite(fd, &c, 1, i) != 1) { + printf("write failed at %d\n", i); + } + } + if (unlock_range(fd, i, 1) != 0) { + printf("unlock at %d failed! - %s\n", + i, strerror(errno)); + } + i = (i+1) % num_locks; + count++; + if (loops > num_locks && incr != last_incr) { + last_incr = incr; + printf("data increment = %u\n", incr); + fflush(stdout); + } + if (end_timer() > 1.0) { + printf("%8u locks/sec\r", + (unsigned)(2*count/end_timer())); + fflush(stdout); + start_timer(); + count=0; + } + loops++; + } + +done: + if (use_mmap) { + munmap(p, num_locks+1); + } + free(val); +} + +static void usage(void) +{ + printf("ping_pong -rwmc <file> <num_locks>\n"); + printf("ping_pong -l <file>\n\n"); + printf("Options\n"); + printf(" -r do reads\n"); + printf(" -w do writes\n"); + printf(" -m use mmap\n"); + printf(" -c check locks\n"); + printf(" -l test for working byte range locks\n"); +} + +int main(int argc, char *argv[]) +{ + char *fname; + int fd, num_locks; + int c; + + while ((c = getopt(argc, argv, "rwmcl")) != -1) { + switch (c){ + case 'w': + do_writes = 1; + break; + case 'r': + do_reads = 1; + break; + case 'm': + use_mmap = 1; + break; + case 'c': + do_check = 1; + break; + case 'l': + do_brl_test = 1; + break; + default: + fprintf(stderr, "Unknown option '%c'\n", c); + exit(1); + } + } + + argv += optind; + argc -= optind; + + if (argc < 1) { + usage(); + exit(1); + } + + fname = argv[0]; + + fd = open(fname, O_CREAT|O_RDWR, 0600); + if (fd == -1) { + exit(1); + } + + if (do_brl_test) { + if (lock_range(fd, 0, 0, false) != 0) { + printf("file already locked, calling check_lock to tell us who has it locked:\n"); + (void)check_lock(fd, 0, 0); + printf("Working POSIX byte range locks\n"); + exit(0); + } + + printf("Holding lock, press any key to continue...\n"); + printf("You should run the same command on another node now.\n"); + (void)getchar(); + printf("Good bye.\n"); + exit(0); + } + + if (argc < 2) { + usage(); + exit(1); + } + + num_locks = atoi(argv[1]); + if (num_locks <= 0) { + printf("num_locks should be > 0\n"); + exit(1); + } + + ping_pong(fd, num_locks); + + return 0; +} diff --git a/ctdb/utils/pmda/Install b/ctdb/utils/pmda/Install new file mode 100644 index 0000000..a56a635 --- /dev/null +++ b/ctdb/utils/pmda/Install @@ -0,0 +1,36 @@ +#! /bin/sh +# +# Copyright (c) 1997 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Install the ctdb PMDA and/or PMNS +# + +. $PCP_DIR/etc/pcp.env +. $PCP_SHARE_DIR/lib/pmdaproc.sh + +iam=ctdb +pmda_interface=2 + +# runs as daemon and only supports pipe IPC +daemon_opt=true +dso_opt=false +pipe_opt=true +socket_opt=false + +pmdaSetup +pmdaInstall +exit 0 diff --git a/ctdb/utils/pmda/README b/ctdb/utils/pmda/README new file mode 100644 index 0000000..f8dbbbc --- /dev/null +++ b/ctdb/utils/pmda/README @@ -0,0 +1,84 @@ +CTDB PMDA +=========== + +This PMDA extracts metrics from the locally running ctdbd daemon for +export to PMCD. + +Note: + This PMDA may be remade from source and hence requires IDO (or + more specifically a C compiler) to be installed. + + Uses of make(1) may fail (without removing or clobbering files) + if the C compiler cannot be found. This is most likely to + happen when running the PMDA ./Install script. + + The only remedial action is to install the C compiler, or + hand-craft changes to the Makefile. + +Metrics +======= + +The file ./help contains descriptions for all of the metrics exported +by this PMDA. + +Once the PMDA has been installed, the following command will list all +the available metrics and their explanatory "help" text: + + $ pminfo -fT ctdb + +Installation +============ + + + # cd $PCP_PMDAS_DIR/ctdb + + + Check that there is no clash in the Performance Metrics Domain + defined in ./domain.h and the other PMDAs currently in use (see + $PCP_PMCDCONF_PATH). If there is, edit ./domain.h to choose another + domain number. + + + Then simply use + + # ./Install + + and choose both the "collector" and "monitor" installation + configuration options. + + You will be prompted to choose either a daemon implementation + or a DSO implementation of the PMDA, and in the case of the daemon + variant to select an IPC method -- everything else is automated + +De-installation +=============== + + + Simply use + + # cd $PCP_PMDAS_DIR/ctdb + # ./Remove + +Troubleshooting +=============== + + + After installing or restarting the agent, the PMCD log file + ($PCP_LOG_DIR/pmcd/pmcd.log) and the PMDA log file + ($PCP_LOG_DIR/pmcd/pmda_ctdb.log) should be checked for any warnings + or errors. + + +Adding a New Metric +=================== + +This section walks through the development task of adding a new metric to the +CTDB PMDA. + + + Define the metric in the pmns file with a unique metric id. See the pmns(4) + man page for details. + + + Add a description of the metric to the help file. + + + Taking note of the previously assigned metric id, add a new entry to the + metrictab structure in pmda_ctdb.c. See the pmdaInit(3) man page for + details. + + + Ensure the counter is already a member of the ctdb_statistics structure. + Finally, add code to pmda_ctdb_fetch_cb() to handle fetch requests for the + newly defined metric. diff --git a/ctdb/utils/pmda/Remove b/ctdb/utils/pmda/Remove new file mode 100644 index 0000000..7d1c509 --- /dev/null +++ b/ctdb/utils/pmda/Remove @@ -0,0 +1,29 @@ +#! /bin/sh +# +# Copyright (c) 1997 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Remove the ctdb PMDA +# + +. $PCP_DIR/etc/pcp.env +. $PCP_SHARE_DIR/lib/pmdaproc.sh + +iam=ctdb + +pmdaSetup +pmdaRemove +exit 0 diff --git a/ctdb/utils/pmda/domain.h b/ctdb/utils/pmda/domain.h new file mode 100644 index 0000000..0bed7fe --- /dev/null +++ b/ctdb/utils/pmda/domain.h @@ -0,0 +1,19 @@ +/* domain.h + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#define CTDB 110 diff --git a/ctdb/utils/pmda/help b/ctdb/utils/pmda/help new file mode 100644 index 0000000..0e9984e --- /dev/null +++ b/ctdb/utils/pmda/help @@ -0,0 +1,106 @@ +# +# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# ctdb PMDA help file in the ASCII format +# +# lines beginning with a # are ignored +# lines beginning @ introduce a new entry of the form +# @ metric_name oneline-text +# help test goes +# here over multiple lines +# ... +# +# the metric_name is decoded against the default PMNS -- as a special case, +# a name of the form NNN.MM (for numeric NNN and MM) is interpreted as an +# instance domain identification, and the text describes the instance domain +# +# blank lines before the @ line are ignored +# + +@ ctdb.num_clients number of clients connected to ctdbd + +@ ctdb.frozen whether any databases are frozen + +@ ctdb.recovering whether recovery is active + +@ ctdb.client_packets_sent number of packets sent to all clients + +@ ctdb.client_packets_recv number of packets received from all clients + +@ ctdb.node_packets_sent number of packets sent to other nodes + +@ ctdb.node_packets_recv number of packets received from other nodes + +@ ctdb.keepalive_packets_sent number of keepalive packets sent to other nodes + +@ ctdb.keepalive_packets_recv number of keepalive packets received from other nodes + +@ ctdb.node.req_call number of node CTDB_REQ_CALL packets handled + +@ ctdb.node.reply_call number of node CTDB_REPLY_CALL packets handled + +@ ctdb.node.req_dmaster number of node CTDB_REQ_DMASTER packets handled + +@ ctdb.node.reply_dmaster number of node CTDB_REPLY_DMASTER packets handled + +@ ctdb.node.reply_error number of node CTDB_REPLY_ERROR packets handled + +@ ctdb.node.req_message number of node CTDB_REQ_MESSAGE packets handled + +@ ctdb.node.req_control number of node CTDB_REQ_CONTROL packets handled + +@ ctdb.node.reply_control number of node CTDB_REPLY_CONTROL packets handled + +@ ctdb.client.req_call number of client CTDB_REQ_CALL packets handled + +@ ctdb.client.req_message number of client CTDB_REQ_MESSAGE packets handled + +@ ctdb.client.req_control number of client CTDB_REQ_CONTROL packets handled + +@ ctdb.timeouts.call (counter not implemented) number of call timeouts + +@ ctdb.timeouts.control number of node control message request timeouts awaiting reply + +@ ctdb.timeouts.traverse number of database traversal timeouts + +@ ctdb.total_calls total number of client ctdb request calls received + +@ ctdb.pending_calls total number of client ctdb request calls in progress + +@ ctdb.lockwait_calls number of tdb chainlock lockwait calls + +@ ctdb.pending_lockwait_calls number of lockwait calls waiting for a lock + +@ ctdb.childwrite_calls number of childwrite calls + +@ ctdb.pending_childwrite_calls number of childwrite calls in progress + +@ ctdb.memory_used total size of the ctdbd null talloc pool + +@ ctdb.max_hop_count maximum hops performed by a CTDB_REQ_CALL packet + +@ ctdb.max_reclock_ctdbd maximum recovery lock latency during setrecmode + +@ ctdb.max_reclock_recd maximum recovery lock latency as reported by the recovery process + +@ ctdb.max_call_latency maximum time spent handling a client request call + +@ ctdb.max_lockwait_latency maximum time spent waiting for a tdb chainlock + +@ ctdb.max_childwrite_latency maximum time spent performing a childwrite + +@ ctdb.num_recoveries number of recoveries finished diff --git a/ctdb/utils/pmda/pmda_ctdb.c b/ctdb/utils/pmda/pmda_ctdb.c new file mode 100644 index 0000000..4f7933d --- /dev/null +++ b/ctdb/utils/pmda/pmda_ctdb.c @@ -0,0 +1,559 @@ +/* + * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP) + * + * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2011 David Disseldorp + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "replace.h" +#include "system/network.h" + +#include <talloc.h> +#include <tevent.h> +#include <tdb.h> + +#include "lib/util/time.h" +#include "lib/util/blocking.h" + +#include "client/client.h" +#include "client/client_sync.h" + +#include <pcp/pmapi.h> +#include <pcp/pmda.h> + +#ifdef HAVE___PMID_INT +#include <pcp/impl.h> + +#define pmID_cluster(id) id->cluster +#define pmID_item(id) id->item +#define pmGetProgname() pmProgname +#define pmSetProgname(a) __pmSetProgname(a) +#endif + +#include "domain.h" + +/* + * CTDB PMDA + * + * This PMDA connects to the locally running ctdbd daemon and pulls + * statistics for export via PCP. The ctdbd Unix domain socket path can be + * specified with the CTDB_SOCKET environment variable, otherwise the default + * path is used. + */ + +/* + * All metrics supported in this PMDA - one table entry for each. + * The 4th field specifies the serial number of the instance domain + * for the metric, and must be either PM_INDOM_NULL (denoting a + * metric that only ever has a single value), or the serial number + * of one of the instance domains declared in the instance domain table + * (i.e. in indomtab, above). + */ +static pmdaMetric metrictab[] = { + /* num_clients */ + { NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, + /* frozen */ + { NULL, { PMDA_PMID(0,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, + /* recovering */ + { NULL, { PMDA_PMID(0,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, + /* client_packets_sent */ + { NULL, { PMDA_PMID(0,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* client_packets_recv */ + { NULL, { PMDA_PMID(0,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* node_packets_sent */ + { NULL, { PMDA_PMID(0,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* node_packets_recv */ + { NULL, { PMDA_PMID(0,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* keepalive_packets_sent */ + { NULL, { PMDA_PMID(0,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* keepalive_packets_recv */ + { NULL, { PMDA_PMID(0,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* req_call */ + { NULL, { PMDA_PMID(1,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* reply_call */ + { NULL, { PMDA_PMID(1,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* req_dmaster */ + { NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* reply_dmaster */ + { NULL, { PMDA_PMID(1,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* reply_error */ + { NULL, { PMDA_PMID(1,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* req_message */ + { NULL, { PMDA_PMID(1,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* req_control */ + { NULL, { PMDA_PMID(1,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* reply_control */ + { NULL, { PMDA_PMID(1,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* req_call */ + { NULL, { PMDA_PMID(2,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* req_message */ + { NULL, { PMDA_PMID(2,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* req_control */ + { NULL, { PMDA_PMID(2,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* call */ + { NULL, { PMDA_PMID(3,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,0) }, }, + /* control */ + { NULL, { PMDA_PMID(3,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,0) }, }, + /* traverse */ + { NULL, { PMDA_PMID(3,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,0) }, }, + /* total_calls */ + { NULL, { PMDA_PMID(0,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* pending_calls */ + { NULL, { PMDA_PMID(0,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, + /* locks.num_calls */ + { NULL, { PMDA_PMID(0,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* locks.num_pending */ + { NULL, { PMDA_PMID(0,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, + /* childwrite_calls */ + { NULL, { PMDA_PMID(0,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER, + PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, }, + /* pending_childwrite_calls */ + { NULL, { PMDA_PMID(0,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, + /* memory_used */ + { NULL, { PMDA_PMID(0,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, }, + /* max_hop_count */ + { NULL, { PMDA_PMID(0,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, + /* reclock.ctdbd.max */ + { NULL, { PMDA_PMID(0,17), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, }, + /* reclock.recd.max */ + { NULL, { PMDA_PMID(0,18), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, }, + /* call_latency.max */ + { NULL, { PMDA_PMID(0,19), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, }, + /* locks.latency.max */ + { NULL, { PMDA_PMID(0,20), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, }, + /* childwrite_latency.max */ + { NULL, { PMDA_PMID(0,21), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, }, + /* num_recoveries */ + { NULL, { PMDA_PMID(0,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT, + PMDA_PMUNITS(0,0,0,0,0,0) }, }, +}; + +static struct tevent_context *ev; +static struct ctdb_client_context *client; +static struct ctdb_statistics *stats; + +static void +pmda_ctdb_disconnected(void *args) +{ + fprintf(stderr, "ctdbd unreachable\n"); + TALLOC_FREE(client); +} + + +static int +pmda_ctdb_daemon_connect(void) +{ + const char *socket_name; + int ret; + + ev = tevent_context_init(NULL); + if (ev == NULL) { + fprintf(stderr, "Failed to init event ctx\n"); + return -1; + } + + socket_name = getenv("CTDB_SOCKET"); + if (socket_name == NULL) { + socket_name = CTDB_SOCKET; + } + + ret = ctdb_client_init(ev, ev, socket_name, &client); + if (ret != 0) { + fprintf(stderr, "Failed to connect to ctdb daemon via %s\n", + socket_name); + goto err_ev; + } + + ctdb_client_set_disconnect_callback(client, pmda_ctdb_disconnected, + NULL); + + return 0; + +err_ev: + talloc_free(ev); + client = NULL; + return -1; +} + +static void +pmda_ctdb_daemon_disconnect(void) +{ + TALLOC_FREE(client); + talloc_free(ev); +} + +static int +fill_base(unsigned int item, pmAtomValue *atom) +{ + switch (item) { + case 0: + atom->ul = stats->num_clients; + break; + case 1: + atom->ul = stats->frozen; + break; + case 2: + atom->ul = stats->recovering; + break; + case 3: + atom->ul = stats->client_packets_sent; + break; + case 4: + atom->ul = stats->client_packets_recv; + break; + case 5: + atom->ul = stats->node_packets_sent; + break; + case 6: + atom->ul = stats->node_packets_recv; + break; + case 7: + atom->ul = stats->keepalive_packets_sent; + break; + case 8: + atom->ul = stats->keepalive_packets_recv; + break; + case 9: + atom->ul = stats->total_calls; + break; + case 10: + atom->ul = stats->pending_calls; + break; + case 11: + atom->ul = stats->locks.num_calls; + break; + case 12: + atom->ul = stats->locks.num_pending; + break; + case 13: + atom->ul = stats->childwrite_calls; + break; + case 14: + atom->ul = stats->pending_childwrite_calls; + break; + case 15: + atom->ul = stats->memory_used; + break; + case 16: + atom->ul = stats->max_hop_count; + break; + case 17: + atom->d = stats->reclock.ctdbd.max; + break; + case 18: + atom->d = stats->reclock.recd.max; + break; + case 19: + atom->d = stats->call_latency.max; + break; + case 20: + atom->d = stats->locks.latency.max; + break; + case 21: + atom->d = stats->childwrite_latency.max; + break; + case 22: + atom->ul = stats->num_recoveries; + break; + default: + return PM_ERR_PMID; + } + + return 0; +} + +static int +fill_node(unsigned int item, pmAtomValue *atom) +{ + switch (item) { + case 0: + atom->ul = stats->node.req_call; + break; + case 1: + atom->ul = stats->node.reply_call; + break; + case 2: + atom->ul = stats->node.req_dmaster; + break; + case 3: + atom->ul = stats->node.reply_dmaster; + break; + case 4: + atom->ul = stats->node.reply_error; + break; + case 5: + atom->ul = stats->node.req_message; + break; + case 6: + atom->ul = stats->node.req_control; + break; + case 7: + atom->ul = stats->node.reply_control; + break; + default: + return PM_ERR_PMID; + } + + return 0; +} + + +static int +fill_client(unsigned int item, pmAtomValue *atom) +{ + switch (item) { + case 0: + atom->ul = stats->client.req_call; + break; + case 1: + atom->ul = stats->client.req_message; + break; + case 2: + atom->ul = stats->client.req_control; + break; + default: + return PM_ERR_PMID; + } + + return 0; +} + +static int +fill_timeout(unsigned int item, pmAtomValue *atom) +{ + switch (item) { + case 0: + atom->ul = stats->timeouts.call; + break; + case 1: + atom->ul = stats->timeouts.control; + break; + case 2: + atom->ul = stats->timeouts.traverse; + break; + default: + return PM_ERR_PMID; + } + + return 0; +} + +/* + * callback provided to pmdaFetch + */ +static int +pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom) +{ + int ret; +#ifdef HAVE___PMID_INT + __pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid); +#else + pmID id = *(pmID *)&(mdesc->m_desc.pmid); +#endif + + if (inst != PM_IN_NULL) { + return PM_ERR_INST; + } + + if (stats == NULL) { + fprintf(stderr, "stats not available\n"); + ret = PM_ERR_VALUE; + goto err_out; + } + + + switch (pmID_cluster(id)) { + case 0: + ret = fill_base(pmID_item(id), atom); + if (ret) { + goto err_out; + } + break; + case 1: + ret = fill_node(pmID_item(id), atom); + if (ret) { + goto err_out; + } + break; + case 2: + ret = fill_client(pmID_item(id), atom); + if (ret) { + goto err_out; + } + break; + case 3: + ret = fill_timeout(pmID_item(id), atom); + if (ret) { + goto err_out; + } + break; + default: + return PM_ERR_PMID; + } + + ret = 0; +err_out: + return ret; +} + +/* + * This routine is called once for each pmFetch(3) operation, so is a + * good place to do once-per-fetch functions, such as value caching or + * instance domain evaluation. + */ +static int +pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda) +{ + int ret; + + if (client == NULL) { + fprintf(stderr, "attempting reconnect to ctdbd\n"); + ret = pmda_ctdb_daemon_connect(); + if (ret < 0) { + fprintf(stderr, "reconnect failed\n"); + return PM_ERR_VALUE; + } + } + + ret = ctdb_ctrl_statistics(client, ev, client, CTDB_CURRENT_NODE, + tevent_timeval_current_ofs(1,0), &stats); + if (ret != 0) { + fprintf(stderr, "ctdb control for statistics failed, reconnecting\n"); + pmda_ctdb_daemon_disconnect(); + ret = PM_ERR_VALUE; + goto err_out; + } + + ret = pmdaFetch(numpmid, pmidlist, resp, pmda); + + talloc_free(stats); +err_out: + return ret; +} + +void pmda_ctdb_init(pmdaInterface *dp); + +/* + * Initialise the agent + */ +void +pmda_ctdb_init(pmdaInterface *dp) +{ + if (dp->status != 0) { + return; + } + + dp->version.two.fetch = pmda_ctdb_fetch; + pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb); + + pmdaInit(dp, NULL, 0, metrictab, + (sizeof(metrictab) / sizeof(metrictab[0]))); +} + +static char * +helpfile(void) +{ + static char buf[MAXPATHLEN]; + + if (!buf[0]) { + snprintf(buf, sizeof(buf), "%s/ctdb/help", + pmGetConfig("PCP_PMDAS_DIR")); + } + return buf; +} + +static void +usage(void) +{ + fprintf(stderr, "Usage: %s [options]\n\n", pmGetProgname()); + fputs("Options:\n" + " -d domain use domain (numeric) for metrics domain of PMDA\n" + " -l logfile write log into logfile rather than using default log name\n" + "\nExactly one of the following options may appear:\n" + " -i port expect PMCD to connect on given inet port (number or name)\n" + " -p expect PMCD to supply stdin/stdout (pipe)\n" + " -u socket expect PMCD to connect on given unix domain socket\n", + stderr); + exit(1); +} + +/* + * Set up the agent if running as a daemon. + */ +int +main(int argc, char **argv) +{ + int err = 0; + char log_file[] = "pmda_ctdb.log"; + pmdaInterface dispatch; + + pmSetProgname(argv[0]); + + pmdaDaemon(&dispatch, PMDA_INTERFACE_2, argv[0], CTDB, + log_file, helpfile()); + + if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) { + err++; + } + + if (err) { + usage(); + } + + pmdaOpenLog(&dispatch); + pmda_ctdb_init(&dispatch); + pmdaConnect(&dispatch); + pmdaMain(&dispatch); + + exit(0); +} + diff --git a/ctdb/utils/pmda/pmns b/ctdb/utils/pmda/pmns new file mode 100644 index 0000000..dc7e3ac --- /dev/null +++ b/ctdb/utils/pmda/pmns @@ -0,0 +1,73 @@ +/* + * Metrics for CTDB PMDA + * + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2011 David Disseldorp + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +ctdb { + num_clients CTDB:0:0 + frozen CTDB:0:1 + recovering CTDB:0:2 + client_packets_sent CTDB:0:3 + client_packets_recv CTDB:0:4 + node_packets_sent CTDB:0:5 + node_packets_recv CTDB:0:6 + keepalive_packets_sent CTDB:0:7 + keepalive_packets_recv CTDB:0:8 + node + client + timeouts + total_calls CTDB:0:9 + pending_calls CTDB:0:10 + lockwait_calls CTDB:0:11 + pending_lockwait_calls CTDB:0:12 + childwrite_calls CTDB:0:13 + pending_childwrite_calls CTDB:0:14 + memory_used CTDB:0:15 + max_hop_count CTDB:0:16 + max_reclock_ctdbd CTDB:0:17 + max_reclock_recd CTDB:0:18 + max_call_latency CTDB:0:19 + max_lockwait_latency CTDB:0:20 + max_childwrite_latency CTDB:0:21 + num_recoveries CTDB:0:22 +} + +ctdb.node { + req_call CTDB:1:0 + reply_call CTDB:1:1 + req_dmaster CTDB:1:2 + reply_dmaster CTDB:1:3 + reply_error CTDB:1:4 + req_message CTDB:1:5 + req_control CTDB:1:6 + reply_control CTDB:1:7 +} + +ctdb.client { + req_call CTDB:2:0 + req_message CTDB:2:1 + req_control CTDB:2:2 +} + +ctdb.timeouts { + call CTDB:3:0 + control CTDB:3:1 + traverse CTDB:3:2 +} + diff --git a/ctdb/utils/pmda/root b/ctdb/utils/pmda/root new file mode 100644 index 0000000..ff036ed --- /dev/null +++ b/ctdb/utils/pmda/root @@ -0,0 +1,10 @@ +/* + * fake "root" for validating the local PMNS subtree + */ + +#include <stdpmid> + +root { ctdb } + +#include "pmns" + diff --git a/ctdb/utils/smnotify/smnotify.c b/ctdb/utils/smnotify/smnotify.c new file mode 100644 index 0000000..5907bd6 --- /dev/null +++ b/ctdb/utils/smnotify/smnotify.c @@ -0,0 +1,151 @@ +/* + simple smnotify tool + + Copyright (C) Ronnie Sahlberg 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <stdlib.h> +#include "smnotify.h" +#include "popt.h" + +static char *client = NULL; +static const char *ip = NULL; +static char *server = NULL; +static int stateval = 0; +static int clientport = 0; +static int sendport = 0; + +static void usage(void) +{ + exit(0); +} + +static int create_socket(const char *addr, int port) +{ + int s; + struct sockaddr_in sock_in; + + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (s == -1) { + printf("Failed to open local socket\n"); + exit(10); + } + + bzero(&sock_in, sizeof(sock_in)); + sock_in.sin_family = AF_INET; + sock_in.sin_port = htons(port); + inet_aton(addr, &sock_in.sin_addr); + if (bind(s, (struct sockaddr *)&sock_in, sizeof(sock_in)) == -1) { + printf("Failed to bind to local socket\n"); + exit(10); + } + + return s; +} + +int main(int argc, const char *argv[]) +{ + struct poptOption popt_options[] = { + POPT_AUTOHELP + { "client", 'c', POPT_ARG_STRING, &client, 0, "remote client to send the notify to", "hostname/ip" }, + { "clientport", 0, POPT_ARG_INT, &clientport, 0, "clientport", "integer" }, + { "ip", 'i', POPT_ARG_STRING, &ip, 0, "local ip address to send the notification from", "ip" }, + { "sendport", 0, POPT_ARG_INT, &sendport, 0, "port to send the notify from", "integer" }, + { "server", 's', POPT_ARG_STRING, &server, 0, "servername to use in the notification", "hostname/ip" }, + { "stateval", 0, POPT_ARG_INT, &stateval, 0, "stateval", "integer" }, + POPT_TABLEEND + }; + int opt; + poptContext pc; + CLIENT *clnt; + int s; + struct sockaddr_in sock_cl; + struct timeval w; + struct status st; + + pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST); + + while ((opt = poptGetNextOpt(pc)) != -1) { + switch (opt) { + default: + fprintf(stderr, "Invalid option %s: %s\n", + poptBadOption(pc, 0), poptStrerror(opt)); + exit(1); + } + } + + if (client == NULL) { + printf("ERROR: client not specified\n"); + usage(); + } + + if (ip == NULL) { + printf("ERROR: ip not specified\n"); + usage(); + } + + if (server == NULL) { + printf("ERROR: server not specified\n"); + usage(); + } + + if (stateval == 0) { + printf("ERROR: stateval not specified\n"); + usage(); + } + + + /* Since we want to control from which address these packets are + sent we must create the socket ourself and use low-level rpc + calls. + */ + s = create_socket(ip, sendport); + + /* only wait for at most 3 seconds before giving up */ + alarm(3); + + /* Setup a sockaddr_in for the client we want to notify */ + bzero(&sock_cl, sizeof(sock_cl)); + sock_cl.sin_family = AF_INET; + sock_cl.sin_port = htons(clientport); + inet_aton(client, &sock_cl.sin_addr); + + w.tv_sec = 1; + w.tv_usec= 0; + + clnt = clntudp_create(&sock_cl, 100024, 1, w, &s); + if (clnt == NULL) { + printf("ERROR: failed to connect to client\n"); + exit(10); + } + + /* we don't want to wait for any reply */ + w.tv_sec = 0; + w.tv_usec = 0; + clnt_control(clnt, CLSET_TIMEOUT, (char *)&w); + + st.mon_name=server; + st.state=stateval; + sm_notify_1(&st, clnt); + + return 0; +} diff --git a/ctdb/utils/smnotify/smnotify.x b/ctdb/utils/smnotify/smnotify.x new file mode 100644 index 0000000..94239f8 --- /dev/null +++ b/ctdb/utils/smnotify/smnotify.x @@ -0,0 +1,21 @@ +#ifdef RPC_HDR +%#ifdef _AIX +%#include <rpc/rpc.h> +%#endif /* _AIX */ +#endif /* RPC_HDR */ + +const SM_MAXSTRLEN = 1024; + +struct status { + string mon_name<SM_MAXSTRLEN>; + int state; +}; + + +program SMNOTIFY { + version SMVERSION { + void SM_NOTIFY(struct status) = 6; + } = 1; +} = 100024; + + diff --git a/ctdb/utils/tdb/tdb_mutex_check.c b/ctdb/utils/tdb/tdb_mutex_check.c new file mode 100644 index 0000000..440bd48 --- /dev/null +++ b/ctdb/utils/tdb/tdb_mutex_check.c @@ -0,0 +1,160 @@ +/* + Check the mutex lock information in tdb database + + Copyright (C) Amitay Isaacs 2015-2021 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <pthread.h> +#include <errno.h> + +#ifndef USE_TDB_MUTEX_LOCKING +#define USE_TDB_MUTEX_LOCKING 1 +#endif + +#include "lib/tdb/common/tdb_private.h" +#include "lib/tdb/common/mutex.c" + +static uint8_t *hex_decode(const char *hex_in, size_t *plen) +{ + size_t i; + int num; + uint8_t *buffer; + size_t len; + + len = strlen(hex_in) / 2; + if (len == 0) { + return NULL; + } + + buffer = malloc(len); + if (buffer == NULL) { + return NULL; + } + + for (i = 0; i < len; i++) { + sscanf(&hex_in[i*2], "%02X", &num); + buffer[i] = (uint8_t)num; + } + + *plen = len; + + return buffer; +} + +static int get_hash_chain(struct tdb_context *tdb, const char *hex_key) +{ + TDB_DATA key = { + .dsize = 0, + }; + unsigned int hash; + + key.dptr = hex_decode(hex_key, &key.dsize); + if (key.dptr == NULL || key.dsize == 0) { + return -1; + } + hash = tdb_jenkins_hash(&key); + free(key.dptr); + + return hash % tdb_hash_size(tdb); +} + +static void check_one(struct tdb_mutexes *mutexes, int chain) +{ + pthread_mutex_t *m; + int ret; + int pthread_mutex_consistent_np(pthread_mutex_t *); + + m = &mutexes->hashchains[chain+1]; + ret = pthread_mutex_trylock(m); + if (ret == 0) { + pthread_mutex_unlock(m); + return; + } + if (ret == EOWNERDEAD) { + ret = pthread_mutex_consistent_np(m); + if (ret != 0) { + printf("[%6d] consistent failed (%d)\n", chain, ret); + return; + } + ret = pthread_mutex_unlock(m); + if (ret != 0) { + printf("[%6d] unlock failed (%d)\n", chain, ret); + return; + } + printf("[%6d] cleaned\n", chain); + return; + } + if (ret == EBUSY) { + printf("[%6d] pid=%d\n", chain, m->__data.__owner); + return; + } + printf("[%6d] trylock failed (%d)\n", chain, ret); +} + +static void check_all(struct tdb_mutexes *mutexes, unsigned int hash_size) +{ + unsigned int i; + + for (i=0; i<hash_size; i++) { + check_one(mutexes, i); + } +} + +int main(int argc, char **argv) +{ + const char *tdb_file; + TDB_CONTEXT *tdb; + uint32_t tdb_flags; + int chain, i; + + if (argc < 2) { + printf("Usage %s <tdb file> [<key1> <key2>]\n", argv[0]); + exit(1); + } + + tdb_file = argv[1]; + + tdb_flags = TDB_MUTEX_LOCKING | TDB_INCOMPATIBLE_HASH | + TDB_CLEAR_IF_FIRST; + tdb = tdb_open(tdb_file, 0, tdb_flags, O_RDWR, 0); + if (tdb == NULL) { + printf("Error opening %s\n", tdb_file); + exit(1); + } + + if (tdb->mutexes == NULL) { + printf("Mutexes are not mmapped\n"); + exit(1); + } + + if (argc == 2) { + check_all(tdb->mutexes, tdb_hash_size(tdb)); + } else { + for (i=2; i<argc; i++) { + chain = get_hash_chain(tdb, argv[i]); + if (chain == -1) { + continue; + } + check_one(tdb->mutexes, chain); + } + } + + tdb_close(tdb); + return 0; +} |