summaryrefslogtreecommitdiffstats
path: root/ctdb/utils
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb/utils')
-rw-r--r--ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c457
-rwxr-xr-xctdb/utils/ceph/test_ceph_rados_reclock.sh212
-rwxr-xr-xctdb/utils/etcd/ctdb_etcd_lock213
-rw-r--r--ctdb/utils/nagios/README56
-rwxr-xr-xctdb/utils/nagios/check_ctdb279
-rw-r--r--ctdb/utils/ping_pong/ping_pong.c303
-rw-r--r--ctdb/utils/pmda/Install36
-rw-r--r--ctdb/utils/pmda/README84
-rw-r--r--ctdb/utils/pmda/Remove29
-rw-r--r--ctdb/utils/pmda/domain.h19
-rw-r--r--ctdb/utils/pmda/help106
-rw-r--r--ctdb/utils/pmda/pmda_ctdb.c559
-rw-r--r--ctdb/utils/pmda/pmns73
-rw-r--r--ctdb/utils/pmda/root10
-rw-r--r--ctdb/utils/scsi_io/scsi_io.c1152
-rw-r--r--ctdb/utils/smnotify/smnotify.c151
-rw-r--r--ctdb/utils/smnotify/smnotify.x21
-rw-r--r--ctdb/utils/tdb/tdb_mutex_check.c160
18 files changed, 3920 insertions, 0 deletions
diff --git a/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c
new file mode 100644
index 0000000..7d868a3
--- /dev/null
+++ b/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c
@@ -0,0 +1,457 @@
+/*
+ CTDB mutex helper using Ceph librados locks
+
+ Copyright (C) David Disseldorp 2016-2020
+
+ Based on ctdb_mutex_fcntl_helper.c, which is:
+ Copyright (C) Martin Schwenke 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include "tevent.h"
+#include "talloc.h"
+#include "rados/librados.h"
+
+#define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex"
+#define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME
+#define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB cluster lock"
+/*
+ * During failover it may take up to <lock duration> seconds before the
+ * newly elected recovery master can obtain the lock.
+ */
+#define CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT 10
+
+#define CTDB_MUTEX_STATUS_HOLDING "0"
+#define CTDB_MUTEX_STATUS_CONTENDED "1"
+#define CTDB_MUTEX_STATUS_TIMEOUT "2"
+#define CTDB_MUTEX_STATUS_ERROR "3"
+
+static char *progname = NULL;
+
+static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name,
+ const char *ceph_auth_name,
+ const char *pool_name,
+ rados_t *_ceph_cluster,
+ rados_ioctx_t *_ioctx)
+{
+ rados_t ceph_cluster = NULL;
+ rados_ioctx_t ioctx = NULL;
+ int ret;
+
+ ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s"
+ " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
+ strerror(-ret));
+ return ret;
+ }
+
+ /* path=NULL tells librados to use default locations */
+ ret = rados_conf_read_file(ceph_cluster, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to parse Ceph cluster config"
+ " - (%s)\n", progname, strerror(-ret));
+ rados_shutdown(ceph_cluster);
+ return ret;
+ }
+
+ ret = rados_connect(ceph_cluster);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s"
+ " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
+ strerror(-ret));
+ rados_shutdown(ceph_cluster);
+ return ret;
+ }
+
+
+ ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx);
+ if (ret < 0) {
+ fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s"
+ " - (%s)\n", progname, pool_name, strerror(-ret));
+ rados_shutdown(ceph_cluster);
+ return ret;
+ }
+
+ *_ceph_cluster = ceph_cluster;
+ *_ioctx = ioctx;
+
+ return 0;
+}
+
+static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx,
+ const char *oid,
+ uint64_t lock_duration_s,
+ uint8_t flags)
+{
+ int ret;
+ struct timeval tv = { lock_duration_s, 0 };
+
+ ret = rados_lock_exclusive(ioctx, oid,
+ CTDB_MUTEX_CEPH_LOCK_NAME,
+ CTDB_MUTEX_CEPH_LOCK_COOKIE,
+ CTDB_MUTEX_CEPH_LOCK_DESC,
+ lock_duration_s == 0 ? NULL : &tv,
+ flags);
+ if ((ret == -EEXIST) || (ret == -EBUSY)) {
+ /* lock contention */
+ return ret;
+ } else if (ret < 0) {
+ /* unexpected failure */
+ fprintf(stderr,
+ "%s: Failed to get lock on RADOS object '%s' - (%s)\n",
+ progname, oid, strerror(-ret));
+ return ret;
+ }
+
+ /* lock obtained */
+ return 0;
+}
+
+static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx,
+ const char *oid)
+{
+ int ret;
+
+ ret = rados_unlock(ioctx, oid,
+ CTDB_MUTEX_CEPH_LOCK_NAME,
+ CTDB_MUTEX_CEPH_LOCK_COOKIE);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s: Failed to drop lock on RADOS object '%s' - (%s)\n",
+ progname, oid, strerror(-ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+struct ctdb_mutex_rados_state {
+ bool holding_mutex;
+ const char *ceph_cluster_name;
+ const char *ceph_auth_name;
+ const char *pool_name;
+ const char *object;
+ uint64_t lock_duration_s;
+ int ppid;
+ struct tevent_context *ev;
+ struct tevent_signal *sigterm_ev;
+ struct tevent_signal *sigint_ev;
+ struct tevent_timer *ppid_timer_ev;
+ struct tevent_timer *renew_timer_ev;
+ rados_t ceph_cluster;
+ rados_ioctx_t ioctx;
+};
+
+static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum,
+ int count,
+ void *siginfo,
+ void *private_data)
+{
+ struct ctdb_mutex_rados_state *cmr_state = private_data;
+ int ret = 0;
+
+ if (!cmr_state->holding_mutex) {
+ fprintf(stderr, "Sigterm callback invoked without mutex!\n");
+ ret = -EINVAL;
+ }
+
+ talloc_free(cmr_state);
+ exit(ret ? 1 : 0);
+}
+
+static void ctdb_mutex_rados_ppid_timer_cb(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_mutex_rados_state *cmr_state = private_data;
+ int ret = 0;
+
+ if (!cmr_state->holding_mutex) {
+ fprintf(stderr, "Timer callback invoked without mutex!\n");
+ ret = -EINVAL;
+ goto err_ctx_cleanup;
+ }
+
+ if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) {
+ /* parent still around, keep waiting */
+ cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev,
+ cmr_state,
+ tevent_timeval_current_ofs(5, 0),
+ ctdb_mutex_rados_ppid_timer_cb,
+ cmr_state);
+ if (cmr_state->ppid_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ /* rely on signal cb */
+ }
+ return;
+ }
+
+ /* parent ended, drop lock (via destructor) and exit */
+err_ctx_cleanup:
+ talloc_free(cmr_state);
+ exit(ret ? 1 : 0);
+}
+
+#define USECS_IN_SEC 1000000
+
+static void ctdb_mutex_rados_lock_renew_timer_cb(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct ctdb_mutex_rados_state *cmr_state = private_data;
+ struct timeval tv;
+ int ret;
+
+ ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
+ cmr_state->lock_duration_s,
+ LIBRADOS_LOCK_FLAG_RENEW);
+ if (ret == -EBUSY) {
+ /* should never get -EEXIST on renewal */
+ fprintf(stderr, "Lock contention during renew: %d\n", ret);
+ goto err_ctx_cleanup;
+ } else if (ret < 0) {
+ fprintf(stderr, "Lock renew failed\n");
+ goto err_ctx_cleanup;
+ }
+
+ tv = tevent_timeval_current_ofs(0,
+ cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
+ cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
+ cmr_state,
+ tv,
+ ctdb_mutex_rados_lock_renew_timer_cb,
+ cmr_state);
+ if (cmr_state->renew_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ goto err_ctx_cleanup;
+ }
+
+ return;
+
+err_ctx_cleanup:
+ /* drop lock (via destructor) and exit */
+ talloc_free(cmr_state);
+ exit(1);
+}
+
+static int ctdb_mutex_rados_state_destroy(struct ctdb_mutex_rados_state *cmr_state)
+{
+ if (cmr_state->holding_mutex) {
+ ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
+ }
+ if (cmr_state->ioctx != NULL) {
+ rados_ioctx_destroy(cmr_state->ioctx);
+ }
+ if (cmr_state->ceph_cluster != NULL) {
+ rados_shutdown(cmr_state->ceph_cluster);
+ }
+ return 0;
+}
+
+/* register this host+service with ceph-mgr for visibility */
+static int ctdb_mutex_rados_mgr_reg(rados_t ceph_cluster)
+{
+ int ret;
+ uint64_t instance_guid;
+ char id_buf[128];
+
+ instance_guid = rados_get_instance_id(ceph_cluster);
+ ret = snprintf(id_buf, sizeof(id_buf), "%s:0x%016llx",
+ "ctdb_mutex_ceph_rados_helper",
+ (unsigned long long)instance_guid);
+ if (ret < 0 || ret >= sizeof(id_buf)) {
+ fprintf(stderr, "Ceph instance name too long\n");
+ return -ENAMETOOLONG;
+ }
+
+ ret = rados_service_register(ceph_cluster, "ctdb", id_buf, "");
+ if (ret < 0) {
+ fprintf(stderr, "failed to register service with ceph-mgr\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+ struct ctdb_mutex_rados_state *cmr_state;
+
+ progname = argv[0];
+
+ if ((argc != 5) && (argc != 6)) {
+ fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> "
+ "<RADOS pool> <RADOS object> "
+ "[lock duration secs]\n",
+ progname);
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ret = setvbuf(stdout, NULL, _IONBF, 0);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to configure unbuffered stdout I/O\n");
+ }
+
+ cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state);
+ if (cmr_state == NULL) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ talloc_set_destructor(cmr_state, ctdb_mutex_rados_state_destroy);
+ cmr_state->ceph_cluster_name = argv[1];
+ cmr_state->ceph_auth_name = argv[2];
+ cmr_state->pool_name = argv[3];
+ cmr_state->object = argv[4];
+ if (argc == 6) {
+ /* optional lock duration provided */
+ char *endptr = NULL;
+ cmr_state->lock_duration_s = strtoull(argv[5], &endptr, 0);
+ if ((endptr == argv[5]) || (*endptr != '\0')) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -EINVAL;
+ goto err_ctx_cleanup;
+ }
+ } else {
+ cmr_state->lock_duration_s
+ = CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT;
+ }
+
+ cmr_state->ppid = getppid();
+ if (cmr_state->ppid == 1) {
+ /*
+ * The original parent is gone and the process has
+ * been reparented to init. This can happen if the
+ * helper is started just as the parent is killed
+ * during shutdown. The error message doesn't need to
+ * be stellar, since there won't be anything around to
+ * capture and log it...
+ */
+ fprintf(stderr, "%s: PPID == 1\n", progname);
+ ret = -EPIPE;
+ goto err_ctx_cleanup;
+ }
+
+ cmr_state->ev = tevent_context_init(cmr_state);
+ if (cmr_state->ev == NULL) {
+ fprintf(stderr, "tevent_context_init failed\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ /* wait for sigterm */
+ cmr_state->sigterm_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0,
+ ctdb_mutex_rados_sigterm_cb,
+ cmr_state);
+ if (cmr_state->sigterm_ev == NULL) {
+ fprintf(stderr, "Failed to create term signal event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ cmr_state->sigint_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGINT, 0,
+ ctdb_mutex_rados_sigterm_cb,
+ cmr_state);
+ if (cmr_state->sigint_ev == NULL) {
+ fprintf(stderr, "Failed to create int signal event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ /* periodically check parent */
+ cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
+ tevent_timeval_current_ofs(5, 0),
+ ctdb_mutex_rados_ppid_timer_cb,
+ cmr_state);
+ if (cmr_state->ppid_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+
+ ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name,
+ cmr_state->ceph_auth_name,
+ cmr_state->pool_name,
+ &cmr_state->ceph_cluster,
+ &cmr_state->ioctx);
+ if (ret < 0) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ goto err_ctx_cleanup;
+ }
+
+ ret = ctdb_mutex_rados_mgr_reg(cmr_state->ceph_cluster);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to register with ceph-mgr\n");
+ /* ignore: ceph-mgr service registration is informational */
+ }
+
+ ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
+ cmr_state->lock_duration_s,
+ 0);
+ if ((ret == -EEXIST) || (ret == -EBUSY)) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED);
+ goto err_ctx_cleanup;
+ } else if (ret < 0) {
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ goto err_ctx_cleanup;
+ }
+ cmr_state->holding_mutex = true;
+
+ if (cmr_state->lock_duration_s != 0) {
+ /*
+ * renew (reobtain) the lock, using a period of half the lock
+ * duration. Convert to usecs to avoid rounding.
+ */
+ struct timeval tv = tevent_timeval_current_ofs(0,
+ cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
+ cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
+ cmr_state,
+ tv,
+ ctdb_mutex_rados_lock_renew_timer_cb,
+ cmr_state);
+ if (cmr_state->renew_timer_ev == NULL) {
+ fprintf(stderr, "Failed to create timer event\n");
+ fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
+ ret = -ENOMEM;
+ goto err_ctx_cleanup;
+ }
+ }
+
+ fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING);
+
+ /* wait for the signal / timer events to do their work */
+ ret = tevent_loop_wait(cmr_state->ev);
+ if (ret < 0) {
+ goto err_ctx_cleanup;
+ }
+err_ctx_cleanup:
+ talloc_free(cmr_state);
+err_out:
+ return ret ? 1 : 0;
+}
diff --git a/ctdb/utils/ceph/test_ceph_rados_reclock.sh b/ctdb/utils/ceph/test_ceph_rados_reclock.sh
new file mode 100755
index 0000000..bfb9c32
--- /dev/null
+++ b/ctdb/utils/ceph/test_ceph_rados_reclock.sh
@@ -0,0 +1,212 @@
+#!/bin/bash
+# standalone test for ctdb_mutex_ceph_rados_helper
+#
+# Copyright (C) David Disseldorp 2016-2020
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+# XXX The following parameters may require configuration:
+CLUSTER="ceph" # Name of the Ceph cluster under test
+USER="client.admin" # Ceph user - a keyring must exist
+POOL="rbd" # RADOS pool - must exist
+OBJECT="ctdb_reclock" # RADOS object: target for lock requests
+
+# test procedure:
+# - using ctdb_mutex_ceph_rados_helper, take a lock on the Ceph RADOS object at
+# CLUSTER/$POOL/$OBJECT using the Ceph keyring for $USER
+# + confirm that lock is obtained, via ctdb_mutex_ceph_rados_helper "0" output
+# - check for ceph-mgr service registration
+# - check RADOS object lock state, using the "rados lock info" command
+# - attempt to obtain the lock again, using ctdb_mutex_ceph_rados_helper
+# + confirm that the lock is not successfully taken ("1" output=contention)
+# - tell the first locker to drop the lock and exit, via SIGTERM
+# - once the first locker has exited, attempt to get the lock again
+# + confirm that this attempt succeeds
+
+function _fail() {
+ echo "FAILED: $*"
+ exit 1
+}
+
+# this test requires the Ceph "rados" binary, and "jq" json parser
+which jq > /dev/null || exit 1
+which rados > /dev/null || exit 1
+which ceph > /dev/null || exit 1
+which ctdb_mutex_ceph_rados_helper || exit 1
+
+TMP_DIR="$(mktemp --directory)" || exit 1
+rados -p "$POOL" rm "$OBJECT"
+
+# explicitly disable lock expiry (duration=0), to ensure that we don't get
+# intermittent failures (due to renewal) from the lock state diff further down
+(ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" 0 \
+ > ${TMP_DIR}/first) &
+locker_pid=$!
+
+# TODO wait for ctdb_mutex_ceph_rados_helper to write one byte to stdout,
+# indicating lock acquisition success/failure
+sleep 1
+
+first_out=$(cat ${TMP_DIR}/first)
+[ "$first_out" == "0" ] \
+ || _fail "expected lock acquisition (0), but got $first_out"
+
+ceph service dump > ${TMP_DIR}/service_dump
+SERVICE_COUNT=$(jq -r '.services.ctdb.daemons | length' ${TMP_DIR}/service_dump)
+[ $SERVICE_COUNT -gt 0 ] || _fail "lock holder missing from ceph service dump"
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_first
+
+# echo "with lock: `cat ${TMP_DIR}/lock_state_first`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_first)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_first)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_first)"
+[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT"
+LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_first)"
+[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected locker cookie: $LOCKER_COOKIE"
+LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_first)"
+[ "$LOCKER_DESC" == "CTDB cluster lock" ] \
+ || _fail "unexpected locker description: $LOCKER_DESC"
+LOCKER_EXP="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_first)"
+[ "$LOCKER_EXP" == "0.000000" ] \
+ || _fail "unexpected locker expiration: $LOCKER_EXP"
+
+# second attempt while first is still holding the lock - expect failure
+ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" \
+ > ${TMP_DIR}/second
+second_out=$(cat ${TMP_DIR}/second)
+[ "$second_out" == "1" ] \
+ || _fail "expected lock contention (1), but got $second_out"
+
+# confirm lock state didn't change
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_second
+
+diff ${TMP_DIR}/lock_state_first ${TMP_DIR}/lock_state_second \
+ || _fail "unexpected lock state change"
+
+# tell first locker to drop the lock and terminate
+kill $locker_pid || exit 1
+
+wait $locker_pid &> /dev/null
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_third
+# echo "without lock: `cat ${TMP_DIR}/lock_state_third`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_third)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_third)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_third)"
+[ $LOCK_COUNT -eq 0 ] \
+ || _fail "didn\'t expect any locks in rados state, got $LOCK_COUNT"
+
+exec >${TMP_DIR}/third -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" "$POOL" "$OBJECT" &
+locker_pid=$!
+
+sleep 1
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_fourth
+# echo "with lock again: `cat ${TMP_DIR}/lock_state_fourth`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fourth)"
+[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT"
+LOCKER_COOKIE="$(jq -r '.lockers[0].cookie' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCKER_COOKIE" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected locker cookie: $LOCKER_COOKIE"
+LOCKER_DESC="$(jq -r '.lockers[0].description' ${TMP_DIR}/lock_state_fourth)"
+[ "$LOCKER_DESC" == "CTDB cluster lock" ] \
+ || _fail "unexpected locker description: $LOCKER_DESC"
+
+kill $locker_pid || exit 1
+wait $locker_pid &> /dev/null
+
+third_out=$(cat ${TMP_DIR}/third)
+[ "$third_out" == "0" ] \
+ || _fail "expected lock acquisition (0), but got $third_out"
+
+# test renew / expire behaviour using a 1s expiry (update period = 500ms)
+exec >${TMP_DIR}/forth -- ctdb_mutex_ceph_rados_helper "$CLUSTER" "$USER" \
+ "$POOL" "$OBJECT" 1 &
+locker_pid=$!
+
+sleep 1
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_fifth_a
+#echo "with lock fifth: `cat ${TMP_DIR}/lock_state_fifth_a`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_fifth_a)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_fifth_a)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_fifth_a)"
+[ $LOCK_COUNT -eq 1 ] || _fail "expected 1 lock in rados state, got $LOCK_COUNT"
+LOCKER_EXP_A="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_a)"
+[ "$LOCKER_EXP_A" != "0.000000" ] \
+ || _fail "unexpected locker expiration: $LOCKER_EXP_A"
+sleep 1 # sleep until renewal
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_fifth_b
+LOCKER_EXP_B="$(jq -r '.lockers[0].expiration' ${TMP_DIR}/lock_state_fifth_b)"
+[ "$LOCKER_EXP_B" != "0.000000" ] \
+ || _fail "unexpected locker expiration: $LOCKER_EXP_B"
+#echo "lock expiration before renewal $LOCKER_EXP_A, after renewal $LOCKER_EXP_B"
+[ "$LOCKER_EXP_B" != "$LOCKER_EXP_A" ] \
+ || _fail "locker expiration matches: $LOCKER_EXP_B"
+
+# no chance to drop the lock, rely on expiry
+kill -KILL $locker_pid || exit 1
+wait $locker_pid &> /dev/null
+sleep 1 # sleep until lock expiry
+
+rados -p "$POOL" lock info "$OBJECT" ctdb_reclock_mutex \
+ > ${TMP_DIR}/lock_state_sixth
+#echo "lock expiry sixth: `cat ${TMP_DIR}/lock_state_sixth`"
+
+LOCK_NAME="$(jq -r '.name' ${TMP_DIR}/lock_state_sixth)"
+[ "$LOCK_NAME" == "ctdb_reclock_mutex" ] \
+ || _fail "unexpected lock name: $LOCK_NAME"
+LOCK_TYPE="$(jq -r '.type' ${TMP_DIR}/lock_state_sixth)"
+[ "$LOCK_TYPE" == "exclusive" ] \
+ || _fail "unexpected lock type: $LOCK_TYPE"
+LOCK_COUNT="$(jq -r '.lockers | length' ${TMP_DIR}/lock_state_sixth)"
+[ $LOCK_COUNT -eq 0 ] || _fail "expected 0 locks in rados state, got $LOCK_COUNT"
+
+rm ${TMP_DIR}/*
+rmdir $TMP_DIR
+
+echo "$0: all tests passed"
diff --git a/ctdb/utils/etcd/ctdb_etcd_lock b/ctdb/utils/etcd/ctdb_etcd_lock
new file mode 100755
index 0000000..dac2436
--- /dev/null
+++ b/ctdb/utils/etcd/ctdb_etcd_lock
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Copyright (C) 2016 Jose A. Rivera <jarrpa@samba.org>
+# Copyright (C) 2016 Ira Cooper <ira@samba.org>
+"""CTDB mutex helper using etcd.
+
+This script is intended to be run as a mutex helper for CTDB. It will try to
+connect to an existing etcd cluster and grab an etcd.Lock() to function as
+CTDB's cluster lock. Please see ctdb/doc/cluster_mutex_helper.txt for
+details on what we're SUPPOSED to be doing. :) To use this, include
+the following line in the ctdb.conf:
+
+ cluster lock = !/path/to/script
+
+You can also pass "-v", "-vv", or "-vvv" to include verbose output in the
+CTDB log. Additional "v"s indicate increases in verbosity.
+
+This mutex helper expects the system Python interpreter to have access to the
+etcd Python module. It also expects an etcd cluster to be configured and
+running. To integrate with this, there is an optional config file of the
+following format:
+
+key = value
+
+The following configuration variables (and their defaults) are defined for
+use by this script:
+
+port = 2379 # connecting port for the etcd cluster
+lock_ttl = 9 # seconds for TTL
+refresh = 2 # seconds between attempts to maintain lock
+locks_dir = _ctdb # where to store CTDB locks in etcd
+ # The final etcd directory for any given lock looks like:
+ # /_locks/{locks_dir}/{netbios name}/
+
+In addition, any keyword parameter that can be used to configure an etcd
+client may be specified and modified here. For more documentation on these
+parameters, see here: https://github.com/jplana/python-etcd/
+
+"""
+import signal
+import time
+import sys
+import os
+import argparse
+import logging
+import subprocess
+
+import etcd
+
+# Helper Functions ------------------------------------------------------------
+#
+
+
+def process_args():
+ '''Process command-line arguments and return them.
+ '''
+ parser = argparse.ArgumentParser(
+ description=__doc__,
+ epilog='',
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument('-v', '--verbose',
+ action='count',
+ help='Display verbose output to stderr. '
+ 'Default is no output.',
+ default=0,
+ )
+ parser.add_argument('-c', '--config',
+ action='store',
+ help='Configuration file to use. The default behavior '
+ 'is to look is the base CTDB configuration '
+ 'directory, which can be overwritten by setting '
+ 'the CTDB_BASE environment variable, for a file '
+ 'called \'etcd\'. Default value is %(default)s.',
+ default=os.path.join(os.getenv('CTDB_BASE',
+ '/usr/local/etc/ctdb'),
+ 'etcd'),
+ )
+ args = parser.parse_args()
+
+ return args
+
+
+def setup_logging(verbose):
+ '''Setup logging based on specified verbosity.
+ '''
+
+ log_levels = [logging.ERROR, logging.WARNING, logging.DEBUG]
+ logging.basicConfig(level=log_levels[min(verbose, len(log_levels)-1)])
+
+
+def sigterm_handler(signum, frame):
+ """Handler for SIGTERM signals.
+ """
+ sys.exit()
+
+
+def print_nonl(out):
+ """Dumb shortcut for printing to stdout with no newline.
+ """
+ sys.stdout.write(str(out))
+ sys.stdout.flush()
+
+
+def int_or_not(s):
+ """Try to convert input to an integer.
+ """
+ try:
+ return int(s)
+ except ValueError:
+ return s
+
+# Mainline --------------------------------------------------------------------
+#
+
+
+def main():
+ args = process_args()
+
+ setup_logging(args.verbose)
+
+ # etcd config defaults
+ etcd_config = {
+ 'port': 2379,
+ 'locks_dir': '_ctdb',
+ 'lock_ttl': 9,
+ 'lock_refresh': 2,
+ }
+ # Find and read etcd config file
+ etcd_client_params = (
+ 'host',
+ 'port',
+ 'srv_domain',
+ 'version_prefix',
+ 'read_timeout',
+ 'allow_redirect',
+ 'protocol',
+ 'cert',
+ 'ca_cert',
+ 'username',
+ 'password',
+ 'allow_reconnect',
+ 'use_proxies',
+ 'expected_cluster_id',
+ 'per_host_pool_size',
+ )
+ if os.path.isfile(args.config):
+ f = open(args.config, 'r')
+ for line in f:
+ (key, value) = line.split("=", 1)
+ etcd_config[key.strip()] = int_or_not(value.strip())
+
+ # Minor hack: call out to shell to retrieve CTDB netbios name and PNN.
+ tmp = subprocess.Popen("testparm -s --parameter-name 'netbios name'; \
+ ctdb pnn",
+ shell=True,
+ universal_newlines=True,
+ stdout=subprocess.PIPE
+ ).stdout.read().strip()
+ nb_name, pnn = tmp.split()
+
+ # Try to get and hold the lock
+ try:
+ client = etcd.Client(
+ **{k: etcd_config[k] for k in
+ set(etcd_client_params).intersection(etcd_config)})
+ lock = etcd.Lock(client, etcd_config['locks_dir'] + "/" + nb_name)
+ lock._uuid = lock._uuid + "_" + pnn
+ logging.debug("Updated lock UUID: %s", lock.uuid)
+ ppid = os.getppid()
+ while True:
+ lock.acquire(blocking=False, lock_ttl=etcd_config['lock_ttl'])
+ if lock.is_acquired:
+ print_nonl(0)
+ else:
+ locks = "No locks found."
+ if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
+ keys = client.read(lock.path, recursive=True)
+ if keys is not None:
+ locks = "Existing locks:\n "
+ locks += '\n '.join(
+ (child.key + ": " + child.value for child in
+ keys.children))
+ logging.debug("Lock contention. %s", locks)
+ print_nonl(1)
+ break
+ os.kill(ppid, 0)
+ time.sleep(etcd_config['lock_refresh'])
+ except (OSError, SystemExit):
+ if lock is not None and lock.is_acquired:
+ lock.release()
+ except Exception:
+ print_nonl(3)
+ if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
+ raise
+
+
+if __name__ == "__main__":
+ signal.signal(signal.SIGTERM, sigterm_handler)
+
+ main()
diff --git a/ctdb/utils/nagios/README b/ctdb/utils/nagios/README
new file mode 100644
index 0000000..99fa6dc
--- /dev/null
+++ b/ctdb/utils/nagios/README
@@ -0,0 +1,56 @@
+check_ctdb 0.3
+
+This nagios plugin is free software, and comes with ABSOLUTELY NO WARRANTY.
+It may be used, redistributed and/or modified under the terms of the GNU
+General Public Licence (see http://www.fsf.org/licensing/licenses/gpl.txt).
+
+CTDB plugin
+
+Usage: check_ctdb -i <info>
+ [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]
+ [ -H <host> ] [-s] [ -l <login_name> ]
+ [ -V ] [ -h ]
+
+ -?, --usage
+ Print usage information
+ -h, --help
+ Print detailed help screen
+ -V, --version
+ Print version information
+ --extra-opts=[section][@file]
+ Read options from an ini file. See http://nagiosplugins.org/extra-opts for usage
+ -i, --info=<info>
+ Information: One of scriptstatus or ping.
+ -H, --hostname=<login_name>
+ Host name or IP Address.
+ -s, --sudo
+ Use sudo.
+ -l, --login=<host>
+ The user to log in as on the remote machine.
+ -w, --warning=THRESHOLD
+ Warning threshold. See
+ http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
+ for the threshold format.
+ -c, --critical=THRESHOLD
+ Critical threshold. See
+ http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
+ for the threshold format.
+ -t, --timeout=INTEGER
+ Seconds before plugin times out (default: 30)
+ -v, --verbose
+ Show details for command-line debugging (can repeat up to 3 times)
+Supported commands:
+ * scriptstatus :
+ check the ctdb scriptstatus command and return CRITICAL if one of the
+ scripts fails.
+ Perfdata count the number of scripts by state (ok, disabled, error,
+ total).
+ * ping :
+ check the ctdb ping command.
+ Perfdata count the number of nodes, the total ping time and the number
+ of clients.
+ Thresholds are checked against the number of nodes.
+
+
+Copyright (c) 2011 Nantes Metropole
+
diff --git a/ctdb/utils/nagios/check_ctdb b/ctdb/utils/nagios/check_ctdb
new file mode 100755
index 0000000..7803f9a
--- /dev/null
+++ b/ctdb/utils/nagios/check_ctdb
@@ -0,0 +1,279 @@
+#!/usr/bin/perl -w
+# Nagios plugin to monitor CTDB (Clustered Trivial Database)
+#
+# License: GPL
+# Copyright (c) 2011 Nantes Metropole
+# Author: Mathieu Parent <math.parent@gmail.com>
+# Contributor(s): -
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+use strict;
+use warnings;
+use vars qw($PROGNAME $VERSION $output $values $result);
+use Nagios::Plugin;
+use File::Basename;
+
+$PROGNAME = basename($0);
+$VERSION = '0.4';
+
+my $np = Nagios::Plugin->new(
+ usage => "Usage: %s -i <info>\n"
+ . " [ -t <timeout> ] [ -w <warn_range> ] [ -c <crit_range> ]\n"
+ . " [ -H <host> ] [-s] [ -l <login_name> ]\n"
+ . ' [ -V ] [ -h ]',
+ version => $VERSION,
+ plugin => $PROGNAME,
+ shortname => uc($PROGNAME),
+ blurb => 'CTDB plugin',
+ extra => "Supported commands:\n"
+ . " * scriptstatus :\n"
+ . " check the ctdb scriptstatus command and return CRITICAL if one of the\n"
+ . " scripts fails.\n"
+ . " Perfdata count the number of scripts by state (ok, disabled, error,\n"
+ . " total).\n"
+ . " * ping :\n"
+ . " check the ctdb ping command.\n"
+ . " Perfdata count the number of nodes, the total ping time and the number\n"
+ . " of clients.\n"
+ . " Thresholds are checked against the number of nodes.\n"
+ . "\n\nCopyright (c) 2011 Nantes Metropole",
+ timeout => 30,
+);
+
+$np->add_arg(
+ spec => 'info|i=s',
+ help => "-i, --info=<info>\n"
+ . ' Information: One of scriptstatus or ping.',
+ required => 1,
+);
+
+$np->add_arg(
+ spec => 'hostname|H=s',
+ help => "-H, --hostname=<login_name>\n"
+ . ' Host name or IP Address.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'sudo|s',
+ help => "-s, --sudo\n"
+ . ' Use sudo.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'login|l=s',
+ help => "-l, --login=<host>\n"
+ . ' The user to log in as on the remote machine.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'warning|w=s',
+ help => "-w, --warning=THRESHOLD\n"
+ . " Warning threshold. See\n"
+ . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+ . ' for the threshold format.',
+ required => 0,
+);
+
+$np->add_arg(
+ spec => 'critical|c=s',
+ help => "-c, --critical=THRESHOLD\n"
+ . " Critical threshold. See\n"
+ . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+ . ' for the threshold format.',
+ required => 0,
+);
+
+$np->getopts;
+
+my $info = $np->opts->info;
+my $hostname = $np->opts->hostname;
+my $login = $np->opts->login;
+my $sudo = $np->opts->sudo;
+my $warning = $np->opts->warning;
+my $critical = $np->opts->critical;
+my $percw;
+my $percc;
+
+$output = "";
+
+if (defined($critical))
+{
+ ($percc, $critical) = check_percantage($critical);
+ $critical = undef if ($critical eq '');
+}
+
+if (defined($warning))
+{
+ ($percw, $warning) = check_percantage($warning);
+ $warning = undef if ($warning eq '');
+}
+
+$np->set_thresholds(critical => $critical, warning => $warning);
+
+my $stderr;
+
+sub safe_open_command {
+ unshift @_, "sudo" if $sudo;
+ if ($hostname) {
+ unshift @_, $hostname;
+ unshift @_, "-l", $login if $login;
+ unshift @_, "ssh";
+ }
+ open(OLDERR, ">&", \*STDERR) or die "Can't dup STDERR: $!";
+ $stderr = "";
+ close STDERR;
+ open(STDERR, ">>", \$stderr) or die "Can't open STDERR: $!";
+ if ($np->opts->verbose) {
+ print "Executing: @_\n";
+ }
+ if (!open(PIPE, '-|', @_)) {
+ $result = CRITICAL;
+ $output .= "Cannot open command '@_': $! ($stderr). ";
+ # restore STDERR
+ open(STDERR, ">", \*OLDERR) or die "Can't dup OLDERR: $!";
+ }
+}
+
+sub safe_close_command {
+ close(PIPE);
+
+ if ($? == -1) {
+ $result = CRITICAL;
+ $output .= "failed to execute: $!. ";
+ } elsif ($? & 127) {
+ $result = CRITICAL;
+ $output .= sprintf("child died with signal %d, %s coredump. ",
+ ($? & 127), ($? & 128) ? 'with' : 'without');
+ } elsif ($? >> 8) {
+ if (($? >> 8) == 255) {
+ # ctdb returns -1=255 if any node is disconnected
+ $result = WARNING;
+ $output .= sprintf("child exited with value %d. ", $? >> 8) if $output eq "";
+ } else {
+ $result = CRITICAL;
+ $output .= sprintf("child exited with value %d. ", $? >> 8);
+ }
+ }
+ # restore STDERR
+ open(STDERR, ">&OLDERR") or die "Can't dup OLDERR: $!";
+}
+
+# main :
+
+if ($info eq "scriptstatus") {
+ $result = OK;
+ safe_open_command('ctdb', '-X', 'scriptstatus');
+ if ($result == OK) {
+ my $script_count = 0;
+ my $ok_script_count = 0;
+ my $disabled_script_count = 0;
+ my $error_script_count = 0;
+ while (<PIPE>) {
+ next if $. == 1; # Header
+ $script_count++;
+ chop;
+ my ($col0, $type, $name, $code, $status, $start, $end, @error) = split("|");
+ if ($col0 ne '') {
+ # Old version, before 30 Aug 2011 and commit a779d83a6213
+ ($type, $name, $code, $status, $start, $end, @error) = ($col0, $type, $name, $code, $status, $start, $end, @error);
+ }
+ my $error = join(':', @error);
+ if ($error ne "") {
+ $output = "$output ;; " if $output;
+ $output = "$output$name ($status=$code): $error ";
+ if ($result != CRITICAL) {
+ $result = WARNING;
+ }
+ }
+ if ($status eq "OK") {
+ $ok_script_count++;
+ next;
+ }
+ if ($status eq "DISABLED") {
+ $disabled_script_count++;
+ next;
+ }
+ $error_script_count++;
+ $result = WARNING;
+ }
+ safe_close_command();
+ $np->add_perfdata(label => "ok", value => $ok_script_count, uom => '',
+ min => 0, max => $script_count);
+ $np->add_perfdata(label => "disabled", value => $disabled_script_count, uom => '',
+ min => 0, max => $script_count);
+ $np->add_perfdata(label => "error", value => $error_script_count, uom => '',
+ min => 0, max => $script_count, warning => '0', critical => '0');
+ $np->add_perfdata(label => "total", value => $script_count, uom => '',
+ min => 0, max => $script_count);
+ if ($result == OK) {
+ $result = $np->check_threshold(check => $error_script_count, warning => '0', critical => '0');
+ }
+ }
+ $np->nagios_exit($result, $output);
+} elsif ($info eq "ping") {
+ # Get expected nodes count
+ $result = OK;
+ safe_open_command('cat', '/etc/ctdb/nodes');
+ 1 while( <PIPE> );
+ my $max_nodes_count = $.;
+ safe_close_command();
+ # ctdb ping
+ $result = OK;
+ safe_open_command('ctdb', '-n', 'all', 'ping');
+ if ($result == OK) {
+ my $nodes_count = 0;
+ my $time_total = 0.0;
+ my $clients_count = 0;
+ while (<PIPE>) {
+ chop;
+ if ($_ =~ /^response from (\d+) time=([0-9.]+) sec \((\d+) clients\)$/) {
+ my ($node_id, $time, $clients) = ($1,$2,$3);
+ $nodes_count += 1;
+ $time_total += $time;
+ $clients_count += $clients;
+ } elsif ($_ =~ /^Unable to get ping response from node (\d+)$/) {
+ #
+ } else {
+ $result = CRITICAL;
+ $output .= "'$_' doesn't match regexp. "
+ }
+ }
+ $output .= sprintf("%d missing nodes. ", $max_nodes_count - $nodes_count) if $nodes_count < $max_nodes_count;
+ safe_close_command();
+ $np->add_perfdata(label => "nodes", value => $nodes_count, uom => '',
+ min => 0, max => $max_nodes_count, warning => $warning, critical => $critical);
+ $np->add_perfdata(label => "ping_time", value => $time_total, uom => 's',
+ min => 0, max => undef);
+ $np->add_perfdata(label => "clients", value => $clients_count, uom => '',
+ min => 0, max => undef);
+ if ($result == OK) {
+ $result = $np->check_threshold(check => $nodes_count);
+ }
+ }
+ $np->nagios_exit($result, $output);
+} else {
+ $np->nagios_exit(UNKNOWN, "Unknown command: '$info'");
+}
+
+sub check_percantage
+{
+ my ($number) = shift(@_);
+ my $perc = $number =~ s/\%//;
+ return ($perc, $number);
+}
+
diff --git a/ctdb/utils/ping_pong/ping_pong.c b/ctdb/utils/ping_pong/ping_pong.c
new file mode 100644
index 0000000..3d28f34
--- /dev/null
+++ b/ctdb/utils/ping_pong/ping_pong.c
@@ -0,0 +1,303 @@
+/*
+ A ping-pong fcntl byte range lock test
+
+ Copyright (C) Andrew Tridgell 2002
+ Copyright (C) Michael Adam 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ This measures the ping-pong byte range lock latency. It is
+ especially useful on a cluster of nodes sharing a common lock
+ manager as it will give some indication of the lock managers
+ performance under stress.
+
+ tridge@samba.org, February 2002
+
+*/
+
+#define _XOPEN_SOURCE 500
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+
+static struct timeval tp1,tp2;
+
+static int do_reads, do_writes, use_mmap, do_check, do_brl_test;
+
+static void start_timer(void)
+{
+ gettimeofday(&tp1,NULL);
+}
+
+static double end_timer(void)
+{
+ gettimeofday(&tp2,NULL);
+ return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) -
+ (tp1.tv_sec + (tp1.tv_usec*1.0e-6));
+}
+
+/* lock a byte range in a open file */
+static int lock_range(int fd, int offset, int len, bool wait)
+{
+ struct flock lock;
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = offset;
+ lock.l_len = len;
+ lock.l_pid = 0;
+
+ return fcntl(fd, wait ? F_SETLKW : F_SETLK, &lock);
+}
+
+/* check whether we could place a lock */
+static int check_lock(int fd, int offset, int len)
+{
+ struct flock lock;
+ int ret;
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = offset;
+ lock.l_len = len;
+ lock.l_pid = 0;
+
+ ret = fcntl(fd, F_GETLK, &lock);
+ if (ret != 0) {
+ printf("error calling fcntl F_GETLCK: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (lock.l_type == F_UNLCK) {
+ /* we would be able to place the lock */
+ return 0;
+ }
+
+ /* we would not be able to place lock */
+ printf("check_lock failed: lock held: "
+ "pid='%d', type='%d', start='%d', len='%d'\n",
+ (int)lock.l_pid, (int)lock.l_type, (int)lock.l_start, (int)lock.l_len);
+ return 1;
+}
+
+/* unlock a byte range in a open file */
+static int unlock_range(int fd, int offset, int len)
+{
+ struct flock lock;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = offset;
+ lock.l_len = len;
+ lock.l_pid = 0;
+
+ return fcntl(fd,F_SETLKW,&lock);
+}
+
+/* run the ping pong test on fd */
+static void ping_pong(int fd, int num_locks)
+{
+ unsigned count = 0;
+ int i=0, loops=0;
+ unsigned char *val;
+ unsigned char incr=0, last_incr=0;
+ unsigned char *p = NULL;
+ int ret;
+
+ ret = ftruncate(fd, num_locks+1);
+ if (ret == -1) {
+ printf("ftruncate failed: %s\n", strerror(errno));
+ return;
+ }
+
+ if (use_mmap) {
+ p = mmap(NULL, num_locks+1, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED) {
+ printf("mmap failed: %s\n", strerror(errno));
+ return;
+ }
+ }
+
+ val = (unsigned char *)calloc(num_locks+1, sizeof(unsigned char));
+ if (val == NULL) {
+ printf("calloc failed\n");
+ if (use_mmap) {
+ munmap(p, num_locks+1);
+ }
+ return;
+ }
+
+ start_timer();
+
+ ret = lock_range(fd, 0, 1, true);
+ if (ret != 0) {
+ printf("initial lock at 0 failed! - %s\n", strerror(errno));
+ goto done;
+ }
+
+ i = 0;
+
+ while (1) {
+ if (lock_range(fd, (i+1) % num_locks, 1, true) != 0) {
+ printf("lock at %d failed! - %s\n",
+ (i+1) % num_locks, strerror(errno));
+ }
+ if (do_check) {
+ ret = check_lock(fd, i, 1);
+ if (ret != 0) {
+ goto done;
+ }
+ }
+ if (do_reads) {
+ unsigned char c;
+ if (use_mmap) {
+ c = p[i];
+ } else if (pread(fd, &c, 1, i) != 1) {
+ printf("read failed at %d\n", i);
+ }
+ incr = c - val[i];
+ val[i] = c;
+ }
+ if (do_writes) {
+ char c = val[i] + 1;
+ if (use_mmap) {
+ p[i] = c;
+ } else if (pwrite(fd, &c, 1, i) != 1) {
+ printf("write failed at %d\n", i);
+ }
+ }
+ if (unlock_range(fd, i, 1) != 0) {
+ printf("unlock at %d failed! - %s\n",
+ i, strerror(errno));
+ }
+ i = (i+1) % num_locks;
+ count++;
+ if (loops > num_locks && incr != last_incr) {
+ last_incr = incr;
+ printf("data increment = %u\n", incr);
+ fflush(stdout);
+ }
+ if (end_timer() > 1.0) {
+ printf("%8u locks/sec\r",
+ (unsigned)(2*count/end_timer()));
+ fflush(stdout);
+ start_timer();
+ count=0;
+ }
+ loops++;
+ }
+
+done:
+ if (use_mmap) {
+ munmap(p, num_locks+1);
+ }
+ free(val);
+}
+
+static void usage(void)
+{
+ printf("ping_pong -rwmc <file> <num_locks>\n");
+ printf("ping_pong -l <file>\n\n");
+ printf("Options\n");
+ printf(" -r do reads\n");
+ printf(" -w do writes\n");
+ printf(" -m use mmap\n");
+ printf(" -c check locks\n");
+ printf(" -l test for working byte range locks\n");
+}
+
+int main(int argc, char *argv[])
+{
+ char *fname;
+ int fd, num_locks;
+ int c;
+
+ while ((c = getopt(argc, argv, "rwmcl")) != -1) {
+ switch (c){
+ case 'w':
+ do_writes = 1;
+ break;
+ case 'r':
+ do_reads = 1;
+ break;
+ case 'm':
+ use_mmap = 1;
+ break;
+ case 'c':
+ do_check = 1;
+ break;
+ case 'l':
+ do_brl_test = 1;
+ break;
+ default:
+ fprintf(stderr, "Unknown option '%c'\n", c);
+ exit(1);
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+
+ if (argc < 1) {
+ usage();
+ exit(1);
+ }
+
+ fname = argv[0];
+
+ fd = open(fname, O_CREAT|O_RDWR, 0600);
+ if (fd == -1) {
+ exit(1);
+ }
+
+ if (do_brl_test) {
+ if (lock_range(fd, 0, 0, false) != 0) {
+ printf("file already locked, calling check_lock to tell us who has it locked:\n");
+ (void)check_lock(fd, 0, 0);
+ printf("Working POSIX byte range locks\n");
+ exit(0);
+ }
+
+ printf("Holding lock, press any key to continue...\n");
+ printf("You should run the same command on another node now.\n");
+ (void)getchar();
+ printf("Good bye.\n");
+ exit(0);
+ }
+
+ if (argc < 2) {
+ usage();
+ exit(1);
+ }
+
+ num_locks = atoi(argv[1]);
+ if (num_locks <= 0) {
+ printf("num_locks should be > 0\n");
+ exit(1);
+ }
+
+ ping_pong(fd, num_locks);
+
+ return 0;
+}
diff --git a/ctdb/utils/pmda/Install b/ctdb/utils/pmda/Install
new file mode 100644
index 0000000..a56a635
--- /dev/null
+++ b/ctdb/utils/pmda/Install
@@ -0,0 +1,36 @@
+#! /bin/sh
+#
+# Copyright (c) 1997 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Install the ctdb PMDA and/or PMNS
+#
+
+. $PCP_DIR/etc/pcp.env
+. $PCP_SHARE_DIR/lib/pmdaproc.sh
+
+iam=ctdb
+pmda_interface=2
+
+# runs as daemon and only supports pipe IPC
+daemon_opt=true
+dso_opt=false
+pipe_opt=true
+socket_opt=false
+
+pmdaSetup
+pmdaInstall
+exit 0
diff --git a/ctdb/utils/pmda/README b/ctdb/utils/pmda/README
new file mode 100644
index 0000000..f8dbbbc
--- /dev/null
+++ b/ctdb/utils/pmda/README
@@ -0,0 +1,84 @@
+CTDB PMDA
+===========
+
+This PMDA extracts metrics from the locally running ctdbd daemon for
+export to PMCD.
+
+Note:
+ This PMDA may be remade from source and hence requires IDO (or
+ more specifically a C compiler) to be installed.
+
+ Uses of make(1) may fail (without removing or clobbering files)
+ if the C compiler cannot be found. This is most likely to
+ happen when running the PMDA ./Install script.
+
+ The only remedial action is to install the C compiler, or
+ hand-craft changes to the Makefile.
+
+Metrics
+=======
+
+The file ./help contains descriptions for all of the metrics exported
+by this PMDA.
+
+Once the PMDA has been installed, the following command will list all
+the available metrics and their explanatory "help" text:
+
+ $ pminfo -fT ctdb
+
+Installation
+============
+
+ + # cd $PCP_PMDAS_DIR/ctdb
+
+ + Check that there is no clash in the Performance Metrics Domain
+ defined in ./domain.h and the other PMDAs currently in use (see
+ $PCP_PMCDCONF_PATH). If there is, edit ./domain.h to choose another
+ domain number.
+
+ + Then simply use
+
+ # ./Install
+
+ and choose both the "collector" and "monitor" installation
+ configuration options.
+
+ You will be prompted to choose either a daemon implementation
+ or a DSO implementation of the PMDA, and in the case of the daemon
+ variant to select an IPC method -- everything else is automated
+
+De-installation
+===============
+
+ + Simply use
+
+ # cd $PCP_PMDAS_DIR/ctdb
+ # ./Remove
+
+Troubleshooting
+===============
+
+ + After installing or restarting the agent, the PMCD log file
+ ($PCP_LOG_DIR/pmcd/pmcd.log) and the PMDA log file
+ ($PCP_LOG_DIR/pmcd/pmda_ctdb.log) should be checked for any warnings
+ or errors.
+
+
+Adding a New Metric
+===================
+
+This section walks through the development task of adding a new metric to the
+CTDB PMDA.
+
+ + Define the metric in the pmns file with a unique metric id. See the pmns(4)
+ man page for details.
+
+ + Add a description of the metric to the help file.
+
+ + Taking note of the previously assigned metric id, add a new entry to the
+ metrictab structure in pmda_ctdb.c. See the pmdaInit(3) man page for
+ details.
+
+ + Ensure the counter is already a member of the ctdb_statistics structure.
+ Finally, add code to pmda_ctdb_fetch_cb() to handle fetch requests for the
+ newly defined metric.
diff --git a/ctdb/utils/pmda/Remove b/ctdb/utils/pmda/Remove
new file mode 100644
index 0000000..7d1c509
--- /dev/null
+++ b/ctdb/utils/pmda/Remove
@@ -0,0 +1,29 @@
+#! /bin/sh
+#
+# Copyright (c) 1997 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Remove the ctdb PMDA
+#
+
+. $PCP_DIR/etc/pcp.env
+. $PCP_SHARE_DIR/lib/pmdaproc.sh
+
+iam=ctdb
+
+pmdaSetup
+pmdaRemove
+exit 0
diff --git a/ctdb/utils/pmda/domain.h b/ctdb/utils/pmda/domain.h
new file mode 100644
index 0000000..0bed7fe
--- /dev/null
+++ b/ctdb/utils/pmda/domain.h
@@ -0,0 +1,19 @@
+/* domain.h
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#define CTDB 110
diff --git a/ctdb/utils/pmda/help b/ctdb/utils/pmda/help
new file mode 100644
index 0000000..0e9984e
--- /dev/null
+++ b/ctdb/utils/pmda/help
@@ -0,0 +1,106 @@
+#
+# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# ctdb PMDA help file in the ASCII format
+#
+# lines beginning with a # are ignored
+# lines beginning @ introduce a new entry of the form
+# @ metric_name oneline-text
+# help test goes
+# here over multiple lines
+# ...
+#
+# the metric_name is decoded against the default PMNS -- as a special case,
+# a name of the form NNN.MM (for numeric NNN and MM) is interpreted as an
+# instance domain identification, and the text describes the instance domain
+#
+# blank lines before the @ line are ignored
+#
+
+@ ctdb.num_clients number of clients connected to ctdbd
+
+@ ctdb.frozen whether any databases are frozen
+
+@ ctdb.recovering whether recovery is active
+
+@ ctdb.client_packets_sent number of packets sent to all clients
+
+@ ctdb.client_packets_recv number of packets received from all clients
+
+@ ctdb.node_packets_sent number of packets sent to other nodes
+
+@ ctdb.node_packets_recv number of packets received from other nodes
+
+@ ctdb.keepalive_packets_sent number of keepalive packets sent to other nodes
+
+@ ctdb.keepalive_packets_recv number of keepalive packets received from other nodes
+
+@ ctdb.node.req_call number of node CTDB_REQ_CALL packets handled
+
+@ ctdb.node.reply_call number of node CTDB_REPLY_CALL packets handled
+
+@ ctdb.node.req_dmaster number of node CTDB_REQ_DMASTER packets handled
+
+@ ctdb.node.reply_dmaster number of node CTDB_REPLY_DMASTER packets handled
+
+@ ctdb.node.reply_error number of node CTDB_REPLY_ERROR packets handled
+
+@ ctdb.node.req_message number of node CTDB_REQ_MESSAGE packets handled
+
+@ ctdb.node.req_control number of node CTDB_REQ_CONTROL packets handled
+
+@ ctdb.node.reply_control number of node CTDB_REPLY_CONTROL packets handled
+
+@ ctdb.client.req_call number of client CTDB_REQ_CALL packets handled
+
+@ ctdb.client.req_message number of client CTDB_REQ_MESSAGE packets handled
+
+@ ctdb.client.req_control number of client CTDB_REQ_CONTROL packets handled
+
+@ ctdb.timeouts.call (counter not implemented) number of call timeouts
+
+@ ctdb.timeouts.control number of node control message request timeouts awaiting reply
+
+@ ctdb.timeouts.traverse number of database traversal timeouts
+
+@ ctdb.total_calls total number of client ctdb request calls received
+
+@ ctdb.pending_calls total number of client ctdb request calls in progress
+
+@ ctdb.lockwait_calls number of tdb chainlock lockwait calls
+
+@ ctdb.pending_lockwait_calls number of lockwait calls waiting for a lock
+
+@ ctdb.childwrite_calls number of childwrite calls
+
+@ ctdb.pending_childwrite_calls number of childwrite calls in progress
+
+@ ctdb.memory_used total size of the ctdbd null talloc pool
+
+@ ctdb.max_hop_count maximum hops performed by a CTDB_REQ_CALL packet
+
+@ ctdb.max_reclock_ctdbd maximum recovery lock latency during setrecmode
+
+@ ctdb.max_reclock_recd maximum recovery lock latency as reported by the recovery process
+
+@ ctdb.max_call_latency maximum time spent handling a client request call
+
+@ ctdb.max_lockwait_latency maximum time spent waiting for a tdb chainlock
+
+@ ctdb.max_childwrite_latency maximum time spent performing a childwrite
+
+@ ctdb.num_recoveries number of recoveries finished
diff --git a/ctdb/utils/pmda/pmda_ctdb.c b/ctdb/utils/pmda/pmda_ctdb.c
new file mode 100644
index 0000000..4f7933d
--- /dev/null
+++ b/ctdb/utils/pmda/pmda_ctdb.c
@@ -0,0 +1,559 @@
+/*
+ * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
+ *
+ * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2011 David Disseldorp
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+#include <tdb.h>
+
+#include "lib/util/time.h"
+#include "lib/util/blocking.h"
+
+#include "client/client.h"
+#include "client/client_sync.h"
+
+#include <pcp/pmapi.h>
+#include <pcp/pmda.h>
+
+#ifdef HAVE___PMID_INT
+#include <pcp/impl.h>
+
+#define pmID_cluster(id) id->cluster
+#define pmID_item(id) id->item
+#define pmGetProgname() pmProgname
+#define pmSetProgname(a) __pmSetProgname(a)
+#endif
+
+#include "domain.h"
+
+/*
+ * CTDB PMDA
+ *
+ * This PMDA connects to the locally running ctdbd daemon and pulls
+ * statistics for export via PCP. The ctdbd Unix domain socket path can be
+ * specified with the CTDB_SOCKET environment variable, otherwise the default
+ * path is used.
+ */
+
+/*
+ * All metrics supported in this PMDA - one table entry for each.
+ * The 4th field specifies the serial number of the instance domain
+ * for the metric, and must be either PM_INDOM_NULL (denoting a
+ * metric that only ever has a single value), or the serial number
+ * of one of the instance domains declared in the instance domain table
+ * (i.e. in indomtab, above).
+ */
+static pmdaMetric metrictab[] = {
+ /* num_clients */
+ { NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* frozen */
+ { NULL, { PMDA_PMID(0,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* recovering */
+ { NULL, { PMDA_PMID(0,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* client_packets_sent */
+ { NULL, { PMDA_PMID(0,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* client_packets_recv */
+ { NULL, { PMDA_PMID(0,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* node_packets_sent */
+ { NULL, { PMDA_PMID(0,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* node_packets_recv */
+ { NULL, { PMDA_PMID(0,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* keepalive_packets_sent */
+ { NULL, { PMDA_PMID(0,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* keepalive_packets_recv */
+ { NULL, { PMDA_PMID(0,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_call */
+ { NULL, { PMDA_PMID(1,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_call */
+ { NULL, { PMDA_PMID(1,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_dmaster */
+ { NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_dmaster */
+ { NULL, { PMDA_PMID(1,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_error */
+ { NULL, { PMDA_PMID(1,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_message */
+ { NULL, { PMDA_PMID(1,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_control */
+ { NULL, { PMDA_PMID(1,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* reply_control */
+ { NULL, { PMDA_PMID(1,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_call */
+ { NULL, { PMDA_PMID(2,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_message */
+ { NULL, { PMDA_PMID(2,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* req_control */
+ { NULL, { PMDA_PMID(2,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* call */
+ { NULL, { PMDA_PMID(3,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* control */
+ { NULL, { PMDA_PMID(3,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* traverse */
+ { NULL, { PMDA_PMID(3,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,0) }, },
+ /* total_calls */
+ { NULL, { PMDA_PMID(0,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* pending_calls */
+ { NULL, { PMDA_PMID(0,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* locks.num_calls */
+ { NULL, { PMDA_PMID(0,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* locks.num_pending */
+ { NULL, { PMDA_PMID(0,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* childwrite_calls */
+ { NULL, { PMDA_PMID(0,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+ /* pending_childwrite_calls */
+ { NULL, { PMDA_PMID(0,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* memory_used */
+ { NULL, { PMDA_PMID(0,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, },
+ /* max_hop_count */
+ { NULL, { PMDA_PMID(0,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+ /* reclock.ctdbd.max */
+ { NULL, { PMDA_PMID(0,17), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* reclock.recd.max */
+ { NULL, { PMDA_PMID(0,18), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* call_latency.max */
+ { NULL, { PMDA_PMID(0,19), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* locks.latency.max */
+ { NULL, { PMDA_PMID(0,20), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* childwrite_latency.max */
+ { NULL, { PMDA_PMID(0,21), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
+ /* num_recoveries */
+ { NULL, { PMDA_PMID(0,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) }, },
+};
+
+static struct tevent_context *ev;
+static struct ctdb_client_context *client;
+static struct ctdb_statistics *stats;
+
+static void
+pmda_ctdb_disconnected(void *args)
+{
+ fprintf(stderr, "ctdbd unreachable\n");
+ TALLOC_FREE(client);
+}
+
+
+static int
+pmda_ctdb_daemon_connect(void)
+{
+ const char *socket_name;
+ int ret;
+
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "Failed to init event ctx\n");
+ return -1;
+ }
+
+ socket_name = getenv("CTDB_SOCKET");
+ if (socket_name == NULL) {
+ socket_name = CTDB_SOCKET;
+ }
+
+ ret = ctdb_client_init(ev, ev, socket_name, &client);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to connect to ctdb daemon via %s\n",
+ socket_name);
+ goto err_ev;
+ }
+
+ ctdb_client_set_disconnect_callback(client, pmda_ctdb_disconnected,
+ NULL);
+
+ return 0;
+
+err_ev:
+ talloc_free(ev);
+ client = NULL;
+ return -1;
+}
+
+static void
+pmda_ctdb_daemon_disconnect(void)
+{
+ TALLOC_FREE(client);
+ talloc_free(ev);
+}
+
+static int
+fill_base(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->num_clients;
+ break;
+ case 1:
+ atom->ul = stats->frozen;
+ break;
+ case 2:
+ atom->ul = stats->recovering;
+ break;
+ case 3:
+ atom->ul = stats->client_packets_sent;
+ break;
+ case 4:
+ atom->ul = stats->client_packets_recv;
+ break;
+ case 5:
+ atom->ul = stats->node_packets_sent;
+ break;
+ case 6:
+ atom->ul = stats->node_packets_recv;
+ break;
+ case 7:
+ atom->ul = stats->keepalive_packets_sent;
+ break;
+ case 8:
+ atom->ul = stats->keepalive_packets_recv;
+ break;
+ case 9:
+ atom->ul = stats->total_calls;
+ break;
+ case 10:
+ atom->ul = stats->pending_calls;
+ break;
+ case 11:
+ atom->ul = stats->locks.num_calls;
+ break;
+ case 12:
+ atom->ul = stats->locks.num_pending;
+ break;
+ case 13:
+ atom->ul = stats->childwrite_calls;
+ break;
+ case 14:
+ atom->ul = stats->pending_childwrite_calls;
+ break;
+ case 15:
+ atom->ul = stats->memory_used;
+ break;
+ case 16:
+ atom->ul = stats->max_hop_count;
+ break;
+ case 17:
+ atom->d = stats->reclock.ctdbd.max;
+ break;
+ case 18:
+ atom->d = stats->reclock.recd.max;
+ break;
+ case 19:
+ atom->d = stats->call_latency.max;
+ break;
+ case 20:
+ atom->d = stats->locks.latency.max;
+ break;
+ case 21:
+ atom->d = stats->childwrite_latency.max;
+ break;
+ case 22:
+ atom->ul = stats->num_recoveries;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+static int
+fill_node(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->node.req_call;
+ break;
+ case 1:
+ atom->ul = stats->node.reply_call;
+ break;
+ case 2:
+ atom->ul = stats->node.req_dmaster;
+ break;
+ case 3:
+ atom->ul = stats->node.reply_dmaster;
+ break;
+ case 4:
+ atom->ul = stats->node.reply_error;
+ break;
+ case 5:
+ atom->ul = stats->node.req_message;
+ break;
+ case 6:
+ atom->ul = stats->node.req_control;
+ break;
+ case 7:
+ atom->ul = stats->node.reply_control;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+
+static int
+fill_client(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->client.req_call;
+ break;
+ case 1:
+ atom->ul = stats->client.req_message;
+ break;
+ case 2:
+ atom->ul = stats->client.req_control;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+static int
+fill_timeout(unsigned int item, pmAtomValue *atom)
+{
+ switch (item) {
+ case 0:
+ atom->ul = stats->timeouts.call;
+ break;
+ case 1:
+ atom->ul = stats->timeouts.control;
+ break;
+ case 2:
+ atom->ul = stats->timeouts.traverse;
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ return 0;
+}
+
+/*
+ * callback provided to pmdaFetch
+ */
+static int
+pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
+{
+ int ret;
+#ifdef HAVE___PMID_INT
+ __pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
+#else
+ pmID id = *(pmID *)&(mdesc->m_desc.pmid);
+#endif
+
+ if (inst != PM_IN_NULL) {
+ return PM_ERR_INST;
+ }
+
+ if (stats == NULL) {
+ fprintf(stderr, "stats not available\n");
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+
+ switch (pmID_cluster(id)) {
+ case 0:
+ ret = fill_base(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 1:
+ ret = fill_node(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 2:
+ ret = fill_client(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ case 3:
+ ret = fill_timeout(pmID_item(id), atom);
+ if (ret) {
+ goto err_out;
+ }
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+
+ ret = 0;
+err_out:
+ return ret;
+}
+
+/*
+ * This routine is called once for each pmFetch(3) operation, so is a
+ * good place to do once-per-fetch functions, such as value caching or
+ * instance domain evaluation.
+ */
+static int
+pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
+{
+ int ret;
+
+ if (client == NULL) {
+ fprintf(stderr, "attempting reconnect to ctdbd\n");
+ ret = pmda_ctdb_daemon_connect();
+ if (ret < 0) {
+ fprintf(stderr, "reconnect failed\n");
+ return PM_ERR_VALUE;
+ }
+ }
+
+ ret = ctdb_ctrl_statistics(client, ev, client, CTDB_CURRENT_NODE,
+ tevent_timeval_current_ofs(1,0), &stats);
+ if (ret != 0) {
+ fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
+ pmda_ctdb_daemon_disconnect();
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+ ret = pmdaFetch(numpmid, pmidlist, resp, pmda);
+
+ talloc_free(stats);
+err_out:
+ return ret;
+}
+
+void pmda_ctdb_init(pmdaInterface *dp);
+
+/*
+ * Initialise the agent
+ */
+void
+pmda_ctdb_init(pmdaInterface *dp)
+{
+ if (dp->status != 0) {
+ return;
+ }
+
+ dp->version.two.fetch = pmda_ctdb_fetch;
+ pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb);
+
+ pmdaInit(dp, NULL, 0, metrictab,
+ (sizeof(metrictab) / sizeof(metrictab[0])));
+}
+
+static char *
+helpfile(void)
+{
+ static char buf[MAXPATHLEN];
+
+ if (!buf[0]) {
+ snprintf(buf, sizeof(buf), "%s/ctdb/help",
+ pmGetConfig("PCP_PMDAS_DIR"));
+ }
+ return buf;
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "Usage: %s [options]\n\n", pmGetProgname());
+ fputs("Options:\n"
+ " -d domain use domain (numeric) for metrics domain of PMDA\n"
+ " -l logfile write log into logfile rather than using default log name\n"
+ "\nExactly one of the following options may appear:\n"
+ " -i port expect PMCD to connect on given inet port (number or name)\n"
+ " -p expect PMCD to supply stdin/stdout (pipe)\n"
+ " -u socket expect PMCD to connect on given unix domain socket\n",
+ stderr);
+ exit(1);
+}
+
+/*
+ * Set up the agent if running as a daemon.
+ */
+int
+main(int argc, char **argv)
+{
+ int err = 0;
+ char log_file[] = "pmda_ctdb.log";
+ pmdaInterface dispatch;
+
+ pmSetProgname(argv[0]);
+
+ pmdaDaemon(&dispatch, PMDA_INTERFACE_2, argv[0], CTDB,
+ log_file, helpfile());
+
+ if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) {
+ err++;
+ }
+
+ if (err) {
+ usage();
+ }
+
+ pmdaOpenLog(&dispatch);
+ pmda_ctdb_init(&dispatch);
+ pmdaConnect(&dispatch);
+ pmdaMain(&dispatch);
+
+ exit(0);
+}
+
diff --git a/ctdb/utils/pmda/pmns b/ctdb/utils/pmda/pmns
new file mode 100644
index 0000000..dc7e3ac
--- /dev/null
+++ b/ctdb/utils/pmda/pmns
@@ -0,0 +1,73 @@
+/*
+ * Metrics for CTDB PMDA
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2011 David Disseldorp
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+ctdb {
+ num_clients CTDB:0:0
+ frozen CTDB:0:1
+ recovering CTDB:0:2
+ client_packets_sent CTDB:0:3
+ client_packets_recv CTDB:0:4
+ node_packets_sent CTDB:0:5
+ node_packets_recv CTDB:0:6
+ keepalive_packets_sent CTDB:0:7
+ keepalive_packets_recv CTDB:0:8
+ node
+ client
+ timeouts
+ total_calls CTDB:0:9
+ pending_calls CTDB:0:10
+ lockwait_calls CTDB:0:11
+ pending_lockwait_calls CTDB:0:12
+ childwrite_calls CTDB:0:13
+ pending_childwrite_calls CTDB:0:14
+ memory_used CTDB:0:15
+ max_hop_count CTDB:0:16
+ max_reclock_ctdbd CTDB:0:17
+ max_reclock_recd CTDB:0:18
+ max_call_latency CTDB:0:19
+ max_lockwait_latency CTDB:0:20
+ max_childwrite_latency CTDB:0:21
+ num_recoveries CTDB:0:22
+}
+
+ctdb.node {
+ req_call CTDB:1:0
+ reply_call CTDB:1:1
+ req_dmaster CTDB:1:2
+ reply_dmaster CTDB:1:3
+ reply_error CTDB:1:4
+ req_message CTDB:1:5
+ req_control CTDB:1:6
+ reply_control CTDB:1:7
+}
+
+ctdb.client {
+ req_call CTDB:2:0
+ req_message CTDB:2:1
+ req_control CTDB:2:2
+}
+
+ctdb.timeouts {
+ call CTDB:3:0
+ control CTDB:3:1
+ traverse CTDB:3:2
+}
+
diff --git a/ctdb/utils/pmda/root b/ctdb/utils/pmda/root
new file mode 100644
index 0000000..ff036ed
--- /dev/null
+++ b/ctdb/utils/pmda/root
@@ -0,0 +1,10 @@
+/*
+ * fake "root" for validating the local PMNS subtree
+ */
+
+#include <stdpmid>
+
+root { ctdb }
+
+#include "pmns"
+
diff --git a/ctdb/utils/scsi_io/scsi_io.c b/ctdb/utils/scsi_io/scsi_io.c
new file mode 100644
index 0000000..c558860
--- /dev/null
+++ b/ctdb/utils/scsi_io/scsi_io.c
@@ -0,0 +1,1152 @@
+/* a tool to open a scsi device and issue some useful commands
+ such as INQUIRY and helpers to call various PERSISTENT RESERVATION
+ functions
+
+ Copyright ronnie sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* very incomplete and needs to be enhanced with noice command line options
+ to drive it.
+ we need access to an array that supports the PERSISTENT RESERVATION cdb's
+ before we can proceed
+*/
+/* scsi bugs:
+ INQUIRY takes a 2 byte allocation_length parameter but it appears that
+ it only looks at the low byte. If you specify 0x00ff all is well
+ but if you specify 0x0100 it gets confused and returnes garbage data
+ for (e.g) SupportedVPDPages. Same goes for UnitSerialNumber and probably all
+ other inq pages as well.
+
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <scsi/sg.h>
+#include "popt.h"
+
+
+#define SCSI_TIMEOUT 5000 /* ms */
+
+static char *command = NULL;
+static char *device = NULL;
+static char *key = NULL;
+static char *rmkey = NULL;
+static int scope = -1;
+static int type = -1;
+
+const char *sensetable[16]={
+ "no sense",
+ "recovered error",
+ "not ready",
+ "medium error",
+ "hardware error",
+ "illegal request",
+ "unit attention",
+ "data protect",
+ "blank check",
+ "vendor specific",
+ "copy aborted",
+ "aboreted command",
+ "unknown",
+ "unknown",
+ "unknown",
+ "unknown"
+};
+
+int scsi_io(int fd, unsigned char *cdb, unsigned char cdb_size, int xfer_dir, unsigned char *data, unsigned int *data_size, unsigned char *sense, unsigned int *sense_len)
+{
+ sg_io_hdr_t io_hdr;
+
+ memset(&io_hdr, 0, sizeof(sg_io_hdr_t));
+ io_hdr.interface_id = 'S';
+
+ /* CDB */
+ io_hdr.cmdp = cdb;
+ io_hdr.cmd_len = cdb_size;
+
+ /* Where to store the sense_data, if there was an error */
+ io_hdr.sbp = sense;
+ io_hdr.mx_sb_len = *sense_len;
+ *sense_len=0;
+
+ /* Transfer direction, either in or out. Linux does not yet
+ support bidirectional SCSI transfers ?
+ */
+ io_hdr.dxfer_direction = xfer_dir;
+
+ /* Where to store the DATA IN/OUT from the device and how big the
+ buffer is
+ */
+ io_hdr.dxferp = data;
+ io_hdr.dxfer_len = *data_size;
+
+ /* SCSI timeout in ms */
+ io_hdr.timeout = SCSI_TIMEOUT;
+
+
+ if(ioctl(fd, SG_IO, &io_hdr) < 0){
+ perror("SG_IO ioctl failed");
+ return -1;
+ }
+
+ /* now for the error processing */
+ if((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK){
+ if(io_hdr.sb_len_wr > 0){
+ *sense_len=io_hdr.sb_len_wr;
+ return 0;
+ }
+ }
+ if(io_hdr.masked_status){
+ printf("status=0x%x\n", io_hdr.status);
+ printf("masked_status=0x%x\n", io_hdr.masked_status);
+ return -2;
+ }
+ if(io_hdr.host_status){
+ printf("host_status=0x%x\n", io_hdr.host_status);
+ return -3;
+ }
+ if(io_hdr.driver_status){
+ printf("driver_status=0x%x\n", io_hdr.driver_status);
+ return -4;
+ }
+
+#if 0
+{int i;
+printf("CDB:\n");
+for(i=0;i<cdb_size;i++){printf("0x%02x ",cdb[i]);if((i%8)==7)printf("\n");}
+printf("\n");
+}
+{int i;
+printf("DATA:\n");
+for(i=0;i<96;i++){printf("0x%02x ",data[i]);if((i%8)==7)printf("\n");}
+printf("\n");
+}
+#endif
+
+ return 0;
+}
+
+typedef struct _value_string_t {
+ int value;
+ const char *string;
+} value_string_t;
+
+
+
+value_string_t peripheral_device_types[] = {
+ {0, "SBC : Direct Access Block device"},
+ {1, "SSC : Sequential Access Device"},
+ {5, "MMC : Multimedia Device"},
+ {17,"OSD : Object Based Storage"},
+ {0,NULL}
+};
+
+value_string_t scsi_versions[] = {
+ {0, "No conformance to any standard claimed"},
+ {3, "SPC"},
+ {4, "SPC-2"},
+ {5, "SPC-3"},
+ {0,NULL}
+};
+
+value_string_t vpd_pages[] = {
+ {0x00, "Supported VPD Pages"},
+ {0x80, "Unit Serial number"},
+ {0x83, "Device Identification"},
+ {0,NULL}
+};
+
+const char *val_to_str(value_string_t *vs, int v)
+{
+ while(vs && vs->string){
+ if(vs->value==v){
+ return vs->string;
+ }
+ vs++;
+ }
+ return "";
+}
+
+void print_sense_data(unsigned char *sense, int sense_len)
+{
+ int i;
+ unsigned char asc, ascq;
+
+ printf("Device returned sense information\n");
+ if(sense[0]==0x70){
+ printf("filemark:%d eom:%d ili:%d sense-key:0x%02x (%s)\n",
+ !!(sense[2]&0x80),
+ !!(sense[2]&0x40),
+ !!(sense[2]&0x20),
+ sense[2]&0x0f,
+ sensetable[sense[2]&0x0f]);
+ printf("command specific info: 0x%02x 0x%02x 0x%02x 0x%02x\n",
+ sense[8],sense[9],sense[10],sense[11]);
+
+ asc=sense[12];
+ printf("additional sense code:0x%02x\n", asc);
+
+ ascq=sense[13];
+ printf("additional sense code qualifier:0x%02x\n", ascq);
+
+ printf("field replacable unit code:0x%02x\n", sense[14]);
+
+ if((asc==0x20)&&(ascq==0x00))
+ printf("INVALID COMMAND OPERATION CODE\n");
+ }
+
+ printf("Sense data:\n");
+ for(i=0;i<sense_len;i++){
+ printf("0x%02x ", sense[i]);
+ if((i%8)==7)printf("\n");
+ }
+ printf("\n");
+}
+
+int scsi_inquiry(int fd)
+{
+ unsigned char cdb[]={0x12,0,0,0,0,0};
+
+ unsigned int data_size=96;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+
+ int res, i;
+
+ cdb[3]=(data_size>>8)&0xff;
+ cdb[4]=data_size&0xff;
+
+
+ printf("Standard INQUIRY Data:\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_FROM_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ /* Peripheral Qualifier */
+ printf("Peripheral Qualifier:%c%c%cb\n",
+ '0'+!!(data[0]&0x80),
+ '0'+!!(data[0]&0x40),
+ '0'+!!(data[0]&0x20));
+
+ /* Peripheral Device Type */
+ printf("Peripheral Device Type: 0x%02x (%s)\n",
+ data[0]&0x1f,
+ val_to_str(peripheral_device_types, data[0]&0x1f));
+
+ /* RMB */
+ printf("RMB: %s device\n", data[1]&0x80?"REMOVABLE":"NON-REMOVABLE");
+
+ /* SCSI Version */
+ printf("SCSI Version: 0x%02x (%s)\n",
+ data[2],
+ val_to_str(scsi_versions, data[2]));
+
+ /* NormACA, HiSUP, Response Data Format */
+ printf("NormACA:%d HiSup:%d ResponseDataFormat:%d\n",
+ !!(data[3]&0x20),
+ !!(data[3]&0x10),
+ data[3]&0x0f);
+
+ switch(data[3]&0x0f){
+ /*SPC-2/SPC-3/SPC-4*/
+ case 2:
+ /*SPC (not strictly correct but we print it like 2 anyway)*/
+ case 1:
+ /* SCCS ... */
+ printf("SCCS:%d ACC:%d TPGS:%c%cb 3PC:%d PROTECT:%d\n",
+ !!(data[5]&0x80),
+ !!(data[5]&0x40),
+ '0'+!!(data[5]&0x20),
+ '0'+!!(data[5]&0x10),
+ !!(data[5]&0x08),
+ !!(data[5]&0x01));
+
+ /* Encserv ... */
+ printf("Encserv:%d VS:%d MultiP:%d ADDR16:%d\n",
+ !!(data[6]&0x40),
+ !!(data[6]&0x20),
+ !!(data[6]&0x10),
+ !!(data[6]&0x01));
+
+ /* WBUS16 ... */
+ printf("WBUS16:%d SYNC:%d CmdQue:%d VS:%d\n",
+ !!(data[7]&0x20),
+ !!(data[7]&0x10),
+ !!(data[7]&0x02),
+ !!(data[7]&0x01));
+
+
+ /* T10 vendor Identification */
+ printf("Vendor:");
+ for(i=0;i<8;i++)printf("%c",data[8+i]);printf("\n");
+
+ /* Product Identification */
+ printf("Product:");
+ for(i=0;i<16;i++)printf("%c",data[16+i]);printf("\n");
+
+ /* Product Revision Level */
+ printf("Product Revision:");
+ for(i=0;i<4;i++)printf("%c",data[32+i]);printf("\n");
+
+ break;
+ }
+
+ return 0;
+}
+
+int scsi_inquiry_supported_vpd_pages(int fd)
+{
+ unsigned char cdb[]={0x12,0x01,0,0,0,0};
+
+ unsigned int data_size=0xff;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+
+ int res, pl, i;
+
+ cdb[3]=(data_size>>8)&0xff;
+ cdb[4]=data_size&0xff;
+
+
+ printf("INQUIRY Supported VPD Pages:\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_FROM_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ /* Page Length */
+ pl=data[3];
+
+ /* Pages */
+ for(i=4;i<(pl+4);i++){
+ printf("Page:%02xh (%s)\n",
+ data[i],
+ val_to_str(vpd_pages, data[i]));
+ }
+
+ return 0;
+}
+
+int scsi_inquiry_unit_serial_number(int fd)
+{
+ unsigned char cdb[]={0x12,0x01,0x80,0,0,0};
+
+ unsigned int data_size=0x00ff;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+
+ int res, pl, i;
+
+ cdb[3]=(data_size>>8)&0xff;
+ cdb[4]=data_size&0xff;
+
+
+ printf("INQUIRY Unit Serial Number:\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_FROM_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ /* Page Length */
+ pl=data[3];
+
+ /* Unit Serial Number */
+ printf("Unit Serial Number:");
+ for(i=4;i<(pl+4);i++)printf("%c",data[i]&0xff);printf("\n");
+
+ return 0;
+}
+
+int scsi_persistent_reserve_in_read_keys(int fd)
+{
+ unsigned char cdb[]={0x5e,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=0x00ff;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=0;
+ int res, i;
+ unsigned long prgeneration, additional_length;
+
+ cdb[1]=service_action;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+
+ printf("PERSISTENT RESERVE IN: READ KEYS\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_FROM_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ /* PRGeneration */
+ prgeneration=data[0];
+ prgeneration<<=8;prgeneration|=data[1];
+ prgeneration<<=8;prgeneration|=data[2];
+ prgeneration<<=8;prgeneration|=data[3];
+ printf("PRGeneration:%lu\n", prgeneration);
+
+ /* Additional Length */
+ additional_length=data[4];
+ additional_length<<=8;additional_length|=data[5];
+ additional_length<<=8;additional_length|=data[6];
+ additional_length<<=8;additional_length|=data[7];
+ printf("Additional Length:%lu\n", additional_length);
+
+ /* print the registered keys */
+ for(i=0;i<additional_length;i+=8){
+ printf("Key:%02x%02x%02x%02x%02x%02x%02x%02x\n",
+ data[i+8],
+ data[i+9],
+ data[i+10],
+ data[i+11],
+ data[i+12],
+ data[i+13],
+ data[i+14],
+ data[i+15]);
+ }
+
+ return 0;
+}
+
+int scsi_persistent_reserve_in_read_reservation(int fd)
+{
+ unsigned char cdb[]={0x5e,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=0x00ff;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=1;
+ int res;
+ unsigned long prgeneration, additional_length;
+
+ cdb[1]=service_action;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+
+ printf("PERSISTENT RESERVE IN: READ RESERVATION\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_FROM_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ /* PRGeneration */
+ prgeneration=data[0];
+ prgeneration<<=8;prgeneration|=data[1];
+ prgeneration<<=8;prgeneration|=data[2];
+ prgeneration<<=8;prgeneration|=data[3];
+ printf("PRGeneration:%lu\n", prgeneration);
+
+ /* Additional Length */
+ additional_length=data[4];
+ additional_length<<=8;additional_length|=data[5];
+ additional_length<<=8;additional_length|=data[6];
+ additional_length<<=8;additional_length|=data[7];
+ printf("Additional Length:%lu\n", additional_length);
+
+ if(additional_length==16){
+ printf("Key:%02x%02x%02x%02x%02x%02x%02x%02x\n",
+ data[8],
+ data[9],
+ data[10],
+ data[11],
+ data[12],
+ data[13],
+ data[14],
+ data[15]);
+ printf("Scope:%xh Type:%xh\n",data[21]>>4,data[21]&0x0f);
+ }
+
+ return 0;
+}
+
+int scsi_persistent_reserve_in_report_capabilities(int fd)
+{
+ unsigned char cdb[]={0x5e,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=0x00ff;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=2;
+ int res;
+ unsigned short length, type_mask;
+
+ cdb[1]=service_action;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+
+ printf("PERSISTENT RESERVE IN: REPORT CAPABILITIES\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_FROM_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ /* Length */
+ length=data[0];
+ length<<=8;length|=data[1];
+ printf("Length:%d\n", length);
+
+ /* CRH ... */
+ printf("CRH:%d SIP_C:%d ATP_C:%d PTPL_C:%d\n",
+ !!(data[2]&0x10),
+ !!(data[2]&0x08),
+ !!(data[2]&0x04),
+ !!(data[2]&0x01));
+
+ /* TMV ... */
+ printf("TMV:%d ALLOW_COMMANDS:%c%c%cb PTPL_A:%d\n",
+ !!(data[3]&0x80),
+ '0'+(!!(data[3]&0x40)),
+ '0'+(!!(data[3]&0x20)),
+ '0'+(!!(data[3]&0x10)),
+ !!(data[3]&0x01));
+
+ /* Persistent Reservation Type Mask */
+ type_mask=data[4];
+ type_mask<<=8;type_mask|=data[5];
+ printf("Persistent Reservation Type Mask:0x%04x\n", type_mask);
+ printf("WR_EX_AR:%d EX_AC_RO:%d WR_EX_RO:%d EX_AC:%d WR_EX:%d EX_AC_AR:%d\n",
+ !!(data[4]&0x80),
+ !!(data[4]&0x40),
+ !!(data[4]&0x20),
+ !!(data[4]&0x08),
+ !!(data[4]&0x02),
+ !!(data[4]&0x01));
+
+ return 0;
+}
+
+int scsi_persistent_reserve_in_read_full_status(int fd)
+{
+ unsigned char cdb[]={0x5e,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=0x00ff;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=3;
+ int res;
+ unsigned long prgeneration, additional_length;
+
+ cdb[1]=service_action;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+
+ printf("PERSISTENT RESERVE IN: READ FULL STATUS\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_FROM_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ /* PRGeneration */
+ prgeneration=data[0];
+ prgeneration<<=8;prgeneration|=data[1];
+ prgeneration<<=8;prgeneration|=data[2];
+ prgeneration<<=8;prgeneration|=data[3];
+ printf("PRGeneration:%lu\n", prgeneration);
+
+ /* Additional Length */
+ additional_length=data[4];
+ additional_length<<=8;additional_length|=data[5];
+ additional_length<<=8;additional_length|=data[6];
+ additional_length<<=8;additional_length|=data[7];
+ printf("Additional Length:%lu\n", additional_length);
+
+/*XXX*/
+
+ return 0;
+}
+
+int scsi_persistent_reserve_out_clear(int fd)
+{
+ unsigned char cdb[]={0x5f,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=24;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=3;
+ int res;
+
+ long long k;
+
+ if (scope==-1) {
+ printf("Must specify scope\n");
+ printf("scsi_io --device=<DEVICE> --command=clear --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (type==-1) {
+ printf("Must specify type\n");
+ printf("scsi_io --device=<DEVICE> --command=clear --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (!key) {
+ printf("Must specify key\n");
+ printf("scsi_io --device=<DEVICE> --command=clear --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+
+ sscanf(key, "%llx", &k);
+ cdb[1]=service_action;
+ cdb[2]=(scope<<4)|type;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+ memset(data, 0, data_size);
+
+ /* Reservation Key */
+ data[0]=(k>>56)&0xff;
+ data[1]=(k>>48)&0xff;
+ data[2]=(k>>40)&0xff;
+ data[3]=(k>>32)&0xff;
+ data[4]=(k>>24)&0xff;
+ data[5]=(k>>16)&0xff;
+ data[6]=(k>> 8)&0xff;
+ data[7]=(k )&0xff;
+
+ /* Service Action Key */
+ data[8]=0;
+ data[9]=0;
+ data[10]=0;
+ data[11]=0;
+ data[12]=0;
+ data[13]=0;
+ data[14]=0;
+ data[15]=0;
+
+ /* Spec_ip_ti=0 all_tg_pt=1 aptpl=0 */
+ data[20]=0x04;
+
+ printf("PERSISTENT RESERVE IN: CLEAR\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_TO_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+int scsi_persistent_reserve_out_reserve(int fd)
+{
+ unsigned char cdb[]={0x5f,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=24;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=1;
+ int res;
+ long long k;
+
+ if (scope==-1) {
+ printf("Must specify scope\n");
+ printf("scsi_io --device=<DEVICE> --command=reserve --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (type==-1) {
+ printf("Must specify type\n");
+ printf("scsi_io --device=<DEVICE> --command=reserve --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (!key) {
+ printf("Must specify key\n");
+ printf("scsi_io --device=<DEVICE> --command=reserve --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+
+ sscanf(key, "%llx", &k);
+
+
+ cdb[1]=service_action;
+ cdb[2]=(scope<<4)|type;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+ memset(data, 0, data_size);
+
+ /* Reservation Key */
+ data[0]=(k>>56)&0xff;
+ data[1]=(k>>48)&0xff;
+ data[2]=(k>>40)&0xff;
+ data[3]=(k>>32)&0xff;
+ data[4]=(k>>24)&0xff;
+ data[5]=(k>>16)&0xff;
+ data[6]=(k>> 8)&0xff;
+ data[7]=(k )&0xff;
+
+ /* Service Action Key */
+ data[8]=0;
+ data[9]=0;
+ data[10]=0;
+ data[11]=0;
+ data[12]=0;
+ data[13]=0;
+ data[14]=0;
+ data[15]=0;
+
+ /* Spec_ip_ti=0 all_tg_pt=1 aptpl=0 */
+ data[20]=0x04;
+
+ printf("PERSISTENT RESERVE IN: RESERVE\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_TO_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+int scsi_persistent_reserve_out_preempt(int fd)
+{
+ unsigned char cdb[]={0x5f,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=24;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=4;
+ int res;
+ long long k;
+
+ if (scope==-1) {
+ printf("Must specify scope\n");
+ printf("scsi_io --device=<DEVICE> --command=preempt --scope=<SCOPE> --type=<TYPE> --key=<KEY> --rmkey=<KEY>\n");
+ _exit(10);
+ }
+ if (type==-1) {
+ printf("Must specify type\n");
+ printf("scsi_io --device=<DEVICE> --command=preempt --scope=<SCOPE> --type=<TYPE> --key=<KEY> --rmkey=<KEY>\n");
+ _exit(10);
+ }
+ if (!key) {
+ printf("Must specify key\n");
+ printf("scsi_io --device=<DEVICE> --command=preempt --scope=<SCOPE> --type=<TYPE> --key=<KEY> --rmkey=<KEY>\n");
+ _exit(10);
+ }
+ if (!rmkey) {
+ printf("Must specify rmkey\n");
+ printf("scsi_io --device=<DEVICE> --command=preempt --scope=<SCOPE> --type=<TYPE> --key=<KEY> --rmkey=<KEY>\n");
+ _exit(10);
+ }
+
+
+
+ cdb[1]=service_action;
+ cdb[2]=(scope<<4)|type;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+ memset(data, 0, data_size);
+
+ /* Reservation Key */
+ sscanf(key, "%llx", &k);
+ data[0]=(k>>56)&0xff;
+ data[1]=(k>>48)&0xff;
+ data[2]=(k>>40)&0xff;
+ data[3]=(k>>32)&0xff;
+ data[4]=(k>>24)&0xff;
+ data[5]=(k>>16)&0xff;
+ data[6]=(k>> 8)&0xff;
+ data[7]=(k )&0xff;
+
+ /* Service Action Key */
+ sscanf(rmkey, "%llx", &k);
+ data[8] =(k>>56)&0xff;
+ data[9] =(k>>48)&0xff;
+ data[10]=(k>>40)&0xff;
+ data[11]=(k>>32)&0xff;
+ data[12]=(k>>24)&0xff;
+ data[13]=(k>>16)&0xff;
+ data[14]=(k>> 8)&0xff;
+ data[15]=(k )&0xff;
+
+ /* Spec_ip_ti=0 all_tg_pt=1 aptpl=0 */
+ data[20]=0x04;
+
+ printf("PERSISTENT RESERVE IN: RESERVE\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_TO_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+int scsi_persistent_reserve_out_register_and_ignore_existing_key(int fd)
+{
+ unsigned char cdb[]={0x5f,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=24;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=6;
+ int res;
+ long long k;
+
+ if (scope==-1) {
+ printf("Must specify scope\n");
+ printf("scsi_io --device=<DEVICE> --command=registerkey --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (type==-1) {
+ printf("Must specify type\n");
+ printf("scsi_io --device=<DEVICE> --command=registerkey --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (!key) {
+ printf("Must specify key\n");
+ printf("scsi_io --device=<DEVICE> --command=registerkey --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+
+ sscanf(key, "%llx", &k);
+
+ cdb[1]=service_action;
+ cdb[2]=(scope<<4)|type;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+ memset(data, 0, data_size);
+
+ /* Reservation Key */
+ data[0]=0;
+ data[1]=0;
+ data[2]=0;
+ data[3]=0;
+ data[4]=0;
+ data[5]=0;
+ data[6]=0;
+ data[7]=0;
+
+ /* Service Action Key */
+ data[8] =(k>>56)&0xff;
+ data[9] =(k>>48)&0xff;
+ data[10]=(k>>40)&0xff;
+ data[11]=(k>>32)&0xff;
+ data[12]=(k>>24)&0xff;
+ data[13]=(k>>16)&0xff;
+ data[14]=(k>> 8)&0xff;
+ data[15]=(k )&0xff;
+
+ /* Spec_ip_ti=0 all_tg_pt=1 aptpl=0 */
+ data[20]=0x04;
+
+ printf("PERSISTENT RESERVE IN: REGISTER AND IGNORE EXISTING KEY\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_TO_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+int scsi_persistent_reserve_out_unregister_key(int fd)
+{
+ unsigned char cdb[]={0x5f,0,0,0,0,0,0,0,0,0};
+
+ unsigned int data_size=24;
+ unsigned char data[data_size];
+
+ unsigned int sense_len=32;
+ unsigned char sense[sense_len];
+ unsigned char service_action=6;
+ int res;
+ long long k;
+
+ if (scope==-1) {
+ printf("Must specify scope\n");
+ printf("scsi_io --device=<DEVICE> --command=unregisterkey --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (type==-1) {
+ printf("Must specify type\n");
+ printf("scsi_io --device=<DEVICE> --command=unregisterkey --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+ if (!key) {
+ printf("Must specify key\n");
+ printf("scsi_io --device=<DEVICE> --command=unregisterkey --scope=<SCOPE> --type=<TYPE> --key=<KEY>\n");
+ _exit(10);
+ }
+
+ sscanf(key, "%llx", &k);
+
+ cdb[1]=service_action;
+ cdb[2]=(scope<<4)|type;
+ cdb[7]=(data_size>>8)&0xff;
+ cdb[8]=data_size&0xff;
+
+ memset(data, 0, data_size);
+
+ /* Reservation Key */
+ data[0]=(k>>56)&0xff;
+ data[1]=(k>>48)&0xff;
+ data[2]=(k>>40)&0xff;
+ data[3]=(k>>32)&0xff;
+ data[4]=(k>>24)&0xff;
+ data[5]=(k>>16)&0xff;
+ data[6]=(k>> 8)&0xff;
+ data[7]=(k )&0xff;
+
+ /* Service Action Key */
+ data[8]=0;
+ data[9]=0;
+ data[10]=0;
+ data[11]=0;
+ data[12]=0;
+ data[13]=0;
+ data[14]=0;
+ data[15]=0;
+
+ /* Spec_ip_ti=0 all_tg_pt=1 aptpl=0 */
+ data[20]=0x04;
+
+ printf("PERSISTENT RESERVE IN: UNREGISTER KEY\n");
+
+ res=scsi_io(fd, cdb, sizeof(cdb), SG_DXFER_TO_DEV, data, &data_size, sense, &sense_len);
+ if(res){
+ printf("SCSI_IO failed\n");
+ return -1;
+ }
+ if(sense_len){
+ print_sense_data(sense, sense_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+
+int open_scsi_device(const char *dev)
+{
+ int fd, vers;
+
+ if((fd=open(dev, O_RDWR))<0){
+ printf("ERROR could not open device %s\n", dev);
+ return -1;
+ }
+ if ((ioctl(fd, SG_GET_VERSION_NUM, &vers) < 0) || (vers < 30000)) {
+ printf("/dev is not an sg device, or old sg driver\n");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+typedef int (*scsi_func_t)(int fd);
+typedef struct _cmds_t {
+ const char *cmd;
+ scsi_func_t func;
+ const char *comment;
+} cmds_t;
+cmds_t cmds[] = {
+ {"inq", scsi_inquiry, "Standard INQUIRY output"},
+ {"vpd", scsi_inquiry_supported_vpd_pages, "Supported VPD Pages"},
+ {"usn", scsi_inquiry_unit_serial_number, "Unit serial number"},
+ {"readkeys", scsi_persistent_reserve_in_read_keys, "Read SCSI Reservation Keys"},
+ {"readrsvr", scsi_persistent_reserve_in_read_reservation, "Read SCSI Reservation Data"},
+ {"reportcap", scsi_persistent_reserve_in_report_capabilities, "Report reservation Capabilities"},
+ {"registerkey", scsi_persistent_reserve_out_register_and_ignore_existing_key, "Register and ignore existing key"},
+ {"unregisterkey", scsi_persistent_reserve_out_unregister_key, "Unregister a key"},
+ {"clear", scsi_persistent_reserve_out_clear, "Clear all reservations and registrations"},
+ {"reserve", scsi_persistent_reserve_out_reserve, "Reserve"},
+ {"preempt", scsi_persistent_reserve_out_preempt, "Preempt (remove someone elses registration)"},
+};
+
+void usage(void)
+{
+ int i;
+ printf("Usage: scsi_io --command <command> --device <device>\n");
+ printf("Commands:\n");
+ for (i=0;i<sizeof(cmds)/sizeof(cmds[0]);i++){
+ printf(" %s %s\n", cmds[i].cmd, cmds[i].comment);
+ }
+}
+
+
+int main(int argc, const char *argv[])
+{
+ int i, fd;
+ int opt;
+ scsi_func_t func=NULL;
+ struct poptOption popt_options[] = {
+ POPT_AUTOHELP
+ { "scope", 's', POPT_ARG_INT, &scope, 0, "scope", "integer" },
+ { "type", 't', POPT_ARG_INT, &type, 0, "type", "integer" },
+ { "key", 'k', POPT_ARG_STRING, &key, 0, "key", "key" },
+ { "rmkey", 'r', POPT_ARG_STRING, &rmkey, 0, "rmkey", "rmkey" },
+ { "command", 'c', POPT_ARG_STRING, &command, 0, "command", "command" },
+ { "device", 'd', POPT_ARG_STRING, &device, 0, "device", "device" },
+// { "machinereadable", 'Y', POPT_ARG_NONE, &options.machinereadable, 0, "enable machinereadable output", NULL },
+ POPT_TABLEEND
+ };
+ poptContext pc;
+
+ pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
+
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ switch (opt) {
+ default:
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ _exit(1);
+ }
+ }
+
+ if (!command) {
+ printf("Must specify the command\n");
+ usage();
+ _exit(10);
+ }
+
+ if (!device) {
+ printf("Must specify the device\n");
+ usage();
+ _exit(10);
+ }
+
+ fd=open_scsi_device(device);
+ if(fd<0){
+ printf("Could not open SCSI device %s\n",device);
+ usage();
+ _exit(10);
+ }
+
+ for (i=0;i<sizeof(cmds)/sizeof(cmds[0]);i++){
+ if(!strcmp(cmds[i].cmd, command)) {
+ func = cmds[i].func;
+ break;
+ }
+ }
+ if (!func) {
+ printf("Unrecognized command : %s\n", command);
+ usage();
+ _exit(10);
+ }
+
+ func(fd);
+
+#if 0
+ scsi_persistent_reserve_in_read_full_status(fd);
+ scsi_persistent_reserve_out_register_and_ignore_existing_key(fd);
+ scsi_persistent_reserve_in_read_keys(fd);
+
+ scsi_persistent_reserve_out_reserve(fd);
+ scsi_persistent_reserve_in_read_reservation(fd);
+
+ scsi_persistent_reserve_out_clear(fd);
+ scsi_persistent_reserve_in_read_reservation(fd);
+
+ scsi_persistent_reserve_out_unregister_key(fd);
+ scsi_persistent_reserve_in_read_keys(fd);
+#endif
+ return 0;
+}
diff --git a/ctdb/utils/smnotify/smnotify.c b/ctdb/utils/smnotify/smnotify.c
new file mode 100644
index 0000000..5907bd6
--- /dev/null
+++ b/ctdb/utils/smnotify/smnotify.c
@@ -0,0 +1,151 @@
+/*
+ simple smnotify tool
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdlib.h>
+#include "smnotify.h"
+#include "popt.h"
+
+static char *client = NULL;
+static const char *ip = NULL;
+static char *server = NULL;
+static int stateval = 0;
+static int clientport = 0;
+static int sendport = 0;
+
+static void usage(void)
+{
+ exit(0);
+}
+
+static int create_socket(const char *addr, int port)
+{
+ int s;
+ struct sockaddr_in sock_in;
+
+ s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (s == -1) {
+ printf("Failed to open local socket\n");
+ exit(10);
+ }
+
+ bzero(&sock_in, sizeof(sock_in));
+ sock_in.sin_family = AF_INET;
+ sock_in.sin_port = htons(port);
+ inet_aton(addr, &sock_in.sin_addr);
+ if (bind(s, (struct sockaddr *)&sock_in, sizeof(sock_in)) == -1) {
+ printf("Failed to bind to local socket\n");
+ exit(10);
+ }
+
+ return s;
+}
+
+int main(int argc, const char *argv[])
+{
+ struct poptOption popt_options[] = {
+ POPT_AUTOHELP
+ { "client", 'c', POPT_ARG_STRING, &client, 0, "remote client to send the notify to", "hostname/ip" },
+ { "clientport", 0, POPT_ARG_INT, &clientport, 0, "clientport", "integer" },
+ { "ip", 'i', POPT_ARG_STRING, &ip, 0, "local ip address to send the notification from", "ip" },
+ { "sendport", 0, POPT_ARG_INT, &sendport, 0, "port to send the notify from", "integer" },
+ { "server", 's', POPT_ARG_STRING, &server, 0, "servername to use in the notification", "hostname/ip" },
+ { "stateval", 0, POPT_ARG_INT, &stateval, 0, "stateval", "integer" },
+ POPT_TABLEEND
+ };
+ int opt;
+ poptContext pc;
+ CLIENT *clnt;
+ int s;
+ struct sockaddr_in sock_cl;
+ struct timeval w;
+ struct status st;
+
+ pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
+
+ while ((opt = poptGetNextOpt(pc)) != -1) {
+ switch (opt) {
+ default:
+ fprintf(stderr, "Invalid option %s: %s\n",
+ poptBadOption(pc, 0), poptStrerror(opt));
+ exit(1);
+ }
+ }
+
+ if (client == NULL) {
+ printf("ERROR: client not specified\n");
+ usage();
+ }
+
+ if (ip == NULL) {
+ printf("ERROR: ip not specified\n");
+ usage();
+ }
+
+ if (server == NULL) {
+ printf("ERROR: server not specified\n");
+ usage();
+ }
+
+ if (stateval == 0) {
+ printf("ERROR: stateval not specified\n");
+ usage();
+ }
+
+
+ /* Since we want to control from which address these packets are
+ sent we must create the socket ourself and use low-level rpc
+ calls.
+ */
+ s = create_socket(ip, sendport);
+
+ /* only wait for at most 3 seconds before giving up */
+ alarm(3);
+
+ /* Setup a sockaddr_in for the client we want to notify */
+ bzero(&sock_cl, sizeof(sock_cl));
+ sock_cl.sin_family = AF_INET;
+ sock_cl.sin_port = htons(clientport);
+ inet_aton(client, &sock_cl.sin_addr);
+
+ w.tv_sec = 1;
+ w.tv_usec= 0;
+
+ clnt = clntudp_create(&sock_cl, 100024, 1, w, &s);
+ if (clnt == NULL) {
+ printf("ERROR: failed to connect to client\n");
+ exit(10);
+ }
+
+ /* we don't want to wait for any reply */
+ w.tv_sec = 0;
+ w.tv_usec = 0;
+ clnt_control(clnt, CLSET_TIMEOUT, (char *)&w);
+
+ st.mon_name=server;
+ st.state=stateval;
+ sm_notify_1(&st, clnt);
+
+ return 0;
+}
diff --git a/ctdb/utils/smnotify/smnotify.x b/ctdb/utils/smnotify/smnotify.x
new file mode 100644
index 0000000..94239f8
--- /dev/null
+++ b/ctdb/utils/smnotify/smnotify.x
@@ -0,0 +1,21 @@
+#ifdef RPC_HDR
+%#ifdef _AIX
+%#include <rpc/rpc.h>
+%#endif /* _AIX */
+#endif /* RPC_HDR */
+
+const SM_MAXSTRLEN = 1024;
+
+struct status {
+ string mon_name<SM_MAXSTRLEN>;
+ int state;
+};
+
+
+program SMNOTIFY {
+ version SMVERSION {
+ void SM_NOTIFY(struct status) = 6;
+ } = 1;
+} = 100024;
+
+
diff --git a/ctdb/utils/tdb/tdb_mutex_check.c b/ctdb/utils/tdb/tdb_mutex_check.c
new file mode 100644
index 0000000..4da0c40
--- /dev/null
+++ b/ctdb/utils/tdb/tdb_mutex_check.c
@@ -0,0 +1,160 @@
+/*
+ Check the mutex lock information in tdb database
+
+ Copyright (C) Amitay Isaacs 2015-2021
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <errno.h>
+
+#ifndef USE_TDB_MUTEX_LOCKING
+#define USE_TDB_MUTEX_LOCKING 1
+#endif
+
+#include "lib/tdb/common/tdb_private.h"
+#include "lib/tdb/common/mutex.c"
+
+static uint8_t *hex_decode(const char *hex_in, size_t *plen)
+{
+ size_t i;
+ int num;
+ uint8_t *buffer;
+ size_t len;
+
+ len = strlen(hex_in) / 2;
+ if (len == 0) {
+ return NULL;
+ }
+
+ buffer = malloc(len);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ for (i = 0; i < len; i++) {
+ sscanf(&hex_in[i*2], "%02X", &num);
+ buffer[i] = (uint8_t)num;
+ }
+
+ *plen = len;
+
+ return buffer;
+}
+
+static int get_hash_chain(struct tdb_context *tdb, const char *hex_key)
+{
+ TDB_DATA key = {
+ .dsize = 0,
+ };
+ unsigned int hash;
+
+ key.dptr = hex_decode(hex_key, &key.dsize);
+ if (key.dptr == NULL || key.dsize == 0) {
+ return -1;
+ }
+ hash = tdb_jenkins_hash(&key);
+ free(key.dptr);
+
+ return hash % tdb_hash_size(tdb);
+}
+
+static void check_one(struct tdb_mutexes *mutexes, int chain)
+{
+ pthread_mutex_t *m;
+ int ret;
+ int pthread_mutex_consistent_np(pthread_mutex_t *);
+
+ m = &mutexes->hashchains[chain+1];
+ ret = pthread_mutex_trylock(m);
+ if (ret == 0) {
+ pthread_mutex_unlock(m);
+ return;
+ }
+ if (ret == EOWNERDEAD) {
+ ret = pthread_mutex_consistent_np(m);
+ if (ret != 0) {
+ printf("[%6d] consistent failed (%d)\n", chain, ret);
+ return;
+ }
+ ret = pthread_mutex_unlock(m);
+ if (ret != 0) {
+ printf("[%6d] unlock failed (%d)\n", chain, ret);
+ return;
+ }
+ printf("[%6d] cleaned\n", chain);
+ return;
+ }
+ if (ret == EBUSY) {
+ printf("[%6d] pid=%d\n", chain, m->__data.__owner);
+ return;
+ }
+ printf("[%6d] trylock failed (%d)\n", chain, ret);
+}
+
+static void check_all(struct tdb_mutexes *mutexes, unsigned int hash_size)
+{
+ unsigned int i;
+
+ for (i=0; i<hash_size; i++) {
+ check_one(mutexes, i);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const char *tdb_file;
+ TDB_CONTEXT *tdb;
+ uint32_t tdb_flags;
+ int chain, i;
+
+ if (argc < 2) {
+ printf("Usage %s <tdb file> [<key1> <key2>]\n", argv[0]);
+ exit(1);
+ }
+
+ tdb_file = argv[1];
+
+ tdb_flags = TDB_MUTEX_LOCKING | TDB_INCOMPATIBLE_HASH |
+ TDB_CLEAR_IF_FIRST;
+ tdb = tdb_open(tdb_file, 0, tdb_flags, O_RDWR, 0);
+ if (tdb == NULL) {
+ printf("Error opening %s\n", tdb_file);
+ exit(1);
+ }
+
+ if (tdb->mutexes == NULL) {
+ printf("Mutexes are not mmaped\n");
+ exit(1);
+ }
+
+ if (argc == 2) {
+ check_all(tdb->mutexes, tdb_hash_size(tdb));
+ } else {
+ for (i=2; i<argc; i++) {
+ chain = get_hash_chain(tdb, argv[i]);
+ if (chain == -1) {
+ continue;
+ }
+ check_one(tdb->mutexes, chain);
+ }
+ }
+
+ tdb_close(tdb);
+ return 0;
+}