summaryrefslogtreecommitdiffstats
path: root/ctdb/tests/INTEGRATION/simple
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ctdb/tests/INTEGRATION/simple/README2
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.000.onnode.sh12
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh38
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.002.tunables.sh67
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.003.ping.sh34
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.004.getpid.sh55
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh66
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.010.statistics.sh17
-rwxr-xr-xctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh62
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh26
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh26
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh24
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh30
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh30
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh29
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh24
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh20
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh80
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh92
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh53
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh34
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh48
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh39
-rwxr-xr-xctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh55
-rwxr-xr-xctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh42
-rwxr-xr-xctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh74
-rwxr-xr-xctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh18
-rwxr-xr-xctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh16
-rwxr-xr-xctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh76
29 files changed, 1189 insertions, 0 deletions
diff --git a/ctdb/tests/INTEGRATION/simple/README b/ctdb/tests/INTEGRATION/simple/README
new file mode 100644
index 0000000..3ac738d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/README
@@ -0,0 +1,2 @@
+Simple integration tests. These can be run against a pool of CTDB
+daemons running on the local machine - aka "local daemons".
diff --git a/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh
new file mode 100755
index 0000000..4ca6e46
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+# Use 'onnode' to confirm connectivity between all cluster nodes
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+echo "Checking connectivity between nodes..."
+onnode all onnode -p all hostname
diff --git a/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh
new file mode 100755
index 0000000..aafe27e
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb listnodes' shows the list of nodes
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node -v 0 "$CTDB listnodes"
+
+num_nodes=$(wc -l <"$outfile")
+
+# Each line should look like an IP address.
+ipv4_pat='[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+'
+ipv6_pat='[[:xdigit:]]+:[[:xdigit:]:]+[[:xdigit:]]+'
+sanity_check_output \
+ 2 \
+ "^${ipv4_pat}|${ipv6_pat}\$"
+
+out_0="$out"
+
+echo "Checking other nodes..."
+
+n=1
+while [ $n -lt $num_nodes ] ; do
+ echo -n "Node ${n}: "
+ try_command_on_node $n "$CTDB listnodes"
+ if [ "$out_0" = "$out" ] ; then
+ echo "OK"
+ else
+ echo "DIFFERs from node 0:"
+ echo "$out"
+ exit 1
+ fi
+ n=$(($n + 1))
+done
diff --git a/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh
new file mode 100755
index 0000000..6f362c6
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+
+# Verify the operation of "ctdb listvars", "ctdb getvar", "ctdb setvar"
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node -v 0 "$CTDB listvars"
+
+sanity_check_output \
+ 5 \
+ '^[[:alpha:]][[:alnum:]]+[[:space:]]*=[[:space:]]*[[:digit:]]+$'
+
+echo "Verifying all variable values using \"ctdb getvar\"..."
+
+while read var x val ; do
+ try_command_on_node 0 "$CTDB getvar $var"
+
+ val2="${out#*= }"
+
+ if [ "$val" != "$val2" ] ; then
+ echo "MISMATCH on $var: $val != $val2"
+ exit 1
+ fi
+done <"$outfile"
+
+echo "GOOD: all tunables match"
+
+var="RecoverTimeout"
+
+try_command_on_node -v 0 $CTDB getvar $var
+
+val="${out#*= }"
+
+echo "Going to try incrementing it..."
+
+incr=$(($val + 1))
+
+try_command_on_node 0 $CTDB setvar $var $incr
+
+echo "That seemed to work, let's check the value..."
+
+try_command_on_node -v 0 $CTDB getvar $var
+
+newval="${out#*= }"
+
+if [ "$incr" != "$newval" ] ; then
+ echo "Nope, that didn't work..."
+ exit 1
+fi
+
+echo "Look's good! Now verifying with \"ctdb listvars\""
+try_command_on_node -v 0 "$CTDB listvars | grep '^$var'"
+
+check="${out#*= }"
+
+if [ "$incr" != "$check" ] ; then
+ echo "Nope, that didn't work..."
+ exit 1
+fi
+
+echo "Look's good! Putting the old value back..."
+cmd="$CTDB setvar $var $val"
+try_command_on_node 0 $cmd
diff --git a/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh
new file mode 100755
index 0000000..8071762
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Verify the operation of the 'ctdb ping' command
+#
+# 1. Run the 'ctdb ping' command on one of the nodes and verify that it
+# shows valid and expected output.
+# 2. Shutdown one of the cluster nodes, using the 'ctdb shutdown'
+# command.
+# 3. Run the 'ctdb ping -n <node>' command from another node to this
+# node.
+# 4. Verify that the command is not successful since th ctdb daemon is
+# not running on the node.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node -v 0 "$CTDB ping -n 1"
+
+sanity_check_output \
+ 1 \
+ '^response from 1 time=-?[.0-9]+ sec[[:space:]]+\([[:digit:]]+ clients\)$'
+
+ctdb_onnode -v 1 "shutdown"
+
+wait_until_node_has_status 1 disconnected 30 0
+
+try_command_on_node -v 0 "! $CTDB ping -n 1"
+
+sanity_check_output \
+ 1 \
+ "(: ctdb_control error: ('ctdb_control to disconnected node'|'node is disconnected')|Unable to get ping response from node 1|Node 1 is DISCONNECTED|ctdb_control for getpnn failed|: Can not access node. Node is not operational\.|Node 1 has status DISCONNECTED\|UNHEALTHY\|INACTIVE$)"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh
new file mode 100755
index 0000000..27025df
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb getpid' works as expected
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+echo "There are $num_nodes nodes..."
+
+# Call getpid a few different ways and make sure the answer is always the same.
+
+try_command_on_node -v 0 "onnode -q all $CTDB getpid"
+pids_onnode="$out"
+
+cmd=""
+n=0
+while [ $n -lt $num_nodes ] ; do
+ cmd="${cmd}${cmd:+; }$CTDB getpid -n $n"
+ n=$(($n + 1))
+done
+try_command_on_node -v 0 "( $cmd )"
+pids_getpid_n="$out"
+
+if [ "$pids_onnode" = "$pids_getpid_n" ] ; then
+ echo "They're the same... cool!"
+else
+ die "Error: they differ."
+fi
+
+echo "Checking each PID for validity"
+
+n=0
+while [ $n -lt $num_nodes ] ; do
+ read pid
+ try_command_on_node $n "ls -l /proc/${pid}/exe | sed -e 's@.*/@@'"
+ echo -n "Node ${n}, PID ${pid} looks to be running \"$out\" - "
+ case "$out" in
+ ctdbd) : ;;
+ memcheck*)
+ if [ -z "$VALGRIND" ] ; then
+ die "BAD"
+ fi
+ ;;
+ *) die "BAD"
+ esac
+
+ echo "GOOD!"
+
+ n=$(($n + 1))
+done <<<"$pids_onnode"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh
new file mode 100755
index 0000000..c6212fd
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb process-exists' shows correct information
+
+# The implementation is creative about how it gets PIDs for existing and
+# non-existing processes.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+test_node=1
+srvid=0xAE00000012345678
+
+# Execute a ctdb client on $test_node that will last for 60 seconds.
+# It should still be there when we check.
+try_command_on_node -v $test_node \
+ "$CTDB_TEST_WRAPPER exec dummy_client -n 10 -S ${srvid} >/dev/null 2>&1 & echo \$!"
+client_pid="$out"
+
+cleanup ()
+{
+ if [ -n "$client_pid" ] ; then
+ onnode $test_node kill -9 "$client_pid"
+ fi
+}
+
+ctdb_test_exit_hook_add cleanup
+
+echo "Waiting until PID $client_pid is registered on node $test_node"
+status=0
+wait_until 30 try_command_on_node $test_node \
+ "$CTDB process-exists ${client_pid}" || status=$?
+echo "$out"
+
+if [ $status -eq 0 ] ; then
+ echo "OK"
+else
+ die "BAD"
+fi
+
+echo "Checking for PID $client_pid with SRVID $srvid on node $test_node"
+status=0
+try_command_on_node $test_node \
+ "$CTDB process-exists ${client_pid} ${srvid}" || status=$?
+echo "$out"
+
+if [ $status -eq 0 ] ; then
+ echo "OK"
+else
+ die "BAD"
+fi
+
+echo "Checking for PID $client_pid with SRVID $client_pid on node $test_node"
+try_command_on_node -v $test_node \
+ "! $CTDB process-exists ${client_pid} ${client_pid}"
+
+# Now just echo the PID of the ctdb daemon on test node.
+# This is not a ctdb client and process-exists should return error.
+try_command_on_node $test_node "ctdb getpid"
+pid="$out"
+
+echo "Checking for PID $pid on node $test_node"
+try_command_on_node -v $test_node "! $CTDB process-exists ${pid}"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh
new file mode 100755
index 0000000..d97e035
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb statistics' works as expected
+
+# This is pretty superficial and could do more validation.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+pattern='^(CTDB version 1|Current time of statistics[[:space:]]*:.*|Statistics collected since[[:space:]]*:.*|Gathered statistics for [[:digit:]]+ nodes|[[:space:]]+[[:alpha:]_]+[[:space:]]+[[:digit:]]+|[[:space:]]+(node|client|timeouts|locks)|[[:space:]]+([[:alpha:]_]+_latency|max_reclock_[[:alpha:]]+)[[:space:]]+[[:digit:]-]+\.[[:digit:]]+[[:space:]]sec|[[:space:]]*(locks_latency|reclock_ctdbd|reclock_recd|call_latency|lockwait_latency|childwrite_latency)[[:space:]]+MIN/AVG/MAX[[:space:]]+[-.[:digit:]]+/[-.[:digit:]]+/[-.[:digit:]]+ sec out of [[:digit:]]+|[[:space:]]+(hop_count_buckets|lock_buckets):[[:space:][:digit:]]+)$'
+
+try_command_on_node -v 1 "$CTDB statistics"
+
+sanity_check_output 40 "$pattern"
diff --git a/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh
new file mode 100755
index 0000000..51f34d9
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb statisticsreset' works as expected
+
+# This is pretty superficial. It just checks that a few particular
+# items reduce.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+get_stat ()
+{
+ local label="$1"
+
+ cat "$outfile" |
+ sed -rn -e "s@^[[:space:]]+${label}[[:space:]]+([[:digit:]])@\1@p" |
+ head -1
+}
+
+check_reduced ()
+{
+ local label="$1"
+ local before="$2"
+ local after="$3"
+
+ if [ $after -lt $before ] ; then
+ echo "GOOD: ${label} reduced from ${before} to ${after}"
+ else
+ die "BAD: ${label} did not reduce from ${before} to ${after}"
+ fi
+}
+
+n=0
+while [ $n -lt $num_nodes ] ; do
+ echo "Getting initial statistics for node ${n}..."
+
+ try_command_on_node -v $n $CTDB statistics
+
+ before_req_control=$(get_stat "req_control")
+ before_reply_control=$(get_stat "reply_control")
+ before_node_packets_recv=$(get_stat "node_packets_recv")
+
+ try_command_on_node $n $CTDB statisticsreset
+
+ try_command_on_node -v $n $CTDB statistics
+
+ after_req_control=$(get_stat "req_control")
+ after_reply_control=$(get_stat "reply_control")
+ after_node_packets_recv=$(get_stat "node_packets_recv")
+
+ check_reduced "req_control" "$before_req_control" "$after_req_control"
+ check_reduced "reply_control" "$before_reply_control" "$after_reply_control"
+ check_reduced "node_packets_recv" "$before_node_packets_recv" "$after_node_packets_recv"
+
+ n=$(($n + 1))
+done
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh
new file mode 100755
index 0000000..180b4ae
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb stop' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Stopping leader ${leader}..."
+ctdb_onnode "$test_node" stop -n "$leader"
+
+wait_until_node_has_status "$leader" stopped
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh
new file mode 100755
index 0000000..234869c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb ban' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Banning leader ${leader}..."
+ctdb_onnode "$test_node" ban 300 -n "$leader"
+
+wait_until_node_has_status "$leader" banned
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh
new file mode 100755
index 0000000..94bcf27
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb ban' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Removing leader capability from leader ${leader}..."
+ctdb_onnode "$test_node" setleaderrole off -n "$leader"
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh
new file mode 100755
index 0000000..95f522d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb stop' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom -C "cluster lock"
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Stopping leader ${leader}..."
+ctdb_onnode "$test_node" stop -n "$leader"
+
+wait_until_node_has_status "$leader" stopped
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh
new file mode 100755
index 0000000..0ef4e2b
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb ban' causes a node to yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom -C "cluster lock"
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Banning leader ${leader}..."
+ctdb_onnode "$test_node" ban 300 -n "$leader"
+
+wait_until_node_has_status "$leader" banned
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh
new file mode 100755
index 0000000..4489bc5
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+# Verify that removing the the leader capability causes a node to
+# yield the leader role
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom -C "cluster lock"
+
+# This is the node used to execute commands
+select_test_node
+echo
+
+# test_node set by select_test_node()
+# shellcheck disable=SC2154
+leader_get "$test_node"
+
+# leader set by leader_get()
+# shellcheck disable=SC2154
+echo "Removing leader capability from leader ${leader}..."
+ctdb_onnode "$test_node" setleaderrole off -n "$leader"
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh
new file mode 100755
index 0000000..3a76654
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# Verify that "ctdb getreclock" gets the recovery lock correctly
+
+# Make sure the recovery lock is consistent across all nodes.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+echo "Check that recovery lock is set the same on all nodes..."
+ctdb_onnode all getreclock
+
+# outfile is set above by ctdb_onnode
+# shellcheck disable=SC2154
+n=$(sort -u "$outfile" | wc -l | tr -d '[:space:]')
+
+case "$n" in
+0) echo "GOOD: Recovery lock is unset on all nodes" ;;
+1) echo "GOOD: All nodes have the same recovery lock setting" ;;
+*) ctdb_test_fail "BAD: Recovery lock setting differs across nodes" ;;
+esac
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh
new file mode 100755
index 0000000..d043c7e
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+# Check that CTDB operates correctly if the recovery lock is configured
+# as a command.
+
+# This test works only with local daemons. On a real cluster it has
+# no way of updating configuration.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+echo "Starting CTDB with recovery lock command configured..."
+ctdb_nodes_start_custom -R
+
+echo "Good, that seems to work!"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
new file mode 100755
index 0000000..9088a80
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+# Verify that the cluster recovers if the recovery lock is removed.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+echo "Starting CTDB with cluster lock recheck interval set to 5s..."
+ctdb_nodes_start_custom -r 5
+
+generation_has_changed ()
+{
+ local node="$1"
+ local generation_init="$2"
+
+ # Leak this so it can be printed by test
+ generation_new=""
+
+ ctdb_onnode "$node" status
+ # shellcheck disable=SC2154
+ # $outfile set by ctdb_onnode() above
+ generation_new=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
+
+ [ "$generation_new" != "$generation_init" ]
+}
+
+select_test_node
+
+echo "Get recovery lock setting"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+ctdb_onnode "$test_node" getreclock
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+reclock_setting="$out"
+
+if [ -z "$reclock_setting" ] ; then
+ ctdb_test_skip "Recovery lock is not set"
+fi
+
+t="${reclock_setting% 5}"
+reclock="${t##* }"
+
+if [ ! -f "$reclock" ] ; then
+ ctdb_test_error "Recovery lock file \"${reclock}\" is missing"
+fi
+
+echo "Recovery lock setting is \"${reclock_setting}\""
+echo "Recovery lock file is \"${reclock}\""
+echo
+
+leader_get "$test_node"
+
+generation_get
+
+echo "Remove recovery lock"
+rm "$reclock"
+echo
+
+# This will mean an election has taken place and a recovery has occured
+wait_until_generation_has_changed "$test_node"
+
+# shellcheck disable=SC2154
+# $leader set by leader_get() above
+leader_old="$leader"
+
+leader_get "$test_node"
+
+if [ "$leader" != "$leader_old" ] ; then
+ echo "OK: Leader has changed to node ${leader_new}"
+fi
+echo "GOOD: Leader is still node ${leader}"
+echo
+
+cluster_is_healthy
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh
new file mode 100755
index 0000000..147547d
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+
+# Verify that if the directory containing the cluster lock is moved
+# then the current cluster leader no longer claims to be leader, and
+# no other node claims to be leader. Confirm that if the directory is
+# moved back then a node will become leader.
+
+# This simulates the cluster filesystem containing the cluster lock
+# being unmounted and remounted.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+echo "Starting CTDB with cluster lock recheck interval set to 5s..."
+ctdb_nodes_start_custom -r 5
+
+select_test_node
+
+echo "Get cluster lock setting"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+ctdb_onnode "$test_node" getreclock
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+reclock_setting="$out"
+
+if [ -z "$reclock_setting" ] ; then
+ ctdb_test_skip "Cluster lock is not set"
+fi
+
+t="${reclock_setting% 5}"
+reclock="${t##* }"
+
+if [ ! -f "$reclock" ] ; then
+ ctdb_test_error "Cluster lock file \"${reclock}\" is missing"
+fi
+
+echo "Cluster lock setting is \"${reclock_setting}\""
+echo "Cluster lock file is \"${reclock}\""
+echo
+
+leader_get "$test_node"
+
+dir=$(dirname "$reclock")
+
+echo "Rename cluster lock directory"
+mv "$dir" "${dir}.$$"
+
+wait_until_leader_has_changed "$test_node"
+echo
+
+# shellcheck disable=SC2154
+# $leader set by leader_get() & wait_until_leader_has_changed(), above
+if [ "$leader" != "UNKNOWN" ]; then
+ test_fail "BAD: leader is ${leader}"
+fi
+
+echo "OK: leader is UNKNOWN"
+echo
+
+echo 'Get "leader timeout":'
+conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'"
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+leader_timeout="$out"
+echo "Leader timeout is ${leader_timeout}s"
+echo
+
+sleep_time=$((2 * leader_timeout))
+echo "Waiting for ${sleep_time}s to confirm leader stays UNKNOWN"
+sleep_for $sleep_time
+
+leader_get "$test_node"
+if [ "$leader" = "UNKNOWN" ]; then
+ echo "OK: leader is UNKNOWN"
+ echo
+else
+ test_fail "BAD: leader is ${leader}"
+fi
+
+echo "Restore cluster lock directory"
+mv "${dir}.$$" "$dir"
+
+wait_until_leader_has_changed "$test_node"
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh
new file mode 100755
index 0000000..b841f5b
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+# Run the message_ring test and sanity check the output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+echo "Running message_ring on all $num_nodes nodes."
+try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND message_ring -n $num_nodes
+
+# Get the last line of output.
+last=$(tail -n 1 "$outfile")
+
+pat='^(Waiting for cluster|Ring\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec \(\+ve=[[:digit:]]+ -ve=[[:digit:]]+\))$'
+sanity_check_output 1 "$pat"
+
+# $last should look like this:
+# Ring[1]: 10670.93 msgs/sec (+ve=53391 -ve=53373)
+stuff="${last##Ring\[*\]: }"
+mps="${stuff% msgs/sec*}"
+
+if [ ${mps%.*} -ge 10 ] ; then
+ echo "OK: $mps msgs/sec >= 10 msgs/sec"
+else
+ echo "BAD: $mps msgs/sec < 10 msgs/sec"
+ exit 1
+fi
+
+stuff="${stuff#*msgs/sec (+ve=}"
+positive="${stuff%% *}"
+
+if [ $positive -ge 10 ] ; then
+ echo "OK: +ive ($positive) >= 10"
+else
+ echo "BAD: +ive ($positive) < 10"
+ exit 1
+fi
+
+stuff="${stuff#*-ve=}"
+negative="${stuff%)}"
+
+if [ $negative -ge 10 ] ; then
+ echo "OK: -ive ($negative) >= 10"
+else
+ echo "BAD: -ive ($negative) < 10"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh
new file mode 100755
index 0000000..f86d080
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Run tunnel_test and sanity check the output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+echo "Running tunnel_test on all $num_nodes nodes."
+try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND \
+ tunnel_test -t 30 -n $num_nodes
+
+# Get the last line of output.
+last=$(tail -n 1 "$outfile")
+
+pat='^(Waiting for cluster|pnn\[[[:digit:]]+\] [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$'
+sanity_check_output 1 "$pat"
+
+# $last should look like this:
+# pnn[2] count=85400
+stuff="${last##pnn\[*\] }"
+mps="${stuff% msgs/sec}"
+
+if [ ${mps%.*} -ge 10 ] ; then
+ echo "OK: $mps msgs/sec >= 10 msgs/sec"
+else
+ echo "BAD: $mps msgs/sec < 10 msgs/sec"
+ exit 1
+fi
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh
new file mode 100755
index 0000000..7bca58c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Verify that nothing bad occurs if a node stalls and the leader
+# broadcast timeout triggers
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node
+echo
+
+echo 'Get "leader timeout":'
+conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'"
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+leader_timeout="$out"
+echo "Leader timeout is ${leader_timeout} seconds"
+echo
+
+# Assume leader timeout is reasonable and doesn't cause node to be
+# disconnected
+stall_time=$((leader_timeout * 2))
+
+generation_get "$test_node"
+
+echo "Get ctdbd PID on node ${test_node}..."
+ctdb_onnode -v "$test_node" "getpid"
+ctdbd_pid="$out"
+echo
+
+echo "Sending SIGSTOP to ctdbd on ${test_node}"
+try_command_on_node "$test_node" "kill -STOP ${ctdbd_pid}"
+
+sleep_for "$stall_time"
+
+echo "Sending SIGCONT to ctdbd on ${test_node}"
+try_command_on_node "$test_node" "kill -CONT ${ctdbd_pid}"
+echo
+
+wait_until_generation_has_changed "$test_node"
+
+cluster_is_healthy
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh
new file mode 100755
index 0000000..2835e55
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# Verify an error occurs if a ctdb command is run against a node
+# without a ctdbd
+
+# That is, check that an error message is printed if an attempt is made
+# to execute a ctdb command against a node that is not running ctdbd.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+test_node=1
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+echo "There are $num_nodes nodes."
+
+echo "Shutting down node ${test_node}..."
+try_command_on_node $test_node $CTDB shutdown
+
+wait_until_node_has_status $test_node disconnected 30 0
+
+wait_until_node_has_status 0 recovered 30 0
+
+pat="ctdb_control error: 'ctdb_control to disconnected node'|ctdb_control error: 'node is disconnected'|Node $test_node is DISCONNECTED|Node $test_node has status DISCONNECTED\|UNHEALTHY\|INACTIVE"
+
+for i in ip disable enable "ban 0" unban listvars ; do
+ try_command_on_node -v 0 ! $CTDB $i -n $test_node
+
+ if egrep -q "$pat" "$outfile" ; then
+ echo "OK: \"ctdb ${i}\" fails with expected \"disconnected node\" message"
+ else
+ echo "BAD: \"ctdb ${i}\" does not fail with expected \"disconnected node\" message"
+ exit 1
+ fi
+done
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh
new file mode 100755
index 0000000..be71750
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# Check that the CTDB version consistency checking operates correctly
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init
+
+select_test_node
+
+try_command_on_node -v "$test_node" ctdb version
+version="$out"
+
+major="${version%%.*}"
+rest="${version#*.}"
+minor="${rest%%.*}"
+
+echo "Node ${test_node} has version ${major}.${minor}"
+
+# Unchanged version - this should work
+export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | minor ))
+printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \
+ "$test_node" \
+ "$CTDB_TEST_SAMBA_VERSION"
+ctdb_nodes_restart "$test_node"
+wait_until_ready
+echo "GOOD: ctdbd restarted successfully on node ${test_node}"
+
+d="$CTDB_SCRIPTS_HELPER_BINDIR"
+try_command_on_node "$test_node" "${d}/ctdb-path" "pidfile" "ctdbd"
+pidfile="$out"
+
+# Changed major version - this should fail
+export CTDB_TEST_SAMBA_VERSION=$(( ((major + 1) << 16) | minor ))
+printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \
+ "$test_node" \
+ "$CTDB_TEST_SAMBA_VERSION"
+ctdb_nodes_restart "$test_node"
+echo "Will use PID file ${pidfile} to check for ctdbd exit"
+wait_until 30 ! test -f "$pidfile"
+echo "GOOD: ctdbd exited early on node ${test_node}"
+
+# Changed minor version - this should fail
+export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | (minor + 1) ))
+printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \
+ "$test_node" \
+ "$CTDB_TEST_SAMBA_VERSION"
+ctdb_nodes_start "$test_node"
+echo "Will use PID file ${pidfile} to check for ctdbd exit"
+wait_until 30 ! test -f "$pidfile"
+echo "GOOD: ctdbd exited early on node ${test_node}"
diff --git a/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh
new file mode 100755
index 0000000..2220a20
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb getdebug' works as expected
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+try_command_on_node 0 "$CTDB listnodes | wc -l"
+num_nodes="$out"
+
+try_command_on_node -v 1 "onnode -q all $CTDB getdebug"
+getdebug_onnode="$out"
+
+sanity_check_output \
+ $num_nodes \
+ '^(ERROR|WARNING|NOTICE|INFO|DEBUG)$'
+
+cmd=""
+n=0
+while [ $n -lt $num_nodes ] ; do
+ cmd="${cmd}${cmd:+; }$CTDB getdebug -n $n"
+ n=$(($n + 1))
+done
+try_command_on_node -v 1 "$cmd"
+getdebug_n="$out"
+
+if [ "$getdebug_onnode" = "$getdebug_n" ] ; then
+ echo "They're the same... cool!"
+else
+ die "Error: they differ."
+fi
+
+seps=""
+nl="
+"
+while read line ; do
+ t=$(echo "$line" | sed -r -e 's@Node [[:digit:]]+ is at debug level ([[:alpha:]]+) \((-?[[:digit:]]+)\)$@\|\1\|\2|@')
+ seps="${seps}${seps:+${nl}}|Name|Level|${nl}${t}"
+done <<<"$getdebug_onnode"
diff --git a/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh
new file mode 100755
index 0000000..dd5949e
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb setdebug' works as expected.
+
+# This is a little superficial. It checks that CTDB thinks the debug
+# level has been changed but doesn't actually check that logging occurs
+# at the new level.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node
+
+get_debug ()
+{
+ # Sets: check_debug
+ local node="$1"
+
+ local out
+
+ try_command_on_node -v $node "$CTDB getdebug"
+ check_debug="$out"
+}
+
+set_and_check_debug ()
+{
+ local node="$1"
+ local level="$2"
+ local levelstr="${3:-$level}"
+
+ echo "Setting debug level on node ${node} to ${level}."
+ try_command_on_node $node "$CTDB setdebug ${level}"
+
+ local check_debug
+ get_debug $node
+
+ if [ "$levelstr" != "$check_debug" ] ; then
+ die "BAD: Debug level \"$levelstr\" != \"$check_debug\"."
+ fi
+}
+
+get_debug $test_node
+initial_debug="$check_debug"
+
+levels="ERROR WARNING NOTICE INFO DEBUG"
+
+for new_debug in $levels ; do
+ [ "$initial_debug" != "$new_debug" ] || continue
+
+ echo
+ set_and_check_debug $test_node "$new_debug"
+done
+
+while read new_debug i ; do
+ [ "$initial_debug" != "$i" ] || continue
+
+ echo
+ set_and_check_debug $test_node "$i" "$new_debug"
+done <<EOF
+ERROR 0
+WARNING 1
+WARNING 2
+NOTICE 3
+NOTICE 4
+INFO 5
+INFO 6
+INFO 7
+INFO 8
+INFO 9
+DEBUG 10
+EOF
diff --git a/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh
new file mode 100755
index 0000000..6205c27
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Verify that 'ctdb dumpmemory' shows expected output
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+pat='^([[:space:]].+[[:space:]]+contains[[:space:]]+[[:digit:]]+ bytes in[[:space:]]+[[:digit:]]+ blocks \(ref [[:digit:]]+\)[[:space:]]+0x[[:xdigit:]]+|[[:space:]]+reference to: .+|full talloc report on .+ \(total[[:space:]]+[[:digit:]]+ bytes in [[:digit:]]+ blocks\))$'
+
+try_command_on_node -v 0 "$CTDB dumpmemory"
+sanity_check_output 10 "$pat"
+
+echo
+try_command_on_node -v 0 "$CTDB rddumpmemory"
+sanity_check_output 10 "$pat"
diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh
new file mode 100755
index 0000000..4fdf61c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+# Check that CTDB operates correctly if there are 0 event scripts
+
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -n
+
+ctdb_nodes_start_custom --no-event-scripts
+
+echo "Good, that seems to work!"
diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh
new file mode 100755
index 0000000..046989c
--- /dev/null
+++ b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+# Verify CTDB's debugging of timed out eventscripts
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init
+
+select_test_node
+
+####################
+
+echo "Setting monitor events to time out..."
+try_command_on_node $test_node 'echo $CTDB_BASE'
+ctdb_base="$out"
+script_options="${ctdb_base}/script.options"
+ctdb_test_exit_hook_add "onnode $test_node rm -f $script_options"
+
+debug_output="${ctdb_base}/debug-hung-script.log"
+ctdb_test_exit_hook_add "onnode $test_node rm -f $debug_output"
+
+try_command_on_node -i "$test_node" tee "$script_options" <<EOF
+CTDB_RUN_TIMEOUT_MONITOR=yes
+CTDB_DEBUG_HUNG_SCRIPT_LOGFILE='$debug_output'
+CTDB_DEBUG_HUNG_SCRIPT_STACKPAT='exportfs|rpcinfo|sleep'
+CTDB_SCRIPT_VARDIR='$ctdb_base'
+EOF
+
+####################
+
+wait_for_monitor_event $test_node
+
+echo "Waiting for debugging output to appear..."
+# Use test -s because the file is created above using mktemp
+wait_until 60 test -s "$debug_output"
+
+echo
+echo "Debugging output:"
+cat "$debug_output"
+echo
+
+echo "Checking output of hung script debugging..."
+
+# Can we actually read kernel stacks
+if try_command_on_node $test_node "cat /proc/$$/stack >/dev/null 2>&1" ; then
+ stackpat='
+---- Stack trace of interesting process [0-9]*\\[sleep\\] ----
+[<[0-9a-f]*>] .*sleep+.*
+'
+else
+ stackpat=''
+fi
+
+while IFS="" read pattern ; do
+ [ -n "$pattern" ] || continue
+ if grep -q -- "^${pattern}\$" "$debug_output" ; then
+ printf 'GOOD: output contains "%s"\n' "$pattern"
+ else
+ printf 'BAD: output does not contain "%s"\n' "$pattern"
+ exit 1
+ fi
+done <<EOF
+===== Start of hung script debug for PID=".*", event="monitor" =====
+===== End of hung script debug for PID=".*", event="monitor" =====
+pstree -p -a .*:
+00\\\\.test\\\\.script,.*
+ *\`-sleep,.*
+${stackpat}
+---- ctdb scriptstatus monitor: ----
+00\\.test *TIMEDOUT.*
+ *OUTPUT: Sleeping for [0-9]* seconds\\\\.\\\\.\\\\.
+EOF