diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:47:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:47:29 +0000 |
commit | 4f5791ebd03eaec1c7da0865a383175b05102712 (patch) | |
tree | 8ce7b00f7a76baa386372422adebbe64510812d4 /ctdb/tests/INTEGRATION/simple | |
parent | Initial commit. (diff) | |
download | samba-4f5791ebd03eaec1c7da0865a383175b05102712.tar.xz samba-4f5791ebd03eaec1c7da0865a383175b05102712.zip |
Adding upstream version 2:4.17.12+dfsg.upstream/2%4.17.12+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ctdb/tests/INTEGRATION/simple')
29 files changed, 1189 insertions, 0 deletions
diff --git a/ctdb/tests/INTEGRATION/simple/README b/ctdb/tests/INTEGRATION/simple/README new file mode 100644 index 0000000..3ac738d --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/README @@ -0,0 +1,2 @@ +Simple integration tests. These can be run against a pool of CTDB +daemons running on the local machine - aka "local daemons". diff --git a/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh new file mode 100755 index 0000000..4ca6e46 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +# Use 'onnode' to confirm connectivity between all cluster nodes + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +echo "Checking connectivity between nodes..." +onnode all onnode -p all hostname diff --git a/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh new file mode 100755 index 0000000..aafe27e --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb listnodes' shows the list of nodes + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node -v 0 "$CTDB listnodes" + +num_nodes=$(wc -l <"$outfile") + +# Each line should look like an IP address. +ipv4_pat='[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+' +ipv6_pat='[[:xdigit:]]+:[[:xdigit:]:]+[[:xdigit:]]+' +sanity_check_output \ + 2 \ + "^${ipv4_pat}|${ipv6_pat}\$" + +out_0="$out" + +echo "Checking other nodes..." + +n=1 +while [ $n -lt $num_nodes ] ; do + echo -n "Node ${n}: " + try_command_on_node $n "$CTDB listnodes" + if [ "$out_0" = "$out" ] ; then + echo "OK" + else + echo "DIFFERs from node 0:" + echo "$out" + exit 1 + fi + n=$(($n + 1)) +done diff --git a/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh new file mode 100755 index 0000000..6f362c6 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# Verify the operation of "ctdb listvars", "ctdb getvar", "ctdb setvar" + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node -v 0 "$CTDB listvars" + +sanity_check_output \ + 5 \ + '^[[:alpha:]][[:alnum:]]+[[:space:]]*=[[:space:]]*[[:digit:]]+$' + +echo "Verifying all variable values using \"ctdb getvar\"..." + +while read var x val ; do + try_command_on_node 0 "$CTDB getvar $var" + + val2="${out#*= }" + + if [ "$val" != "$val2" ] ; then + echo "MISMATCH on $var: $val != $val2" + exit 1 + fi +done <"$outfile" + +echo "GOOD: all tunables match" + +var="RecoverTimeout" + +try_command_on_node -v 0 $CTDB getvar $var + +val="${out#*= }" + +echo "Going to try incrementing it..." + +incr=$(($val + 1)) + +try_command_on_node 0 $CTDB setvar $var $incr + +echo "That seemed to work, let's check the value..." + +try_command_on_node -v 0 $CTDB getvar $var + +newval="${out#*= }" + +if [ "$incr" != "$newval" ] ; then + echo "Nope, that didn't work..." + exit 1 +fi + +echo "Look's good! Now verifying with \"ctdb listvars\"" +try_command_on_node -v 0 "$CTDB listvars | grep '^$var'" + +check="${out#*= }" + +if [ "$incr" != "$check" ] ; then + echo "Nope, that didn't work..." + exit 1 +fi + +echo "Look's good! Putting the old value back..." +cmd="$CTDB setvar $var $val" +try_command_on_node 0 $cmd diff --git a/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh new file mode 100755 index 0000000..8071762 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Verify the operation of the 'ctdb ping' command +# +# 1. Run the 'ctdb ping' command on one of the nodes and verify that it +# shows valid and expected output. +# 2. Shutdown one of the cluster nodes, using the 'ctdb shutdown' +# command. +# 3. Run the 'ctdb ping -n <node>' command from another node to this +# node. +# 4. Verify that the command is not successful since th ctdb daemon is +# not running on the node. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node -v 0 "$CTDB ping -n 1" + +sanity_check_output \ + 1 \ + '^response from 1 time=-?[.0-9]+ sec[[:space:]]+\([[:digit:]]+ clients\)$' + +ctdb_onnode -v 1 "shutdown" + +wait_until_node_has_status 1 disconnected 30 0 + +try_command_on_node -v 0 "! $CTDB ping -n 1" + +sanity_check_output \ + 1 \ + "(: ctdb_control error: ('ctdb_control to disconnected node'|'node is disconnected')|Unable to get ping response from node 1|Node 1 is DISCONNECTED|ctdb_control for getpnn failed|: Can not access node. Node is not operational\.|Node 1 has status DISCONNECTED\|UNHEALTHY\|INACTIVE$)" diff --git a/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh new file mode 100755 index 0000000..27025df --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb getpid' works as expected + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" +echo "There are $num_nodes nodes..." + +# Call getpid a few different ways and make sure the answer is always the same. + +try_command_on_node -v 0 "onnode -q all $CTDB getpid" +pids_onnode="$out" + +cmd="" +n=0 +while [ $n -lt $num_nodes ] ; do + cmd="${cmd}${cmd:+; }$CTDB getpid -n $n" + n=$(($n + 1)) +done +try_command_on_node -v 0 "( $cmd )" +pids_getpid_n="$out" + +if [ "$pids_onnode" = "$pids_getpid_n" ] ; then + echo "They're the same... cool!" +else + die "Error: they differ." +fi + +echo "Checking each PID for validity" + +n=0 +while [ $n -lt $num_nodes ] ; do + read pid + try_command_on_node $n "ls -l /proc/${pid}/exe | sed -e 's@.*/@@'" + echo -n "Node ${n}, PID ${pid} looks to be running \"$out\" - " + case "$out" in + ctdbd) : ;; + memcheck*) + if [ -z "$VALGRIND" ] ; then + die "BAD" + fi + ;; + *) die "BAD" + esac + + echo "GOOD!" + + n=$(($n + 1)) +done <<<"$pids_onnode" diff --git a/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh new file mode 100755 index 0000000..c6212fd --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb process-exists' shows correct information + +# The implementation is creative about how it gets PIDs for existing and +# non-existing processes. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +test_node=1 +srvid=0xAE00000012345678 + +# Execute a ctdb client on $test_node that will last for 60 seconds. +# It should still be there when we check. +try_command_on_node -v $test_node \ + "$CTDB_TEST_WRAPPER exec dummy_client -n 10 -S ${srvid} >/dev/null 2>&1 & echo \$!" +client_pid="$out" + +cleanup () +{ + if [ -n "$client_pid" ] ; then + onnode $test_node kill -9 "$client_pid" + fi +} + +ctdb_test_exit_hook_add cleanup + +echo "Waiting until PID $client_pid is registered on node $test_node" +status=0 +wait_until 30 try_command_on_node $test_node \ + "$CTDB process-exists ${client_pid}" || status=$? +echo "$out" + +if [ $status -eq 0 ] ; then + echo "OK" +else + die "BAD" +fi + +echo "Checking for PID $client_pid with SRVID $srvid on node $test_node" +status=0 +try_command_on_node $test_node \ + "$CTDB process-exists ${client_pid} ${srvid}" || status=$? +echo "$out" + +if [ $status -eq 0 ] ; then + echo "OK" +else + die "BAD" +fi + +echo "Checking for PID $client_pid with SRVID $client_pid on node $test_node" +try_command_on_node -v $test_node \ + "! $CTDB process-exists ${client_pid} ${client_pid}" + +# Now just echo the PID of the ctdb daemon on test node. +# This is not a ctdb client and process-exists should return error. +try_command_on_node $test_node "ctdb getpid" +pid="$out" + +echo "Checking for PID $pid on node $test_node" +try_command_on_node -v $test_node "! $CTDB process-exists ${pid}" diff --git a/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh new file mode 100755 index 0000000..d97e035 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb statistics' works as expected + +# This is pretty superficial and could do more validation. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +pattern='^(CTDB version 1|Current time of statistics[[:space:]]*:.*|Statistics collected since[[:space:]]*:.*|Gathered statistics for [[:digit:]]+ nodes|[[:space:]]+[[:alpha:]_]+[[:space:]]+[[:digit:]]+|[[:space:]]+(node|client|timeouts|locks)|[[:space:]]+([[:alpha:]_]+_latency|max_reclock_[[:alpha:]]+)[[:space:]]+[[:digit:]-]+\.[[:digit:]]+[[:space:]]sec|[[:space:]]*(locks_latency|reclock_ctdbd|reclock_recd|call_latency|lockwait_latency|childwrite_latency)[[:space:]]+MIN/AVG/MAX[[:space:]]+[-.[:digit:]]+/[-.[:digit:]]+/[-.[:digit:]]+ sec out of [[:digit:]]+|[[:space:]]+(hop_count_buckets|lock_buckets):[[:space:][:digit:]]+)$' + +try_command_on_node -v 1 "$CTDB statistics" + +sanity_check_output 40 "$pattern" diff --git a/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh new file mode 100755 index 0000000..51f34d9 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb statisticsreset' works as expected + +# This is pretty superficial. It just checks that a few particular +# items reduce. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +get_stat () +{ + local label="$1" + + cat "$outfile" | + sed -rn -e "s@^[[:space:]]+${label}[[:space:]]+([[:digit:]])@\1@p" | + head -1 +} + +check_reduced () +{ + local label="$1" + local before="$2" + local after="$3" + + if [ $after -lt $before ] ; then + echo "GOOD: ${label} reduced from ${before} to ${after}" + else + die "BAD: ${label} did not reduce from ${before} to ${after}" + fi +} + +n=0 +while [ $n -lt $num_nodes ] ; do + echo "Getting initial statistics for node ${n}..." + + try_command_on_node -v $n $CTDB statistics + + before_req_control=$(get_stat "req_control") + before_reply_control=$(get_stat "reply_control") + before_node_packets_recv=$(get_stat "node_packets_recv") + + try_command_on_node $n $CTDB statisticsreset + + try_command_on_node -v $n $CTDB statistics + + after_req_control=$(get_stat "req_control") + after_reply_control=$(get_stat "reply_control") + after_node_packets_recv=$(get_stat "node_packets_recv") + + check_reduced "req_control" "$before_req_control" "$after_req_control" + check_reduced "reply_control" "$before_reply_control" "$after_reply_control" + check_reduced "node_packets_recv" "$before_node_packets_recv" "$after_node_packets_recv" + + n=$(($n + 1)) +done diff --git a/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh new file mode 100755 index 0000000..180b4ae --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb stop' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Stopping leader ${leader}..." +ctdb_onnode "$test_node" stop -n "$leader" + +wait_until_node_has_status "$leader" stopped + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh new file mode 100755 index 0000000..234869c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb ban' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Banning leader ${leader}..." +ctdb_onnode "$test_node" ban 300 -n "$leader" + +wait_until_node_has_status "$leader" banned + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh new file mode 100755 index 0000000..94bcf27 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb ban' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Removing leader capability from leader ${leader}..." +ctdb_onnode "$test_node" setleaderrole off -n "$leader" + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh new file mode 100755 index 0000000..95f522d --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb stop' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom -C "cluster lock" + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Stopping leader ${leader}..." +ctdb_onnode "$test_node" stop -n "$leader" + +wait_until_node_has_status "$leader" stopped + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh new file mode 100755 index 0000000..0ef4e2b --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb ban' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom -C "cluster lock" + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Banning leader ${leader}..." +ctdb_onnode "$test_node" ban 300 -n "$leader" + +wait_until_node_has_status "$leader" banned + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh new file mode 100755 index 0000000..4489bc5 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# Verify that removing the the leader capability causes a node to +# yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom -C "cluster lock" + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Removing leader capability from leader ${leader}..." +ctdb_onnode "$test_node" setleaderrole off -n "$leader" + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh new file mode 100755 index 0000000..3a76654 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Verify that "ctdb getreclock" gets the recovery lock correctly + +# Make sure the recovery lock is consistent across all nodes. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +echo "Check that recovery lock is set the same on all nodes..." +ctdb_onnode all getreclock + +# outfile is set above by ctdb_onnode +# shellcheck disable=SC2154 +n=$(sort -u "$outfile" | wc -l | tr -d '[:space:]') + +case "$n" in +0) echo "GOOD: Recovery lock is unset on all nodes" ;; +1) echo "GOOD: All nodes have the same recovery lock setting" ;; +*) ctdb_test_fail "BAD: Recovery lock setting differs across nodes" ;; +esac diff --git a/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh new file mode 100755 index 0000000..d043c7e --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# Check that CTDB operates correctly if the recovery lock is configured +# as a command. + +# This test works only with local daemons. On a real cluster it has +# no way of updating configuration. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +echo "Starting CTDB with recovery lock command configured..." +ctdb_nodes_start_custom -R + +echo "Good, that seems to work!" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh new file mode 100755 index 0000000..9088a80 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +# Verify that the cluster recovers if the recovery lock is removed. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +echo "Starting CTDB with cluster lock recheck interval set to 5s..." +ctdb_nodes_start_custom -r 5 + +generation_has_changed () +{ + local node="$1" + local generation_init="$2" + + # Leak this so it can be printed by test + generation_new="" + + ctdb_onnode "$node" status + # shellcheck disable=SC2154 + # $outfile set by ctdb_onnode() above + generation_new=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") + + [ "$generation_new" != "$generation_init" ] +} + +select_test_node + +echo "Get recovery lock setting" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +ctdb_onnode "$test_node" getreclock +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +reclock_setting="$out" + +if [ -z "$reclock_setting" ] ; then + ctdb_test_skip "Recovery lock is not set" +fi + +t="${reclock_setting% 5}" +reclock="${t##* }" + +if [ ! -f "$reclock" ] ; then + ctdb_test_error "Recovery lock file \"${reclock}\" is missing" +fi + +echo "Recovery lock setting is \"${reclock_setting}\"" +echo "Recovery lock file is \"${reclock}\"" +echo + +leader_get "$test_node" + +generation_get + +echo "Remove recovery lock" +rm "$reclock" +echo + +# This will mean an election has taken place and a recovery has occured +wait_until_generation_has_changed "$test_node" + +# shellcheck disable=SC2154 +# $leader set by leader_get() above +leader_old="$leader" + +leader_get "$test_node" + +if [ "$leader" != "$leader_old" ] ; then + echo "OK: Leader has changed to node ${leader_new}" +fi +echo "GOOD: Leader is still node ${leader}" +echo + +cluster_is_healthy diff --git a/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh new file mode 100755 index 0000000..147547d --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +# Verify that if the directory containing the cluster lock is moved +# then the current cluster leader no longer claims to be leader, and +# no other node claims to be leader. Confirm that if the directory is +# moved back then a node will become leader. + +# This simulates the cluster filesystem containing the cluster lock +# being unmounted and remounted. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +echo "Starting CTDB with cluster lock recheck interval set to 5s..." +ctdb_nodes_start_custom -r 5 + +select_test_node + +echo "Get cluster lock setting" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +ctdb_onnode "$test_node" getreclock +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +reclock_setting="$out" + +if [ -z "$reclock_setting" ] ; then + ctdb_test_skip "Cluster lock is not set" +fi + +t="${reclock_setting% 5}" +reclock="${t##* }" + +if [ ! -f "$reclock" ] ; then + ctdb_test_error "Cluster lock file \"${reclock}\" is missing" +fi + +echo "Cluster lock setting is \"${reclock_setting}\"" +echo "Cluster lock file is \"${reclock}\"" +echo + +leader_get "$test_node" + +dir=$(dirname "$reclock") + +echo "Rename cluster lock directory" +mv "$dir" "${dir}.$$" + +wait_until_leader_has_changed "$test_node" +echo + +# shellcheck disable=SC2154 +# $leader set by leader_get() & wait_until_leader_has_changed(), above +if [ "$leader" != "UNKNOWN" ]; then + test_fail "BAD: leader is ${leader}" +fi + +echo "OK: leader is UNKNOWN" +echo + +echo 'Get "leader timeout":' +conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'" +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +leader_timeout="$out" +echo "Leader timeout is ${leader_timeout}s" +echo + +sleep_time=$((2 * leader_timeout)) +echo "Waiting for ${sleep_time}s to confirm leader stays UNKNOWN" +sleep_for $sleep_time + +leader_get "$test_node" +if [ "$leader" = "UNKNOWN" ]; then + echo "OK: leader is UNKNOWN" + echo +else + test_fail "BAD: leader is ${leader}" +fi + +echo "Restore cluster lock directory" +mv "${dir}.$$" "$dir" + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh new file mode 100755 index 0000000..b841f5b --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# Run the message_ring test and sanity check the output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +echo "Running message_ring on all $num_nodes nodes." +try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND message_ring -n $num_nodes + +# Get the last line of output. +last=$(tail -n 1 "$outfile") + +pat='^(Waiting for cluster|Ring\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec \(\+ve=[[:digit:]]+ -ve=[[:digit:]]+\))$' +sanity_check_output 1 "$pat" + +# $last should look like this: +# Ring[1]: 10670.93 msgs/sec (+ve=53391 -ve=53373) +stuff="${last##Ring\[*\]: }" +mps="${stuff% msgs/sec*}" + +if [ ${mps%.*} -ge 10 ] ; then + echo "OK: $mps msgs/sec >= 10 msgs/sec" +else + echo "BAD: $mps msgs/sec < 10 msgs/sec" + exit 1 +fi + +stuff="${stuff#*msgs/sec (+ve=}" +positive="${stuff%% *}" + +if [ $positive -ge 10 ] ; then + echo "OK: +ive ($positive) >= 10" +else + echo "BAD: +ive ($positive) < 10" + exit 1 +fi + +stuff="${stuff#*-ve=}" +negative="${stuff%)}" + +if [ $negative -ge 10 ] ; then + echo "OK: -ive ($negative) >= 10" +else + echo "BAD: -ive ($negative) < 10" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh new file mode 100755 index 0000000..f86d080 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Run tunnel_test and sanity check the output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +echo "Running tunnel_test on all $num_nodes nodes." +try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND \ + tunnel_test -t 30 -n $num_nodes + +# Get the last line of output. +last=$(tail -n 1 "$outfile") + +pat='^(Waiting for cluster|pnn\[[[:digit:]]+\] [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$' +sanity_check_output 1 "$pat" + +# $last should look like this: +# pnn[2] count=85400 +stuff="${last##pnn\[*\] }" +mps="${stuff% msgs/sec}" + +if [ ${mps%.*} -ge 10 ] ; then + echo "OK: $mps msgs/sec >= 10 msgs/sec" +else + echo "BAD: $mps msgs/sec < 10 msgs/sec" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh new file mode 100755 index 0000000..7bca58c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Verify that nothing bad occurs if a node stalls and the leader +# broadcast timeout triggers + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node +echo + +echo 'Get "leader timeout":' +conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'" +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +leader_timeout="$out" +echo "Leader timeout is ${leader_timeout} seconds" +echo + +# Assume leader timeout is reasonable and doesn't cause node to be +# disconnected +stall_time=$((leader_timeout * 2)) + +generation_get "$test_node" + +echo "Get ctdbd PID on node ${test_node}..." +ctdb_onnode -v "$test_node" "getpid" +ctdbd_pid="$out" +echo + +echo "Sending SIGSTOP to ctdbd on ${test_node}" +try_command_on_node "$test_node" "kill -STOP ${ctdbd_pid}" + +sleep_for "$stall_time" + +echo "Sending SIGCONT to ctdbd on ${test_node}" +try_command_on_node "$test_node" "kill -CONT ${ctdbd_pid}" +echo + +wait_until_generation_has_changed "$test_node" + +cluster_is_healthy diff --git a/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh new file mode 100755 index 0000000..2835e55 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Verify an error occurs if a ctdb command is run against a node +# without a ctdbd + +# That is, check that an error message is printed if an attempt is made +# to execute a ctdb command against a node that is not running ctdbd. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +test_node=1 + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" +echo "There are $num_nodes nodes." + +echo "Shutting down node ${test_node}..." +try_command_on_node $test_node $CTDB shutdown + +wait_until_node_has_status $test_node disconnected 30 0 + +wait_until_node_has_status 0 recovered 30 0 + +pat="ctdb_control error: 'ctdb_control to disconnected node'|ctdb_control error: 'node is disconnected'|Node $test_node is DISCONNECTED|Node $test_node has status DISCONNECTED\|UNHEALTHY\|INACTIVE" + +for i in ip disable enable "ban 0" unban listvars ; do + try_command_on_node -v 0 ! $CTDB $i -n $test_node + + if egrep -q "$pat" "$outfile" ; then + echo "OK: \"ctdb ${i}\" fails with expected \"disconnected node\" message" + else + echo "BAD: \"ctdb ${i}\" does not fail with expected \"disconnected node\" message" + exit 1 + fi +done diff --git a/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh new file mode 100755 index 0000000..be71750 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# Check that the CTDB version consistency checking operates correctly + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init + +select_test_node + +try_command_on_node -v "$test_node" ctdb version +version="$out" + +major="${version%%.*}" +rest="${version#*.}" +minor="${rest%%.*}" + +echo "Node ${test_node} has version ${major}.${minor}" + +# Unchanged version - this should work +export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | minor )) +printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \ + "$test_node" \ + "$CTDB_TEST_SAMBA_VERSION" +ctdb_nodes_restart "$test_node" +wait_until_ready +echo "GOOD: ctdbd restarted successfully on node ${test_node}" + +d="$CTDB_SCRIPTS_HELPER_BINDIR" +try_command_on_node "$test_node" "${d}/ctdb-path" "pidfile" "ctdbd" +pidfile="$out" + +# Changed major version - this should fail +export CTDB_TEST_SAMBA_VERSION=$(( ((major + 1) << 16) | minor )) +printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \ + "$test_node" \ + "$CTDB_TEST_SAMBA_VERSION" +ctdb_nodes_restart "$test_node" +echo "Will use PID file ${pidfile} to check for ctdbd exit" +wait_until 30 ! test -f "$pidfile" +echo "GOOD: ctdbd exited early on node ${test_node}" + +# Changed minor version - this should fail +export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | (minor + 1) )) +printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \ + "$test_node" \ + "$CTDB_TEST_SAMBA_VERSION" +ctdb_nodes_start "$test_node" +echo "Will use PID file ${pidfile} to check for ctdbd exit" +wait_until 30 ! test -f "$pidfile" +echo "GOOD: ctdbd exited early on node ${test_node}" diff --git a/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh new file mode 100755 index 0000000..2220a20 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb getdebug' works as expected + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +try_command_on_node -v 1 "onnode -q all $CTDB getdebug" +getdebug_onnode="$out" + +sanity_check_output \ + $num_nodes \ + '^(ERROR|WARNING|NOTICE|INFO|DEBUG)$' + +cmd="" +n=0 +while [ $n -lt $num_nodes ] ; do + cmd="${cmd}${cmd:+; }$CTDB getdebug -n $n" + n=$(($n + 1)) +done +try_command_on_node -v 1 "$cmd" +getdebug_n="$out" + +if [ "$getdebug_onnode" = "$getdebug_n" ] ; then + echo "They're the same... cool!" +else + die "Error: they differ." +fi + +seps="" +nl=" +" +while read line ; do + t=$(echo "$line" | sed -r -e 's@Node [[:digit:]]+ is at debug level ([[:alpha:]]+) \((-?[[:digit:]]+)\)$@\|\1\|\2|@') + seps="${seps}${seps:+${nl}}|Name|Level|${nl}${t}" +done <<<"$getdebug_onnode" diff --git a/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh new file mode 100755 index 0000000..dd5949e --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb setdebug' works as expected. + +# This is a little superficial. It checks that CTDB thinks the debug +# level has been changed but doesn't actually check that logging occurs +# at the new level. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node + +get_debug () +{ + # Sets: check_debug + local node="$1" + + local out + + try_command_on_node -v $node "$CTDB getdebug" + check_debug="$out" +} + +set_and_check_debug () +{ + local node="$1" + local level="$2" + local levelstr="${3:-$level}" + + echo "Setting debug level on node ${node} to ${level}." + try_command_on_node $node "$CTDB setdebug ${level}" + + local check_debug + get_debug $node + + if [ "$levelstr" != "$check_debug" ] ; then + die "BAD: Debug level \"$levelstr\" != \"$check_debug\"." + fi +} + +get_debug $test_node +initial_debug="$check_debug" + +levels="ERROR WARNING NOTICE INFO DEBUG" + +for new_debug in $levels ; do + [ "$initial_debug" != "$new_debug" ] || continue + + echo + set_and_check_debug $test_node "$new_debug" +done + +while read new_debug i ; do + [ "$initial_debug" != "$i" ] || continue + + echo + set_and_check_debug $test_node "$i" "$new_debug" +done <<EOF +ERROR 0 +WARNING 1 +WARNING 2 +NOTICE 3 +NOTICE 4 +INFO 5 +INFO 6 +INFO 7 +INFO 8 +INFO 9 +DEBUG 10 +EOF diff --git a/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh new file mode 100755 index 0000000..6205c27 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb dumpmemory' shows expected output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +pat='^([[:space:]].+[[:space:]]+contains[[:space:]]+[[:digit:]]+ bytes in[[:space:]]+[[:digit:]]+ blocks \(ref [[:digit:]]+\)[[:space:]]+0x[[:xdigit:]]+|[[:space:]]+reference to: .+|full talloc report on .+ \(total[[:space:]]+[[:digit:]]+ bytes in [[:digit:]]+ blocks\))$' + +try_command_on_node -v 0 "$CTDB dumpmemory" +sanity_check_output 10 "$pat" + +echo +try_command_on_node -v 0 "$CTDB rddumpmemory" +sanity_check_output 10 "$pat" diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh new file mode 100755 index 0000000..4fdf61c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Check that CTDB operates correctly if there are 0 event scripts + + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom --no-event-scripts + +echo "Good, that seems to work!" diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh new file mode 100755 index 0000000..046989c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +# Verify CTDB's debugging of timed out eventscripts + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init + +select_test_node + +#################### + +echo "Setting monitor events to time out..." +try_command_on_node $test_node 'echo $CTDB_BASE' +ctdb_base="$out" +script_options="${ctdb_base}/script.options" +ctdb_test_exit_hook_add "onnode $test_node rm -f $script_options" + +debug_output="${ctdb_base}/debug-hung-script.log" +ctdb_test_exit_hook_add "onnode $test_node rm -f $debug_output" + +try_command_on_node -i "$test_node" tee "$script_options" <<EOF +CTDB_RUN_TIMEOUT_MONITOR=yes +CTDB_DEBUG_HUNG_SCRIPT_LOGFILE='$debug_output' +CTDB_DEBUG_HUNG_SCRIPT_STACKPAT='exportfs|rpcinfo|sleep' +CTDB_SCRIPT_VARDIR='$ctdb_base' +EOF + +#################### + +wait_for_monitor_event $test_node + +echo "Waiting for debugging output to appear..." +# Use test -s because the file is created above using mktemp +wait_until 60 test -s "$debug_output" + +echo +echo "Debugging output:" +cat "$debug_output" +echo + +echo "Checking output of hung script debugging..." + +# Can we actually read kernel stacks +if try_command_on_node $test_node "cat /proc/$$/stack >/dev/null 2>&1" ; then + stackpat=' +---- Stack trace of interesting process [0-9]*\\[sleep\\] ---- +[<[0-9a-f]*>] .*sleep+.* +' +else + stackpat='' +fi + +while IFS="" read pattern ; do + [ -n "$pattern" ] || continue + if grep -q -- "^${pattern}\$" "$debug_output" ; then + printf 'GOOD: output contains "%s"\n' "$pattern" + else + printf 'BAD: output does not contain "%s"\n' "$pattern" + exit 1 + fi +done <<EOF +===== Start of hung script debug for PID=".*", event="monitor" ===== +===== End of hung script debug for PID=".*", event="monitor" ===== +pstree -p -a .*: +00\\\\.test\\\\.script,.* + *\`-sleep,.* +${stackpat} +---- ctdb scriptstatus monitor: ---- +00\\.test *TIMEDOUT.* + *OUTPUT: Sleeping for [0-9]* seconds\\\\.\\\\.\\\\. +EOF |