diff options
Diffstat (limited to 'ctdb/tests/INTEGRATION')
69 files changed, 4632 insertions, 0 deletions
diff --git a/ctdb/tests/INTEGRATION/database/basics.001.attach.sh b/ctdb/tests/INTEGRATION/database/basics.001.attach.sh new file mode 100755 index 0000000..1fbffc5 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/basics.001.attach.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb getdbmap' operates as expected + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node + +# test_node set by select_test_node() above +# shellcheck disable=SC2154 +ctdb_onnode -v "$test_node" getdbmap + +dbid='dbid:0x[[:xdigit:]]+' +name='name:[^[:space:]]+' +path='path:[^[:space:]]+' +opts='( (PERSISTENT|STICKY|READONLY|REPLICATED|UNHEALTHY))*' +line="${dbid} ${name} ${path}${opts}" +dbmap_pattern="^(Number of databases:[[:digit:]]+|${line})\$" + +# outfile set by ctdb_onnode() above +# shellcheck disable=SC2154 +num_db_init=$(sed -n -e '1s/.*://p' "$outfile") + +sanity_check_output $(($num_db_init + 1)) "$dbmap_pattern" + +for i in $(seq 1 5) ; do + f="attach_test_${i}.tdb" + echo "Creating test database: $f" + ctdb_onnode "$test_node" "attach ${f}" + + ctdb_onnode "$test_node" getdbmap + sanity_check_output $((num_db_init + 1)) "$dbmap_pattern" + num=$(sed -n -e '1s/^.*://p' "$outfile") + if [ "$num" = $((num_db_init + i)) ] ; then + echo "OK: correct number of additional databases" + else + ctdb_test_fail "BAD: no additional database" + fi + if awk '{print $2}' "$outfile" | grep -Fqx "name:$f" ; then + echo "OK: getdbmap knows about \"$f\"" + else + ctdb_test_fail "BAD: getdbmap does not know about \"$f\"" + fi +done diff --git a/ctdb/tests/INTEGRATION/database/basics.002.attach.sh b/ctdb/tests/INTEGRATION/database/basics.002.attach.sh new file mode 100755 index 0000000..6a5c812 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/basics.002.attach.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +# Verify that databases are attached a node joins the cluster: +# 1. Shut down CTDB on one node +# 2. Attach test databases +# 3. Check that databases are attached on all up nodes +# 4. Start CTDB on the node where it is shut down +# 5. Verify that the test databases are attached on this node +# 6. Restart one of the nodes +# 7. Verify that the test databases are attached on this node + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +###################################################################### + +try_command_on_node 0 "$CTDB listnodes -X | wc -l" +numnodes="$out" +lastnode=$(( numnodes - 1 )) + +###################################################################### + +# Confirm that the database is attached with appropriate flags +check_db_once () +{ + local pnn="$1" + local db="$2" + + try_command_on_node "$pnn" $CTDB getdbmap + if grep -qF "name:${db}" "$outfile" >/dev/null ; then + return 0 + else + return 1 + fi +} + +check_db () +{ + local pnn="$1" + local db="$2" + local flag="$3" + + local flags + + echo "Waiting until database ${db} is attached on node ${pnn}" + wait_until 10 check_db_once "$pnn" "$db" + + flags=$(awk -v db="$db" '$2 == "name:" db {print $4}' "$outfile") + if [ "$flags" = "$flag" ]; then + echo "GOOD: db ${db} attached on node ${pnn} with flag $flag" + else + echo "BAD: db ${db} attached on node ${pnn} with wrong flag" + cat "$outfile" + exit 1 + fi +} + +###################################################################### + +testdb1="test_volatile.tdb" +testdb2="test_persistent.tdb" +testdb3="test_replicated.tdb" + +test_node="0" + +echo "Shutting down node $test_node" +ctdb_nodes_stop "$test_node" +sleep 1 +wait_until_node_has_status 1 recovered +try_command_on_node -v 1 $CTDB status + +echo "Create test databases" +try_command_on_node 1 $CTDB attach "$testdb1" +try_command_on_node 1 $CTDB attach "$testdb2" persistent +try_command_on_node 1 $CTDB attach "$testdb3" replicated + +echo +echo "Checking if database is attached with correct flags" +for node in $(seq 0 $lastnode) ; do + if [ $node -ne $test_node ] ; then + check_db $node $testdb1 "" + check_db $node $testdb2 PERSISTENT + check_db $node $testdb3 REPLICATED + fi +done + +###################################################################### + +echo +echo "Start node $test_node" +ctdb_nodes_start "$test_node" +sleep 1 +wait_until_ready + +echo +echo "Checking if database is attached with correct flags" +check_db $test_node $testdb1 "" +check_db $test_node $testdb2 PERSISTENT +check_db $test_node $testdb3 REPLICATED + +###################################################################### + +echo +echo "Restarting node $test_node" +ctdb_nodes_restart "$test_node" +sleep 1 +wait_until_ready + +echo +echo "Checking if database is attached with correct flags" +check_db $test_node $testdb1 "" +check_db $test_node $testdb2 PERSISTENT +check_db $test_node $testdb3 REPLICATED diff --git a/ctdb/tests/INTEGRATION/database/basics.003.detach.sh b/ctdb/tests/INTEGRATION/database/basics.003.detach.sh new file mode 100755 index 0000000..cb44955 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/basics.003.detach.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb detach' works as expected: +# 1. Attach test databases +# 2. Detach test databases +# 3. Confirm test databases are not attached + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +###################################################################### + +try_command_on_node 0 "$CTDB listnodes -X | wc -l" +numnodes="$out" + +###################################################################### + +# Confirm that the database is attached +check_db_once () +{ + local db="$1" + + local num_db + + try_command_on_node all "$CTDB getdbmap" + num_db=$(grep -cF "name:${db}" "$outfile") || true + if [ "$num_db" -eq "$numnodes" ]; then + return 0 + else + return 1 + fi +} + +check_db () +{ + local db="$1" + + echo "Waiting until database ${db} is attached on all nodes" + wait_until 10 check_db_once "$db" +} + +# Confirm that no nodes have databases attached +check_no_db_once () +{ + local db="$1" + + local num_db + + try_command_on_node all "$CTDB getdbmap" + num_db=$(grep -cF "name:${db}" "$outfile") || true + if [ "$num_db" -eq 0 ]; then + return 0 + else + return 1 + fi +} + +check_no_db () +{ + local db="$1" + + echo "Waiting until database ${db} is detached on all nodes" + wait_until 10 check_no_db_once "$db" +} + +###################################################################### + +testdb1="detach_test1.tdb" +testdb2="detach_test2.tdb" +testdb3="detach_test3.tdb" +testdb4="detach_test4.tdb" + +echo "Create test databases" +for db in "$testdb1" "$testdb2" "$testdb3" "$testdb4" ; do + echo " $db" + try_command_on_node 0 $CTDB attach "$db" +done + +for db in "$testdb1" "$testdb2" "$testdb3" "$testdb4" ; do + check_db "$db" +done + +###################################################################### + +echo +echo "Ensuring AllowClientDBAttach=1 on all nodes" +try_command_on_node all $CTDB setvar AllowClientDBAttach 1 + +echo "Check failure detaching single test database $testdb1" +try_command_on_node 1 "! $CTDB detach $testdb1" +check_db "$testdb1" + +echo +echo "Setting AllowClientDBAttach=0 on node 0" +try_command_on_node 0 $CTDB setvar AllowClientDBAttach 0 + +echo "Check failure detaching single test database $testdb1" +try_command_on_node 1 "! $CTDB detach $testdb1" +check_db "$testdb1" + +echo +echo "Setting AllowClientDBAttach=0 on all nodes" +try_command_on_node all $CTDB setvar AllowClientDBAttach 0 + +echo "Check detaching single test database $testdb1" +try_command_on_node 1 "$CTDB detach $testdb1" +check_no_db "$testdb1" + +###################################################################### + +echo +echo "Detach multiple test databases" +echo " $testdb2, $testdb3, $testdb4" +try_command_on_node 0 $CTDB detach $testdb2 $testdb3 $testdb4 + +for db in "$testdb2" "$testdb3" "$testdb4" ; do + check_no_db "$db" +done + +###################################################################### + +echo +echo "Attach a single test database" +try_command_on_node all $CTDB setvar AllowClientDBAttach 1 +try_command_on_node 0 $CTDB attach $testdb1 +check_db "$testdb1" + +echo +echo "Write a key to database" +try_command_on_node 0 $CTDB writekey $testdb1 foo bar +try_command_on_node 0 $CTDB catdb $testdb1 +num_keys=$(sed -n -e 's/Dumped \([0-9]*\) records/\1/p' "$outfile") || true +if [ -n "$num_keys" -a $num_keys -eq 1 ]; then + echo "GOOD: Key added to database" +else + echo "BAD: Key did not get added to database" + cat "$outfile" + exit 1 +fi + +echo +echo "Detach test database" +try_command_on_node all $CTDB setvar AllowClientDBAttach 0 +try_command_on_node 0 $CTDB detach $testdb1 +check_no_db "$testdb1" + +echo +echo "Re-attach test database" +try_command_on_node all $CTDB setvar AllowClientDBAttach 1 +try_command_on_node 0 $CTDB attach $testdb1 +check_db "$testdb1" + +echo +echo "Check if the database is empty" +try_command_on_node 0 $CTDB catdb $testdb1 +num_keys=$(sed -n -e 's/Dumped \([0-9]*\) records/\1/p' "$outfile") || true +if [ -n "$num_keys" -a $num_keys -eq 0 ]; then + echo "GOOD: Database $testdb1 is empty" +else + echo "BAD: Database $testdb1 is not empty" + cat "$outfile" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/database/basics.004.wipe.sh b/ctdb/tests/INTEGRATION/database/basics.004.wipe.sh new file mode 100755 index 0000000..115d64c --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/basics.004.wipe.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb wipedb' can clear a persistent database: +# 1. Verify that the status on all of the ctdb nodes is 'OK'. +# 2. Create a persistent test database +# 3. Add some records to node 0 and node 1 +# 4. Run wipedb on node 0 +# 5. verify the database is empty on both node 0 and 1 + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +# 2. +test_db="persistent_test.tdb" +echo "Create persistent test database \"$test_db\"" +try_command_on_node 0 $CTDB attach "$test_db" persistent + +# 3. +# add one record to node 0 key==ABC data==ABC +echo "Store key(ABC) data(ABC) on node 0" +db_ctdb_tstore 0 "$test_db" "ABC" "ABC" + +# add one record to node 1 key==DEF data==DEF +echo "Store key(DEF) data(DEF) on node 1" +db_ctdb_tstore 1 "$test_db" "DEF" "DEF" + +# 4. +echo "Wipe database" +try_command_on_node 0 $CTDB wipedb "$test_db" + +# check that the database is wiped +num_records=$(db_ctdb_cattdb_count_records 1 "$test_db") +if [ $num_records = "0" ] ; then + echo "OK: Database was wiped" +else + echo "BAD: We did not end up with an empty database" + exit 1 +fi + +echo "Force a recovery" +try_command_on_node 0 $CTDB recover + +# check that the database is wiped +num_records=$(db_ctdb_cattdb_count_records 1 "$test_db") +if [ $num_records = "0" ] ; then + echo "OK: Database was wiped" +else + echo "BAD: We did not end up with an empty database" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/database/basics.010.backup_restore.sh b/ctdb/tests/INTEGRATION/database/basics.010.backup_restore.sh new file mode 100755 index 0000000..8c469d4 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/basics.010.backup_restore.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +# Confirm that 'ctdb restoredb' works correctly: +# 1. Create a persistent test database +# 2. Add some records to test database +# 3. Backup database +# 4. Wipe database and verify the database is empty on all nodes +# 5. Restore database and make sure all the records are restored +# 6. Make sure no recovery has been triggered + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 $CTDB status +generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +# 2. +test_db="restoredb_test.tdb" +test_dump=$(mktemp) +echo $test_dump +echo "Create persistent test database \"$test_db\"" +try_command_on_node 0 $CTDB attach "$test_db" persistent +try_command_on_node 0 $CTDB wipedb "$test_db" + +# 3. +# add 10,000 records to database +echo "Adding 10000 records to database" +( +for i in $(seq 1 10000) ; do + echo "\"key$i\" \"value$i\"" +done +) | try_command_on_node -i 0 $CTDB ptrans "$test_db" + +num_records=$(db_ctdb_cattdb_count_records 1 "$test_db") +if [ $num_records = "10000" ] ; then + echo "OK: Records added" +else + echo "BAD: We did not end up with 10000 records" + echo "num records = $num_records" + exit 1 +fi + +ctdb_test_exit_hook_add "rm -f $test_dump" + +# 4. +echo "Backup database" +try_command_on_node 0 $CTDB backupdb "$test_db" "$test_dump" + +# 5. +echo "Wipe database" +try_command_on_node 0 $CTDB wipedb "$test_db" + +# check that the database is restored +num_records=$(db_ctdb_cattdb_count_records 1 "$test_db") +if [ $num_records = "0" ] ; then + echo "OK: Database was wiped" +else + echo "BAD: We did not end up with an empty database" + echo "num records = $num_records" + exit 1 +fi + +# 6. +echo "Restore database" +try_command_on_node 0 $CTDB restoredb "$test_dump" "$test_db" + +# check that the database is restored +num_records=$(db_ctdb_cattdb_count_records 1 "$test_db") +if [ $num_records = "10000" ] ; then + echo "OK: Database was restored" +else + echo "BAD: We did not end up with 10000 records" + echo "num records = $num_records" + exit 1 +fi + +# 7. +wait_until_ready + +try_command_on_node 0 $CTDB status +new_generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") + +echo "Old generation = $generation" +echo "New generation = $new_generation" + +if [ "$generation" = "$new_generation" ]; then + echo "OK: Database recovery not triggered." +else + echo "BAD: Database recovery triggered." + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/database/fetch.001.ring.sh b/ctdb/tests/INTEGRATION/database/fetch.001.ring.sh new file mode 100755 index 0000000..4d7d392 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/fetch.001.ring.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Run the fetch_ring test and sanity check the output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +echo "Running fetch_ring on all $num_nodes nodes." +testprog_onnode -v -p all \ + fetch_ring -n "$num_nodes" -D "fetch_ring.tdb" -k "testkey" + +pat='^(Waiting for cluster|Fetch\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$' +sanity_check_output 1 "$pat" + +# Get the last line of output. +last=$(tail -n 1 "$outfile") + +# $last should look like this: +# Fetch[1]: 10670.93 msgs/sec +stuff="${last##*Fetch\[*\]: }" +mps="${stuff% msgs/sec*}" + +if [ ${mps%.*} -ge 10 ] ; then + echo "OK: $mps msgs/sec >= 10 msgs/sec" +else + echo "BAD: $mps msgs/sec < 10 msgs/sec" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/database/fetch.002.ring-hotkeys.sh b/ctdb/tests/INTEGRATION/database/fetch.002.ring-hotkeys.sh new file mode 100755 index 0000000..6d44253 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/fetch.002.ring-hotkeys.sh @@ -0,0 +1,161 @@ +#!/usr/bin/env bash + +# Run the fetch_ring test, sanity check the output and check hot keys +# statistics + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +testdb="fetch_ring.tdb" + +ctdb_get_all_pnns +# $all_pnns is set above +# shellcheck disable=SC2154 +num_nodes=$(echo "$all_pnns" | wc -w | tr -d '[:space:]') +first=$(echo "$all_pnns" | sed -n -e '1p') + +get_key () +{ + _n="$1" + + echo "testkey${_n}" +} + +run_fetch_ring () +{ + _timelimit="$1" + _key_num="$2" + + _key=$(get_key "$_key_num") + _base_cmd="fetch_ring -n ${num_nodes} -D ${testdb}" + _cmd="${_base_cmd} -t ${_timelimit} -k ${_key}" + echo "Running \"${_cmd}\" on all $num_nodes nodes." + testprog_onnode -v -p all "$_cmd" + + _pat='^(Waiting for cluster|Fetch\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$' + sanity_check_output 1 "$_pat" + + # Get the last line of output. + # $outfile is set above by testprog_onnode() + # shellcheck disable=SC2154 + _last=$(tail -n 1 "$outfile") + + # $last should look like this: + # Fetch[1]: 10670.93 msgs/sec + _stuff="${_last##*Fetch\[*\]: }" + _mps="${_stuff% msgs/sec*}" + + if [ "${_mps%.*}" -ge 10 ] ; then + echo "OK: ${_mps} msgs/sec >= 10 msgs/sec" + else + ctdb_test_fail "BAD: ${_mps} msgs/sec < 10 msgs/sec" + fi +} + +check_hot_keys () +{ + _pnn="$1" + _first_key="$2" + _num_keys="$3" + + echo + echo "Checking hot keys on node ${_pnn}" + + ctdb_onnode "$_pnn" dbstatistics "$testdb" + + # Get hot keys with a non-empty key + _hotkeys=$(grep -Ex '[[:space:]]+Count:[[:digit:]]+ Key:[[:xdigit:]]+' \ + "$outfile") || true + + # Check that there are the right number of non-empty slots + if [ -z "$_hotkeys" ] ; then + _num=0 + else + _num=$(echo "$_hotkeys" | wc -l | tr -d '[:space:]') + fi + _msg="hot key slots in use = ${_num}" + if [ "$_num_keys" -ne "$_num" ] ; then + echo + cat "$outfile" + ctdb_test_fail "BAD: ${_msg} (expected ${_num_keys})" + fi + echo "GOOD: ${_msg}" + + # No hot keys? Done... + if [ "$_num" = 0 ] ; then + return + fi + + # Check that hot key counts are correctly sorted + # + # Try to be as POSIX as possible + # shellcheck disable=SC2001 + _counts=$(echo "$_hotkeys" | \ + sed -e 's|.*Count:\([[:digit:]][[:digit:]]*\).*|\1|') + _counts_sorted=$(echo "$_counts" | sort -n) + if [ "$_counts" != "$_counts_sorted" ] ; then + echo + cat "$outfile" + ctdb_test_fail "BAD: hot keys not sorted" + fi + echo "GOOD: hot key counts are correctly sorted" + + # Check that all keys are considered hot + for _j in $(seq "$_first_key" $((_first_key + _num_keys - 1))) ; do + _key=$(get_key "$_j") + _key_hex=$(printf '%s' "$_key" | \ + od -A n -t x1 | \ + tr -d '[:space:]') + if ! echo "$_hotkeys" | grep -q "Key:${_key_hex}\$" ; then + echo + cat "$outfile" + ctdb_test_fail "BAD: key \"${_key}\" is not a hot key" + fi + done + echo "GOOD: all keys are listed as hot keys" +} + +# Run fetch_ring for each of 10 keys. After each run confirm that all +# keys used so far are considered hot keys (and do other hot key +# sanity checks) on all nodes. +for i in $(seq 1 10) ; do + run_fetch_ring 5 "$i" + + for pnn in $all_pnns ; do + check_hot_keys "$pnn" 1 "$i" + done + + echo +done + +echo +echo "Resetting statistics on node ${first}" +ctdb_onnode "$first" statisticsreset + +# Ensure that only node $first has had statistics reset +for pnn in $all_pnns ; do + if [ "$pnn" = "$first" ] ; then + check_hot_keys "$pnn" 1 0 + else + check_hot_keys "$pnn" 1 10 + fi +done + +echo + +# Run fetch_ring for each of 3 new keys. After each run confirm that +# the new keys used so far are considered hot keys (and do other hot +# key sanity checks) on node $first. +# +# Note that nothing can be said about hot keys on other nodes, since +# they may be an arbitrary blend of old and new keys. +for i in $(seq 1 3) ; do + run_fetch_ring 5 $((100 + i)) + + check_hot_keys 0 101 "$i" + + echo +done diff --git a/ctdb/tests/INTEGRATION/database/readonly.001.basic.sh b/ctdb/tests/INTEGRATION/database/readonly.001.basic.sh new file mode 100755 index 0000000..aeb9740 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/readonly.001.basic.sh @@ -0,0 +1,178 @@ +#!/usr/bin/env bash + +# Test support for read-only records + +# Read-only records can be activated at runtime using a ctdb command. +# If read-only records are not activated, then any attempt to fetch a +# read-only copy should be automatically upgraded to a read-write +# fetch_locked(). + +# If read-only delegations are present, then any attempt to acquire a +# read-write fetch_lock will trigger revocation of all delegations +# before the fetch_locked(). + +# 1. Create a test database and some records +# 2. Try to fetch read-only records, this should not result in any delegations +# 3. Activate read-only support +# 4. Try to fetch read-only records, this should result in delegations +# 5. Do a fetchlock and the delegations should be revoked +# 6. Try to fetch read-only records, this should result in delegations + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +###################################################################### + +# Confirm that no nodes have databases with read-only delegations +check_no_readonly () +{ + try_command_on_node all $CTDB cattdb $testdb + local ro_flags="RO_HAVE_READONLY|RO_HAVE_DELEGATIONS" + local numreadonly=$(grep -c -E "$ro_flags" "$outfile") || true + if [ $numreadonly -eq 0 ] ; then + echo "GOOD: no read-only delegations" + else + echo "BAD: there are read-only delegations" + cat "$outfile" + exit 1 + fi +} + +# Check that the test record has the correct read-only flags on the +# given nodes. The first node is the dmaster, which should know there +# are delegations but should not be flagged as having a read-only +# copy. Subsequent nodes should have a read-only copy but not know +# about any (other) delegations. +check_readonly () +{ + local dmaster="$1" ; shift + local others="$*" + + local count + + try_command_on_node $dmaster $CTDB cattdb $testdb + count=$(grep -c -E "RO_HAVE_DELEGATIONS" "$outfile") || true + if [ $count -eq 1 ] ; then + echo "GOOD: dmaster ${dmaster} has read-only delegations" + else + echo "BAD: dmaster ${dmaster} has no read-only delegations" + cat "$outfile" + exit 1 + fi + count=$(grep -c -E "RO_HAVE_READONLY" "$outfile") || true + if [ $count -ne 0 ] ; then + echo "BAD: dmaster ${dmaster} has a read-only copy" + cat "$outfile" + exit 1 + fi + + local o + for o in $others ; do + try_command_on_node $o $CTDB cattdb $testdb + count=$(grep -c -E "RO_HAVE_READONLY" "$outfile") || true + if [ $count -eq 1 ] ; then + echo "GOOD: node ${o} has a read-only copy" + else + echo "BAD: node ${o} has no read-only copy" + cat "$outfile" + exit 1 + fi + count=$(grep -c -E "RO_HAVE_DELEGATIONS" "$outfile") || true + if [ $count -ne 0 ] ; then + echo "BAD: other node ${o} has read-only delegations" + cat "$outfile" + exit 1 + fi + done +} + +###################################################################### + +echo "Get list of nodes..." +ctdb_onnode 0 "-X listnodes" +all_nodes=$(awk -F'|' '{print $2}' "$outfile") + +###################################################################### + +testdb="test.tdb" +echo "Create test database \"${testdb}\"" +try_command_on_node 0 $CTDB attach $testdb + +echo "Create some records..." +try_command_on_node all $CTDB_TEST_WRAPPER $VALGRIND update_record \ + -D ${testdb} -k testkey + +###################################################################### + +echo "Try some readonly fetches, these should all be upgraded to full fetchlocks..." +try_command_on_node all $CTDB_TEST_WRAPPER $VALGRIND fetch_readonly \ + -D ${testdb} -k testkey + +check_no_readonly + +###################################################################### + +echo "Activate read-only record support for \"$testdb\"..." +try_command_on_node all $CTDB setdbreadonly $testdb + +# Database should be tagged as READONLY +try_command_on_node 0 $CTDB getdbmap +db_details=$(awk -v db="$testdb" '$2 == foo="name:" db { print }' "$outfile") +if grep -q "READONLY" <<<"$db_details" ; then + echo "GOOD: read-only record support is enabled" +else + echo "BAD: could not activate read-only support" + echo "$db_details" + exit 1 +fi + +###################################################################### + +echo "Create 1 read-only delegation ..." +# dmaster=1 +try_command_on_node 1 $CTDB_TEST_WRAPPER $VALGRIND update_record \ + -D ${testdb} -k testkey + +# Fetch read-only to node 0 +try_command_on_node 0 $CTDB_TEST_WRAPPER $VALGRIND fetch_readonly \ + -D ${testdb} -k testkey + +check_readonly 1 0 + +###################################################################### + +echo "Verify that a fetchlock revokes read-only delegations..." +# Node 1 becomes dmaster +try_command_on_node 1 $CTDB_TEST_WRAPPER $VALGRIND update_record \ + -D ${testdb} -k testkey + +check_no_readonly + +###################################################################### + +echo "Create more read-only delegations..." +dmaster=1 +try_command_on_node $dmaster $CTDB_TEST_WRAPPER $VALGRIND update_record \ + -D ${testdb} -k testkey + +others="" +for n in $all_nodes ; do + if [ "$n" != "$dmaster" ] ; then + # Fetch read-only copy to this node + try_command_on_node $n $CTDB_TEST_WRAPPER $VALGRIND fetch_readonly \ + -D ${testdb} -k testkey + others="${others} ${n}" + fi +done + +check_readonly $dmaster $others + +###################################################################### + +echo "Verify that a recovery will revoke the delegations..." +try_command_on_node 0 $CTDB recover + +check_no_readonly diff --git a/ctdb/tests/INTEGRATION/database/recovery.001.volatile.sh b/ctdb/tests/INTEGRATION/database/recovery.001.volatile.sh new file mode 100755 index 0000000..d7aaa3b --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/recovery.001.volatile.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash + +# Test that recovery correctly handles RSNs + +# Recovery can under certain circumstances lead to old record copies +# resurrecting: Recovery selects the newest record copy purely by RSN. At +# the end of the recovery, the leader is the dmaster for all +# records in all (non-persistent) databases. And the other nodes locally +# hold the complete copy of the databases. The bug is that the recovery +# process does not increment the RSN on the leader at the end of +# the recovery. Now clients acting directly on the leader will +# directly change a record's content on the leader without migration +# and hence without RSN bump. So a subsequent recovery can not tell that +# the leader's copy is newer than the copies on the other nodes, since +# their RSN is the same. Hence, if the leader is not node 0 (or more +# precisely not the active node with the lowest node number), the recovery +# will choose copies from nodes with lower number and stick to these. + +# 1. Create a test database +# 2. Add a record with value value1 on leader +# 3. Force a recovery +# 4. Update the record with value value2 on leader +# 5. Force a recovery +# 6. Confirm that the value is value2 + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# +# Main test +# +TESTDB="rec_test.tdb" + +status=0 + +# Make sure node 0 is not the leader +echo "find out which node is leader" +ctdb_onnode 0 leader +leader="$out" +if [ "$leader" = "0" ]; then + echo "node 0 is leader, disable leader role on node 0" + # + # Note: + # It should be sufficient to run "ctdb setleaderrole off" + # on node 0 and wait for election and recovery to finish. + # But there were problems related to this in this automatic + # test, so for now use "ctdb stop" and "ctdb continue". + # + echo "stop node 0" + try_command_on_node 0 $CTDB stop + wait_until_node_has_status 0 stopped + echo "continue node 0" + try_command_on_node 0 $CTDB continue + wait_until_node_has_status 0 notstopped + + ctdb_onnode 0 leader + leader="$out" + if [ "$leader" = "0" ]; then + echo "failed to move leader to different node" + exit 1 + fi +fi + +echo "Leader:${leader}" + +# Create a temporary non-persistent database to test with +echo "create test database $TESTDB" +ctdb_onnode "$leader" attach "$TESTDB" + +# Wipe Test database +echo "wipe test database" +ctdb_onnode "$leader" wipedb "$TESTDB" + +# Add a record key=test1 data=value1 +echo "store key(test1) data(value1)" +ctdb_onnode "$leader" writekey "$TESTDB" test1 value1 + +# Fetch a record key=test1 +echo "read key(test1)" +ctdb_onnode "$leader" readkey "$TESTDB" test1 +cat "$outfile" + +# Do a recovery +echo "force recovery" +ctdb_onnode "$leader" recover + +wait_until_node_has_status "$leader" recovered + +# Add a record key=test1 data=value2 +echo "store key(test1) data(value2)" +ctdb_onnode "$leader" writekey "$TESTDB" test1 value2 + +# Fetch a record key=test1 +echo "read key(test1)" +ctdb_onnode "$leader" readkey "$TESTDB" test1 +cat "$outfile" + +# Do a recovery +echo "force recovery" +ctdb_onnode "$leader" recover + +wait_until_node_has_status "$leader" recovered + +# Verify record key=test1 +echo "read key(test1)" +ctdb_onnode "$leader" readkey "$TESTDB" test1 +cat "$outfile" +if [ "$out" = "Data: size:6 ptr:[value2]" ]; then + echo "GOOD: Recovery did not corrupt database" +else + echo "BAD: Recovery corrupted database" + status=1 +fi + +exit $status diff --git a/ctdb/tests/INTEGRATION/database/recovery.002.large.sh b/ctdb/tests/INTEGRATION/database/recovery.002.large.sh new file mode 100755 index 0000000..4736071 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/recovery.002.large.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +# Test recovery of large volatile and persistent databases + +# Recovery now uses DB_PULL and DB_PUSH_START/DB_PUSH_CONFIRM +# controls. This sends the records in batches of ~RecBufferSizeLimit +# in size at a time. Test that large databases are re-assembled +# correctly. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# +# Main test +# +TEST1DB="large_persistent_db.tdb" +TEST2DB="large_volatile_db.tdb" +RECDATA=$(onnode 0 mktemp) + +# Create a persistent database to test +echo "create persistent test database $TEST1DB" +try_command_on_node 0 $CTDB attach $TEST1DB persistent + +# Wipe Test database +echo "wipe test database $TEST1DB" +try_command_on_node 0 $CTDB wipedb $TEST1DB + +# Create dummy record data +echo "creating dummy record data" +onnode 0 dd if=/dev/urandom of=$RECDATA bs=10K count=1 + +# Add 345 records +echo "Adding 345 records" +for i in $(seq 1 345) ; do + try_command_on_node 0 $CTDB pstore $TEST1DB record$i $RECDATA || exit 1 +done + +num_records=$(db_ctdb_cattdb_count_records 0 $TEST1DB) +if [ $num_records = "345" ] ; then + echo "OK: records added correctly" +else + echo "BAD: persistent database has $num_records of 345 records" + try_command_on_node -v 0 "$CTDB cattdb $TEST1DB | tail -n 1" + exit 1 +fi + +# Create a volatile database to test +echo "create volatile test database $TEST2DB" +try_command_on_node 0 $CTDB attach $TEST2DB + +# Wipe Test database +echo "wipe test database $TEST2DB" +try_command_on_node 0 $CTDB wipedb $TEST2DB + +# Create dummy record data +v1="1234567890" +v2="$v1$v1$v1$v1$v1$v1$v1$v1$v1$v1" +v3="$v2$v2$v2$v2$v2$v2$v2$v2$v2$v2" + +# Add 1234 records +echo "Adding 1234 records" +for i in $(seq 1 1234) ; do + try_command_on_node 0 $CTDB writekey $TEST2DB record$i $v3 || exit 1 +done + +num_records=$(db_ctdb_cattdb_count_records 0 $TEST2DB) +if [ $num_records = "1234" ] ; then + echo "OK: records added correctly" +else + echo "BAD: volatile database has $num_records of 1234 records" + try_command_on_node -v 0 "$CTDB cattdb $TEST2DB | tail -n 1" + exit 1 +fi + +echo +leader_get 0 +# Set RecBufferSizeLimit to 10000 +ctdb_onnode "$leader" setvar RecBufferSizeLimit 10000 + +# Do a recovery +echo "force recovery" +try_command_on_node 0 $CTDB recover + +wait_until_node_has_status 0 recovered 30 + +# check that there are correct number of records +num_records=$(db_ctdb_cattdb_count_records 0 $TEST1DB) +if [ $num_records = "345" ] ; then + echo "OK: persistent database recovered correctly" +else + echo "BAD: persistent database has $num_records of 345 records" + try_command_on_node -v 0 "$CTDB cattdb $TEST1DB | tail -n 1" + exit 1 +fi + +num_records=$(db_ctdb_cattdb_count_records 0 $TEST2DB) +if [ $num_records = "1234" ] ; then + echo "OK: volatile database recovered correctly" +else + echo "BAD: volatile database has $num_records of 1234 records" + try_command_on_node -v 0 "$CTDB cattdb $TEST2DB | tail -n 1" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/database/recovery.003.no_resurrect.sh b/ctdb/tests/INTEGRATION/database/recovery.003.no_resurrect.sh new file mode 100755 index 0000000..b314d4d --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/recovery.003.no_resurrect.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# Ensure recovery doesn't resurrect deleted records from recently +# inactive nodes + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +testdb="rec_test.tdb" + +echo "Getting list of nodes..." +ctdb_get_all_pnns + +first=$(echo "$all_pnns" | sed -n -e '1p') +second=$(echo "$all_pnns" | sed -n -e '2p') +notfirst=$(echo "$all_pnns" | tail -n +2) + +echo "Create/wipe test database ${testdb}" +try_command_on_node $first $CTDB attach "$testdb" +try_command_on_node $first $CTDB wipedb "$testdb" + +echo "store key(test1) data(value1)" +try_command_on_node $first $CTDB writekey "$testdb" test1 value1 + +echo "Migrate key(test1) to all nodes" +try_command_on_node all $CTDB readkey "$testdb" test1 + +echo "Stop node ${first}" +try_command_on_node $first $CTDB stop +wait_until_node_has_status $first stopped + +echo "Delete key(test1)" +try_command_on_node $second $CTDB deletekey "$testdb" test1 + +database_has_zero_records () +{ + # shellcheck disable=SC2086 + # $notfirst can be multi-word + check_cattdb_num_records "$testdb" 0 "$notfirst" +} + +echo "Trigger a recovery" +try_command_on_node "$second" $CTDB recover + +echo "Checking that database has 0 records" +database_has_zero_records + +echo "Continue node ${first}" +try_command_on_node $first $CTDB continue +wait_until_node_has_status $first notstopped + +echo "Get database contents" +try_command_on_node -v $first $CTDB catdb "$testdb" + +if grep -q '^key(' "$outfile" ; then + echo "BAD: Deleted record has been resurrected" + exit 1 +fi + +echo "GOOD: Deleted record is still gone" diff --git a/ctdb/tests/INTEGRATION/database/recovery.010.persistent.sh b/ctdb/tests/INTEGRATION/database/recovery.010.persistent.sh new file mode 100755 index 0000000..d13a9a5 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/recovery.010.persistent.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash + +# Ensure that persistent databases are correctly recovered by database +# sequence number +# +# 1. Create and wipe a persistent test database +# 2. Directly add a single record to the database on each node +# 3. Trigger a recover +# 4. Ensure that the database contains only a single record +# +# Repeat but with sequence numbers set by hand on each node + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +add_record_per_node () +{ + _i=0 + while [ $_i -lt $num_nodes ] ; do + _k="KEY${_i}" + _d="DATA${_i}" + echo "Store key(${_k}) data(${_d}) on node ${_i}" + db_ctdb_tstore $_i "$test_db" "$_k" "$_d" + _i=$(($_i + 1)) + done +} + +test_db="persistent_test.tdb" +echo "Create persistent test database \"$test_db\"" +try_command_on_node 0 $CTDB attach "$test_db" persistent + +# 3, +# If no __db_sequence_number__ recover whole database +# + +echo +echo "Test that no __db_sequence_number__ does not blend the database during recovery" + +# wipe database +echo "Wipe the test database" +try_command_on_node 0 $CTDB wipedb "$test_db" + +add_record_per_node + +# force a recovery +echo force a recovery +try_command_on_node 0 $CTDB recover + +# Check that we now have 1 record on node 0 +num_records=$(db_ctdb_cattdb_count_records 0 "$test_db") +if [ $num_records = "1" ] ; then + echo "OK: databases were not blended" +else + echo "BAD: we did not end up with the expected single record after the recovery" + exit 1 +fi + + +# 4, +# If __db_sequence_number__ recover whole database +# + +echo +echo test that __db_sequence_number__ does not blend the database during recovery + +# wipe database +echo wipe the test database +try_command_on_node 0 $CTDB wipedb persistent_test.tdb + +add_record_per_node + +echo "Add __db_sequence_number__==5 record to all nodes" +pnn=0 +while [ $pnn -lt $num_nodes ] ; do + db_ctdb_tstore_dbseqnum $pnn "$test_db" 5 + pnn=$(($pnn + 1)) +done + +echo "Set __db_sequence_number__ to 7 on node 0" +db_ctdb_tstore_dbseqnum 0 "$test_db" 7 + +echo "Set __db_sequence_number__ to 8 on node 1" +db_ctdb_tstore_dbseqnum 1 "$test_db" 8 + + +# force a recovery +echo force a recovery +try_command_on_node 0 $CTDB recover + +# check that we now have both records on node 0 +num_records=$(db_ctdb_cattdb_count_records 0 "$test_db") +if [ $num_records = "1" ] ; then + echo "OK: databases were not blended" +else + echo "BAD: we did not end up with the expected single record after the recovery" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/database/recovery.011.continue.sh b/ctdb/tests/INTEGRATION/database/recovery.011.continue.sh new file mode 100755 index 0000000..3376e06 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/recovery.011.continue.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# Confirm that the deleted records are not resurrected after recovery +# +# 1. Create a persistent database +# 2. Add a record and update it few times. +# 3. Delete the record +# 4. Use "ctdb stop" to stop one of the nodes +# 5. Add a record with same key. +# 6. Continue on the stopped node +# 7. Confirm that the record still exists + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +do_test() +{ +# Wipe Test database +echo "wipe test database" +try_command_on_node 0 $CTDB wipedb $TESTDB + +# Add a record key=test1 data=value1 +# and update values +for value in value1 value2 value3 value4 value5 ; do + echo "store key(test1) data($value)" + echo "\"test1\" \"$value\"" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB" +done + +# Delete record +echo "delete key(test1)" +try_command_on_node 0 $CTDB pdelete $TESTDB test1 + +# Stop a node +echo "stop node 1" +try_command_on_node 1 $CTDB stop + +wait_until_node_has_status 1 stopped + +# Add a record key=test1 data=value2 +echo "store key(test1) data(newvalue1)" +echo '"test1" "newvalue1"' | try_command_on_node -i 0 $CTDB ptrans "$TESTDB" + +# Continue node +echo "contine node 1" +try_command_on_node 1 $CTDB continue + +wait_until_node_has_status 1 notstopped + +} + +# +# Main test +# +TESTDB="persistent_test.tdb" + +status=0 + +# Create a temporary persistent database to test with +echo "create persistent test database $TESTDB" +try_command_on_node 0 $CTDB attach $TESTDB persistent + +do_test +if try_command_on_node 0 $CTDB pfetch $TESTDB test1 ; then + echo "GOOD: Record was not deleted (recovery by sequence number worked)" +else + echo "BAD: Record was deleted" + status=1 +fi + +exit $status diff --git a/ctdb/tests/INTEGRATION/database/scripts/local.bash b/ctdb/tests/INTEGRATION/database/scripts/local.bash new file mode 100644 index 0000000..ae2e0d5 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/scripts/local.bash @@ -0,0 +1,116 @@ +# Hey Emacs, this is a -*- shell-script -*- !!! :-) + +check_cattdb_num_records () +{ + local db="$1" + local num="$2" + local nodes="$3" + + # $nodes has embedded newlines - put list on 1 line for printing + local t + t=$(echo "$nodes" | xargs) + echo "Confirm that ${db} has ${num} record(s) on node(s): ${t}" + + local ret=0 + local node + for node in $nodes ; do + local num_found + + num_found=$(db_ctdb_cattdb_count_records "$node" "$db") + if [ "$num_found" = "$num" ] ; then + continue + fi + + printf 'BAD: %s on node %d has %d record(s), expected %d\n' \ + "$db" "$node" "$num_found" "$num" + ctdb_onnode -v "$node" "cattdb $db" + ret=1 + done + + return $ret +} + +_key_dmaster_check () +{ + local node="$1" + local db="$2" + local key="$3" + local dmaster="${4:-${node}}" + + testprog_onnode "$node" "ctdb-db-test local-read ${db} ${key}" + + # shellcheck disable=SC2154 + # $outfile is set above by try_command_on_node() + grep -Fqx "dmaster: ${dmaster}" "$outfile" +} + +_key_dmaster_fail () +{ + local dmaster="$1" + + echo "BAD: node ${dmaster} is not dmaster" + # shellcheck disable=SC2154 + # $outfile is set by the caller via _key_dmaster_check() + cat "$outfile" + ctdb_test_fail +} + +vacuum_test_key_dmaster () +{ + local node="$1" + local db="$2" + local key="$3" + local dmaster="${4:-${node}}" + + if ! _key_dmaster_check "$node" "$db" "$key" "$dmaster" ; then + _key_dmaster_fail "$dmaster" + fi +} + +vacuum_test_wait_key_dmaster () +{ + local node="$1" + local db="$2" + local key="$3" + local dmaster="${4:-${node}}" + + if ! wait_until 30 \ + _key_dmaster_check "$node" "$db" "$key" "$dmaster" ; then + _key_dmaster_fail "$dmaster" + fi +} + +vacuum_confirm_key_empty_dmaster () +{ + local node="$1" + local db="$2" + local key="$3" + local dmaster="${4:-${node}}" + + echo "Confirm record key=\"${key}\" is empty and dmaster=${dmaster}" + + vacuum_test_key_dmaster "$node" "$db" "$key" "$dmaster" + + if ! grep -Fqx 'data(0) = ""' "$outfile" ; then + echo "BAD: record not empty" + cat "$outfile" + ctdb_test_fail + fi +} + +db_confirm_key_has_value () +{ + local node="$1" + local db="$2" + local key="$3" + local val="$4" + + local out + + ctdb_onnode "$node" "readkey ${db} ${key}" + outv=$(echo "$out" | sed -n 's|^Data: size:.* ptr:\[\(.*\)\]$|\1|p') + if [ "$val" != "$outv" ] ; then + ctdb_test_fail \ + "BAD: value for \"${key}\"=\"${outv}\" (not \"${val}\")" + fi +} diff --git a/ctdb/tests/INTEGRATION/database/transaction.001.ptrans.sh b/ctdb/tests/INTEGRATION/database/transaction.001.ptrans.sh new file mode 100755 index 0000000..556e523 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/transaction.001.ptrans.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash + +# Verify that the 'ctdb ptrans' works as expected +# +# Pipe some operation to ctdb ptrans and validate the TDB contents +# with ctdb catdb + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +TESTDB="ptrans_test.tdb" + +# Create a temporary persistent database to test with +echo "create persistent test database $TESTDB" +try_command_on_node 0 $CTDB attach $TESTDB persistent + +# Wipe Test database +echo "wipe test database" +try_command_on_node 0 $CTDB wipedb $TESTDB + +########## + +echo "Adding 3 records" + +items=' +"key1" "value1" +"key2" "value1" +"key3" "value1"' + +echo "$items" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB" + +try_command_on_node 0 $CTDB catdb "$TESTDB" + +n=$(grep -c '^key.*= "key.*"' "$outfile" || true) + +if [ $n -ne 3 ] ; then + echo "BAD: expected 3 keys in..." + cat "$outfile" + exit 1 +else + echo "GOOD: 3 records were inserted" +fi + +########## + +echo "Deleting 1 record, updating 1, adding 1 new record, 1 bogus input line" + +items=' +"key1" "" +"key2" "value2" +"key3" +"key4" "value1"' + +echo "$items" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB" + +try_command_on_node 0 $CTDB catdb "$TESTDB" + +n=$(grep -c '^key.*= "key.*"' "$outfile" || true) + +if [ $n -ne 3 ] ; then + echo "BAD: expected 3 keys in..." + cat "$outfile" + exit 1 +else + echo "GOOD: 3 records found" +fi + +########## + +echo "Verifying records" + +while read key value ; do + try_command_on_node 0 $CTDB pfetch "$TESTDB" "$key" + if [ "$value" != "$out" ] ; then + echo "BAD: for key \"$key\" expected \"$value\" but got \"$out\"" + exit 1 + else + echo "GOOD: for key \"$key\" got \"$out\"" + fi +done <<EOF +key2 value2 +key3 value1 +key4 value1 +EOF + +########## + +echo "Deleting all records" + +items=' +"key2" "" +"key3" "" +"key4" ""' + +echo "$items" | try_command_on_node -i 0 $CTDB ptrans "$TESTDB" + +try_command_on_node 0 $CTDB catdb "$TESTDB" + +n=$(grep -c '^key.*= "key.*"' "$outfile" || true) + +if [ $n -ne 0 ] ; then + echo "BAD: expected 0 keys in..." + cat "$outfile" + exit 1 +else + echo "GOOD: 0 records found" +fi diff --git a/ctdb/tests/INTEGRATION/database/transaction.002.loop.sh b/ctdb/tests/INTEGRATION/database/transaction.002.loop.sh new file mode 100755 index 0000000..d633c7c --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/transaction.002.loop.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Verify that the transaction_loop test succeeds + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +TESTDB="persistent_trans.tdb" + +try_command_on_node 0 "$CTDB attach $TESTDB persistent" +try_command_on_node 0 "$CTDB wipedb $TESTDB" + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +if [ -z "$CTDB_TEST_TIMELIMIT" ] ; then + CTDB_TEST_TIMELIMIT=30 +fi + +t="$CTDB_TEST_WRAPPER $VALGRIND transaction_loop \ + -n ${num_nodes} -t ${CTDB_TEST_TIMELIMIT} \ + -D ${TESTDB} -T persistent -k testkey" + +echo "Running transaction_loop on all $num_nodes nodes." +try_command_on_node -v -p all "$t" diff --git a/ctdb/tests/INTEGRATION/database/transaction.003.loop_recovery.sh b/ctdb/tests/INTEGRATION/database/transaction.003.loop_recovery.sh new file mode 100755 index 0000000..05aadba --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/transaction.003.loop_recovery.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# Verify that the transaction_loop test succeeds with recoveries. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +recovery_loop() +{ + local COUNT=1 + + while true ; do + echo Recovery $COUNT + try_command_on_node 0 $CTDB recover + sleep 2 + COUNT=$((COUNT + 1)) + done +} + +recovery_loop_start() +{ + recovery_loop >/dev/null & + RECLOOP_PID=$! + ctdb_test_exit_hook_add "kill $RECLOOP_PID >/dev/null 2>&1" +} + +TESTDB="persistent_trans.tdb" + +try_command_on_node 0 "$CTDB attach $TESTDB persistent" +try_command_on_node 0 "$CTDB wipedb $TESTDB" + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +if [ -z "$CTDB_TEST_TIMELIMIT" ] ; then + CTDB_TEST_TIMELIMIT=30 +fi + +t="$CTDB_TEST_WRAPPER $VALGRIND transaction_loop \ + -n ${num_nodes} -t ${CTDB_TEST_TIMELIMIT} \ + -D ${TESTDB} -T persistent -k testkey" + +echo "Starting recovery loop" +recovery_loop_start + +echo "Running transaction_loop on all $num_nodes nodes." +try_command_on_node -v -p all "$t" diff --git a/ctdb/tests/INTEGRATION/database/transaction.004.update_record.sh b/ctdb/tests/INTEGRATION/database/transaction.004.update_record.sh new file mode 100755 index 0000000..528303a --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/transaction.004.update_record.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +# Verify that "ctdb update_record_persistent" creates new records and +# updates existing records in a persistent database +# +# 1. Create and wipe a persistent test database +# 2. Do a recovery +# 3. Confirm that the database is empty +# 4. Create a new record using "ctdb update_record_persistent" +# 5. Confirm the record exists in the database using "ctdb cattdb" +# 6. Update the record's value using "ctdb update_record_persistent" +# 7. Confirm that the original value no longer exists using "ctdb cattdb" + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +test_db="persistent_test.tdb" + +# create a temporary persistent database to test with +echo "Create persistent test database \"$test_db\"" +try_command_on_node 0 $CTDB attach "$test_db" persistent + + +# 3. +echo "Wipe the persistent test database" +try_command_on_node 0 $CTDB wipedb "$test_db" +echo "Force a recovery" +try_command_on_node 0 $CTDB recover + +# check that the database is wiped +num_records=$(db_ctdb_cattdb_count_records 1 "$test_db") +if [ $num_records = "0" ] ; then + echo "OK: database was wiped" +else + echo "BAD: we did not end up with an empty database" + exit 1 +fi + +# 4. +echo "Create a new record in the persistent database using UPDATE_RECORD" +try_command_on_node 0 $CTDB_TEST_WRAPPER $VALGRIND update_record_persistent \ + -D "$test_db" -k "Update_Record_Persistent" -v "FirstValue" + +try_command_on_node 0 "$CTDB cattdb "$test_db" | grep 'FirstValue' | wc -l" +if [ "$out" = 1 ] ; then + echo "GOOD: we did not find the record after the create/update" +else + echo "BAD: we did find the record after the create/update" + exit 1 +fi + +# 5. +echo Modify an existing record in the persistent database using UPDATE_RECORD +try_command_on_node 0 $CTDB_TEST_WRAPPER $VALGRIND update_record_persistent \ + -D "$test_db" -k "Update_Record_Persistent" -v "SecondValue" + +try_command_on_node 0 "$CTDB cattdb "$test_db" | grep 'FirstValue' | wc -l" +if [ "$out" = 0 ] ; then + echo "GOOD: did not find old record after the modify/update" +else + echo "BAD: we still found the old record after the modify/update" + exit 1 +fi + +try_command_on_node 0 "$CTDB cattdb "$test_db" | grep 'SecondValue' | wc -l" +if [ "$out" = 1 ] ; then + echo "GOOD: found the record after the modify/update" +else + echo "BAD: could not find the record after the modify/update" + exit 1 +fi + +echo "Wipe the persistent test databases and clean up" +try_command_on_node 0 $CTDB wipedb "$test_db" diff --git a/ctdb/tests/INTEGRATION/database/transaction.010.loop_recovery.sh b/ctdb/tests/INTEGRATION/database/transaction.010.loop_recovery.sh new file mode 100755 index 0000000..9de6c34 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/transaction.010.loop_recovery.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# Verify that the transaction_loop test succeeds with recoveries for +# replicated databases + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +recovery_loop() +{ + local COUNT=1 + + while true ; do + echo Recovery $COUNT + try_command_on_node 0 $CTDB recover + sleep 2 + COUNT=$((COUNT + 1)) + done +} + +recovery_loop_start() +{ + recovery_loop >/dev/null & + RECLOOP_PID=$! + ctdb_test_exit_hook_add "kill $RECLOOP_PID >/dev/null 2>&1" +} + +TESTDB="replicated_trans.tdb" + +try_command_on_node 0 "$CTDB attach $TESTDB replicated" +try_command_on_node 0 "$CTDB wipedb $TESTDB" + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +if [ -z "$CTDB_TEST_TIMELIMIT" ] ; then + CTDB_TEST_TIMELIMIT=30 +fi + +t="$CTDB_TEST_WRAPPER $VALGRIND transaction_loop \ + -n ${num_nodes} -t ${CTDB_TEST_TIMELIMIT} \ + -D ${TESTDB} -T replicated -k testkey" + +echo "Starting recovery loop" +recovery_loop_start + +echo "Running transaction_loop on all $num_nodes nodes." +try_command_on_node -v -p all "$t" diff --git a/ctdb/tests/INTEGRATION/database/traverse.001.one.sh b/ctdb/tests/INTEGRATION/database/traverse.001.one.sh new file mode 100755 index 0000000..1b3b7c2 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/traverse.001.one.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +# Confirm that traverses of volatile databases work as expected + +# This is a very simple example. It writes a single record, updates it +# on another node and then confirms that the correct value is found when +# traversing. It then repeats this after removing the LMASTER role from +# the node where the value is updated. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# +# Main test +# +TESTDB="traverse_db.tdb" + +echo "create volatile test database $TESTDB" +try_command_on_node 0 $CTDB attach "$TESTDB" + +echo "wipe test database $TESTDB" +try_command_on_node 0 $CTDB wipedb "$TESTDB" + +echo "write foo=bar0 on node 0" +try_command_on_node 0 $CTDB writekey "$TESTDB" "foo" "bar0" + +echo "write foo=bar1 on node 1" +try_command_on_node 1 $CTDB writekey "$TESTDB" "foo" "bar1" + +echo + +check_db_num_records () +{ + local node="$1" + local db="$2" + local n="$3" + + echo "Checking on node ${node} to ensure ${db} has ${n} records..." + try_command_on_node "$node" "${CTDB} catdb ${db}" + + num=$(sed -n -e 's|^Dumped \(.*\) records$|\1|p' "$outfile") + if [ "$num" = "$n" ] ; then + echo "OK: Number of records=${num}" + echo + else + echo "BAD: There were ${num} (!= ${n}) records" + cat "$outfile" + exit 1 + fi +} + +check_db_num_records 0 "$TESTDB" 1 +check_db_num_records 1 "$TESTDB" 1 + +cat <<EOF + +Again, this time with 10 records, rewriting 5 of them on the 2nd node + +EOF + +echo "wipe test database $TESTDB" +try_command_on_node 0 $CTDB wipedb "$TESTDB" + +for i in $(seq 0 9) ; do + k="foo${i}" + v="bar${i}@0" + echo "write ${k}=${v} on node 0" + try_command_on_node 0 "${CTDB} writekey ${TESTDB} ${k} ${v}" +done + +for i in $(seq 1 5) ; do + k="foo${i}" + v="bar${i}@1" + echo "write ${k}=${v} on node 1" + try_command_on_node 1 "${CTDB} writekey ${TESTDB} ${k} ${v}" +done + +check_db_num_records 0 "$TESTDB" 10 +check_db_num_records 1 "$TESTDB" 10 + +cat <<EOF + +Again, this time with lmaster role off on node 1 + +EOF + +echo "wipe test database $TESTDB" +try_command_on_node 0 $CTDB wipedb "$TESTDB" + +echo "switching off lmaster role on node 1" +try_command_on_node 1 $CTDB setlmasterrole off + +try_command_on_node -v 1 $CTDB getcapabilities + +wait_until_node_has_status 1 notlmaster 10 0 + +echo "write foo=bar0 on node 0" +try_command_on_node 0 $CTDB writekey "$TESTDB" "foo" "bar0" + +echo "write foo=bar1 on node 1" +try_command_on_node 1 $CTDB writekey "$TESTDB" "foo" "bar1" + +echo + +check_db_num_records 0 "$TESTDB" 1 +check_db_num_records 1 "$TESTDB" 1 + +if grep -q "^data(4) = \"bar1\"\$" "$outfile" ; then + echo "OK: Data from node 1 was returned" +else + echo "BAD: Data from node 1 was not returned" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/database/traverse.002.many.sh b/ctdb/tests/INTEGRATION/database/traverse.002.many.sh new file mode 100755 index 0000000..fb0dc98 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/traverse.002.many.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +# Test cluster wide traverse code +# +# 1. Create a volatile test database +# 2. Add records on different nodes +# 3. Use "ctdb catdb" to confirm that all added records are present + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes" +num_nodes=$(echo "$out" | wc -l) + +num_records=1000 + +TESTDB="traverse_test.tdb" + +echo "create test database $TESTDB" +try_command_on_node 0 $CTDB attach $TESTDB + +echo "wipe test database $TESTDB" +try_command_on_node 0 $CTDB wipedb $TESTDB + +echo "Add $num_records records to database" +i=0 +while [ $i -lt $num_records ]; do + key=$(printf "key-%04x" $i) + value="value-$i" + + n=$[ $i % $num_nodes ] + try_command_on_node $n $CTDB writekey $TESTDB $key $value + + i=$[ $i + 1 ] +done + +echo "Start a traverse and collect records" +try_command_on_node 0 $CTDB catdb $TESTDB + +num_read=$(tail -n 1 "$outfile" | cut -d\ -f2) +if [ $num_read -eq $num_records ]; then + echo "GOOD: All $num_records records retrieved" + status=0 +else + echo "BAD: Only $num_read/$num_records records retrieved" + status=1 +fi + +exit $status diff --git a/ctdb/tests/INTEGRATION/database/vacuum.001.fast.sh b/ctdb/tests/INTEGRATION/database/vacuum.001.fast.sh new file mode 100755 index 0000000..27a2225 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.001.fast.sh @@ -0,0 +1,159 @@ +#!/usr/bin/env bash + +# Ensure that vacuuming deletes records on all nodes + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +vacuum_test () +{ + local db="$1" + local num_records="$2" + local delete_from_lmaster="${3:-false}" + + local t + if "$delete_from_lmaster" ; then + t="lmaster" + else + t="non-lmaster" + fi + + echo + echo '............................................................' + printf 'Creating %d record(s)\n' "$num_records" + printf 'Testing vacuuming of 1 record deleted from %s\n' "$t" + echo '............................................................' + + echo + echo "Stall vacuuming on all nodes" + ctdb_onnode -p all "setvar VacuumInterval 99999" + + echo + echo "Getting list of nodes..." + local all_pnns + ctdb_get_all_pnns + + local first + first=$(echo "$all_pnns" | sed -n -e '1p') + + echo + echo "Create/wipe test database ${db}" + ctdb_onnode "$first" "attach ${db}" + ctdb_onnode "$first" "wipedb ${db}" + + echo + echo "Write ${num_records} records to ${db}" + local i + for i in $(seq 1 "$num_records") ; do + ctdb_onnode "$first" "writekey ${db} test${i} value${i}" + done + + echo + echo "Migrate record(s) to all nodes" + for i in $(seq 1 "$num_records") ; do + ctdb_onnode all "readkey ${db} test${i}" + done + + echo + echo "Confirm that all nodes have all the records" + check_cattdb_num_records "$db" "$num_records" "$all_pnns" + + local key="test1" + echo + echo "Delete key ${key}" + + echo " Find lmaster for key \"${key}\"" + testprog_onnode "$first" "ctdb-db-test get-lmaster ${key}" + # out is set above + # shellcheck disable=SC2154 + lmaster="$out" + echo " lmaster=${lmaster}" + + if "$delete_from_lmaster" ; then + echo " Delete key ${key} on lmaster node ${lmaster}" + dnode="$lmaster" + else + for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] ; then + dnode="$i" + break + fi + done + echo " Delete key ${key} on non-lmaster node ${dnode}" + fi + ctdb_onnode "$dnode" "deletekey ${db} ${key}" + + echo + vacuum_confirm_key_empty_dmaster "$dnode" "$db" "$key" + + echo + echo "Confirm all records still exist on all nodes" + check_cattdb_num_records "$db" "$num_records" "$all_pnns" + + if ! "$delete_from_lmaster" ; then + # Ask the lmaster to fetch the deleted record + echo + echo "Vacuum on non-lmaster node ${dnode}" + testprog_onnode "$dnode" "ctdb-db-test vacuum ${db}" + + echo + vacuum_confirm_key_empty_dmaster "$dnode" "$db" "$key" + + # Fetch the record and put it in the delete queue in + # the main daemon for processing in next vacuuming run + # on the lmaster + echo + echo "Vacuum on lmaster node ${lmaster}" + testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + + echo + echo "Confirm all records still exist on all node nodes" + check_cattdb_num_records "$db" "$num_records" "$all_pnns" + + echo + vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key" + fi + + echo + # In the delete-from-lmaster case, the record is already in + # the lmaster's delete-queue so only a single run is needed + echo "Vacuum on lmaster node ${lmaster}" + testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + + echo + echo "Confirm a record has been deleted on all nodes" + local n=$((num_records - 1)) + check_cattdb_num_records "$db" "$n" "$all_pnns" + + echo + echo "Confirm all other records still exist with expected values" + local i + for i in $(seq 1 "$num_records") ; do + local k="test${i}" + local v="value${i}" + + if [ "$k" = "$key" ] ; then + continue + fi + + db_confirm_key_has_value "$first" "$db" "$k" "$v" + done + echo "GOOD" +} + +testdb="vacuum_test.tdb" + +# 1 record, delete from non-lmaster +vacuum_test "$testdb" 1 false + +# 10 records, delete from non-lmaster +vacuum_test "$testdb" 10 false + +# 1 record, delete from lmaster +vacuum_test "$testdb" 1 true + +# 10 records, delete from lmaster +vacuum_test "$testdb" 10 true diff --git a/ctdb/tests/INTEGRATION/database/vacuum.002.full.sh b/ctdb/tests/INTEGRATION/database/vacuum.002.full.sh new file mode 100755 index 0000000..0dc8372 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.002.full.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +# Ensure a full vacuuming run deletes records + +# Create some records, delete some of them on their lmaster (with a +# test tool that doesn't do SCHEDULE_FOR_DELETION), run some fast +# vacuuming runs (to ensure they don't delete records that haven't +# been added to the delete queue) and then try a full vacuuming run, +# which will actually do a traverse of the database to find empty +# records and delete them. Confirm that records that haven't been +# deleted are still there, with expected values. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +db="vacuum_test.tdb" + +echo "Stall vacuuming on all nodes" +ctdb_onnode -p all "setvar VacuumInterval 99999" + +echo +echo "Getting list of nodes..." +ctdb_get_all_pnns + +# all_pnns is set above by ctdb_get_all_pnns() +# shellcheck disable=SC2154 +first=$(echo "$all_pnns" | sed -n -e '1p') + +echo +echo "Create/wipe test database ${db}" +ctdb_onnode "$first" "attach ${db}" +ctdb_onnode "$first" "wipedb ${db}" + +echo +echo "Create records in ${db}" +for i in $(seq 1 10) ; do + ctdb_onnode "$first" "writekey ${db} delete${i} value${i}" + ctdb_onnode "$first" "writekey ${db} keep${i} value${i}" +done + +echo +echo "Migrate record(s) to all nodes" +for i in $(seq 1 10) ; do + ctdb_onnode all "readkey ${db} delete${i}" + ctdb_onnode all "readkey ${db} keep${i}" +done + +echo +echo "Confirm that all nodes have all the records" +check_cattdb_num_records "$db" 20 "$all_pnns" + +echo +echo "Delete all 10 records from their lmaster node" +for i in $(seq 1 10) ; do + key="delete${i}" + + testprog_onnode "$first" "ctdb-db-test get-lmaster ${key}" + # $out is set above by testprog_onnode() + # shellcheck disable=SC2154 + lmaster="$out" + + echo + echo "Delete ${key} from lmaster node ${lmaster}" + testprog_onnode "$lmaster" \ + "ctdb-db-test fetch-local-delete $db ${key}" + + vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key" +done + +echo "Do fast vacuuming run on all nodes" +testprog_onnode "all" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm all records still exist on all nodes" +check_cattdb_num_records "$db" 20 "$all_pnns" + +echo +echo "Do full vacuuming run on all nodes" +testprog_onnode "all" "ctdb-db-test vacuum ${db} full" + +echo +echo "Confirm 10 records exist on all nodes" +check_cattdb_num_records "$db" 10 "$all_pnns" + +echo +echo "Confirm that remaining records still exist with expected values" +for i in $(seq 1 10) ; do + k="keep${i}" + v="value${i}" + + db_confirm_key_has_value "$first" "$db" "$k" "$v" +done +echo "GOOD" diff --git a/ctdb/tests/INTEGRATION/database/vacuum.003.recreate.sh b/ctdb/tests/INTEGRATION/database/vacuum.003.recreate.sh new file mode 100755 index 0000000..acb7b13 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.003.recreate.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +# Ensure that vacuuming does not delete a record that is recreated +# before vacuuming completes. This needs at least 3 nodes. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +db="vacuum_test.tdb" + +echo "Stall vacuuming on all nodes" +ctdb_onnode -p all "setvar VacuumInterval 99999" + +echo +echo "Getting list of nodes..." +ctdb_get_all_pnns + +# all_pnns is set above by ctdb_get_all_pnns() +# shellcheck disable=SC2154 +first=$(echo "$all_pnns" | sed -n -e '1p') + +echo +echo "Create/wipe test database ${db}" +ctdb_onnode "$first" "attach ${db}" +ctdb_onnode "$first" "wipedb ${db}" + +echo +echo "Create a record in ${db}" +ctdb_onnode "$first" "writekey ${db} key value1" + +echo +echo "Migrate record to all nodes" +ctdb_onnode all "readkey ${db} key" + +echo +echo "Confirm that all nodes have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +echo "Determine lmaster node for key" +testprog_onnode "$first" "ctdb-db-test get-lmaster key" +# $out is set above by testprog_onnode() +# shellcheck disable=SC2154 +lmaster="$out" +echo "lmaster=${lmaster}" + +non_lmaster="" +# Find a non-lmaster node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] ; then + non_lmaster="$i" + break + fi +done +if [ -z "$non_lmaster" ] ; then + ctdb_test_fail "Could not find non-lmaster node for key" +fi + +another_non_lmaster="" +# Find another non-lmaster node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] && [ "$i" != "$non_lmaster" ] ; then + another_non_lmaster="$i" + break + fi +done +if [ -z "$another_non_lmaster" ] ; then + ctdb_test_fail "Could not find another non-lmaster node for key" +fi + +vacuum_test () +{ + local db="$1" + local key="$2" + local val="$3" + local dnode="$4" + local rnode="$5" + local rrun="$6" + + echo + echo '............................................................' + printf 'Delete key %s on node %d\n' "$key" "$dnode" + printf 'Recreate on node %d after %d vacuuming run(s)\n' \ + "$rnode" "$rrun" + echo '............................................................' + + echo + echo "Delete key \"${key}\" from node ${dnode}" + ctdb_onnode "$dnode" "deletekey ${db} ${key}" + + if [ "$rrun" -eq 0 ] ; then + echo "Recreate record on node ${rnode}" + ctdb_onnode "$rnode" "writekey ${db} ${key} ${val}" + fi + + echo "Do a fast vacuuming run on node ${dnode}" + testprog_onnode "$dnode" "ctdb-db-test vacuum ${db}" + + if [ "$rrun" -eq 1 ] ; then + echo "Recreate record on node ${rnode}" + ctdb_onnode "$rnode" "writekey ${db} ${key} ${val}" + fi + + echo "Do a fast vacuuming run on lmaster node ${lmaster}" + testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + + if [ "$rrun" -eq 2 ] ; then + echo "Recreate record on node ${rnode}" + ctdb_onnode "$rnode" "writekey ${db} ${key} ${val}" + fi + + echo "Do a fast vacuuming run on lmaster node ${lmaster}" + testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + + echo + echo "Confirm the record still exists on all nodes" + check_cattdb_num_records "$db" 1 "$all_pnns" + + echo + echo "Confirm the record contains correct value" + db_confirm_key_has_value "$first" "$db" "$key" "$val" +} + +vacuum_test "$db" "key" "value01" "$non_lmaster" "$non_lmaster" 0 +vacuum_test "$db" "key" "value02" "$non_lmaster" "$another_non_lmaster" 0 +vacuum_test "$db" "key" "value03" "$non_lmaster" "$lmaster" 0 +vacuum_test "$db" "key" "value04" "$lmaster" "$non_lmaster" 0 +vacuum_test "$db" "key" "value05" "$lmaster" "$lmaster" 0 + +vacuum_test "$db" "key" "value06" "$non_lmaster" "$non_lmaster" 1 +vacuum_test "$db" "key" "value07" "$non_lmaster" "$lmaster" 1 +vacuum_test "$db" "key" "value08" "$non_lmaster" "$another_non_lmaster" 1 + +vacuum_test "$db" "key" "value09" "$non_lmaster" "$non_lmaster" 2 +vacuum_test "$db" "key" "value10" "$non_lmaster" "$lmaster" 2 +vacuum_test "$db" "key" "value11" "$non_lmaster" "$another_non_lmaster" 2 diff --git a/ctdb/tests/INTEGRATION/database/vacuum.030.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.030.locked.sh new file mode 100755 index 0000000..3862526 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.030.locked.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash + +# Confirm that a record is not vacuumed if it is locked when the 1st +# fast vacuuming run occurs on the node on which it was deleted, but +# is dropped from the delete queue + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +db="vacuum_test.tdb" +key="key" + +echo "Stall vacuuming on all nodes" +ctdb_onnode -p all "setvar VacuumInterval 99999" + +echo +echo "Getting list of nodes..." +ctdb_get_all_pnns + +# all_pnns is set above by ctdb_get_all_pnns() +# shellcheck disable=SC2154 +first=$(echo "$all_pnns" | sed -n -e '1p') + +echo +echo "Determine lmaster node for key" +testprog_onnode "$first" "ctdb-db-test get-lmaster key" +# $out is set above by testprog_onnode() +# shellcheck disable=SC2154 +lmaster="$out" +echo "lmaster=${lmaster}" + +non_lmaster="" +# Find a non-lmaster node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] ; then + non_lmaster="$i" + break + fi +done +if [ -z "$non_lmaster" ] ; then + ctdb_test_fail "Could not find non-lmaster node for key" +fi + +echo "............................................................" +echo "Delete key ${key} on non-lmaster node ${non_lmaster}" +echo "Lock on node ${non_lmaster} during 1st vacuuming run" +echo "............................................................" + +echo + +echo "Create/wipe test database ${db}" +ctdb_onnode "$first" "attach ${db}" +ctdb_onnode "$first" "wipedb ${db}" + +echo "Create a record in ${db}" +ctdb_onnode "$first" "writekey ${db} ${key} value1" + +echo "Migrate record to all nodes" +ctdb_onnode all "readkey ${db} ${key}" + +echo "Confirm that all nodes have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo + +echo "Delete key \"${key}\" from node ${non_lmaster}" +ctdb_onnode "$non_lmaster" "deletekey $db ${key}" + +echo "Lock record on node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test local-lock ${db} ${key}" +pid="${out#OK }" +ctdb_test_cleanup_pid_set "$non_lmaster" "$pid" + +echo "Do a fast vacuuming run on node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}" + +echo "Kill lock process ${pid} on node ${non_lmaster}" +try_command_on_node "$non_lmaster" "kill ${pid}" +ctdb_test_cleanup_pid_clear + +echo + +# If the record is still in the delete queue then this will process it +echo "Do a fast vacuuming run on node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}" + +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo + +echo "Confirm the record still exists on all nodes" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +vacuum_confirm_key_empty_dmaster "$non_lmaster" "$db" "$key" diff --git a/ctdb/tests/INTEGRATION/database/vacuum.031.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.031.locked.sh new file mode 100755 index 0000000..d16482e --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.031.locked.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash + +# Confirm that a record is vacuumed if it is locked on the deleting +# node when the 2nd fast vacuuming run occurs, but vacuuming is +# delayed until the lock is released + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +db="vacuum_test.tdb" +key="key" + +echo "Stall vacuuming on all nodes" +ctdb_onnode -p all "setvar VacuumInterval 99999" + +echo +echo "Getting list of nodes..." +ctdb_get_all_pnns + +# all_pnns is set above by ctdb_get_all_pnns() +# shellcheck disable=SC2154 +first=$(echo "$all_pnns" | sed -n -e '1p') + +echo +echo "Determine lmaster node for key" +testprog_onnode "$first" "ctdb-db-test get-lmaster key" +# $out is set above by testprog_onnode() +# shellcheck disable=SC2154 +lmaster="$out" +echo "lmaster=${lmaster}" + +non_lmaster="" +# Find a non-lmaster node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] ; then + non_lmaster="$i" + break + fi +done +if [ -z "$non_lmaster" ] ; then + ctdb_test_fail "Could not find non-lmaster node for key" +fi + +echo "............................................................" +echo "Delete key ${key} on node ${non_lmaster}" +echo "Lock on non-lmaster node ${non_lmaster} during 2nd vacuuming run" +echo "............................................................" + +echo + +echo "Create/wipe test database ${db}" +ctdb_onnode "$first" "attach ${db}" +ctdb_onnode "$first" "wipedb ${db}" + +echo "Create a record in ${db}" +ctdb_onnode "$first" "writekey ${db} ${key} value1" + +echo "Migrate record to all nodes" +ctdb_onnode all "readkey ${db} ${key}" + +echo "Confirm that all nodes have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo + +echo "Delete key \"${key}\" from node ${non_lmaster}" +ctdb_onnode "$non_lmaster" "deletekey $db ${key}" + +echo +echo "Do a fast vacuuming run on node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that all nodes still have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +echo "Lock record on non-lmaster node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test local-lock ${db} ${key}" +pid="${out#OK }" +ctdb_test_cleanup_pid_set "$non_lmaster" "$pid" + +echo +echo "Do a fast vacuuming run on lmaster node ${lmaster} - THIS WILL FAIL" +status=0 +testprog_onnode "$lmaster" "ctdb-db-test -t 10 vacuum ${db}" || status=$? + +if [ $status -ne 110 ] ; then + ctdb_test_fail "$out" +fi + +echo "Confirm record key=\"${key}\" has dmaster=${non_lmaster}" +vacuum_test_key_dmaster "$lmaster" "$db" "$key" "$non_lmaster" + +echo "Kill lock process ${pid} on node ${non_lmaster}" +try_command_on_node "$non_lmaster" "kill ${pid}" +ctdb_test_cleanup_pid_clear + +echo "Wait until record is migrated to lmaster node ${lmaster}" +vacuum_test_wait_key_dmaster "$lmaster" "$db" "$key" + +echo +echo "Confirm that all nodes still have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo "Do a fast vacuuming run on node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that the record is gone from all nodes" +check_cattdb_num_records "$db" 0 "$all_pnns" diff --git a/ctdb/tests/INTEGRATION/database/vacuum.032.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.032.locked.sh new file mode 100755 index 0000000..481d1d4 --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.032.locked.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash + +# Confirm that a record is not vacuumed if it is locked on the lmaster +# when the 3rd fast vacuuming run occurs, but is dropped from the +# lmaster delete queue + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +db="vacuum_test.tdb" +key="key" + +echo "Stall vacuuming on all nodes" +ctdb_onnode -p all "setvar VacuumInterval 99999" + +echo +echo "Getting list of nodes..." +ctdb_get_all_pnns + +# all_pnns is set above by ctdb_get_all_pnns() +# shellcheck disable=SC2154 +first=$(echo "$all_pnns" | sed -n -e '1p') + +echo +echo "Determine lmaster node for key" +testprog_onnode "$first" "ctdb-db-test get-lmaster key" +# $out is set above by testprog_onnode() +# shellcheck disable=SC2154 +lmaster="$out" +echo "lmaster=${lmaster}" + +non_lmaster="" +# Find a non-lmaster node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] ; then + non_lmaster="$i" + break + fi +done +if [ -z "$non_lmaster" ] ; then + ctdb_test_fail "Could not find non-lmaster node for key" +fi + +echo "............................................................" +echo "Delete key ${key} on node ${non_lmaster}" +echo "Lock on lmaster node ${lmaster} during 3rd vacuuming run" +echo "............................................................" + +echo + +echo "Create/wipe test database ${db}" +ctdb_onnode "$first" "attach ${db}" +ctdb_onnode "$first" "wipedb ${db}" + +echo "Create a record in ${db}" +ctdb_onnode "$first" "writekey ${db} ${key} value1" + +echo "Migrate record to all nodes" +ctdb_onnode all "readkey ${db} ${key}" + +echo "Confirm that all nodes have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo + +echo "Delete key \"${key}\" from node ${non_lmaster}" +ctdb_onnode "$non_lmaster" "deletekey $db ${key}" + +echo "Do a fast vacuuming run on node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}" + +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo "Lock record on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test local-lock ${db} ${key}" +pid="${out#OK }" +ctdb_test_cleanup_pid_set "$lmaster" "$pid" + +echo "Do a fast vacuuming run on node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo "Kill lock process ${pid} on node ${lmaster}" +try_command_on_node "$lmaster" "kill ${pid}" +ctdb_test_cleanup_pid_clear + +echo + +# If the record is still in the delete queue then this will process it +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo + +echo "Confirm the record still exists on all nodes" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key" diff --git a/ctdb/tests/INTEGRATION/database/vacuum.033.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.033.locked.sh new file mode 100755 index 0000000..63d7d1f --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.033.locked.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +# Confirm that a record is not vacuumed if it is locked on the +# deleting node when the 3rd fast vacuuming run occurs, but is dropped +# from the lmaster delete list + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +db="vacuum_test.tdb" +key="key" + +echo "Stall vacuuming on all nodes" +ctdb_onnode -p all "setvar VacuumInterval 99999" + +echo +echo "Getting list of nodes..." +ctdb_get_all_pnns + +# all_pnns is set above by ctdb_get_all_pnns() +# shellcheck disable=SC2154 +first=$(echo "$all_pnns" | sed -n -e '1p') + +echo +echo "Determine lmaster node for key" +testprog_onnode "$first" "ctdb-db-test get-lmaster key" +# $out is set above by testprog_onnode() +# shellcheck disable=SC2154 +lmaster="$out" +echo "lmaster=${lmaster}" + +non_lmaster="" +# Find a non-lmaster node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] ; then + non_lmaster="$i" + break + fi +done +if [ -z "$non_lmaster" ] ; then + ctdb_test_fail "Could not find non-lmaster node for key" +fi + +echo "............................................................" +echo "Delete key ${key} on node ${non_lmaster}" +echo "Lock on non-lmaster node ${non_lmaster} during 3rd vacuuming run" +echo "............................................................" + +echo + +echo "Create/wipe test database ${db}" +ctdb_onnode "$first" "attach ${db}" +ctdb_onnode "$first" "wipedb ${db}" + +echo "Create a record in ${db}" +ctdb_onnode "$first" "writekey ${db} ${key} value1" + +echo "Migrate record to all nodes" +ctdb_onnode all "readkey ${db} ${key}" + +echo "Confirm that all nodes have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo + +echo "Delete key \"${key}\" from node ${non_lmaster}" +ctdb_onnode "$non_lmaster" "deletekey $db ${key}" + +echo +echo "Do a fast vacuuming run on node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that all nodes still have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that all nodes still have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +echo "Lock record on non-lmaster node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test local-lock ${db} ${key}" +pid="${out#OK }" +ctdb_test_cleanup_pid_set "$non_lmaster" "$pid" + +echo "Do a fast vacuuming run on node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo "Kill lock process ${pid} on node ${non_lmaster}" +try_command_on_node "$non_lmaster" "kill ${pid}" +ctdb_test_cleanup_pid_clear + +echo +echo "Confirm that nodes ${lmaster} and ${non_lmaster} still have the record" +check_cattdb_num_records "$db" 1 "${lmaster} ${non_lmaster}" + +vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key" + +echo + +# Record has been dropped from the delete list so this will not pick it up +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that nodes ${lmaster} and ${non_lmaster} still have the record" +check_cattdb_num_records "$db" 1 "${lmaster} ${non_lmaster}" + +vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key" diff --git a/ctdb/tests/INTEGRATION/database/vacuum.034.locked.sh b/ctdb/tests/INTEGRATION/database/vacuum.034.locked.sh new file mode 100755 index 0000000..7f37ada --- /dev/null +++ b/ctdb/tests/INTEGRATION/database/vacuum.034.locked.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash + +# Confirm that a record is not vacuumed if it is locked on another +# (non-lmaster, non-deleting) node when the 3rd fast vacuuming run +# occurs, but is dropped from the lmaster delete tree + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +db="vacuum_test.tdb" +key="key" + +echo "Stall vacuuming on all nodes" +ctdb_onnode -p all "setvar VacuumInterval 99999" + +echo +echo "Getting list of nodes..." +ctdb_get_all_pnns + +# all_pnns is set above by ctdb_get_all_pnns() +# shellcheck disable=SC2154 +first=$(echo "$all_pnns" | sed -n -e '1p') + +echo +echo "Determine lmaster node for key" +testprog_onnode "$first" "ctdb-db-test get-lmaster key" +# $out is set above by testprog_onnode() +# shellcheck disable=SC2154 +lmaster="$out" +echo "lmaster=${lmaster}" + +non_lmaster="" +# Find a non-lmaster node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] ; then + non_lmaster="$i" + break + fi +done +if [ -z "$non_lmaster" ] ; then + ctdb_test_fail "Could not find non-lmaster node for key" +fi + +another_node="" +# Find another node +for i in $all_pnns ; do + if [ "$i" != "$lmaster" ] && [ "$i" != "$non_lmaster" ] ; then + another_node="$i" + break + fi +done +if [ -z "$another_node" ] ; then + ctdb_test_fail "Could not find another non-lmaster node for key" +fi + +echo "............................................................" +echo "Delete key ${key} on node ${non_lmaster}" +echo "Lock on non-lmaster node ${non_lmaster} during 3rd vacuuming run" +echo "............................................................" + +echo + +echo "Create/wipe test database ${db}" +ctdb_onnode "$first" "attach ${db}" +ctdb_onnode "$first" "wipedb ${db}" + +echo "Create a record in ${db}" +ctdb_onnode "$first" "writekey ${db} ${key} value1" + +echo "Migrate record to all nodes" +ctdb_onnode all "readkey ${db} ${key}" + +echo "Confirm that all nodes have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo + +echo "Delete key \"${key}\" from node ${non_lmaster}" +ctdb_onnode "$non_lmaster" "deletekey $db ${key}" + +echo +echo "Do a fast vacuuming run on node ${non_lmaster}" +testprog_onnode "$non_lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that all nodes still have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that all nodes still have the record" +check_cattdb_num_records "$db" 1 "$all_pnns" + +echo +echo "Lock record on non-lmaster node ${another_node}" +testprog_onnode "$another_node" "ctdb-db-test local-lock ${db} ${key}" +pid="${out#OK }" +ctdb_test_cleanup_pid_set "$another_node" "$pid" + +echo "Do a fast vacuuming run on node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo "Kill lock process ${pid} on node ${another_node}" +try_command_on_node "$another_node" "kill ${pid}" +ctdb_test_cleanup_pid_clear + +echo +echo "Confirm that nodes ${lmaster} and ${another_node} still have the record" +check_cattdb_num_records "$db" 1 "${lmaster} ${another_node}" + +vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key" + +echo + +# Record has been dropped from the delete list so this will not pick it up +echo "Do a fast vacuuming run on lmaster node ${lmaster}" +testprog_onnode "$lmaster" "ctdb-db-test vacuum ${db}" + +echo +echo "Confirm that nodes ${lmaster} and ${another_node} still have the record" +check_cattdb_num_records "$db" 1 "${lmaster} ${another_node}" + +vacuum_confirm_key_empty_dmaster "$lmaster" "$db" "$key" diff --git a/ctdb/tests/INTEGRATION/failover/pubips.001.list.sh b/ctdb/tests/INTEGRATION/failover/pubips.001.list.sh new file mode 100755 index 0000000..2fc75b7 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.001.list.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb ip' shows the correct output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +echo "Getting list of public IPs..." +try_command_on_node -v 1 "$CTDB ip all | tail -n +2" +ips=$(sed \ + -e 's@ node\[@ @' \ + -e 's@\].*$@@' \ + "$outfile") +machineout=$(sed -r \ + -e 's@^| |$@\|@g' \ + -e 's@[[:alpha:]]+\[@@g' \ + -e 's@\]@@g' \ + "$outfile") + +if ctdb_test_on_cluster ; then + while read ip pnn ; do + try_command_on_node $pnn "ip addr show to ${ip}" + if [ -n "$out" ] ; then + echo "GOOD: node $pnn appears to have $ip assigned" + else + die "BAD: node $pnn does not appear to have $ip assigned" + fi + done <<<"$ips" # bashism to avoid problem setting variable in pipeline. +fi + +echo "Looks good!" + +cmd="$CTDB -X ip all | tail -n +2" +echo "Checking that \"$cmd\" produces expected output..." + +try_command_on_node 1 "$cmd" +if [ "$out" = "$machineout" ] ; then + echo "Yep, looks good!" +else + echo "Nope, it looks like this:" + echo "$out" + echo "Should be like this:" + echo "$machineout" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/failover/pubips.010.addip.sh b/ctdb/tests/INTEGRATION/failover/pubips.010.addip.sh new file mode 100755 index 0000000..aba85dd --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.010.addip.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# Verify that an IP address can be added to a node using 'ctdb addip' + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips +get_test_ip_mask_and_iface + +echo "Deleting IP $test_ip from all nodes" +delete_ip_from_all_nodes $test_ip +try_command_on_node -v $test_node $CTDB ipreallocate +wait_until_ips_are_on_node '!' $test_node $test_ip + +# Debugging... +try_command_on_node -v all $CTDB ip + +echo "Adding IP ${test_ip}/${mask} on ${iface}, node ${test_node}" +try_command_on_node $test_node $CTDB addip ${test_ip}/${mask} $iface +try_command_on_node $test_node $CTDB ipreallocate +wait_until_ips_are_on_node $test_node $test_ip diff --git a/ctdb/tests/INTEGRATION/failover/pubips.011.delip.sh b/ctdb/tests/INTEGRATION/failover/pubips.011.delip.sh new file mode 100755 index 0000000..5235a9d --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.011.delip.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Verify that a node's public IP address can be deleted using 'ctdb deleteip' + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Deleting IP ${test_ip} from node ${test_node}" +try_command_on_node $test_node $CTDB delip $test_ip +try_command_on_node $test_node $CTDB ipreallocate +wait_until_ips_are_on_node '!' $test_node $test_ip diff --git a/ctdb/tests/INTEGRATION/failover/pubips.012.reloadips.sh b/ctdb/tests/INTEGRATION/failover/pubips.012.reloadips.sh new file mode 100755 index 0000000..a3bb3af --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.012.reloadips.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +# Verify that IPs can be reconfigured using 'ctdb reloadips' + +# Various sub-tests that remove addresses from the public_addresses file +# on a node or delete the entire contents of the public_addresses file. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +try_command_on_node $test_node $CTDB_TEST_WRAPPER ctdb_base_show +addresses="${out}/public_addresses" +echo "Public addresses file on node $test_node is \"$addresses\"" +backup="${addresses}.$$" + +restore_public_addresses () +{ + try_command_on_node $test_node "mv $backup $addresses >/dev/null 2>&1 || true" +} +ctdb_test_exit_hook_add restore_public_addresses + +# ctdb reloadips will fail if it can't disable takover runs. The most +# likely reason for this is that there is already a takeover run in +# progress. We can't predict when this will happen, so retry if this +# occurs. +do_ctdb_reloadips () +{ + local retry_max=10 + local retry_count=0 + while : ; do + if ctdb_onnode "$test_node" "reloadips all" ; then + return 0 + fi + + if [ "$out" != "Failed to disable takeover runs" ] ; then + return 1 + fi + + if [ $retry_count -ge $retry_max ] ; then + return 1 + fi + + retry_count=$((retry_count + 1)) + echo "Retrying..." + sleep_for 1 + done +} + + +echo "Removing IP $test_ip from node $test_node" + +try_command_on_node $test_node "mv $addresses $backup && grep -v '^${test_ip}/' $backup >$addresses" + +do_ctdb_reloadips + +try_command_on_node $test_node $CTDB ip + +if grep "^${test_ip} " <<<"$out" ; then + cat <<EOF +BAD: node $test_node can still host IP $test_ip: +$out +EOF + exit 1 +fi + +cat <<EOF +GOOD: node $test_node is no longer hosting IP $test_ip: +$out +EOF + +ctdb_onnode "$test_node" sync + + +echo "Restoring addresses" +restore_public_addresses + +do_ctdb_reloadips + +echo "Getting list of public IPs on node $test_node" +try_command_on_node $test_node "$CTDB ip | tail -n +2" + +if [ -z "$out" ] ; then + echo "BAD: node $test_node has no ips" + exit 1 +fi + +cat <<EOF +GOOD: node $test_node has these addresses: +$out +EOF + +ctdb_onnode "$test_node" sync + + +echo "Emptying public addresses file on $test_node" + +try_command_on_node $test_node "mv $addresses $backup && touch $addresses" + +do_ctdb_reloadips + +echo "Getting list of public IPs on node $test_node" +try_command_on_node $test_node "$CTDB ip | tail -n +2" + +if [ -n "$out" ] ; then + cat <<EOF +BAD: node $test_node still has ips: +$out +EOF + exit 1 +fi + +echo "GOOD: no IPs left on node $test_node" diff --git a/ctdb/tests/INTEGRATION/failover/pubips.013.failover_noop.sh b/ctdb/tests/INTEGRATION/failover/pubips.013.failover_noop.sh new file mode 100755 index 0000000..77f9a63 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.013.failover_noop.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +# Check that CTDB operates correctly if: + +# * failover is disabled; or +# * there are 0 public IPs configured + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +echo "Starting CTDB with failover disabled..." +ctdb_nodes_start_custom -F + +select_test_node + +echo "Getting IP allocation..." + +# $test_node set above by select_test_node() +# shellcheck disable=SC2154 +try_command_on_node -v "$test_node" "$CTDB ip all | tail -n +2" + +while read ip pnn ; do + if [ "$pnn" != "-1" ] ; then + die "BAD: IP address ${ip} is assigned to node ${pnn}" + fi +done <"$outfile" + +echo "GOOD: All IP addresses are unassigned" + +echo "----------------------------------------" + +echo "Starting CTDB with an empty public addresses configuration..." +ctdb_nodes_start_custom -P /dev/null + +echo "Trying explicit ipreallocate..." +ctdb_onnode "$test_node" ipreallocate + +echo "Good, that seems to work!" +echo diff --git a/ctdb/tests/INTEGRATION/failover/pubips.014.iface_gc.sh b/ctdb/tests/INTEGRATION/failover/pubips.014.iface_gc.sh new file mode 100755 index 0000000..845b4b5 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.014.iface_gc.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# Verify that an interface is deleted when all IPs on it are deleted + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +# Find interfaces on test node +try_command_on_node $test_node "$CTDB ifaces -X" +ifaces=$(awk -F'|' 'NR > 1 { print $2 }' "$outfile") +echo "Node ${test_node} has interfaces: ${ifaces}" + +# Delete all IPs on each interface... deleting IPs from one interface +# can cause other interfaces to disappear, so we need to be careful... +for i in $ifaces ; do + try_command_on_node $test_node "$CTDB ifaces -X" + info=$(awk -F'|' -v iface="$i" '$2 == iface { print $0 }' "$outfile") + + if [ -z "$info" ] ; then + echo "Interface ${i} missing... assuming already deleted!" + continue + fi + + echo "Deleting IPs on interface ${i}, with this information:" + echo " $info" + + try_command_on_node $test_node "$CTDB ip -v -X | tail -n +2" + awk -F'|' -v i="$i" \ + '$6 == i { print $2 }' "$outfile" | + while read ip ; do + echo " $ip" + try_command_on_node $test_node "$CTDB delip $ip" + done + try_command_on_node $test_node "$CTDB ipreallocate" + + try_command_on_node $test_node "$CTDB ifaces -X" + info=$(awk -F'|' -v iface="$i" '$2 == iface { print $0 }' "$outfile") + + if [ -z "$info" ] ; then + echo "GOOD: Interface ${i} has been garbage collected" + else + echo "BAD: Interface ${i} still exists" + echo "$out" + exit 1 + fi +done diff --git a/ctdb/tests/INTEGRATION/failover/pubips.020.moveip.sh b/ctdb/tests/INTEGRATION/failover/pubips.020.moveip.sh new file mode 100755 index 0000000..8daf3f5 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.020.moveip.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb moveip' allows movement of public IPs between nodes + +# This test does not do any network level checks to make sure IP +# addresses are actually on interfaces. It just consults "ctdb ip". + +# To work, this test ensures that IPAllocAlgorithm is not set to 0 +# (Deterministic IPs) and sets NoIPFailback. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +sanity_check_ips () +{ + echo "Sanity checking IPs..." + + local x ipp prev + prev="" + while read x ipp ; do + [ "$ipp" = "-1" ] && break + if [ -n "$prev" -a "$ipp" != "$prev" ] ; then + echo "OK" + return 0 + fi + prev="$ipp" + done <"$outfile" + + echo "BAD: a node was -1 or IPs are only assigned to one node:" + cat "$outfile" + echo "Are you running an old version of CTDB?" + return 1 +} + +sanity_check_ips + +# Find a target node - it must be willing to host $test_ip + +# $test_node set above by select_test_node_and_ips() +# shellcheck disable=SC2154 +try_command_on_node "$test_node" "$CTDB listnodes | wc -l" +num_nodes="$out" +to_node="" +for i in $(seq 0 $(($num_nodes - 1)) ) ; do + [ $i -ne $test_node ] || continue + all_ips_on_node $i + while read ip x ; do + if [ "$ip" = "$test_ip" ] ; then + to_node="$i" + break 2 + fi + done <"$outfile" +done + +if [ -z "$to_node" ] ; then + echo "Unable to find target node" + exit 1 +fi + +echo "Target node is ${to_node}" + +echo "Setting IPAllocAlgorithm=2 to avoid Deterministic IPs..." +try_command_on_node -q all $CTDB setvar IPAllocAlgorithm 2 + +echo "Turning on NoIPFailback..." +try_command_on_node -q all $CTDB setvar NoIPFailback 1 + +echo "Attempting to move ${test_ip} from node ${test_node} to node ${to_node}" +try_command_on_node $test_node $CTDB moveip $test_ip $to_node +wait_until_ips_are_on_node '!' $test_node $test_ip +wait_until_ips_are_on_node $to_node $test_ip diff --git a/ctdb/tests/INTEGRATION/failover/pubips.030.disable_enable.sh b/ctdb/tests/INTEGRATION/failover/pubips.030.disable_enable.sh new file mode 100755 index 0000000..3f40097 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.030.disable_enable.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# Verify the operation of "ctdb disable" and "ctdb enable" + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +######################################## + +select_test_node_and_ips + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled 30 all +wait_until_node_has_no_ips "$test_node" + +echo "Re-enabling node $test_node" +try_command_on_node 1 $CTDB enable -n $test_node +wait_until_node_has_status $test_node enabled 30 all +wait_until_node_has_some_ips "$test_node" diff --git a/ctdb/tests/INTEGRATION/failover/pubips.032.stop_continue.sh b/ctdb/tests/INTEGRATION/failover/pubips.032.stop_continue.sh new file mode 100755 index 0000000..f5936b0 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.032.stop_continue.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# Verify the operation of "ctdb stop" and "ctdb continue" + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Stopping node ${test_node}..." +try_command_on_node 1 $CTDB stop -n $test_node +wait_until_node_has_status $test_node stopped +wait_until_node_has_no_ips "$test_node" + +echo "Continuing node $test_node" +try_command_on_node 1 $CTDB continue -n $test_node +wait_until_node_has_status $test_node notstopped +wait_until_node_has_some_ips "$test_node" diff --git a/ctdb/tests/INTEGRATION/failover/pubips.040.NoIPTakeover.sh b/ctdb/tests/INTEGRATION/failover/pubips.040.NoIPTakeover.sh new file mode 100755 index 0000000..e99a265 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.040.NoIPTakeover.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb setvar NoIPTakeover 1' stops IP addresses being taken over + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +ctdb_get_all_pnns +# out is set above +# shellcheck disable=SC2154 +num_nodes=$(echo "$out" | wc -l | tr -d '[:space:]') +echo "There are $num_nodes nodes..." + +if [ "$num_nodes" -lt 2 ] ; then + echo "Less than 2 nodes!" + exit 1 +fi + +select_test_node_and_ips + + +# sets: num +count_ips_on_node () +{ + local node="$1" + + ctdb_onnode "$node" ip + # outfile is set by ctdb_onnode() above + # shellcheck disable=SC2154,SC2126 + # * || true is needed to avoid command failure when there are no matches + # * Using "wc -l | tr -d '[:space:]'" is our standard + # pattern... and "grep -c" requires handling of special case + # for no match + num=$(grep -v 'Public' "$outfile" | \ + grep " ${node}\$" | \ + wc -l | \ + tr -d '[:space:]') + echo "Number of addresses on node ${node}: ${num}" +} + + +# test_node is set by select_test_node_and_ips() above +# shellcheck disable=SC2154 +count_ips_on_node "$test_node" + +echo "Turning on NoIPTakeover on all nodes" +ctdb_onnode all "setvar NoIPTakeover 1" +ctdb_onnode "$test_node" ipreallocate + +echo "Disable node ${test_node}" +ctdb_onnode "$test_node" disable + +count_ips_on_node "$test_node" +if [ "$num" != "0" ] ; then + test_fail "BAD: node 1 still hosts IP addresses" +fi + + +echo "Enable node 1 again" +ctdb_onnode "$test_node" enable + +count_ips_on_node "$test_node" +if [ "$num" != "0" ] ; then + test_fail "BAD: node 1 took over IP addresses" +fi + + +echo "OK: IP addresses were not taken over" diff --git a/ctdb/tests/INTEGRATION/failover/pubips.050.missing_ip.sh b/ctdb/tests/INTEGRATION/failover/pubips.050.missing_ip.sh new file mode 100755 index 0000000..543f9a9 --- /dev/null +++ b/ctdb/tests/INTEGRATION/failover/pubips.050.missing_ip.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# Verify that the recovery daemon handles unhosted IPs properly + +# This test does not do any network level checks to make sure the IP +# address is actually on an interface. It just consults "ctdb ip". + +# This is a variation of the "addip" test. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Running test against node $test_node and IP $test_ip" + +get_test_ip_mask_and_iface + +echo "Deleting IP $test_ip from all nodes" +delete_ip_from_all_nodes $test_ip +try_command_on_node -v $test_node $CTDB ipreallocate +wait_until_ips_are_on_node ! $test_node $test_ip + +try_command_on_node -v all $CTDB ip + +my_exit_hook () +{ + if ctdb_test_on_cluster ; then + onnode -q all $CTDB event script enable legacy "10.interface" + fi +} + +ctdb_test_exit_hook_add my_exit_hook + +# This forces us to wait until the ipreallocated associated with the +# delips is complete. +try_command_on_node $test_node $CTDB sync + +# Wait for a monitor event. Then the next steps are unlikely to occur +# in the middle of a monitor event and will have the expected effect. +wait_for_monitor_event $test_node + +if ctdb_test_on_cluster ; then + # Stop monitor events from bringing up the link status of an interface + try_command_on_node $test_node $CTDB event script disable legacy 10.interface +fi + +echo "Marking interface $iface down on node $test_node" +try_command_on_node $test_node $CTDB setifacelink $iface down + +echo "Adding IP $test_ip to node $test_node" +try_command_on_node $test_node $CTDB addip $test_ip/$mask $iface +try_command_on_node $test_node $CTDB ipreallocate + +echo "Wait long enough for IP verification to have taken place" +sleep_for 15 + +echo "Ensuring that IP ${test_ip} is not hosted on node ${test_node} when interface is down" +if ips_are_on_node '!' $test_node $test_ip; then + echo "GOOD: the IP has not been hosted while the interface is down" +else + echo "BAD: the IP is hosted but the interface is down" + exit 1 +fi + +echo "Marking interface $iface up on node $test_node" +try_command_on_node $test_node $CTDB setifacelink $iface up +wait_until_ips_are_on_node $test_node $test_ip diff --git a/ctdb/tests/INTEGRATION/simple/README b/ctdb/tests/INTEGRATION/simple/README new file mode 100644 index 0000000..3ac738d --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/README @@ -0,0 +1,2 @@ +Simple integration tests. These can be run against a pool of CTDB +daemons running on the local machine - aka "local daemons". diff --git a/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh new file mode 100755 index 0000000..4ca6e46 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.000.onnode.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +# Use 'onnode' to confirm connectivity between all cluster nodes + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +echo "Checking connectivity between nodes..." +onnode all onnode -p all hostname diff --git a/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh new file mode 100755 index 0000000..aafe27e --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.001.listnodes.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb listnodes' shows the list of nodes + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node -v 0 "$CTDB listnodes" + +num_nodes=$(wc -l <"$outfile") + +# Each line should look like an IP address. +ipv4_pat='[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+' +ipv6_pat='[[:xdigit:]]+:[[:xdigit:]:]+[[:xdigit:]]+' +sanity_check_output \ + 2 \ + "^${ipv4_pat}|${ipv6_pat}\$" + +out_0="$out" + +echo "Checking other nodes..." + +n=1 +while [ $n -lt $num_nodes ] ; do + echo -n "Node ${n}: " + try_command_on_node $n "$CTDB listnodes" + if [ "$out_0" = "$out" ] ; then + echo "OK" + else + echo "DIFFERs from node 0:" + echo "$out" + exit 1 + fi + n=$(($n + 1)) +done diff --git a/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh new file mode 100755 index 0000000..6f362c6 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.002.tunables.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# Verify the operation of "ctdb listvars", "ctdb getvar", "ctdb setvar" + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node -v 0 "$CTDB listvars" + +sanity_check_output \ + 5 \ + '^[[:alpha:]][[:alnum:]]+[[:space:]]*=[[:space:]]*[[:digit:]]+$' + +echo "Verifying all variable values using \"ctdb getvar\"..." + +while read var x val ; do + try_command_on_node 0 "$CTDB getvar $var" + + val2="${out#*= }" + + if [ "$val" != "$val2" ] ; then + echo "MISMATCH on $var: $val != $val2" + exit 1 + fi +done <"$outfile" + +echo "GOOD: all tunables match" + +var="RecoverTimeout" + +try_command_on_node -v 0 $CTDB getvar $var + +val="${out#*= }" + +echo "Going to try incrementing it..." + +incr=$(($val + 1)) + +try_command_on_node 0 $CTDB setvar $var $incr + +echo "That seemed to work, let's check the value..." + +try_command_on_node -v 0 $CTDB getvar $var + +newval="${out#*= }" + +if [ "$incr" != "$newval" ] ; then + echo "Nope, that didn't work..." + exit 1 +fi + +echo "Look's good! Now verifying with \"ctdb listvars\"" +try_command_on_node -v 0 "$CTDB listvars | grep '^$var'" + +check="${out#*= }" + +if [ "$incr" != "$check" ] ; then + echo "Nope, that didn't work..." + exit 1 +fi + +echo "Look's good! Putting the old value back..." +cmd="$CTDB setvar $var $val" +try_command_on_node 0 $cmd diff --git a/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh new file mode 100755 index 0000000..8071762 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.003.ping.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Verify the operation of the 'ctdb ping' command +# +# 1. Run the 'ctdb ping' command on one of the nodes and verify that it +# shows valid and expected output. +# 2. Shutdown one of the cluster nodes, using the 'ctdb shutdown' +# command. +# 3. Run the 'ctdb ping -n <node>' command from another node to this +# node. +# 4. Verify that the command is not successful since th ctdb daemon is +# not running on the node. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node -v 0 "$CTDB ping -n 1" + +sanity_check_output \ + 1 \ + '^response from 1 time=-?[.0-9]+ sec[[:space:]]+\([[:digit:]]+ clients\)$' + +ctdb_onnode -v 1 "shutdown" + +wait_until_node_has_status 1 disconnected 30 0 + +try_command_on_node -v 0 "! $CTDB ping -n 1" + +sanity_check_output \ + 1 \ + "(: ctdb_control error: ('ctdb_control to disconnected node'|'node is disconnected')|Unable to get ping response from node 1|Node 1 is DISCONNECTED|ctdb_control for getpnn failed|: Can not access node. Node is not operational\.|Node 1 has status DISCONNECTED\|UNHEALTHY\|INACTIVE$)" diff --git a/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh new file mode 100755 index 0000000..27025df --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.004.getpid.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb getpid' works as expected + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" +echo "There are $num_nodes nodes..." + +# Call getpid a few different ways and make sure the answer is always the same. + +try_command_on_node -v 0 "onnode -q all $CTDB getpid" +pids_onnode="$out" + +cmd="" +n=0 +while [ $n -lt $num_nodes ] ; do + cmd="${cmd}${cmd:+; }$CTDB getpid -n $n" + n=$(($n + 1)) +done +try_command_on_node -v 0 "( $cmd )" +pids_getpid_n="$out" + +if [ "$pids_onnode" = "$pids_getpid_n" ] ; then + echo "They're the same... cool!" +else + die "Error: they differ." +fi + +echo "Checking each PID for validity" + +n=0 +while [ $n -lt $num_nodes ] ; do + read pid + try_command_on_node $n "ls -l /proc/${pid}/exe | sed -e 's@.*/@@'" + echo -n "Node ${n}, PID ${pid} looks to be running \"$out\" - " + case "$out" in + ctdbd) : ;; + memcheck*) + if [ -z "$VALGRIND" ] ; then + die "BAD" + fi + ;; + *) die "BAD" + esac + + echo "GOOD!" + + n=$(($n + 1)) +done <<<"$pids_onnode" diff --git a/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh new file mode 100755 index 0000000..c6212fd --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.005.process_exists.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb process-exists' shows correct information + +# The implementation is creative about how it gets PIDs for existing and +# non-existing processes. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +test_node=1 +srvid=0xAE00000012345678 + +# Execute a ctdb client on $test_node that will last for 60 seconds. +# It should still be there when we check. +try_command_on_node -v $test_node \ + "$CTDB_TEST_WRAPPER exec dummy_client -n 10 -S ${srvid} >/dev/null 2>&1 & echo \$!" +client_pid="$out" + +cleanup () +{ + if [ -n "$client_pid" ] ; then + onnode $test_node kill -9 "$client_pid" + fi +} + +ctdb_test_exit_hook_add cleanup + +echo "Waiting until PID $client_pid is registered on node $test_node" +status=0 +wait_until 30 try_command_on_node $test_node \ + "$CTDB process-exists ${client_pid}" || status=$? +echo "$out" + +if [ $status -eq 0 ] ; then + echo "OK" +else + die "BAD" +fi + +echo "Checking for PID $client_pid with SRVID $srvid on node $test_node" +status=0 +try_command_on_node $test_node \ + "$CTDB process-exists ${client_pid} ${srvid}" || status=$? +echo "$out" + +if [ $status -eq 0 ] ; then + echo "OK" +else + die "BAD" +fi + +echo "Checking for PID $client_pid with SRVID $client_pid on node $test_node" +try_command_on_node -v $test_node \ + "! $CTDB process-exists ${client_pid} ${client_pid}" + +# Now just echo the PID of the ctdb daemon on test node. +# This is not a ctdb client and process-exists should return error. +try_command_on_node $test_node "ctdb getpid" +pid="$out" + +echo "Checking for PID $pid on node $test_node" +try_command_on_node -v $test_node "! $CTDB process-exists ${pid}" diff --git a/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh new file mode 100755 index 0000000..d97e035 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.010.statistics.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb statistics' works as expected + +# This is pretty superficial and could do more validation. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +pattern='^(CTDB version 1|Current time of statistics[[:space:]]*:.*|Statistics collected since[[:space:]]*:.*|Gathered statistics for [[:digit:]]+ nodes|[[:space:]]+[[:alpha:]_]+[[:space:]]+[[:digit:]]+|[[:space:]]+(node|client|timeouts|locks)|[[:space:]]+([[:alpha:]_]+_latency|max_reclock_[[:alpha:]]+)[[:space:]]+[[:digit:]-]+\.[[:digit:]]+[[:space:]]sec|[[:space:]]*(locks_latency|reclock_ctdbd|reclock_recd|call_latency|lockwait_latency|childwrite_latency)[[:space:]]+MIN/AVG/MAX[[:space:]]+[-.[:digit:]]+/[-.[:digit:]]+/[-.[:digit:]]+ sec out of [[:digit:]]+|[[:space:]]+(hop_count_buckets|lock_buckets):[[:space:][:digit:]]+)$' + +try_command_on_node -v 1 "$CTDB statistics" + +sanity_check_output 40 "$pattern" diff --git a/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh new file mode 100755 index 0000000..51f34d9 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/basics.011.statistics_reset.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb statisticsreset' works as expected + +# This is pretty superficial. It just checks that a few particular +# items reduce. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +get_stat () +{ + local label="$1" + + cat "$outfile" | + sed -rn -e "s@^[[:space:]]+${label}[[:space:]]+([[:digit:]])@\1@p" | + head -1 +} + +check_reduced () +{ + local label="$1" + local before="$2" + local after="$3" + + if [ $after -lt $before ] ; then + echo "GOOD: ${label} reduced from ${before} to ${after}" + else + die "BAD: ${label} did not reduce from ${before} to ${after}" + fi +} + +n=0 +while [ $n -lt $num_nodes ] ; do + echo "Getting initial statistics for node ${n}..." + + try_command_on_node -v $n $CTDB statistics + + before_req_control=$(get_stat "req_control") + before_reply_control=$(get_stat "reply_control") + before_node_packets_recv=$(get_stat "node_packets_recv") + + try_command_on_node $n $CTDB statisticsreset + + try_command_on_node -v $n $CTDB statistics + + after_req_control=$(get_stat "req_control") + after_reply_control=$(get_stat "reply_control") + after_node_packets_recv=$(get_stat "node_packets_recv") + + check_reduced "req_control" "$before_req_control" "$after_req_control" + check_reduced "reply_control" "$before_reply_control" "$after_reply_control" + check_reduced "node_packets_recv" "$before_node_packets_recv" "$after_node_packets_recv" + + n=$(($n + 1)) +done diff --git a/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh new file mode 100755 index 0000000..180b4ae --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.001.stop_leader_yield.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb stop' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Stopping leader ${leader}..." +ctdb_onnode "$test_node" stop -n "$leader" + +wait_until_node_has_status "$leader" stopped + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh new file mode 100755 index 0000000..234869c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.002.ban_leader_yield.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb ban' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Banning leader ${leader}..." +ctdb_onnode "$test_node" ban 300 -n "$leader" + +wait_until_node_has_status "$leader" banned + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh new file mode 100755 index 0000000..94bcf27 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.003.capability_leader_yield.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb ban' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Removing leader capability from leader ${leader}..." +ctdb_onnode "$test_node" setleaderrole off -n "$leader" + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh new file mode 100755 index 0000000..95f522d --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.006.stop_leader_yield_no_lock.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb stop' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom -C "cluster lock" + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Stopping leader ${leader}..." +ctdb_onnode "$test_node" stop -n "$leader" + +wait_until_node_has_status "$leader" stopped + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh new file mode 100755 index 0000000..0ef4e2b --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.007.ban_leader_yield_no_lock.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb ban' causes a node to yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom -C "cluster lock" + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Banning leader ${leader}..." +ctdb_onnode "$test_node" ban 300 -n "$leader" + +wait_until_node_has_status "$leader" banned + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh new file mode 100755 index 0000000..4489bc5 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.008.capability_leader_yield_no_lock.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# Verify that removing the the leader capability causes a node to +# yield the leader role + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom -C "cluster lock" + +# This is the node used to execute commands +select_test_node +echo + +# test_node set by select_test_node() +# shellcheck disable=SC2154 +leader_get "$test_node" + +# leader set by leader_get() +# shellcheck disable=SC2154 +echo "Removing leader capability from leader ${leader}..." +ctdb_onnode "$test_node" setleaderrole off -n "$leader" + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh new file mode 100755 index 0000000..3a76654 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.010.getrelock.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Verify that "ctdb getreclock" gets the recovery lock correctly + +# Make sure the recovery lock is consistent across all nodes. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +echo "Check that recovery lock is set the same on all nodes..." +ctdb_onnode all getreclock + +# outfile is set above by ctdb_onnode +# shellcheck disable=SC2154 +n=$(sort -u "$outfile" | wc -l | tr -d '[:space:]') + +case "$n" in +0) echo "GOOD: Recovery lock is unset on all nodes" ;; +1) echo "GOOD: All nodes have the same recovery lock setting" ;; +*) ctdb_test_fail "BAD: Recovery lock setting differs across nodes" ;; +esac diff --git a/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh new file mode 100755 index 0000000..d043c7e --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.012.reclock_command.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# Check that CTDB operates correctly if the recovery lock is configured +# as a command. + +# This test works only with local daemons. On a real cluster it has +# no way of updating configuration. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +echo "Starting CTDB with recovery lock command configured..." +ctdb_nodes_start_custom -R + +echo "Good, that seems to work!" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh new file mode 100755 index 0000000..9088a80 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +# Verify that the cluster recovers if the recovery lock is removed. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +echo "Starting CTDB with cluster lock recheck interval set to 5s..." +ctdb_nodes_start_custom -r 5 + +generation_has_changed () +{ + local node="$1" + local generation_init="$2" + + # Leak this so it can be printed by test + generation_new="" + + ctdb_onnode "$node" status + # shellcheck disable=SC2154 + # $outfile set by ctdb_onnode() above + generation_new=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") + + [ "$generation_new" != "$generation_init" ] +} + +select_test_node + +echo "Get recovery lock setting" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +ctdb_onnode "$test_node" getreclock +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +reclock_setting="$out" + +if [ -z "$reclock_setting" ] ; then + ctdb_test_skip "Recovery lock is not set" +fi + +t="${reclock_setting% 5}" +reclock="${t##* }" + +if [ ! -f "$reclock" ] ; then + ctdb_test_error "Recovery lock file \"${reclock}\" is missing" +fi + +echo "Recovery lock setting is \"${reclock_setting}\"" +echo "Recovery lock file is \"${reclock}\"" +echo + +leader_get "$test_node" + +generation_get + +echo "Remove recovery lock" +rm "$reclock" +echo + +# This will mean an election has taken place and a recovery has occured +wait_until_generation_has_changed "$test_node" + +# shellcheck disable=SC2154 +# $leader set by leader_get() above +leader_old="$leader" + +leader_get "$test_node" + +if [ "$leader" != "$leader_old" ] ; then + echo "OK: Leader has changed to node ${leader_new}" +fi +echo "GOOD: Leader is still node ${leader}" +echo + +cluster_is_healthy diff --git a/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh new file mode 100755 index 0000000..147547d --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +# Verify that if the directory containing the cluster lock is moved +# then the current cluster leader no longer claims to be leader, and +# no other node claims to be leader. Confirm that if the directory is +# moved back then a node will become leader. + +# This simulates the cluster filesystem containing the cluster lock +# being unmounted and remounted. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +echo "Starting CTDB with cluster lock recheck interval set to 5s..." +ctdb_nodes_start_custom -r 5 + +select_test_node + +echo "Get cluster lock setting" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +ctdb_onnode "$test_node" getreclock +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +reclock_setting="$out" + +if [ -z "$reclock_setting" ] ; then + ctdb_test_skip "Cluster lock is not set" +fi + +t="${reclock_setting% 5}" +reclock="${t##* }" + +if [ ! -f "$reclock" ] ; then + ctdb_test_error "Cluster lock file \"${reclock}\" is missing" +fi + +echo "Cluster lock setting is \"${reclock_setting}\"" +echo "Cluster lock file is \"${reclock}\"" +echo + +leader_get "$test_node" + +dir=$(dirname "$reclock") + +echo "Rename cluster lock directory" +mv "$dir" "${dir}.$$" + +wait_until_leader_has_changed "$test_node" +echo + +# shellcheck disable=SC2154 +# $leader set by leader_get() & wait_until_leader_has_changed(), above +if [ "$leader" != "UNKNOWN" ]; then + test_fail "BAD: leader is ${leader}" +fi + +echo "OK: leader is UNKNOWN" +echo + +echo 'Get "leader timeout":' +conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'" +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +leader_timeout="$out" +echo "Leader timeout is ${leader_timeout}s" +echo + +sleep_time=$((2 * leader_timeout)) +echo "Waiting for ${sleep_time}s to confirm leader stays UNKNOWN" +sleep_for $sleep_time + +leader_get "$test_node" +if [ "$leader" = "UNKNOWN" ]; then + echo "OK: leader is UNKNOWN" + echo +else + test_fail "BAD: leader is ${leader}" +fi + +echo "Restore cluster lock directory" +mv "${dir}.$$" "$dir" + +wait_until_leader_has_changed "$test_node" diff --git a/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh new file mode 100755 index 0000000..b841f5b --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.020.message_ring.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# Run the message_ring test and sanity check the output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +echo "Running message_ring on all $num_nodes nodes." +try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND message_ring -n $num_nodes + +# Get the last line of output. +last=$(tail -n 1 "$outfile") + +pat='^(Waiting for cluster|Ring\[[[:digit:]]+\]: [[:digit:]]+(\.[[:digit:]]+)? msgs/sec \(\+ve=[[:digit:]]+ -ve=[[:digit:]]+\))$' +sanity_check_output 1 "$pat" + +# $last should look like this: +# Ring[1]: 10670.93 msgs/sec (+ve=53391 -ve=53373) +stuff="${last##Ring\[*\]: }" +mps="${stuff% msgs/sec*}" + +if [ ${mps%.*} -ge 10 ] ; then + echo "OK: $mps msgs/sec >= 10 msgs/sec" +else + echo "BAD: $mps msgs/sec < 10 msgs/sec" + exit 1 +fi + +stuff="${stuff#*msgs/sec (+ve=}" +positive="${stuff%% *}" + +if [ $positive -ge 10 ] ; then + echo "OK: +ive ($positive) >= 10" +else + echo "BAD: +ive ($positive) < 10" + exit 1 +fi + +stuff="${stuff#*-ve=}" +negative="${stuff%)}" + +if [ $negative -ge 10 ] ; then + echo "OK: -ive ($negative) >= 10" +else + echo "BAD: -ive ($negative) < 10" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh new file mode 100755 index 0000000..f86d080 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.021.tunnel_ring.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Run tunnel_test and sanity check the output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +echo "Running tunnel_test on all $num_nodes nodes." +try_command_on_node -v -p all $CTDB_TEST_WRAPPER $VALGRIND \ + tunnel_test -t 30 -n $num_nodes + +# Get the last line of output. +last=$(tail -n 1 "$outfile") + +pat='^(Waiting for cluster|pnn\[[[:digit:]]+\] [[:digit:]]+(\.[[:digit:]]+)? msgs/sec)$' +sanity_check_output 1 "$pat" + +# $last should look like this: +# pnn[2] count=85400 +stuff="${last##pnn\[*\] }" +mps="${stuff% msgs/sec}" + +if [ ${mps%.*} -ge 10 ] ; then + echo "OK: $mps msgs/sec >= 10 msgs/sec" +else + echo "BAD: $mps msgs/sec < 10 msgs/sec" + exit 1 +fi diff --git a/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh new file mode 100755 index 0000000..7bca58c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Verify that nothing bad occurs if a node stalls and the leader +# broadcast timeout triggers + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node +echo + +echo 'Get "leader timeout":' +conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'" +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +leader_timeout="$out" +echo "Leader timeout is ${leader_timeout} seconds" +echo + +# Assume leader timeout is reasonable and doesn't cause node to be +# disconnected +stall_time=$((leader_timeout * 2)) + +generation_get "$test_node" + +echo "Get ctdbd PID on node ${test_node}..." +ctdb_onnode -v "$test_node" "getpid" +ctdbd_pid="$out" +echo + +echo "Sending SIGSTOP to ctdbd on ${test_node}" +try_command_on_node "$test_node" "kill -STOP ${ctdbd_pid}" + +sleep_for "$stall_time" + +echo "Sending SIGCONT to ctdbd on ${test_node}" +try_command_on_node "$test_node" "kill -CONT ${ctdbd_pid}" +echo + +wait_until_generation_has_changed "$test_node" + +cluster_is_healthy diff --git a/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh new file mode 100755 index 0000000..2835e55 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.090.unreachable.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Verify an error occurs if a ctdb command is run against a node +# without a ctdbd + +# That is, check that an error message is printed if an attempt is made +# to execute a ctdb command against a node that is not running ctdbd. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +test_node=1 + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" +echo "There are $num_nodes nodes." + +echo "Shutting down node ${test_node}..." +try_command_on_node $test_node $CTDB shutdown + +wait_until_node_has_status $test_node disconnected 30 0 + +wait_until_node_has_status 0 recovered 30 0 + +pat="ctdb_control error: 'ctdb_control to disconnected node'|ctdb_control error: 'node is disconnected'|Node $test_node is DISCONNECTED|Node $test_node has status DISCONNECTED\|UNHEALTHY\|INACTIVE" + +for i in ip disable enable "ban 0" unban listvars ; do + try_command_on_node -v 0 ! $CTDB $i -n $test_node + + if egrep -q "$pat" "$outfile" ; then + echo "OK: \"ctdb ${i}\" fails with expected \"disconnected node\" message" + else + echo "BAD: \"ctdb ${i}\" does not fail with expected \"disconnected node\" message" + exit 1 + fi +done diff --git a/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh new file mode 100755 index 0000000..be71750 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.091.version_check.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# Check that the CTDB version consistency checking operates correctly + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init + +select_test_node + +try_command_on_node -v "$test_node" ctdb version +version="$out" + +major="${version%%.*}" +rest="${version#*.}" +minor="${rest%%.*}" + +echo "Node ${test_node} has version ${major}.${minor}" + +# Unchanged version - this should work +export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | minor )) +printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \ + "$test_node" \ + "$CTDB_TEST_SAMBA_VERSION" +ctdb_nodes_restart "$test_node" +wait_until_ready +echo "GOOD: ctdbd restarted successfully on node ${test_node}" + +d="$CTDB_SCRIPTS_HELPER_BINDIR" +try_command_on_node "$test_node" "${d}/ctdb-path" "pidfile" "ctdbd" +pidfile="$out" + +# Changed major version - this should fail +export CTDB_TEST_SAMBA_VERSION=$(( ((major + 1) << 16) | minor )) +printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \ + "$test_node" \ + "$CTDB_TEST_SAMBA_VERSION" +ctdb_nodes_restart "$test_node" +echo "Will use PID file ${pidfile} to check for ctdbd exit" +wait_until 30 ! test -f "$pidfile" +echo "GOOD: ctdbd exited early on node ${test_node}" + +# Changed minor version - this should fail +export CTDB_TEST_SAMBA_VERSION=$(( (major << 16) | (minor + 1) )) +printf '\nRestarting node %d with CTDB_TEST_SAMBA_VERSION=0x%08x\n' \ + "$test_node" \ + "$CTDB_TEST_SAMBA_VERSION" +ctdb_nodes_start "$test_node" +echo "Will use PID file ${pidfile} to check for ctdbd exit" +wait_until 30 ! test -f "$pidfile" +echo "GOOD: ctdbd exited early on node ${test_node}" diff --git a/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh new file mode 100755 index 0000000..2220a20 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/debug.001.getdebug.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb getdebug' works as expected + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +try_command_on_node 0 "$CTDB listnodes | wc -l" +num_nodes="$out" + +try_command_on_node -v 1 "onnode -q all $CTDB getdebug" +getdebug_onnode="$out" + +sanity_check_output \ + $num_nodes \ + '^(ERROR|WARNING|NOTICE|INFO|DEBUG)$' + +cmd="" +n=0 +while [ $n -lt $num_nodes ] ; do + cmd="${cmd}${cmd:+; }$CTDB getdebug -n $n" + n=$(($n + 1)) +done +try_command_on_node -v 1 "$cmd" +getdebug_n="$out" + +if [ "$getdebug_onnode" = "$getdebug_n" ] ; then + echo "They're the same... cool!" +else + die "Error: they differ." +fi + +seps="" +nl=" +" +while read line ; do + t=$(echo "$line" | sed -r -e 's@Node [[:digit:]]+ is at debug level ([[:alpha:]]+) \((-?[[:digit:]]+)\)$@\|\1\|\2|@') + seps="${seps}${seps:+${nl}}|Name|Level|${nl}${t}" +done <<<"$getdebug_onnode" diff --git a/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh new file mode 100755 index 0000000..dd5949e --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/debug.002.setdebug.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb setdebug' works as expected. + +# This is a little superficial. It checks that CTDB thinks the debug +# level has been changed but doesn't actually check that logging occurs +# at the new level. + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node + +get_debug () +{ + # Sets: check_debug + local node="$1" + + local out + + try_command_on_node -v $node "$CTDB getdebug" + check_debug="$out" +} + +set_and_check_debug () +{ + local node="$1" + local level="$2" + local levelstr="${3:-$level}" + + echo "Setting debug level on node ${node} to ${level}." + try_command_on_node $node "$CTDB setdebug ${level}" + + local check_debug + get_debug $node + + if [ "$levelstr" != "$check_debug" ] ; then + die "BAD: Debug level \"$levelstr\" != \"$check_debug\"." + fi +} + +get_debug $test_node +initial_debug="$check_debug" + +levels="ERROR WARNING NOTICE INFO DEBUG" + +for new_debug in $levels ; do + [ "$initial_debug" != "$new_debug" ] || continue + + echo + set_and_check_debug $test_node "$new_debug" +done + +while read new_debug i ; do + [ "$initial_debug" != "$i" ] || continue + + echo + set_and_check_debug $test_node "$i" "$new_debug" +done <<EOF +ERROR 0 +WARNING 1 +WARNING 2 +NOTICE 3 +NOTICE 4 +INFO 5 +INFO 6 +INFO 7 +INFO 8 +INFO 9 +DEBUG 10 +EOF diff --git a/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh new file mode 100755 index 0000000..6205c27 --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/debug.003.dumpmemory.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Verify that 'ctdb dumpmemory' shows expected output + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +pat='^([[:space:]].+[[:space:]]+contains[[:space:]]+[[:digit:]]+ bytes in[[:space:]]+[[:digit:]]+ blocks \(ref [[:digit:]]+\)[[:space:]]+0x[[:xdigit:]]+|[[:space:]]+reference to: .+|full talloc report on .+ \(total[[:space:]]+[[:digit:]]+ bytes in [[:digit:]]+ blocks\))$' + +try_command_on_node -v 0 "$CTDB dumpmemory" +sanity_check_output 10 "$pat" + +echo +try_command_on_node -v 0 "$CTDB rddumpmemory" +sanity_check_output 10 "$pat" diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh new file mode 100755 index 0000000..4fdf61c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/eventscripts.001.zero_scripts.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Check that CTDB operates correctly if there are 0 event scripts + + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init -n + +ctdb_nodes_start_custom --no-event-scripts + +echo "Good, that seems to work!" diff --git a/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh new file mode 100755 index 0000000..046989c --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/eventscripts.090.debug_hung.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +# Verify CTDB's debugging of timed out eventscripts + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_skip_on_cluster + +ctdb_test_init + +select_test_node + +#################### + +echo "Setting monitor events to time out..." +try_command_on_node $test_node 'echo $CTDB_BASE' +ctdb_base="$out" +script_options="${ctdb_base}/script.options" +ctdb_test_exit_hook_add "onnode $test_node rm -f $script_options" + +debug_output="${ctdb_base}/debug-hung-script.log" +ctdb_test_exit_hook_add "onnode $test_node rm -f $debug_output" + +try_command_on_node -i "$test_node" tee "$script_options" <<EOF +CTDB_RUN_TIMEOUT_MONITOR=yes +CTDB_DEBUG_HUNG_SCRIPT_LOGFILE='$debug_output' +CTDB_DEBUG_HUNG_SCRIPT_STACKPAT='exportfs|rpcinfo|sleep' +CTDB_SCRIPT_VARDIR='$ctdb_base' +EOF + +#################### + +wait_for_monitor_event $test_node + +echo "Waiting for debugging output to appear..." +# Use test -s because the file is created above using mktemp +wait_until 60 test -s "$debug_output" + +echo +echo "Debugging output:" +cat "$debug_output" +echo + +echo "Checking output of hung script debugging..." + +# Can we actually read kernel stacks +if try_command_on_node $test_node "cat /proc/$$/stack >/dev/null 2>&1" ; then + stackpat=' +---- Stack trace of interesting process [0-9]*\\[sleep\\] ---- +[<[0-9a-f]*>] .*sleep+.* +' +else + stackpat='' +fi + +while IFS="" read pattern ; do + [ -n "$pattern" ] || continue + if grep -q -- "^${pattern}\$" "$debug_output" ; then + printf 'GOOD: output contains "%s"\n' "$pattern" + else + printf 'BAD: output does not contain "%s"\n' "$pattern" + exit 1 + fi +done <<EOF +===== Start of hung script debug for PID=".*", event="monitor" ===== +===== End of hung script debug for PID=".*", event="monitor" ===== +pstree -p -a .*: +00\\\\.test\\\\.script,.* + *\`-sleep,.* +${stackpat} +---- ctdb scriptstatus monitor: ---- +00\\.test *TIMEDOUT.* + *OUTPUT: Sleeping for [0-9]* seconds\\\\.\\\\.\\\\. +EOF |