diff options
Diffstat (limited to 'ctdb/tests/CLUSTER')
18 files changed, 1546 insertions, 0 deletions
diff --git a/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh new file mode 100755 index 0000000..dba6d07 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Verify that a node's public IP address can be deleted using 'ctdb deleteip'. + +# This is an extended version of simple/17_ctdb_config_delete_ip.sh + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips +get_test_ip_mask_and_iface + +echo "Checking that node ${test_node} hosts ${test_ip}..." +try_command_on_node $test_node "ip addr show to ${test_ip} | grep -q ." + +echo "Attempting to remove ${test_ip} from node ${test_node}." +try_command_on_node $test_node $CTDB delip $test_ip +try_command_on_node $test_node $CTDB ipreallocate +wait_until_ips_are_on_node '!' $test_node $test_ip + +timeout=60 +increment=5 +count=0 +echo "Waiting for ${test_ip} to disappear from node ${test_node}..." +while : ; do + try_command_on_node -v $test_node "ip addr show to ${test_node}" + if -n "$out" ; then + echo "Still there..." + if [ $(($count * $increment)) -ge $timeout ] ; then + echo "BAD: Timed out waiting..." + exit 1 + fi + sleep_for $increment + count=$(($count + 1)) + else + break + fi +done + +echo "GOOD: IP was successfully removed!" diff --git a/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh new file mode 100755 index 0000000..150aeea --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh @@ -0,0 +1,257 @@ +#!/bin/bash + +# Verify that adding/deleting IPs using 'ctdb reloadips' works + +# Checks that when IPs are added to and deleted from a single node then +# those IPs are actually assigned and unassigned from the specified +# interface. + +# Prerequisites: + +# * An active CTDB cluster with public IP addresses configured + +# Expected results: + +# * When IPs are added to a single node then they are assigned to an +# interface. + +# * When IPs are deleted from a single node then they disappear from an +# interface. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +#################### + +# Search for an unused 10.B.1.0/24 network on which to add public IP +# addresses. + +# The initial search is for a 10.B.0.0/16 network since some +# configurations may use a whole class B for the private network. +# Check that there are no public IP addresses (as reported by "ctdb ip +# all") or other IP addresses (as reported by "ip addr show") with +# the provided prefix. Note that this is an IPv4-specific test. + +echo "Getting public IP information from CTDB..." +ctdb_onnode "$test_node" "ip -X -v all" +ctdb_ip_info=$(awk -F'|' 'NR > 1 { print $2, $3, $5 }' "$outfile") + +echo "Getting IP information from interfaces..." +try_command_on_node all "ip addr show" +ip_addr_info=$(awk '$1 == "inet" { ip = $2; sub(/\/.*/, "", ip); print ip }' \ + "$outfile") + +prefix="" +for b in $(seq 0 255) ; do + prefix="10.${b}" + + # Does the prefix match any IP address returned by "ip addr info"? + while read ip ; do + if [ "${ip#${prefix}.}" != "$ip" ] ; then + prefix="" + continue 2 + fi + done <<<"$ip_addr_info" + + # Does the prefix match any public IP address "ctdb ip all"? + while read ip pnn iface ; do + if [ "${ip#${prefix}.}" != "$ip" ] ; then + prefix="" + continue 2 + fi + done <<<"$ctdb_ip_info" + + # Got through the IPs without matching prefix - done! + break +done + +[ -n "$prefix" ] || die "Unable to find a usable IP address prefix" + +# We really want a class C: 10.B.1.0/24 +prefix="${prefix}.1" + +#################### + +iface=$(echo "$ctdb_ip_info" | awk -v pnn=$test_node '$2 == pnn { print $3 ; exit }') + +#################### + +# This needs to be set only on the recmaster. All nodes should do the trick. +new_takeover_timeout=90 +echo "Setting TakeoverTimeout=${new_takeover_timeout} to avoid potential bans" +try_command_on_node all "$CTDB setvar TakeoverTimeout ${new_takeover_timeout}" + +#################### + +try_command_on_node $test_node $CTDB_TEST_WRAPPER ctdb_base_show +addresses="${out}/public_addresses" +echo "Public addresses file on node $test_node is \"$addresses\"" +backup="${addresses}.$$" + +backup_public_addresses () +{ + try_command_on_node $test_node "cp -a $addresses $backup" +} + +restore_public_addresses () +{ + try_command_on_node $test_node "mv $backup $addresses >/dev/null 2>&1 || true" +} +ctdb_test_exit_hook_add restore_public_addresses + +# Now create that backup +backup_public_addresses + +#################### + +add_ips_to_original_config () +{ + local test_node="$1" + local addresses="$2" + local iface="$3" + local prefix="$4" + local first="$5" + local last="$6" + + echo "Adding new public IPs to original config on node ${test_node}..." + echo "IPs will be ${prefix}.${first}/24..${prefix}.${last}/24" + + # Implement this by completely rebuilding the public_addresses + # file. This is easier than deleting entries on a remote node. + restore_public_addresses + backup_public_addresses + + # Note that tee is a safe way of creating a file on a remote node. + # This avoids potential fragility with quoting or redirection. + for i in $(seq $first $last) ; do + echo "${prefix}.${i}/24 ${iface}" + done | + try_command_on_node -i $test_node "tee -a $addresses" +} + +check_ips () +{ + local test_node="$1" + local iface="$2" + local prefix="$3" + local first="$4" + local last="$5" + + # If just 0 specified then this is an empty range + local public_ips_file=$(mktemp) + if [ "$first" = 0 -a -z "$last" ] ; then + echo "Checking that there are no IPs in ${prefix}.0/24" + else + local prefix_regexp="inet *${prefix//./\.}" + + echo "Checking IPs in range ${prefix}.${first}/24..${prefix}.${last}/24" + + local i + for i in $(seq $first $last) ; do + echo "${prefix}.${i}" + done | sort >"$public_ips_file" + fi + + try_command_on_node $test_node "ip addr show dev ${iface}" + local ip_addrs_file=$(mktemp) + cat "$outfile" | \ + sed -n -e "s@.*inet * \(${prefix//./\.}\.[0-9]*\)/.*@\1@p" | \ + sort >"$ip_addrs_file" + + local diffs=$(diff "$public_ips_file" "$ip_addrs_file") || true + rm -f "$ip_addrs_file" "$public_ips_file" + + if [ -z "$diffs" ] ; then + echo "GOOD: IP addresses are as expected" + else + echo "BAD: IP addresses are incorrect:" + echo "$diffs" + exit 1 + fi +} + +# ctdb reloadips will fail if it can't disable takover runs. The most +# likely reason for this is that there is already a takeover run in +# progress. We can't predict when this will happen, so retry if this +# occurs. +do_ctdb_reloadips () +{ + local retry_max=10 + local retry_count=0 + while : ; do + if try_command_on_node "$test_node" "$CTDB reloadips" ; then + return 0 + fi + + if [ "$out" != "Failed to disable takeover runs" ] ; then + return 1 + fi + + if [ $retry_count -ge $retry_max ] ; then + return 1 + fi + + retry_count=$((retry_count + 1)) + echo "Retrying..." + sleep_for 1 + done +} + +#################### + +new_ip_max=100 + +#################### + +add_ips_to_original_config \ + $test_node "$addresses" "$iface" "$prefix" 1 $new_ip_max + +do_ctdb_reloadips + +check_ips $test_node "$iface" "$prefix" 1 $new_ip_max + +ctdb_onnode "$test_node" sync + +#################### + +# This should be the primary. Ensure that no other IPs are lost +echo "Using 'ctdb reloadips' to remove the 1st address just added..." + +add_ips_to_original_config \ + $test_node "$addresses" "$iface" "$prefix" 2 $new_ip_max + +do_ctdb_reloadips + +check_ips $test_node "$iface" "$prefix" 2 $new_ip_max + +ctdb_onnode "$test_node" sync + +#################### + +# Get rid of about 1/2 the IPs +start=$(($new_ip_max / 2 + 1)) +echo "Updating to include only about 1/2 of the new IPs..." + +add_ips_to_original_config \ + $test_node "$addresses" "$iface" "$prefix" $start $new_ip_max + +do_ctdb_reloadips + +check_ips $test_node "$iface" "$prefix" $start $new_ip_max + +ctdb_onnode "$test_node" sync + +#################### + +# Delete the rest +echo "Restoring original IP configuration..." +restore_public_addresses + +do_ctdb_reloadips + +check_ips $test_node "$iface" "$prefix" 0 diff --git a/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh new file mode 100755 index 0000000..4d8f617 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Verify that NFS connections are monitored and that NFS tickles are sent. + +# Create a connection to the NFS server on a node. Then disable the +# relevant NFS server node and ensure that it sends an appropriate reset +# packet. The packet must come from the releasing node. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# * Cluster nodes must be listening on the NFS TCP port (2049). + +# Expected results: + +# * CTDB on the releasing node should correctly send a reset packet when +# the node is disabled. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +test_port=2049 + +echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..." + +sleep 30 | nc $test_ip $test_port & +nc_pid=$! +ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1" + +wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc" +src_socket="$out" +echo "Source socket is $src_socket" + +echo "Getting MAC address associated with ${test_ip}..." +releasing_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}') +[ -n "$releasing_mac" ] || die "Couldn't get MAC address for ${test_prefix}" +echo "MAC address is: ${releasing_mac}" + +tcptickle_sniff_start $src_socket "${test_ip}:${test_port}" + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +# Only look for a reset from the releasing node +tcptickle_sniff_wait_show "$releasing_mac" diff --git a/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh new file mode 100755 index 0000000..e3f1540 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +# Verify that NFS connections are monitored and that NFS tickles are sent. + +# We create a connection to the NFS server on a node and confirm that +# this connection is registered in the nfs-tickles/ subdirectory in +# shared storage. Then kill ctdbd on the relevant NFS server node and +# ensure that the takeover node sends an appropriate reset packet. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# * Cluster nodes must be listening on the NFS TCP port (2049). + +# Expected results: + +# * CTDB should correctly record the socket and on failover the takeover +# node should send a reset packet. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips +try_command_on_node $test_node "$CTDB listnodes | wc -l" +numnodes="$out" + +# We need this for later, so we know how long to run nc for. +ctdb_onnode "$test_node" "getvar MonitorInterval" +monitor_interval="${out#*= }" + +test_port=2049 + +echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..." + +sleep $((monitor_interval * 4)) | nc $test_ip $test_port & +nc_pid=$! +ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1" + +wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc" +src_socket="$out" +echo "Source socket is $src_socket" + +wait_for_monitor_event $test_node + +echo "Wait until NFS connection is tracked by CTDB on test node ..." +wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket + +echo "Getting TicklesUpdateInterval..." +try_command_on_node $test_node $CTDB getvar TickleUpdateInterval +update_interval="$out" + +echo "Wait until NFS connection is tracked by CTDB on all nodes..." +wait_until $(($update_interval * 2)) \ + check_tickles_all $numnodes $test_ip $test_port $src_socket + +tcptickle_sniff_start $src_socket "${test_ip}:${test_port}" + +# We need to be nasty to make that the node being failed out doesn't +# get a chance to send any tickles and confuse our sniff. IPs also +# need to be dropped because we're simulating a dead node rather than +# a CTDB failure. To properly handle a CTDB failure we would need a +# watchdog to drop the IPs when CTDB disappears. +echo "Killing ctdbd on ${test_node}..." +try_command_on_node -v $test_node "killall -9 ctdbd ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}" + +wait_until_node_has_status $test_node disconnected + +tcptickle_sniff_wait_show diff --git a/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh new file mode 100755 index 0000000..c5b583d --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# Verify that CIFS connections are monitored and that CIFS tickles are sent. + +# We create a connection to the CIFS server on a node and confirm that +# this connection is registered by CTDB. Then disable the relevant CIFS +# server node and ensure that the takeover node sends an appropriate +# reset packet. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# * Clustered Samba must be listening on TCP port 445. + +# Expected results: + +# * CTDB should correctly record the connection and the takeover node +# should send a reset packet. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +# We need this for later, so we know how long to sleep. +try_command_on_node 0 $CTDB getvar MonitorInterval +monitor_interval="${out#*= }" +#echo "Monitor interval on node $test_node is $monitor_interval seconds." + +select_test_node_and_ips + +test_port=445 + +echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..." + +sleep $((monitor_interval * 4)) | nc $test_ip $test_port & +nc_pid=$! +ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1" + +wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc" +src_socket="$out" +echo "Source socket is $src_socket" + +# This should happen as soon as connection is up... but unless we wait +# we sometimes beat the registration. +echo "Checking if CIFS connection is tracked by CTDB on test node..." +wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket + +# This is almost immediate. However, it is sent between nodes +# asynchonously, so it is worth checking... +echo "Wait until CIFS connection is tracked by CTDB on all nodes..." +try_command_on_node $test_node "$CTDB listnodes | wc -l" +numnodes="$out" +wait_until 5 \ + check_tickles_all $numnodes $test_ip $test_port $src_socket +tcptickle_sniff_start $src_socket "${test_ip}:${test_port}" + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +tcptickle_sniff_wait_show diff --git a/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh new file mode 100755 index 0000000..7a0944f --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +# Verify that a gratuitous ARP is sent when a node is failed out. + +# We ping a public IP and lookup the MAC address in the ARP table. We +# then disable the node and check the ARP table again - the MAC address +# should have changed. This test does NOT test connectivity after the +# failover. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# Steps: + +# 1. Verify that the cluster is healthy. +# 2. Select a public address and its corresponding node. +# 3. Remove any entries for the chosen address from the ARP table. +# 4. Send a single ping request packet to the selected public address. +# 5. Determine the MAC address corresponding to the public address by +# checking the ARP table. +# 6. Disable the selected node. +# 7. Check the ARP table and check the MAC associated with the public +# address. + +# Expected results: + +# * When a node is disabled the MAC address associated with public +# addresses on that node should change. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Removing ${test_ip} from the local ARP table..." +ip neigh flush "$test_prefix" >/dev/null 2>&1 || true + +echo "Pinging ${test_ip}..." +ping_wrapper -q -n -c 1 $test_ip + +echo "Getting MAC address associated with ${test_ip}..." +original_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}') +[ -n "$original_mac" ] || die "Couldn't get MAC address for ${test_prefix}" + +echo "MAC address is: ${original_mac}" + +gratarp_sniff_start + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +gratarp_sniff_wait_show + +echo "Getting MAC address associated with ${test_ip} again..." +new_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}') +[ -n "$new_mac" ] || die "Couldn't get MAC address for ${test_prefix}" + +echo "MAC address is: ${new_mac}" + +if [ "$original_mac" != "$new_mac" ] ; then + echo "GOOD: MAC address changed" +else + die "BAD: MAC address did not change" +fi diff --git a/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh new file mode 100755 index 0000000..b81510d --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# Verify that a newly started CTDB node gets updated tickle details + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Cluster nodes must be listening on the NFS TCP port (2049). + +# Steps: + +# As with 31_nfs_tickle.sh but restart a node after the tickle is +# registered. + +# Expected results: + +# * CTDB should correctly communicated tickles to new CTDB instances as +# they join the cluster. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips +try_command_on_node $test_node "$CTDB listnodes -X" +listnodes_output="$out" +numnodes=$(wc -l <<<"$listnodes_output") + +test_port=2049 + +echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..." + +sleep 600 | nc $test_ip $test_port & +nc_pid=$! +ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1" + +wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc" +src_socket="$out" +echo "Source socket is $src_socket" + +wait_for_monitor_event $test_node + +echo "Wait until NFS connection is tracked by CTDB on test node ..." +wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket + +echo "Select a node to restart ctdbd" +rn=$(awk -F'|' -v test_node=$test_node \ + '$2 != test_node { print $2 ; exit }' <<<"$listnodes_output") + +echo "Restarting CTDB on node ${rn}" +ctdb_nodes_restart "$rn" + +# In some theoretical world this is racy. In practice, the node will +# take quite a while to become healthy, so this will beat any +# assignment of IPs to the node. +echo "Setting NoIPTakeover on node ${rn}" +try_command_on_node $rn $CTDB setvar NoIPTakeover 1 + +wait_until_ready + +echo "Getting TickleUpdateInterval..." +try_command_on_node $test_node $CTDB getvar TickleUpdateInterval +update_interval="$out" + +echo "Wait until NFS connection is tracked by CTDB on all nodes..." +if ! wait_until $(($update_interval * 2)) \ + check_tickles_all $numnodes $test_ip $test_port $src_socket ; then + echo "BAD: connection not tracked on all nodes:" + echo "$out" + exit 1 +fi + +# We could go on to test whether the tickle ACK gets sent. However, +# this is tested in previous tests and the use of NoIPTakeover +# complicates things on a 2 node cluster. diff --git a/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh new file mode 100755 index 0000000..d0f3d08 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +# Verify that the server end of an SMB connection is correctly reset + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# * Clustered Samba must be listening on TCP port 445. + +# Expected results: + +# * CTDB should correctly record the connection and the releasing node +# should reset the server end of the connection. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +# We need this for later, so we know how long to sleep. +try_command_on_node 0 $CTDB getvar MonitorInterval +monitor_interval="${out#*= }" + +select_test_node_and_ips + +test_port=445 + +echo "Set NoIPTakeover=1 on all nodes" +try_command_on_node all $CTDB setvar NoIPTakeover 1 + +echo "Give the recovery daemon some time to reload tunables" +sleep_for 5 + +echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..." + +sleep $((monitor_interval * 4)) | nc $test_ip $test_port & +nc_pid=$! +ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1" + +wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc" +src_socket="$out" +echo "Source socket is $src_socket" + +# This should happen as soon as connection is up... but unless we wait +# we sometimes beat the registration. +echo "Waiting until SMB connection is tracked by CTDB on test node..." +wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket + +# It would be nice if ss consistently used local/peer instead of src/dst +ss_filter="src ${test_ip}:${test_port} dst ${src_socket}" + +try_command_on_node $test_node \ + "ss -tn state established '${ss_filter}' | tail -n +2" +if [ -z "$out" ] ; then + echo "BAD: ss did not list the socket" + exit 1 +fi +echo "GOOD: ss lists the socket:" +cat "$outfile" + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +try_command_on_node $test_node \ + "ss -tn state established '${ss_filter}' | tail -n +2" +if [ -n "$out" ] ; then + echo "BAD: ss listed the socket after failover" + exit 1 +fi +echo "GOOD: ss no longer lists the socket" diff --git a/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh new file mode 100755 index 0000000..3e249f9 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +# Verify that the server end of an NFS connection is correctly reset + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# * Cluster nodes must be listening on the NFS TCP port (2049). + +# Expected results: + +# * CTDB should correctly record the connection and the releasing node +# should reset the server end of the connection. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +# We need this for later, so we know how long to sleep. +try_command_on_node 0 $CTDB getvar MonitorInterval +monitor_interval="${out#*= }" + +select_test_node_and_ips + +test_port=2049 + +echo "Set NoIPTakeover=1 on all nodes" +try_command_on_node all $CTDB setvar NoIPTakeover 1 + +echo "Give the recovery daemon some time to reload tunables" +sleep_for 5 + +echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..." + +sleep $((monitor_interval * 4)) | nc $test_ip $test_port & +nc_pid=$! +ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1" + +wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc" +src_socket="$out" +echo "Source socket is $src_socket" + +echo "Wait until NFS connection is tracked by CTDB on test node ..." +wait_until $((monitor_interval * 2)) \ + check_tickles $test_node $test_ip $test_port $src_socket +cat "$outfile" + +# It would be nice if ss consistently used local/peer instead of src/dst +ss_filter="src ${test_ip}:${test_port} dst ${src_socket}" + +try_command_on_node $test_node \ + "ss -tn state established '${ss_filter}' | tail -n +2" +if [ -z "$out" ] ; then + echo "BAD: ss did not list the socket" + exit 1 +fi +echo "GOOD: ss lists the socket:" +cat "$outfile" + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +try_command_on_node $test_node \ + "ss -tn state established '${ss_filter}' | tail -n +2" +if [ -n "$out" ] ; then + echo "BAD: ss listed the socket after failover" + exit 1 +fi +echo "GOOD: ss no longer lists the socket" diff --git a/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh new file mode 100755 index 0000000..539d25e --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Verify that it is possible to ping a public address after disabling a node. + +# We ping a public IP, disable the node hosting it and then ping the +# public IP again. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# Steps: + +# 1. Verify that the cluster is healthy. +# 2. Select a public address and its corresponding node. +# 3. Send a single ping request packet to the selected public address. +# 4. Disable the selected node. +# 5. Send another single ping request packet to the selected public address. + +# Expected results: + +# * When a node is disabled the public address fails over and the +# address is still pingable. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Removing ${test_ip} from the local neighbor table..." +ip neigh flush "$test_prefix" >/dev/null 2>&1 || true + +echo "Pinging ${test_ip}..." +ping_wrapper -q -n -c 1 $test_ip + +gratarp_sniff_start + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +gratarp_sniff_wait_show + +echo "Removing ${test_ip} from the local neighbor table again..." +ip neigh flush "$test_prefix" >/dev/null 2>&1 || true + +echo "Pinging ${test_ip} again..." +ping_wrapper -q -n -c 1 $test_ip diff --git a/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh new file mode 100755 index 0000000..233819b --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Verify that it is possible to SSH to a public address after disabling a node. + +# We SSH to a public IP and check the hostname, disable the node hosting +# it and then SSH again to confirm that the hostname has changed. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# Steps: + +# 1. Verify that the cluster is healthy. +# 2. Select a public address and its corresponding node. +# 3. SSH to the selected public address and run hostname. +# 4. Disable the selected node. +# 5. SSH to the selected public address again and run hostname. + +# Expected results: + +# * When a node is disabled the public address fails over and it is +# still possible to SSH to the node. The hostname should change. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Removing ${test_ip} from the local neighbor table..." +ip neigh flush "$test_prefix" >/dev/null 2>&1 || true + +echo "SSHing to ${test_ip} and running hostname..." +if ! original_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then + die "Failed to get original hostname via SSH..." +fi + +echo "Hostname is: ${original_hostname}" + +gratarp_sniff_start + +echo "Disabling node $test_node" +try_command_on_node 1 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +gratarp_sniff_wait_show + +echo "SSHing to ${test_ip} and running hostname (again)..." +if ! new_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then + echo "Failed to get new hostname via SSH..." + echo "DEBUG:" + ip neigh show + exit 1 +fi + +echo "Hostname is: ${new_hostname}" + +if [ "$original_hostname" != "$new_hostname" ] ; then + echo "GOOD: hostname changed" +else + die "BAD: hostname did not change" +fi diff --git a/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh new file mode 100755 index 0000000..ac2cafd --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Verify that a mounted NFS share is still operational after failover. + +# We mount an NFS share from a node, write a file via NFS and then +# confirm that we can correctly read the file after a failover. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# Steps: + +# 1. Verify that the cluster is healthy. +# 2. Select a public address and its corresponding node. +# 3. Select the 1st NFS share exported on the node. +# 4. Mount the selected NFS share. +# 5. Create a file in the NFS mount and calculate its checksum. +# 6. Disable the selected node. +# 7. Read the file and calculate its checksum. +# 8. Compare the checksums. + +# Expected results: + +# * When a node is disabled the public address fails over and it is +# possible to correctly read a file over NFS. The checksums should be +# the same before and after. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +nfs_test_setup + +echo "Create file containing random data..." +dd if=/dev/urandom of=$nfs_local_file bs=1k count=1 +original_sum=$(sum $nfs_local_file) +[ $? -eq 0 ] + +gratarp_sniff_start + +echo "Disabling node $test_node" +try_command_on_node 0 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +gratarp_sniff_wait_show + +new_sum=$(sum $nfs_local_file) +[ $? -eq 0 ] + +if [ "$original_md5" = "$new_md5" ] ; then + echo "GOOD: file contents unchanged after failover" +else + die "BAD: file contents are different after failover" +fi diff --git a/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh new file mode 100755 index 0000000..5c8324c --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Verify that a file created on a node is readable via NFS after a failover. + +# We write a file into an exported directory on a node, mount the NFS +# share from a node, verify that we can read the file via NFS and that +# we can still read it after a failover. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# Steps: + +# 1. Verify that the cluster is healthy. +# 2. Select a public address and its corresponding node. +# 3. Select the 1st NFS share exported on the node. +# 4. Write a file into exported directory on the node and calculate its +# checksum. +# 5. Mount the selected NFS share. +# 6. Read the file via the NFS mount and calculate its checksum. +# 7. Compare checksums. +# 8. Disable the selected node. +# 9. Read the file via NFS and calculate its checksum. +# 10. Compare the checksums. + +# Expected results: + +# * Checksums for the file on all 3 occasions should be the same. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +nfs_test_setup + +echo "Create file containing random data..." +local_f=$(mktemp) +ctdb_test_exit_hook_add rm -f "$local_f" +dd if=/dev/urandom of=$local_f bs=1k count=1 +local_sum=$(sum $local_f) + +scp -p "$local_f" "[${test_ip}]:${nfs_remote_file}" +try_command_on_node $test_node "chmod 644 $nfs_remote_file" + +nfs_sum=$(sum $nfs_local_file) + +if [ "$local_sum" = "$nfs_sum" ] ; then + echo "GOOD: file contents read correctly via NFS" +else + echo "BAD: file contents are different over NFS" + echo " original file: $local_sum" + echo " NFS file: $nfs_sum" + exit 1 +fi + +gratarp_sniff_start + +echo "Disabling node $test_node" +try_command_on_node 0 $CTDB disable -n $test_node +wait_until_node_has_status $test_node disabled + +gratarp_sniff_wait_show + +new_sum=$(sum $nfs_local_file) +[ $? -eq 0 ] + +if [ "$nfs_sum" = "$new_sum" ] ; then + echo "GOOD: file contents unchanged after failover" +else + echo "BAD: file contents are different after failover" + echo " original file: $nfs_sum" + echo " NFS file: $new_sum" + exit 1 +fi diff --git a/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh new file mode 100755 index 0000000..2d15748 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# Verify that a mounted NFS share is still operational after failover. + +# We mount an NFS share from a node, write a file via NFS and then +# confirm that we can correctly read the file after a failover. + +# Prerequisites: + +# * An active CTDB cluster with at least 2 nodes with public addresses. + +# * Test must be run on a real or virtual cluster rather than against +# local daemons. + +# * Test must not be run from a cluster node. + +# Steps: + +# 1. Verify that the cluster is healthy. +# 2. Select a public address and its corresponding node. +# 3. Select the 1st NFS share exported on the node. +# 4. Mount the selected NFS share. +# 5. Create a file in the NFS mount and calculate its checksum. +# 6. Kill CTDB on the selected node. +# 7. Read the file and calculate its checksum. +# 8. Compare the checksums. + +# Expected results: + +# * When a node is disabled the public address fails over and it is +# possible to correctly read a file over NFS. The checksums should be +# the same before and after. + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +nfs_test_setup + +echo "Create file containing random data..." +dd if=/dev/urandom of=$nfs_local_file bs=1k count=1 +original_sum=$(sum $nfs_local_file) +[ $? -eq 0 ] + +gratarp_sniff_start + +echo "Killing node $test_node" +try_command_on_node $test_node $CTDB getpid +pid=${out#*:} +# We need to be nasty to make that the node being failed out doesn't +# get a chance to send any tickles or doing anything else clever. IPs +# also need to be dropped because we're simulating a dead node rather +# than a CTDB failure. To properly handle a CTDB failure we would +# need a watchdog to drop the IPs when CTDB disappears. +try_command_on_node -v $test_node "kill -9 $pid ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}" +wait_until_node_has_status $test_node disconnected + +gratarp_sniff_wait_show + +new_sum=$(sum $nfs_local_file) +[ $? -eq 0 ] + +if [ "$original_md5" = "$new_md5" ] ; then + echo "GOOD: file contents unchanged after failover" +else + die "BAD: file contents are different after failover" +fi diff --git a/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh new file mode 100755 index 0000000..efa9ef2 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Verify that the recovery daemon correctly handles a rogue IP + +# It should be released... + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Using $test_ip, which is onnode $test_node" + +# This test depends on being able to assign a duplicate address on a +# 2nd node. However, IPv6 guards against this and causes the test to +# fail. +case "$test_ip" in +*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;; +esac + +get_test_ip_mask_and_iface + +echo "Finding another node that knows about $test_ip" +ctdb_get_all_pnns +other_node="" +for i in $all_pnns ; do + if [ "$i" = "$test_node" ] ; then + continue + fi + try_command_on_node $i "$CTDB ip" + n=$(awk -v ip="$test_ip" '$1 == ip { print }' "$outfile") + if [ -n "$n" ] ; then + other_node="$i" + break + fi +done +if [ -z "$other_node" ] ; then + die "Unable to find another node that knows about $test_ip" +fi + +echo "Adding $test_ip on node $other_node" +try_command_on_node $other_node "ip addr add ${test_ip}/${mask} dev ${iface}" + +rogue_ip_is_gone () +{ + local pnn="$1" + local test_ip="$2" + try_command_on_node $pnn $CTDB_TEST_WRAPPER ip_maskbits_iface $test_ip + [ -z "$out" ] +} + +echo "Waiting until rogue IP is no longer assigned..." +wait_until 30 rogue_ip_is_gone $other_node $test_ip diff --git a/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh new file mode 100755 index 0000000..5ee4e54 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Verify that TAKE_IP will work for an IP that is already on an interface + +# This is a variation of simple/60_recoverd_missing_ip.sh + +. "${TEST_SCRIPTS_DIR}/cluster.bash" + +set -e + +ctdb_test_init + +select_test_node_and_ips + +echo "Running test against node $test_node and IP $test_ip" + +# This test puts an address on an interface and then needs to quickly +# configure that address and cause an IP takeover. However, an IPv6 +# address will be tentative for a while so "quickly" is not possible". +# When ctdb_control_takeover_ip() calls ctdb_sys_have_ip() it will +# decide that the address is not present. It then attempts a takeip, +# which can fail if the address is suddenly present because it is no +# longer tentative. +case "$test_ip" in +*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;; +esac + +get_test_ip_mask_and_iface + +echo "Deleting IP $test_ip from all nodes" +delete_ip_from_all_nodes $test_ip +try_command_on_node -v $test_node $CTDB ipreallocate +wait_until_ips_are_on_node ! $test_node $test_ip + +try_command_on_node -v all $CTDB ip + +# The window here needs to small, to try to avoid the address being +# released. The test will still pass either way but if the first IP +# takeover run does a release then this doesn't test the code path we +# expect it to... +echo "Adding IP $test_ip to $iface and CTDB on node $test_node" +ip_cmd="ip addr add $test_ip/$mask dev $iface" +ctdb_cmd="$CTDB addip $test_ip/$mask $iface && $CTDB ipreallocate" +try_command_on_node $test_node "$ip_cmd && $ctdb_cmd" + +wait_until_ips_are_on_node $test_node $test_ip diff --git a/ctdb/tests/CLUSTER/complex/README b/ctdb/tests/CLUSTER/complex/README new file mode 100644 index 0000000..72de396 --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/README @@ -0,0 +1,2 @@ +Complex integration tests. These need a real or virtual cluster. +That is, they can not be run against local daemons. diff --git a/ctdb/tests/CLUSTER/complex/scripts/local.bash b/ctdb/tests/CLUSTER/complex/scripts/local.bash new file mode 100644 index 0000000..0e2addd --- /dev/null +++ b/ctdb/tests/CLUSTER/complex/scripts/local.bash @@ -0,0 +1,289 @@ +# Hey Emacs, this is a -*- shell-script -*- !!! :-) + +# Thanks/blame to Stephen Rothwell for suggesting that this can be +# done in the shell. ;-) +ipv6_to_hex () +{ + local addr="$1" + + # Replace "::" by something special. + local foo="${addr/::/:@:}" + + # Join the groups of digits together, 0-padding each group of + # digits out to 4 digits, and count the number of (non-@) groups + local out="" + local count=0 + local i + for i in $(IFS=":" ; echo $foo ) ; do + if [ "$i" = "@" ] ; then + out="${out}@" + else + out="${out}$(printf '%04x' 0x${i})" + count=$(($count + 4)) + fi + done + + # Replace '@' with correct number of zeroes + local zeroes=$(printf "%0$((32 - $count))x" 0) + echo "${out/@/${zeroes}}" +} + +####################################### + +get_src_socket () +{ + local proto="$1" + local dst_socket="$2" + local pid="$3" + local prog="$4" + + local pat="^${proto}6?[[:space:]]+[[:digit:]]+[[:space:]]+[[:digit:]]+[[:space:]]+[^[:space:]]+[[:space:]]+${dst_socket//./\\.}[[:space:]]+ESTABLISHED[[:space:]]+${pid}/${prog}[[:space:]]*\$" + out=$(netstat -tanp | + egrep "$pat" | + awk '{ print $4 }') + + [ -n "$out" ] +} + +wait_until_get_src_socket () +{ + local proto="$1" + local dst_socket="$2" + local pid="$3" + local prog="$4" + + echo "Waiting for ${prog} to establish connection to ${dst_socket}..." + + wait_until 5 get_src_socket "$@" +} + +####################################### + +check_tickles () +{ + local node="$1" + local test_ip="$2" + local test_port="$3" + local src_socket="$4" + try_command_on_node $node ctdb gettickles $test_ip $test_port + # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445 + grep -Fq "SRC: ${src_socket} " "$outfile" +} + +check_tickles_all () +{ + local numnodes="$1" + local test_ip="$2" + local test_port="$3" + local src_socket="$4" + + try_command_on_node all ctdb gettickles $test_ip $test_port + # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445 + local count=$(grep -Fc "SRC: ${src_socket} " "$outfile" || true) + [ $count -eq $numnodes ] +} + + + +####################################### + +# filename will be in $tcpdump_filename, pid in $tcpdump_pid +tcpdump_start () +{ + tcpdump_filter="$1" # global + + echo "Running tcpdump..." + tcpdump_filename=$(mktemp) + ctdb_test_exit_hook_add "rm -f $tcpdump_filename" + + # The only way of being sure that tcpdump is listening is to send + # some packets that it will see. So we use dummy pings - the -U + # option to tcpdump ensures that packets are flushed to the file + # as they are captured. + local dummy_addr="127.3.2.1" + local dummy="icmp and dst host ${dummy_addr} and icmp[icmptype] == icmp-echo" + tcpdump -n -p -s 0 -e -U -w $tcpdump_filename -i any "($tcpdump_filter) or ($dummy)" & + ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1" + + echo "Waiting for tcpdump output file to be ready..." + ping -q "$dummy_addr" >/dev/null 2>&1 & + ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1" + + tcpdump_listen_for_dummy () + { + tcpdump -n -r $tcpdump_filename -c 1 "$dummy" >/dev/null 2>&1 + } + + wait_until 10 tcpdump_listen_for_dummy +} + +# By default, wait for 1 matching packet. +tcpdump_wait () +{ + local count="${1:-1}" + local filter="${2:-${tcpdump_filter}}" + + tcpdump_check () + { + # It would be much nicer to add "ether src + # $releasing_mac" to the filter. However, tcpdump + # does not allow MAC filtering unless an ethernet + # interface is specified with -i. It doesn't work + # with "-i any" and it doesn't work when reading from + # a file. :-( + local found + if [ -n "$releasing_mac" ] ; then + found=$(tcpdump -n -e -r "$tcpdump_filename" \ + "$filter" 2>/dev/null | + grep -c "In ${releasing_mac}") + else + found=$(tcpdump -n -e -r "$tcpdump_filename" \ + "$filter" 2>/dev/null | + wc -l) + fi + + [ $found -ge $count ] + } + + echo "Waiting for tcpdump to capture some packets..." + if ! wait_until 30 tcpdump_check ; then + echo "DEBUG AT $(date '+%F %T'):" + local i + for i in "onnode -q 0 $CTDB status" \ + "netstat -tanp" \ + "tcpdump -n -e -r $tcpdump_filename" ; do + echo "$i" + $i || true + done + return 1 + fi +} + +tcpdump_show () +{ + local filter="${1:-${tcpdump_filter}}" + + tcpdump -n -e -vv -XX -r $tcpdump_filename "$filter" 2>/dev/null +} + +tcp4tickle_sniff_start () +{ + local src="$1" + local dst="$2" + + local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}" + local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}" + local tickle_ack="${in} and (tcp[tcpflags] & tcp-ack != 0) and (tcp[14:2] == 1234)" # win == 1234 + local ack_ack="${out} and (tcp[tcpflags] & tcp-ack != 0)" + tcptickle_reset="${in} and tcp[tcpflags] & tcp-rst != 0" + local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})" + + tcpdump_start "$filter" +} + +# tcp[] does not work for IPv6 (in some versions of tcpdump) +tcp6tickle_sniff_start () +{ + local src="$1" + local dst="$2" + + local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}" + local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}" + local tickle_ack="${in} and (ip6[53] & tcp-ack != 0) and (ip6[54:2] == 1234)" # win == 1234 + local ack_ack="${out} and (ip6[53] & tcp-ack != 0)" + tcptickle_reset="${in} and ip6[53] & tcp-rst != 0" + local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})" + + tcpdump_start "$filter" +} + +tcptickle_sniff_start () +{ + local src="$1" + local dst="$2" + + case "${dst%:*}" in + *:*) tcp6tickle_sniff_start "$src" "$dst" ;; + *) tcp4tickle_sniff_start "$src" "$dst" ;; + esac +} + +tcptickle_sniff_wait_show () +{ + local releasing_mac="$1" # optional, used by tcpdump_wait() + + tcpdump_wait 1 "$tcptickle_reset" + + echo "GOOD: here are some TCP tickle packets:" + tcpdump_show +} + +gratarp4_sniff_start () +{ + tcpdump_start "arp host ${test_ip}" +} + +gratarp6_sniff_start () +{ + local neighbor_advertisement="icmp6 and ip6[40] == 136" + local hex=$(ipv6_to_hex "$test_ip") + local match_target="ip6[48:4] == 0x${hex:0:8} and ip6[52:4] == 0x${hex:8:8} and ip6[56:4] == 0x${hex:16:8} and ip6[60:4] == 0x${hex:24:8}" + + tcpdump_start "${neighbor_advertisement} and ${match_target}" +} + +gratarp_sniff_start () +{ + case "$test_ip" in + *:*) gratarp6_sniff_start ;; + *) gratarp4_sniff_start ;; + esac +} + +gratarp_sniff_wait_show () +{ + tcpdump_wait 2 + + echo "GOOD: this should be the some gratuitous ARPs:" + tcpdump_show +} + +ping_wrapper () +{ + case "$*" in + *:*) ping6 "$@" ;; + *) ping "$@" ;; + esac +} + +####################################### + +nfs_test_setup () +{ + select_test_node_and_ips + + nfs_first_export=$(showmount -e $test_ip | sed -n -e '2s/ .*//p') + + echo "Creating test subdirectory..." + try_command_on_node $test_node "TMPDIR=$nfs_first_export mktemp -d" + nfs_test_dir="$out" + try_command_on_node $test_node "chmod 777 $nfs_test_dir" + + nfs_mnt_d=$(mktemp -d) + nfs_local_file="${nfs_mnt_d}/${nfs_test_dir##*/}/TEST_FILE" + nfs_remote_file="${nfs_test_dir}/TEST_FILE" + + ctdb_test_exit_hook_add nfs_test_cleanup + + echo "Mounting ${test_ip}:${nfs_first_export} on ${nfs_mnt_d} ..." + mount -o timeo=1,hard,intr,vers=3 \ + "[${test_ip}]:${nfs_first_export}" ${nfs_mnt_d} +} + +nfs_test_cleanup () +{ + rm -f "$nfs_local_file" + umount -f "$nfs_mnt_d" + rmdir "$nfs_mnt_d" + onnode -q $test_node rmdir "$nfs_test_dir" +} |