summaryrefslogtreecommitdiffstats
path: root/ctdb/tests/CLUSTER
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb/tests/CLUSTER')
-rwxr-xr-xctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh43
-rwxr-xr-xctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh257
-rwxr-xr-xctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh57
-rwxr-xr-xctdb/tests/CLUSTER/complex/31_nfs_tickle.sh77
-rwxr-xr-xctdb/tests/CLUSTER/complex/32_cifs_tickle.sh69
-rwxr-xr-xctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh74
-rwxr-xr-xctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh81
-rwxr-xr-xctdb/tests/CLUSTER/complex/36_smb_reset_server.sh78
-rwxr-xr-xctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh78
-rwxr-xr-xctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh56
-rwxr-xr-xctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh70
-rwxr-xr-xctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh62
-rwxr-xr-xctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh82
-rwxr-xr-xctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh69
-rwxr-xr-xctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh56
-rwxr-xr-xctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh46
-rw-r--r--ctdb/tests/CLUSTER/complex/README2
-rw-r--r--ctdb/tests/CLUSTER/complex/scripts/local.bash289
18 files changed, 1546 insertions, 0 deletions
diff --git a/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh
new file mode 100755
index 0000000..dba6d07
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Verify that a node's public IP address can be deleted using 'ctdb deleteip'.
+
+# This is an extended version of simple/17_ctdb_config_delete_ip.sh
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+get_test_ip_mask_and_iface
+
+echo "Checking that node ${test_node} hosts ${test_ip}..."
+try_command_on_node $test_node "ip addr show to ${test_ip} | grep -q ."
+
+echo "Attempting to remove ${test_ip} from node ${test_node}."
+try_command_on_node $test_node $CTDB delip $test_ip
+try_command_on_node $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node '!' $test_node $test_ip
+
+timeout=60
+increment=5
+count=0
+echo "Waiting for ${test_ip} to disappear from node ${test_node}..."
+while : ; do
+ try_command_on_node -v $test_node "ip addr show to ${test_node}"
+ if -n "$out" ; then
+ echo "Still there..."
+ if [ $(($count * $increment)) -ge $timeout ] ; then
+ echo "BAD: Timed out waiting..."
+ exit 1
+ fi
+ sleep_for $increment
+ count=$(($count + 1))
+ else
+ break
+ fi
+done
+
+echo "GOOD: IP was successfully removed!"
diff --git a/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh
new file mode 100755
index 0000000..150aeea
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Verify that adding/deleting IPs using 'ctdb reloadips' works
+
+# Checks that when IPs are added to and deleted from a single node then
+# those IPs are actually assigned and unassigned from the specified
+# interface.
+
+# Prerequisites:
+
+# * An active CTDB cluster with public IP addresses configured
+
+# Expected results:
+
+# * When IPs are added to a single node then they are assigned to an
+# interface.
+
+# * When IPs are deleted from a single node then they disappear from an
+# interface.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+####################
+
+# Search for an unused 10.B.1.0/24 network on which to add public IP
+# addresses.
+
+# The initial search is for a 10.B.0.0/16 network since some
+# configurations may use a whole class B for the private network.
+# Check that there are no public IP addresses (as reported by "ctdb ip
+# all") or other IP addresses (as reported by "ip addr show") with
+# the provided prefix. Note that this is an IPv4-specific test.
+
+echo "Getting public IP information from CTDB..."
+ctdb_onnode "$test_node" "ip -X -v all"
+ctdb_ip_info=$(awk -F'|' 'NR > 1 { print $2, $3, $5 }' "$outfile")
+
+echo "Getting IP information from interfaces..."
+try_command_on_node all "ip addr show"
+ip_addr_info=$(awk '$1 == "inet" { ip = $2; sub(/\/.*/, "", ip); print ip }' \
+ "$outfile")
+
+prefix=""
+for b in $(seq 0 255) ; do
+ prefix="10.${b}"
+
+ # Does the prefix match any IP address returned by "ip addr info"?
+ while read ip ; do
+ if [ "${ip#${prefix}.}" != "$ip" ] ; then
+ prefix=""
+ continue 2
+ fi
+ done <<<"$ip_addr_info"
+
+ # Does the prefix match any public IP address "ctdb ip all"?
+ while read ip pnn iface ; do
+ if [ "${ip#${prefix}.}" != "$ip" ] ; then
+ prefix=""
+ continue 2
+ fi
+ done <<<"$ctdb_ip_info"
+
+ # Got through the IPs without matching prefix - done!
+ break
+done
+
+[ -n "$prefix" ] || die "Unable to find a usable IP address prefix"
+
+# We really want a class C: 10.B.1.0/24
+prefix="${prefix}.1"
+
+####################
+
+iface=$(echo "$ctdb_ip_info" | awk -v pnn=$test_node '$2 == pnn { print $3 ; exit }')
+
+####################
+
+# This needs to be set only on the recmaster. All nodes should do the trick.
+new_takeover_timeout=90
+echo "Setting TakeoverTimeout=${new_takeover_timeout} to avoid potential bans"
+try_command_on_node all "$CTDB setvar TakeoverTimeout ${new_takeover_timeout}"
+
+####################
+
+try_command_on_node $test_node $CTDB_TEST_WRAPPER ctdb_base_show
+addresses="${out}/public_addresses"
+echo "Public addresses file on node $test_node is \"$addresses\""
+backup="${addresses}.$$"
+
+backup_public_addresses ()
+{
+ try_command_on_node $test_node "cp -a $addresses $backup"
+}
+
+restore_public_addresses ()
+{
+ try_command_on_node $test_node "mv $backup $addresses >/dev/null 2>&1 || true"
+}
+ctdb_test_exit_hook_add restore_public_addresses
+
+# Now create that backup
+backup_public_addresses
+
+####################
+
+add_ips_to_original_config ()
+{
+ local test_node="$1"
+ local addresses="$2"
+ local iface="$3"
+ local prefix="$4"
+ local first="$5"
+ local last="$6"
+
+ echo "Adding new public IPs to original config on node ${test_node}..."
+ echo "IPs will be ${prefix}.${first}/24..${prefix}.${last}/24"
+
+ # Implement this by completely rebuilding the public_addresses
+ # file. This is easier than deleting entries on a remote node.
+ restore_public_addresses
+ backup_public_addresses
+
+ # Note that tee is a safe way of creating a file on a remote node.
+ # This avoids potential fragility with quoting or redirection.
+ for i in $(seq $first $last) ; do
+ echo "${prefix}.${i}/24 ${iface}"
+ done |
+ try_command_on_node -i $test_node "tee -a $addresses"
+}
+
+check_ips ()
+{
+ local test_node="$1"
+ local iface="$2"
+ local prefix="$3"
+ local first="$4"
+ local last="$5"
+
+ # If just 0 specified then this is an empty range
+ local public_ips_file=$(mktemp)
+ if [ "$first" = 0 -a -z "$last" ] ; then
+ echo "Checking that there are no IPs in ${prefix}.0/24"
+ else
+ local prefix_regexp="inet *${prefix//./\.}"
+
+ echo "Checking IPs in range ${prefix}.${first}/24..${prefix}.${last}/24"
+
+ local i
+ for i in $(seq $first $last) ; do
+ echo "${prefix}.${i}"
+ done | sort >"$public_ips_file"
+ fi
+
+ try_command_on_node $test_node "ip addr show dev ${iface}"
+ local ip_addrs_file=$(mktemp)
+ cat "$outfile" | \
+ sed -n -e "s@.*inet * \(${prefix//./\.}\.[0-9]*\)/.*@\1@p" | \
+ sort >"$ip_addrs_file"
+
+ local diffs=$(diff "$public_ips_file" "$ip_addrs_file") || true
+ rm -f "$ip_addrs_file" "$public_ips_file"
+
+ if [ -z "$diffs" ] ; then
+ echo "GOOD: IP addresses are as expected"
+ else
+ echo "BAD: IP addresses are incorrect:"
+ echo "$diffs"
+ exit 1
+ fi
+}
+
+# ctdb reloadips will fail if it can't disable takover runs. The most
+# likely reason for this is that there is already a takeover run in
+# progress. We can't predict when this will happen, so retry if this
+# occurs.
+do_ctdb_reloadips ()
+{
+ local retry_max=10
+ local retry_count=0
+ while : ; do
+ if try_command_on_node "$test_node" "$CTDB reloadips" ; then
+ return 0
+ fi
+
+ if [ "$out" != "Failed to disable takeover runs" ] ; then
+ return 1
+ fi
+
+ if [ $retry_count -ge $retry_max ] ; then
+ return 1
+ fi
+
+ retry_count=$((retry_count + 1))
+ echo "Retrying..."
+ sleep_for 1
+ done
+}
+
+####################
+
+new_ip_max=100
+
+####################
+
+add_ips_to_original_config \
+ $test_node "$addresses" "$iface" "$prefix" 1 $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 1 $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# This should be the primary. Ensure that no other IPs are lost
+echo "Using 'ctdb reloadips' to remove the 1st address just added..."
+
+add_ips_to_original_config \
+ $test_node "$addresses" "$iface" "$prefix" 2 $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 2 $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# Get rid of about 1/2 the IPs
+start=$(($new_ip_max / 2 + 1))
+echo "Updating to include only about 1/2 of the new IPs..."
+
+add_ips_to_original_config \
+ $test_node "$addresses" "$iface" "$prefix" $start $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" $start $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# Delete the rest
+echo "Restoring original IP configuration..."
+restore_public_addresses
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 0
diff --git a/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh
new file mode 100755
index 0000000..4d8f617
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Verify that NFS connections are monitored and that NFS tickles are sent.
+
+# Create a connection to the NFS server on a node. Then disable the
+# relevant NFS server node and ensure that it sends an appropriate reset
+# packet. The packet must come from the releasing node.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB on the releasing node should correctly send a reset packet when
+# the node is disabled.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep 30 | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+echo "Getting MAC address associated with ${test_ip}..."
+releasing_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$releasing_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+echo "MAC address is: ${releasing_mac}"
+
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+# Only look for a reset from the releasing node
+tcptickle_sniff_wait_show "$releasing_mac"
diff --git a/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh
new file mode 100755
index 0000000..e3f1540
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Verify that NFS connections are monitored and that NFS tickles are sent.
+
+# We create a connection to the NFS server on a node and confirm that
+# this connection is registered in the nfs-tickles/ subdirectory in
+# shared storage. Then kill ctdbd on the relevant NFS server node and
+# ensure that the takeover node sends an appropriate reset packet.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB should correctly record the socket and on failover the takeover
+# node should send a reset packet.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes | wc -l"
+numnodes="$out"
+
+# We need this for later, so we know how long to run nc for.
+ctdb_onnode "$test_node" "getvar MonitorInterval"
+monitor_interval="${out#*= }"
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+wait_for_monitor_event $test_node
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+echo "Getting TicklesUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
+
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+wait_until $(($update_interval * 2)) \
+ check_tickles_all $numnodes $test_ip $test_port $src_socket
+
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles and confuse our sniff. IPs also
+# need to be dropped because we're simulating a dead node rather than
+# a CTDB failure. To properly handle a CTDB failure we would need a
+# watchdog to drop the IPs when CTDB disappears.
+echo "Killing ctdbd on ${test_node}..."
+try_command_on_node -v $test_node "killall -9 ctdbd ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}"
+
+wait_until_node_has_status $test_node disconnected
+
+tcptickle_sniff_wait_show
diff --git a/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh
new file mode 100755
index 0000000..c5b583d
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Verify that CIFS connections are monitored and that CIFS tickles are sent.
+
+# We create a connection to the CIFS server on a node and confirm that
+# this connection is registered by CTDB. Then disable the relevant CIFS
+# server node and ensure that the takeover node sends an appropriate
+# reset packet.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Clustered Samba must be listening on TCP port 445.
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the takeover node
+# should send a reset packet.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+#echo "Monitor interval on node $test_node is $monitor_interval seconds."
+
+select_test_node_and_ips
+
+test_port=445
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+# This should happen as soon as connection is up... but unless we wait
+# we sometimes beat the registration.
+echo "Checking if CIFS connection is tracked by CTDB on test node..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+# This is almost immediate. However, it is sent between nodes
+# asynchonously, so it is worth checking...
+echo "Wait until CIFS connection is tracked by CTDB on all nodes..."
+try_command_on_node $test_node "$CTDB listnodes | wc -l"
+numnodes="$out"
+wait_until 5 \
+ check_tickles_all $numnodes $test_ip $test_port $src_socket
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+tcptickle_sniff_wait_show
diff --git a/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh
new file mode 100755
index 0000000..7a0944f
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Verify that a gratuitous ARP is sent when a node is failed out.
+
+# We ping a public IP and lookup the MAC address in the ARP table. We
+# then disable the node and check the ARP table again - the MAC address
+# should have changed. This test does NOT test connectivity after the
+# failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Remove any entries for the chosen address from the ARP table.
+# 4. Send a single ping request packet to the selected public address.
+# 5. Determine the MAC address corresponding to the public address by
+# checking the ARP table.
+# 6. Disable the selected node.
+# 7. Check the ARP table and check the MAC associated with the public
+# address.
+
+# Expected results:
+
+# * When a node is disabled the MAC address associated with public
+# addresses on that node should change.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local ARP table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip}..."
+ping_wrapper -q -n -c 1 $test_ip
+
+echo "Getting MAC address associated with ${test_ip}..."
+original_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$original_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+
+echo "MAC address is: ${original_mac}"
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "Getting MAC address associated with ${test_ip} again..."
+new_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$new_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+
+echo "MAC address is: ${new_mac}"
+
+if [ "$original_mac" != "$new_mac" ] ; then
+ echo "GOOD: MAC address changed"
+else
+ die "BAD: MAC address did not change"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh
new file mode 100755
index 0000000..b81510d
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+# Verify that a newly started CTDB node gets updated tickle details
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Steps:
+
+# As with 31_nfs_tickle.sh but restart a node after the tickle is
+# registered.
+
+# Expected results:
+
+# * CTDB should correctly communicated tickles to new CTDB instances as
+# they join the cluster.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes -X"
+listnodes_output="$out"
+numnodes=$(wc -l <<<"$listnodes_output")
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep 600 | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+wait_for_monitor_event $test_node
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+echo "Select a node to restart ctdbd"
+rn=$(awk -F'|' -v test_node=$test_node \
+ '$2 != test_node { print $2 ; exit }' <<<"$listnodes_output")
+
+echo "Restarting CTDB on node ${rn}"
+ctdb_nodes_restart "$rn"
+
+# In some theoretical world this is racy. In practice, the node will
+# take quite a while to become healthy, so this will beat any
+# assignment of IPs to the node.
+echo "Setting NoIPTakeover on node ${rn}"
+try_command_on_node $rn $CTDB setvar NoIPTakeover 1
+
+wait_until_ready
+
+echo "Getting TickleUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
+
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+if ! wait_until $(($update_interval * 2)) \
+ check_tickles_all $numnodes $test_ip $test_port $src_socket ; then
+ echo "BAD: connection not tracked on all nodes:"
+ echo "$out"
+ exit 1
+fi
+
+# We could go on to test whether the tickle ACK gets sent. However,
+# this is tested in previous tests and the use of NoIPTakeover
+# complicates things on a 2 node cluster.
diff --git a/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh
new file mode 100755
index 0000000..d0f3d08
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Verify that the server end of an SMB connection is correctly reset
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Clustered Samba must be listening on TCP port 445.
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the releasing node
+# should reset the server end of the connection.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+
+select_test_node_and_ips
+
+test_port=445
+
+echo "Set NoIPTakeover=1 on all nodes"
+try_command_on_node all $CTDB setvar NoIPTakeover 1
+
+echo "Give the recovery daemon some time to reload tunables"
+sleep_for 5
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+# This should happen as soon as connection is up... but unless we wait
+# we sometimes beat the registration.
+echo "Waiting until SMB connection is tracked by CTDB on test node..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+# It would be nice if ss consistently used local/peer instead of src/dst
+ss_filter="src ${test_ip}:${test_port} dst ${src_socket}"
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -z "$out" ] ; then
+ echo "BAD: ss did not list the socket"
+ exit 1
+fi
+echo "GOOD: ss lists the socket:"
+cat "$outfile"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -n "$out" ] ; then
+ echo "BAD: ss listed the socket after failover"
+ exit 1
+fi
+echo "GOOD: ss no longer lists the socket"
diff --git a/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh
new file mode 100755
index 0000000..3e249f9
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Verify that the server end of an NFS connection is correctly reset
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the releasing node
+# should reset the server end of the connection.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+
+select_test_node_and_ips
+
+test_port=2049
+
+echo "Set NoIPTakeover=1 on all nodes"
+try_command_on_node all $CTDB setvar NoIPTakeover 1
+
+echo "Give the recovery daemon some time to reload tunables"
+sleep_for 5
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until $((monitor_interval * 2)) \
+ check_tickles $test_node $test_ip $test_port $src_socket
+cat "$outfile"
+
+# It would be nice if ss consistently used local/peer instead of src/dst
+ss_filter="src ${test_ip}:${test_port} dst ${src_socket}"
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -z "$out" ] ; then
+ echo "BAD: ss did not list the socket"
+ exit 1
+fi
+echo "GOOD: ss lists the socket:"
+cat "$outfile"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+try_command_on_node $test_node \
+ "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -n "$out" ] ; then
+ echo "BAD: ss listed the socket after failover"
+ exit 1
+fi
+echo "GOOD: ss no longer lists the socket"
diff --git a/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh
new file mode 100755
index 0000000..539d25e
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Verify that it is possible to ping a public address after disabling a node.
+
+# We ping a public IP, disable the node hosting it and then ping the
+# public IP again.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Send a single ping request packet to the selected public address.
+# 4. Disable the selected node.
+# 5. Send another single ping request packet to the selected public address.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and the
+# address is still pingable.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local neighbor table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip}..."
+ping_wrapper -q -n -c 1 $test_ip
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "Removing ${test_ip} from the local neighbor table again..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip} again..."
+ping_wrapper -q -n -c 1 $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh
new file mode 100755
index 0000000..233819b
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Verify that it is possible to SSH to a public address after disabling a node.
+
+# We SSH to a public IP and check the hostname, disable the node hosting
+# it and then SSH again to confirm that the hostname has changed.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. SSH to the selected public address and run hostname.
+# 4. Disable the selected node.
+# 5. SSH to the selected public address again and run hostname.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+# still possible to SSH to the node. The hostname should change.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local neighbor table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "SSHing to ${test_ip} and running hostname..."
+if ! original_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then
+ die "Failed to get original hostname via SSH..."
+fi
+
+echo "Hostname is: ${original_hostname}"
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "SSHing to ${test_ip} and running hostname (again)..."
+if ! new_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then
+ echo "Failed to get new hostname via SSH..."
+ echo "DEBUG:"
+ ip neigh show
+ exit 1
+fi
+
+echo "Hostname is: ${new_hostname}"
+
+if [ "$original_hostname" != "$new_hostname" ] ; then
+ echo "GOOD: hostname changed"
+else
+ die "BAD: hostname did not change"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh
new file mode 100755
index 0000000..ac2cafd
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Verify that a mounted NFS share is still operational after failover.
+
+# We mount an NFS share from a node, write a file via NFS and then
+# confirm that we can correctly read the file after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Mount the selected NFS share.
+# 5. Create a file in the NFS mount and calculate its checksum.
+# 6. Disable the selected node.
+# 7. Read the file and calculate its checksum.
+# 8. Compare the checksums.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+# possible to correctly read a file over NFS. The checksums should be
+# the same before and after.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+dd if=/dev/urandom of=$nfs_local_file bs=1k count=1
+original_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 0 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$original_md5" = "$new_md5" ] ; then
+ echo "GOOD: file contents unchanged after failover"
+else
+ die "BAD: file contents are different after failover"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh
new file mode 100755
index 0000000..5c8324c
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Verify that a file created on a node is readable via NFS after a failover.
+
+# We write a file into an exported directory on a node, mount the NFS
+# share from a node, verify that we can read the file via NFS and that
+# we can still read it after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Write a file into exported directory on the node and calculate its
+# checksum.
+# 5. Mount the selected NFS share.
+# 6. Read the file via the NFS mount and calculate its checksum.
+# 7. Compare checksums.
+# 8. Disable the selected node.
+# 9. Read the file via NFS and calculate its checksum.
+# 10. Compare the checksums.
+
+# Expected results:
+
+# * Checksums for the file on all 3 occasions should be the same.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+local_f=$(mktemp)
+ctdb_test_exit_hook_add rm -f "$local_f"
+dd if=/dev/urandom of=$local_f bs=1k count=1
+local_sum=$(sum $local_f)
+
+scp -p "$local_f" "[${test_ip}]:${nfs_remote_file}"
+try_command_on_node $test_node "chmod 644 $nfs_remote_file"
+
+nfs_sum=$(sum $nfs_local_file)
+
+if [ "$local_sum" = "$nfs_sum" ] ; then
+ echo "GOOD: file contents read correctly via NFS"
+else
+ echo "BAD: file contents are different over NFS"
+ echo " original file: $local_sum"
+ echo " NFS file: $nfs_sum"
+ exit 1
+fi
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 0 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$nfs_sum" = "$new_sum" ] ; then
+ echo "GOOD: file contents unchanged after failover"
+else
+ echo "BAD: file contents are different after failover"
+ echo " original file: $nfs_sum"
+ echo " NFS file: $new_sum"
+ exit 1
+fi
diff --git a/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh
new file mode 100755
index 0000000..2d15748
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Verify that a mounted NFS share is still operational after failover.
+
+# We mount an NFS share from a node, write a file via NFS and then
+# confirm that we can correctly read the file after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+# local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Mount the selected NFS share.
+# 5. Create a file in the NFS mount and calculate its checksum.
+# 6. Kill CTDB on the selected node.
+# 7. Read the file and calculate its checksum.
+# 8. Compare the checksums.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+# possible to correctly read a file over NFS. The checksums should be
+# the same before and after.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+dd if=/dev/urandom of=$nfs_local_file bs=1k count=1
+original_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+gratarp_sniff_start
+
+echo "Killing node $test_node"
+try_command_on_node $test_node $CTDB getpid
+pid=${out#*:}
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles or doing anything else clever. IPs
+# also need to be dropped because we're simulating a dead node rather
+# than a CTDB failure. To properly handle a CTDB failure we would
+# need a watchdog to drop the IPs when CTDB disappears.
+try_command_on_node -v $test_node "kill -9 $pid ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}"
+wait_until_node_has_status $test_node disconnected
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$original_md5" = "$new_md5" ] ; then
+ echo "GOOD: file contents unchanged after failover"
+else
+ die "BAD: file contents are different after failover"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh
new file mode 100755
index 0000000..efa9ef2
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Verify that the recovery daemon correctly handles a rogue IP
+
+# It should be released...
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Using $test_ip, which is onnode $test_node"
+
+# This test depends on being able to assign a duplicate address on a
+# 2nd node. However, IPv6 guards against this and causes the test to
+# fail.
+case "$test_ip" in
+*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;;
+esac
+
+get_test_ip_mask_and_iface
+
+echo "Finding another node that knows about $test_ip"
+ctdb_get_all_pnns
+other_node=""
+for i in $all_pnns ; do
+ if [ "$i" = "$test_node" ] ; then
+ continue
+ fi
+ try_command_on_node $i "$CTDB ip"
+ n=$(awk -v ip="$test_ip" '$1 == ip { print }' "$outfile")
+ if [ -n "$n" ] ; then
+ other_node="$i"
+ break
+ fi
+done
+if [ -z "$other_node" ] ; then
+ die "Unable to find another node that knows about $test_ip"
+fi
+
+echo "Adding $test_ip on node $other_node"
+try_command_on_node $other_node "ip addr add ${test_ip}/${mask} dev ${iface}"
+
+rogue_ip_is_gone ()
+{
+ local pnn="$1"
+ local test_ip="$2"
+ try_command_on_node $pnn $CTDB_TEST_WRAPPER ip_maskbits_iface $test_ip
+ [ -z "$out" ]
+}
+
+echo "Waiting until rogue IP is no longer assigned..."
+wait_until 30 rogue_ip_is_gone $other_node $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh
new file mode 100755
index 0000000..5ee4e54
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Verify that TAKE_IP will work for an IP that is already on an interface
+
+# This is a variation of simple/60_recoverd_missing_ip.sh
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Running test against node $test_node and IP $test_ip"
+
+# This test puts an address on an interface and then needs to quickly
+# configure that address and cause an IP takeover. However, an IPv6
+# address will be tentative for a while so "quickly" is not possible".
+# When ctdb_control_takeover_ip() calls ctdb_sys_have_ip() it will
+# decide that the address is not present. It then attempts a takeip,
+# which can fail if the address is suddenly present because it is no
+# longer tentative.
+case "$test_ip" in
+*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;;
+esac
+
+get_test_ip_mask_and_iface
+
+echo "Deleting IP $test_ip from all nodes"
+delete_ip_from_all_nodes $test_ip
+try_command_on_node -v $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node ! $test_node $test_ip
+
+try_command_on_node -v all $CTDB ip
+
+# The window here needs to small, to try to avoid the address being
+# released. The test will still pass either way but if the first IP
+# takeover run does a release then this doesn't test the code path we
+# expect it to...
+echo "Adding IP $test_ip to $iface and CTDB on node $test_node"
+ip_cmd="ip addr add $test_ip/$mask dev $iface"
+ctdb_cmd="$CTDB addip $test_ip/$mask $iface && $CTDB ipreallocate"
+try_command_on_node $test_node "$ip_cmd && $ctdb_cmd"
+
+wait_until_ips_are_on_node $test_node $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/README b/ctdb/tests/CLUSTER/complex/README
new file mode 100644
index 0000000..72de396
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/README
@@ -0,0 +1,2 @@
+Complex integration tests. These need a real or virtual cluster.
+That is, they can not be run against local daemons.
diff --git a/ctdb/tests/CLUSTER/complex/scripts/local.bash b/ctdb/tests/CLUSTER/complex/scripts/local.bash
new file mode 100644
index 0000000..0e2addd
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/scripts/local.bash
@@ -0,0 +1,289 @@
+# Hey Emacs, this is a -*- shell-script -*- !!! :-)
+
+# Thanks/blame to Stephen Rothwell for suggesting that this can be
+# done in the shell. ;-)
+ipv6_to_hex ()
+{
+ local addr="$1"
+
+ # Replace "::" by something special.
+ local foo="${addr/::/:@:}"
+
+ # Join the groups of digits together, 0-padding each group of
+ # digits out to 4 digits, and count the number of (non-@) groups
+ local out=""
+ local count=0
+ local i
+ for i in $(IFS=":" ; echo $foo ) ; do
+ if [ "$i" = "@" ] ; then
+ out="${out}@"
+ else
+ out="${out}$(printf '%04x' 0x${i})"
+ count=$(($count + 4))
+ fi
+ done
+
+ # Replace '@' with correct number of zeroes
+ local zeroes=$(printf "%0$((32 - $count))x" 0)
+ echo "${out/@/${zeroes}}"
+}
+
+#######################################
+
+get_src_socket ()
+{
+ local proto="$1"
+ local dst_socket="$2"
+ local pid="$3"
+ local prog="$4"
+
+ local pat="^${proto}6?[[:space:]]+[[:digit:]]+[[:space:]]+[[:digit:]]+[[:space:]]+[^[:space:]]+[[:space:]]+${dst_socket//./\\.}[[:space:]]+ESTABLISHED[[:space:]]+${pid}/${prog}[[:space:]]*\$"
+ out=$(netstat -tanp |
+ egrep "$pat" |
+ awk '{ print $4 }')
+
+ [ -n "$out" ]
+}
+
+wait_until_get_src_socket ()
+{
+ local proto="$1"
+ local dst_socket="$2"
+ local pid="$3"
+ local prog="$4"
+
+ echo "Waiting for ${prog} to establish connection to ${dst_socket}..."
+
+ wait_until 5 get_src_socket "$@"
+}
+
+#######################################
+
+check_tickles ()
+{
+ local node="$1"
+ local test_ip="$2"
+ local test_port="$3"
+ local src_socket="$4"
+ try_command_on_node $node ctdb gettickles $test_ip $test_port
+ # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445
+ grep -Fq "SRC: ${src_socket} " "$outfile"
+}
+
+check_tickles_all ()
+{
+ local numnodes="$1"
+ local test_ip="$2"
+ local test_port="$3"
+ local src_socket="$4"
+
+ try_command_on_node all ctdb gettickles $test_ip $test_port
+ # SRC: 10.0.2.45:49091 DST: 10.0.2.143:445
+ local count=$(grep -Fc "SRC: ${src_socket} " "$outfile" || true)
+ [ $count -eq $numnodes ]
+}
+
+
+
+#######################################
+
+# filename will be in $tcpdump_filename, pid in $tcpdump_pid
+tcpdump_start ()
+{
+ tcpdump_filter="$1" # global
+
+ echo "Running tcpdump..."
+ tcpdump_filename=$(mktemp)
+ ctdb_test_exit_hook_add "rm -f $tcpdump_filename"
+
+ # The only way of being sure that tcpdump is listening is to send
+ # some packets that it will see. So we use dummy pings - the -U
+ # option to tcpdump ensures that packets are flushed to the file
+ # as they are captured.
+ local dummy_addr="127.3.2.1"
+ local dummy="icmp and dst host ${dummy_addr} and icmp[icmptype] == icmp-echo"
+ tcpdump -n -p -s 0 -e -U -w $tcpdump_filename -i any "($tcpdump_filter) or ($dummy)" &
+ ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
+
+ echo "Waiting for tcpdump output file to be ready..."
+ ping -q "$dummy_addr" >/dev/null 2>&1 &
+ ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
+
+ tcpdump_listen_for_dummy ()
+ {
+ tcpdump -n -r $tcpdump_filename -c 1 "$dummy" >/dev/null 2>&1
+ }
+
+ wait_until 10 tcpdump_listen_for_dummy
+}
+
+# By default, wait for 1 matching packet.
+tcpdump_wait ()
+{
+ local count="${1:-1}"
+ local filter="${2:-${tcpdump_filter}}"
+
+ tcpdump_check ()
+ {
+ # It would be much nicer to add "ether src
+ # $releasing_mac" to the filter. However, tcpdump
+ # does not allow MAC filtering unless an ethernet
+ # interface is specified with -i. It doesn't work
+ # with "-i any" and it doesn't work when reading from
+ # a file. :-(
+ local found
+ if [ -n "$releasing_mac" ] ; then
+ found=$(tcpdump -n -e -r "$tcpdump_filename" \
+ "$filter" 2>/dev/null |
+ grep -c "In ${releasing_mac}")
+ else
+ found=$(tcpdump -n -e -r "$tcpdump_filename" \
+ "$filter" 2>/dev/null |
+ wc -l)
+ fi
+
+ [ $found -ge $count ]
+ }
+
+ echo "Waiting for tcpdump to capture some packets..."
+ if ! wait_until 30 tcpdump_check ; then
+ echo "DEBUG AT $(date '+%F %T'):"
+ local i
+ for i in "onnode -q 0 $CTDB status" \
+ "netstat -tanp" \
+ "tcpdump -n -e -r $tcpdump_filename" ; do
+ echo "$i"
+ $i || true
+ done
+ return 1
+ fi
+}
+
+tcpdump_show ()
+{
+ local filter="${1:-${tcpdump_filter}}"
+
+ tcpdump -n -e -vv -XX -r $tcpdump_filename "$filter" 2>/dev/null
+}
+
+tcp4tickle_sniff_start ()
+{
+ local src="$1"
+ local dst="$2"
+
+ local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}"
+ local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}"
+ local tickle_ack="${in} and (tcp[tcpflags] & tcp-ack != 0) and (tcp[14:2] == 1234)" # win == 1234
+ local ack_ack="${out} and (tcp[tcpflags] & tcp-ack != 0)"
+ tcptickle_reset="${in} and tcp[tcpflags] & tcp-rst != 0"
+ local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})"
+
+ tcpdump_start "$filter"
+}
+
+# tcp[] does not work for IPv6 (in some versions of tcpdump)
+tcp6tickle_sniff_start ()
+{
+ local src="$1"
+ local dst="$2"
+
+ local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}"
+ local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}"
+ local tickle_ack="${in} and (ip6[53] & tcp-ack != 0) and (ip6[54:2] == 1234)" # win == 1234
+ local ack_ack="${out} and (ip6[53] & tcp-ack != 0)"
+ tcptickle_reset="${in} and ip6[53] & tcp-rst != 0"
+ local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})"
+
+ tcpdump_start "$filter"
+}
+
+tcptickle_sniff_start ()
+{
+ local src="$1"
+ local dst="$2"
+
+ case "${dst%:*}" in
+ *:*) tcp6tickle_sniff_start "$src" "$dst" ;;
+ *) tcp4tickle_sniff_start "$src" "$dst" ;;
+ esac
+}
+
+tcptickle_sniff_wait_show ()
+{
+ local releasing_mac="$1" # optional, used by tcpdump_wait()
+
+ tcpdump_wait 1 "$tcptickle_reset"
+
+ echo "GOOD: here are some TCP tickle packets:"
+ tcpdump_show
+}
+
+gratarp4_sniff_start ()
+{
+ tcpdump_start "arp host ${test_ip}"
+}
+
+gratarp6_sniff_start ()
+{
+ local neighbor_advertisement="icmp6 and ip6[40] == 136"
+ local hex=$(ipv6_to_hex "$test_ip")
+ local match_target="ip6[48:4] == 0x${hex:0:8} and ip6[52:4] == 0x${hex:8:8} and ip6[56:4] == 0x${hex:16:8} and ip6[60:4] == 0x${hex:24:8}"
+
+ tcpdump_start "${neighbor_advertisement} and ${match_target}"
+}
+
+gratarp_sniff_start ()
+{
+ case "$test_ip" in
+ *:*) gratarp6_sniff_start ;;
+ *) gratarp4_sniff_start ;;
+ esac
+}
+
+gratarp_sniff_wait_show ()
+{
+ tcpdump_wait 2
+
+ echo "GOOD: this should be the some gratuitous ARPs:"
+ tcpdump_show
+}
+
+ping_wrapper ()
+{
+ case "$*" in
+ *:*) ping6 "$@" ;;
+ *) ping "$@" ;;
+ esac
+}
+
+#######################################
+
+nfs_test_setup ()
+{
+ select_test_node_and_ips
+
+ nfs_first_export=$(showmount -e $test_ip | sed -n -e '2s/ .*//p')
+
+ echo "Creating test subdirectory..."
+ try_command_on_node $test_node "TMPDIR=$nfs_first_export mktemp -d"
+ nfs_test_dir="$out"
+ try_command_on_node $test_node "chmod 777 $nfs_test_dir"
+
+ nfs_mnt_d=$(mktemp -d)
+ nfs_local_file="${nfs_mnt_d}/${nfs_test_dir##*/}/TEST_FILE"
+ nfs_remote_file="${nfs_test_dir}/TEST_FILE"
+
+ ctdb_test_exit_hook_add nfs_test_cleanup
+
+ echo "Mounting ${test_ip}:${nfs_first_export} on ${nfs_mnt_d} ..."
+ mount -o timeo=1,hard,intr,vers=3 \
+ "[${test_ip}]:${nfs_first_export}" ${nfs_mnt_d}
+}
+
+nfs_test_cleanup ()
+{
+ rm -f "$nfs_local_file"
+ umount -f "$nfs_mnt_d"
+ rmdir "$nfs_mnt_d"
+ onnode -q $test_node rmdir "$nfs_test_dir"
+}