18 files changed, 1546 insertions, 0 deletions
diff --git a/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh
new file mode 100755
index 0000000..dba6d07
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/11_ctdb_delip_removes_ip.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Verify that a node's public IP address can be deleted using 'ctdb deleteip'.
+
+# This is an extended version of simple/17_ctdb_config_delete_ip.sh
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+get_test_ip_mask_and_iface
+
+echo "Checking that node ${test_node} hosts ${test_ip}..."
+try_command_on_node $test_node "ip addr show to ${test_ip} | grep -q ."
+
+echo "Attempting to remove ${test_ip} from node ${test_node}."
+try_command_on_node $test_node $CTDB delip $test_ip
+try_command_on_node $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node '!' $test_node $test_ip
+
+timeout=60
+increment=5
+count=0
+echo "Waiting for ${test_ip} to disappear from node ${test_node}..."
+while : ; do
+    try_command_on_node -v $test_node "ip addr show to ${test_node}"
+    if -n "$out" ; then
+	echo "Still there..."
+	if [ $(($count * $increment)) -ge $timeout ] ; then
+	    echo "BAD: Timed out waiting..."
+	    exit 1
+	fi
+	sleep_for $increment
+	count=$(($count + 1))
+    else
+	break
+    fi
+done
+
+echo "GOOD: IP was successfully removed!"
diff --git a/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh
new file mode 100755
index 0000000..150aeea
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/18_ctdb_reloadips.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Verify that adding/deleting IPs using 'ctdb reloadips' works
+
+# Checks that when IPs are added to and deleted from a single node then
+# those IPs are actually assigned and unassigned from the specified
+# interface.
+
+# Prerequisites:
+
+# * An active CTDB cluster with public IP addresses configured
+
+# Expected results:
+
+# * When IPs are added to a single node then they are assigned to an
+#   interface.
+
+# * When IPs are deleted from a single node then they disappear from an
+#   interface.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+####################
+
+# Search for an unused 10.B.1.0/24 network on which to add public IP
+# addresses.
+
+# The initial search is for a 10.B.0.0/16 network since some
+# configurations may use a whole class B for the private network.
+# Check that there are no public IP addresses (as reported by "ctdb ip
+# all") or other IP addresses (as reported by "ip addr show") with
+# the provided prefix.  Note that this is an IPv4-specific test.
+
+echo "Getting public IP information from CTDB..."
+ctdb_onnode "$test_node" "ip -X -v all"
+ctdb_ip_info=$(awk -F'|' 'NR > 1 { print $2, $3, $5 }' "$outfile")
+
+echo "Getting IP information from interfaces..."
+try_command_on_node all "ip addr show"
+ip_addr_info=$(awk '$1 == "inet" { ip = $2; sub(/\/.*/, "", ip); print ip }' \
+		   "$outfile")
+
+prefix=""
+for b in $(seq 0 255) ; do
+    prefix="10.${b}"
+
+    # Does the prefix match any IP address returned by "ip addr info"?
+    while read ip ; do
+	if [ "${ip#${prefix}.}" != "$ip" ] ; then
+	    prefix=""
+	    continue 2
+	fi
+    done <<<"$ip_addr_info"
+
+    # Does the prefix match any public IP address "ctdb ip all"?
+    while read ip pnn iface ; do
+	if [ "${ip#${prefix}.}" != "$ip" ] ; then
+	    prefix=""
+	    continue 2
+	fi
+    done <<<"$ctdb_ip_info"
+
+    # Got through the IPs without matching prefix - done!
+    break
+done
+
+[ -n "$prefix" ] || die "Unable to find a usable IP address prefix"
+
+# We really want a class C: 10.B.1.0/24
+prefix="${prefix}.1"
+
+####################
+
+iface=$(echo "$ctdb_ip_info" | awk -v pnn=$test_node '$2 == pnn { print $3 ; exit }')
+
+####################
+
+# This needs to be set only on the recmaster. All nodes should do the trick.
+new_takeover_timeout=90
+echo "Setting TakeoverTimeout=${new_takeover_timeout} to avoid potential bans"
+try_command_on_node all "$CTDB setvar TakeoverTimeout ${new_takeover_timeout}"
+
+####################
+
+try_command_on_node $test_node $CTDB_TEST_WRAPPER ctdb_base_show
+addresses="${out}/public_addresses"
+echo "Public addresses file on node $test_node is \"$addresses\""
+backup="${addresses}.$$"
+
+backup_public_addresses ()
+{
+    try_command_on_node $test_node "cp -a $addresses $backup"
+}
+
+restore_public_addresses ()
+{
+    try_command_on_node $test_node "mv $backup $addresses >/dev/null 2>&1 || true"
+}
+ctdb_test_exit_hook_add restore_public_addresses
+
+# Now create that backup
+backup_public_addresses
+
+####################
+
+add_ips_to_original_config ()
+{
+    local test_node="$1"
+    local addresses="$2"
+    local iface="$3"
+    local prefix="$4"
+    local first="$5"
+    local last="$6"
+
+    echo "Adding new public IPs to original config on node ${test_node}..."
+    echo "IPs will be ${prefix}.${first}/24..${prefix}.${last}/24"
+
+    # Implement this by completely rebuilding the public_addresses
+    # file.  This is easier than deleting entries on a remote node.
+    restore_public_addresses
+    backup_public_addresses
+
+    # Note that tee is a safe way of creating a file on a remote node.
+    # This avoids potential fragility with quoting or redirection.
+    for i in $(seq $first $last) ; do
+	echo "${prefix}.${i}/24 ${iface}"
+    done |
+    try_command_on_node -i $test_node "tee -a $addresses"
+}
+
+check_ips ()
+{
+    local test_node="$1"
+    local iface="$2"
+    local prefix="$3"
+    local first="$4"
+    local last="$5"
+
+    # If just 0 specified then this is an empty range
+    local public_ips_file=$(mktemp)
+    if [ "$first" = 0 -a -z "$last" ] ; then
+	echo "Checking that there are no IPs in ${prefix}.0/24"
+    else
+	local prefix_regexp="inet *${prefix//./\.}"
+
+	echo "Checking IPs in range ${prefix}.${first}/24..${prefix}.${last}/24"
+
+	local i
+	for i in $(seq $first $last) ; do
+	    echo "${prefix}.${i}"
+	done | sort >"$public_ips_file"
+    fi
+
+    try_command_on_node $test_node "ip addr show dev ${iface}"
+    local ip_addrs_file=$(mktemp)
+    cat "$outfile" | \
+	sed -n -e "s@.*inet * \(${prefix//./\.}\.[0-9]*\)/.*@\1@p" | \
+	sort >"$ip_addrs_file"
+
+    local diffs=$(diff "$public_ips_file" "$ip_addrs_file") || true
+    rm -f "$ip_addrs_file" "$public_ips_file"
+
+    if [ -z "$diffs" ] ; then
+	echo "GOOD: IP addresses are as expected"
+    else
+	echo "BAD: IP addresses are incorrect:"
+	echo "$diffs"
+	exit 1
+    fi
+}
+
+# ctdb reloadips will fail if it can't disable takover runs.  The most
+# likely reason for this is that there is already a takeover run in
+# progress.  We can't predict when this will happen, so retry if this
+# occurs.
+do_ctdb_reloadips ()
+{
+	local retry_max=10
+	local retry_count=0
+	while : ; do
+		if try_command_on_node "$test_node" "$CTDB reloadips" ; then
+			return 0
+		fi
+
+		if [ "$out" != "Failed to disable takeover runs" ] ; then
+			return 1
+		fi
+
+		if [ $retry_count -ge $retry_max ] ; then
+			return 1
+		fi
+
+		retry_count=$((retry_count + 1))
+		echo "Retrying..."
+		sleep_for 1
+	done
+}
+
+####################
+
+new_ip_max=100
+
+####################
+
+add_ips_to_original_config \
+    $test_node "$addresses" "$iface" "$prefix" 1 $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 1 $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# This should be the primary.  Ensure that no other IPs are lost
+echo "Using 'ctdb reloadips' to remove the 1st address just added..."
+
+add_ips_to_original_config \
+    $test_node "$addresses" "$iface" "$prefix" 2 $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 2 $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# Get rid of about 1/2 the IPs
+start=$(($new_ip_max / 2 + 1))
+echo "Updating to include only about 1/2 of the new IPs..."
+
+add_ips_to_original_config \
+    $test_node "$addresses" "$iface" "$prefix" $start $new_ip_max
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" $start $new_ip_max
+
+ctdb_onnode "$test_node" sync
+
+####################
+
+# Delete the rest
+echo "Restoring original IP configuration..."
+restore_public_addresses
+
+do_ctdb_reloadips
+
+check_ips $test_node "$iface" "$prefix" 0
diff --git a/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh
new file mode 100755
index 0000000..4d8f617
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/30_nfs_tickle_killtcp.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Verify that NFS connections are monitored and that NFS tickles are sent.
+
+# Create a connection to the NFS server on a node. Then disable the
+# relevant NFS server node and ensure that it sends an appropriate reset
+# packet.  The packet must come from the releasing node.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB on the releasing node should correctly send a reset packet when
+#   the node is disabled.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep 30 | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+echo "Getting MAC address associated with ${test_ip}..."
+releasing_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$releasing_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+echo "MAC address is: ${releasing_mac}"
+
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+# Only look for a reset from the releasing node
+tcptickle_sniff_wait_show "$releasing_mac"
diff --git a/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh
new file mode 100755
index 0000000..e3f1540
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/31_nfs_tickle.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Verify that NFS connections are monitored and that NFS tickles are sent.
+
+# We create a connection to the NFS server on a node and confirm that
+# this connection is registered in the nfs-tickles/ subdirectory in
+# shared storage.  Then kill ctdbd on the relevant NFS server node and
+# ensure that the takeover node sends an appropriate reset packet.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB should correctly record the socket and on failover the takeover
+#   node should send a reset packet.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes | wc -l"
+numnodes="$out"
+
+# We need this for later, so we know how long to run nc for.
+ctdb_onnode "$test_node" "getvar MonitorInterval"
+monitor_interval="${out#*= }"
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+wait_for_monitor_event $test_node
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+echo "Getting TicklesUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
+
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+wait_until $(($update_interval * 2)) \
+    check_tickles_all $numnodes  $test_ip $test_port $src_socket
+
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles and confuse our sniff.  IPs also
+# need to be dropped because we're simulating a dead node rather than
+# a CTDB failure.  To properly handle a CTDB failure we would need a
+# watchdog to drop the IPs when CTDB disappears.
+echo "Killing ctdbd on ${test_node}..."
+try_command_on_node -v $test_node "killall -9 ctdbd ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}"
+
+wait_until_node_has_status $test_node disconnected
+
+tcptickle_sniff_wait_show
diff --git a/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh
new file mode 100755
index 0000000..c5b583d
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/32_cifs_tickle.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Verify that CIFS connections are monitored and that CIFS tickles are sent.
+
+# We create a connection to the CIFS server on a node and confirm that
+# this connection is registered by CTDB.  Then disable the relevant CIFS
+# server node and ensure that the takeover node sends an appropriate
+# reset packet.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Clustered Samba must be listening on TCP port 445.
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the takeover node
+#   should send a reset packet.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+#echo "Monitor interval on node $test_node is $monitor_interval seconds."
+
+select_test_node_and_ips
+
+test_port=445
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+# This should happen as soon as connection is up... but unless we wait
+# we sometimes beat the registration.
+echo "Checking if CIFS connection is tracked by CTDB on test node..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+# This is almost immediate.  However, it is sent between nodes
+# asynchonously, so it is worth checking...
+echo "Wait until CIFS connection is tracked by CTDB on all nodes..."
+try_command_on_node $test_node "$CTDB listnodes | wc -l"
+numnodes="$out"
+wait_until 5 \
+    check_tickles_all $numnodes  $test_ip $test_port $src_socket
+tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+tcptickle_sniff_wait_show
diff --git a/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh
new file mode 100755
index 0000000..7a0944f
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/33_gratuitous_arp.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Verify that a gratuitous ARP is sent when a node is failed out.
+
+# We ping a public IP and lookup the MAC address in the ARP table.  We
+# then disable the node and check the ARP table again - the MAC address
+# should have changed.  This test does NOT test connectivity after the
+# failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Remove any entries for the chosen address from the ARP table.
+# 4. Send a single ping request packet to the selected public address.
+# 5. Determine the MAC address corresponding to the public address by
+#    checking the ARP table.
+# 6. Disable the selected node.
+# 7. Check the ARP table and check the MAC associated with the public
+#    address.
+
+# Expected results:
+
+# * When a node is disabled the MAC address associated with public
+#   addresses on that node should change.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local ARP table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip}..."
+ping_wrapper -q -n -c 1 $test_ip
+
+echo "Getting MAC address associated with ${test_ip}..."
+original_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$original_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+
+echo "MAC address is: ${original_mac}"
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "Getting MAC address associated with ${test_ip} again..."
+new_mac=$(ip neigh show $test_prefix | awk '$4 == "lladdr" {print $5}')
+[ -n "$new_mac" ] || die "Couldn't get MAC address for ${test_prefix}"
+
+echo "MAC address is: ${new_mac}"
+
+if [ "$original_mac" != "$new_mac" ] ; then
+    echo "GOOD: MAC address changed"
+else
+    die "BAD: MAC address did not change"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh
new file mode 100755
index 0000000..b81510d
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/34_nfs_tickle_restart.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+# Verify that a newly started CTDB node gets updated tickle details
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Steps:
+
+# As with 31_nfs_tickle.sh but restart a node after the tickle is
+# registered.
+
+# Expected results:
+
+# * CTDB should correctly communicated tickles to new CTDB instances as
+#   they join the cluster.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+try_command_on_node $test_node "$CTDB listnodes -X"
+listnodes_output="$out"
+numnodes=$(wc -l <<<"$listnodes_output")
+
+test_port=2049
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with netcat..."
+
+sleep 600 | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+wait_for_monitor_event $test_node
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+echo "Select a node to restart ctdbd"
+rn=$(awk -F'|' -v test_node=$test_node \
+    '$2 != test_node { print $2 ; exit }' <<<"$listnodes_output")
+
+echo "Restarting CTDB on node ${rn}"
+ctdb_nodes_restart "$rn"
+
+# In some theoretical world this is racy.  In practice, the node will
+# take quite a while to become healthy, so this will beat any
+# assignment of IPs to the node.
+echo "Setting NoIPTakeover on node ${rn}"
+try_command_on_node $rn $CTDB setvar NoIPTakeover 1
+
+wait_until_ready
+
+echo "Getting TickleUpdateInterval..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+update_interval="$out"
+
+echo "Wait until NFS connection is tracked by CTDB on all nodes..."
+if ! wait_until $(($update_interval * 2)) \
+    check_tickles_all $numnodes $test_ip $test_port $src_socket ; then
+    echo "BAD: connection not tracked on all nodes:"
+    echo "$out"
+    exit 1
+fi
+
+# We could go on to test whether the tickle ACK gets sent.  However,
+# this is tested in previous tests and the use of NoIPTakeover
+# complicates things on a 2 node cluster.
diff --git a/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh
new file mode 100755
index 0000000..d0f3d08
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/36_smb_reset_server.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Verify that the server end of an SMB connection is correctly reset
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Clustered Samba must be listening on TCP port 445.
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the releasing node
+#   should reset the server end of the connection.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+
+select_test_node_and_ips
+
+test_port=445
+
+echo "Set NoIPTakeover=1 on all nodes"
+try_command_on_node all $CTDB setvar NoIPTakeover 1
+
+echo "Give the recovery daemon some time to reload tunables"
+sleep_for 5
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+# This should happen as soon as connection is up... but unless we wait
+# we sometimes beat the registration.
+echo "Waiting until SMB connection is tracked by CTDB on test node..."
+wait_until 10 check_tickles $test_node $test_ip $test_port $src_socket
+
+# It would be nice if ss consistently used local/peer instead of src/dst
+ss_filter="src ${test_ip}:${test_port} dst ${src_socket}"
+
+try_command_on_node $test_node \
+		    "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -z "$out" ] ; then
+	echo "BAD: ss did not list the socket"
+	exit 1
+fi
+echo "GOOD: ss lists the socket:"
+cat "$outfile"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+try_command_on_node $test_node \
+		    "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -n "$out" ] ; then
+	echo "BAD: ss listed the socket after failover"
+	exit 1
+fi
+echo "GOOD: ss no longer lists the socket"
diff --git a/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh
new file mode 100755
index 0000000..3e249f9
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/37_nfs_reset_server.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Verify that the server end of an NFS connection is correctly reset
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# * Cluster nodes must be listening on the NFS TCP port (2049).
+
+# Expected results:
+
+# * CTDB should correctly record the connection and the releasing node
+#   should reset the server end of the connection.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+# We need this for later, so we know how long to sleep.
+try_command_on_node 0 $CTDB getvar MonitorInterval
+monitor_interval="${out#*= }"
+
+select_test_node_and_ips
+
+test_port=2049
+
+echo "Set NoIPTakeover=1 on all nodes"
+try_command_on_node all $CTDB setvar NoIPTakeover 1
+
+echo "Give the recovery daemon some time to reload tunables"
+sleep_for 5
+
+echo "Connecting to node ${test_node} on IP ${test_ip}:${test_port} with nc..."
+
+sleep $((monitor_interval * 4)) | nc $test_ip $test_port &
+nc_pid=$!
+ctdb_test_exit_hook_add "kill $nc_pid >/dev/null 2>&1"
+
+wait_until_get_src_socket "tcp" "${test_ip}:${test_port}" $nc_pid "nc"
+src_socket="$out"
+echo "Source socket is $src_socket"
+
+echo "Wait until NFS connection is tracked by CTDB on test node ..."
+wait_until $((monitor_interval * 2)) \
+	   check_tickles $test_node $test_ip $test_port $src_socket
+cat "$outfile"
+
+# It would be nice if ss consistently used local/peer instead of src/dst
+ss_filter="src ${test_ip}:${test_port} dst ${src_socket}"
+
+try_command_on_node $test_node \
+		    "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -z "$out" ] ; then
+	echo "BAD: ss did not list the socket"
+	exit 1
+fi
+echo "GOOD: ss lists the socket:"
+cat "$outfile"
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+try_command_on_node $test_node \
+		    "ss -tn state established '${ss_filter}' | tail -n +2"
+if [ -n "$out" ] ; then
+	echo "BAD: ss listed the socket after failover"
+	exit 1
+fi
+echo "GOOD: ss no longer lists the socket"
diff --git a/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh
new file mode 100755
index 0000000..539d25e
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/41_failover_ping_discrete.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Verify that it is possible to ping a public address after disabling a node.
+
+# We ping a public IP, disable the node hosting it and then ping the
+# public IP again.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Send a single ping request packet to the selected public address.
+# 4. Disable the selected node.
+# 5. Send another single ping request packet to the selected public address.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and the
+#   address is still pingable.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local neighbor table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip}..."
+ping_wrapper -q -n -c 1 $test_ip
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "Removing ${test_ip} from the local neighbor table again..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "Pinging ${test_ip} again..."
+ping_wrapper -q -n -c 1 $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh
new file mode 100755
index 0000000..233819b
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/42_failover_ssh_hostname.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Verify that it is possible to SSH to a public address after disabling a node.
+
+# We SSH to a public IP and check the hostname, disable the node hosting
+# it and then SSH again to confirm that the hostname has changed.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. SSH to the selected public address and run hostname.
+# 4. Disable the selected node.
+# 5. SSH to the selected public address again and run hostname.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+#   still possible to SSH to the node.  The hostname should change.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Removing ${test_ip} from the local neighbor table..."
+ip neigh flush "$test_prefix" >/dev/null 2>&1 || true
+
+echo "SSHing to ${test_ip} and running hostname..."
+if ! original_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then
+    die "Failed to get original hostname via SSH..."
+fi
+
+echo "Hostname is: ${original_hostname}"
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 1 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+echo "SSHing to ${test_ip} and running hostname (again)..."
+if ! new_hostname=$(ssh -o "StrictHostKeyChecking no" $test_ip hostname) ; then
+    echo "Failed to get new hostname via SSH..."
+    echo "DEBUG:"
+    ip neigh show
+    exit 1
+fi
+
+echo "Hostname is: ${new_hostname}"
+
+if [ "$original_hostname" != "$new_hostname" ] ; then
+    echo "GOOD: hostname changed"
+else
+    die "BAD: hostname did not change"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh
new file mode 100755
index 0000000..ac2cafd
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/43_failover_nfs_basic.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Verify that a mounted NFS share is still operational after failover.
+
+# We mount an NFS share from a node, write a file via NFS and then
+# confirm that we can correctly read the file after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Mount the selected NFS share.
+# 5. Create a file in the NFS mount and calculate its checksum.
+# 6. Disable the selected node.
+# 7. Read the file and calculate its checksum.
+# 8. Compare the checksums.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+#   possible to correctly read a file over NFS.  The checksums should be
+#   the same before and after.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+dd if=/dev/urandom of=$nfs_local_file bs=1k count=1
+original_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 0 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$original_md5" = "$new_md5" ] ; then
+    echo "GOOD: file contents unchanged after failover"
+else
+    die "BAD: file contents are different after failover"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh
new file mode 100755
index 0000000..5c8324c
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/44_failover_nfs_oneway.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Verify that a file created on a node is readable via NFS after a failover.
+
+# We write a file into an exported directory on a node, mount the NFS
+# share from a node, verify that we can read the file via NFS and that
+# we can still read it after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1.  Verify that the cluster is healthy.
+# 2.  Select a public address and its corresponding node.
+# 3.  Select the 1st NFS share exported on the node.
+# 4.  Write a file into exported directory on the node and calculate its
+#     checksum.
+# 5.  Mount the selected NFS share.
+# 6.  Read the file via the NFS mount and calculate its checksum.
+# 7.  Compare checksums.
+# 8.  Disable the selected node.
+# 9.  Read the file via NFS and calculate its checksum.
+# 10. Compare the checksums.
+
+# Expected results:
+
+# * Checksums for the file on all 3 occasions should be the same.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+local_f=$(mktemp)
+ctdb_test_exit_hook_add rm -f "$local_f"
+dd if=/dev/urandom of=$local_f bs=1k count=1
+local_sum=$(sum $local_f)
+
+scp -p "$local_f" "[${test_ip}]:${nfs_remote_file}"
+try_command_on_node $test_node "chmod 644 $nfs_remote_file"
+
+nfs_sum=$(sum $nfs_local_file)
+
+if [ "$local_sum" = "$nfs_sum" ] ; then
+    echo "GOOD: file contents read correctly via NFS"
+else
+    echo "BAD: file contents are different over NFS"
+    echo "  original file: $local_sum"
+    echo "       NFS file: $nfs_sum"
+    exit 1
+fi
+
+gratarp_sniff_start
+
+echo "Disabling node $test_node"
+try_command_on_node 0 $CTDB disable -n $test_node
+wait_until_node_has_status $test_node disabled
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$nfs_sum" = "$new_sum" ] ; then
+    echo "GOOD: file contents unchanged after failover"
+else
+    echo "BAD: file contents are different after failover"
+    echo "  original file: $nfs_sum"
+    echo "       NFS file: $new_sum"
+    exit 1
+fi
diff --git a/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh
new file mode 100755
index 0000000..2d15748
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/45_failover_nfs_kill.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Verify that a mounted NFS share is still operational after failover.
+
+# We mount an NFS share from a node, write a file via NFS and then
+# confirm that we can correctly read the file after a failover.
+
+# Prerequisites:
+
+# * An active CTDB cluster with at least 2 nodes with public addresses.
+
+# * Test must be run on a real or virtual cluster rather than against
+#   local daemons.
+
+# * Test must not be run from a cluster node.
+
+# Steps:
+
+# 1. Verify that the cluster is healthy.
+# 2. Select a public address and its corresponding node.
+# 3. Select the 1st NFS share exported on the node.
+# 4. Mount the selected NFS share.
+# 5. Create a file in the NFS mount and calculate its checksum.
+# 6. Kill CTDB on the selected node.
+# 7. Read the file and calculate its checksum.
+# 8. Compare the checksums.
+
+# Expected results:
+
+# * When a node is disabled the public address fails over and it is
+#   possible to correctly read a file over NFS.  The checksums should be
+#   the same before and after.
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+nfs_test_setup
+
+echo "Create file containing random data..."
+dd if=/dev/urandom of=$nfs_local_file bs=1k count=1
+original_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+gratarp_sniff_start
+
+echo "Killing node $test_node"
+try_command_on_node $test_node $CTDB getpid
+pid=${out#*:}
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles or doing anything else clever.  IPs
+# also need to be dropped because we're simulating a dead node rather
+# than a CTDB failure.  To properly handle a CTDB failure we would
+# need a watchdog to drop the IPs when CTDB disappears.
+try_command_on_node -v $test_node "kill -9 $pid ; $CTDB_TEST_WRAPPER drop_ips ${test_node_ips}"
+wait_until_node_has_status $test_node disconnected
+
+gratarp_sniff_wait_show
+
+new_sum=$(sum $nfs_local_file)
+[ $? -eq 0 ]
+
+if [ "$original_md5" = "$new_md5" ] ; then
+    echo "GOOD: file contents unchanged after failover"
+else
+    die "BAD: file contents are different after failover"
+fi
diff --git a/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh
new file mode 100755
index 0000000..efa9ef2
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/60_rogueip_releaseip.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Verify that the recovery daemon correctly handles a rogue IP
+
+# It should be released...
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Using $test_ip, which is onnode $test_node"
+
+# This test depends on being able to assign a duplicate address on a
+# 2nd node.  However, IPv6 guards against this and causes the test to
+# fail.
+case "$test_ip" in
+*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;;
+esac
+
+get_test_ip_mask_and_iface
+
+echo "Finding another node that knows about $test_ip"
+ctdb_get_all_pnns
+other_node=""
+for i in $all_pnns ; do
+	if [ "$i" = "$test_node" ] ; then
+		continue
+	fi
+	try_command_on_node $i "$CTDB ip"
+	n=$(awk -v ip="$test_ip" '$1 == ip { print }' "$outfile")
+	if [ -n "$n" ] ; then
+		other_node="$i"
+		break
+	fi
+done
+if [ -z "$other_node" ] ; then
+	die "Unable to find another node that knows about $test_ip"
+fi
+
+echo "Adding $test_ip on node $other_node"
+try_command_on_node $other_node "ip addr add ${test_ip}/${mask} dev ${iface}"
+
+rogue_ip_is_gone ()
+{
+	local pnn="$1"
+	local test_ip="$2"
+	try_command_on_node $pnn $CTDB_TEST_WRAPPER ip_maskbits_iface $test_ip
+	[ -z "$out" ]
+}
+
+echo "Waiting until rogue IP is no longer assigned..."
+wait_until 30 rogue_ip_is_gone $other_node $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh
new file mode 100755
index 0000000..5ee4e54
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/61_rogueip_takeip.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Verify that TAKE_IP will work for an IP that is already on an interface
+
+# This is a variation of simple/60_recoverd_missing_ip.sh
+
+. "${TEST_SCRIPTS_DIR}/cluster.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node_and_ips
+
+echo "Running test against node $test_node and IP $test_ip"
+
+# This test puts an address on an interface and then needs to quickly
+# configure that address and cause an IP takeover.  However, an IPv6
+# address will be tentative for a while so "quickly" is not possible".
+# When ctdb_control_takeover_ip() calls ctdb_sys_have_ip() it will
+# decide that the address is not present.  It then attempts a takeip,
+# which can fail if the address is suddenly present because it is no
+# longer tentative.
+case "$test_ip" in
+*:*) ctdb_test_skip "This test is not supported for IPv6 addresses" ;;
+esac
+
+get_test_ip_mask_and_iface
+
+echo "Deleting IP $test_ip from all nodes"
+delete_ip_from_all_nodes $test_ip
+try_command_on_node -v $test_node $CTDB ipreallocate
+wait_until_ips_are_on_node ! $test_node $test_ip
+
+try_command_on_node -v all $CTDB ip
+
+# The window here needs to small, to try to avoid the address being
+# released.  The test will still pass either way but if the first IP
+# takeover run does a release then this doesn't test the code path we
+# expect it to...
+echo "Adding IP $test_ip to $iface and CTDB on node $test_node"
+ip_cmd="ip addr add $test_ip/$mask dev $iface"
+ctdb_cmd="$CTDB addip $test_ip/$mask $iface && $CTDB ipreallocate"
+try_command_on_node $test_node "$ip_cmd && $ctdb_cmd"
+
+wait_until_ips_are_on_node $test_node $test_ip
diff --git a/ctdb/tests/CLUSTER/complex/README b/ctdb/tests/CLUSTER/complex/README
new file mode 100644
index 0000000..72de396
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/README
@@ -0,0 +1,2 @@
+Complex integration tests.  These need a real or virtual cluster.
+That is, they can not be run against local daemons.
diff --git a/ctdb/tests/CLUSTER/complex/scripts/local.bash b/ctdb/tests/CLUSTER/complex/scripts/local.bash
new file mode 100644
index 0000000..0e2addd
--- /dev/null
+++ b/ctdb/tests/CLUSTER/complex/scripts/local.bash
@@ -0,0 +1,289 @@
+# Hey Emacs, this is a -*- shell-script -*- !!!  :-)
+
+# Thanks/blame to Stephen Rothwell for suggesting that this can be
+# done in the shell.  ;-)
+ipv6_to_hex ()
+{
+    local addr="$1"
+
+    # Replace "::" by something special.
+    local foo="${addr/::/:@:}"
+
+    # Join the groups of digits together, 0-padding each group of
+    # digits out to 4 digits, and count the number of (non-@) groups
+    local out=""
+    local count=0
+    local i
+    for i in $(IFS=":" ; echo $foo ) ; do
+	if [ "$i" = "@" ] ; then
+	    out="${out}@"
+	else
+	    out="${out}$(printf '%04x' 0x${i})"
+	    count=$(($count + 4))
+	fi
+    done
+
+    # Replace '@' with correct number of zeroes
+    local zeroes=$(printf "%0$((32 - $count))x" 0)
+    echo "${out/@/${zeroes}}"
+}
+
+#######################################
+
+get_src_socket ()
+{
+    local proto="$1"
+    local dst_socket="$2"
+    local pid="$3"
+    local prog="$4"
+
+    local pat="^${proto}6?[[:space:]]+[[:digit:]]+[[:space:]]+[[:digit:]]+[[:space:]]+[^[:space:]]+[[:space:]]+${dst_socket//./\\.}[[:space:]]+ESTABLISHED[[:space:]]+${pid}/${prog}[[:space:]]*\$"
+    out=$(netstat -tanp |
+	egrep "$pat" |
+	awk '{ print $4 }')
+
+    [ -n "$out" ]
+}
+
+wait_until_get_src_socket ()
+{
+    local proto="$1"
+    local dst_socket="$2"
+    local pid="$3"
+    local prog="$4"
+
+    echo "Waiting for ${prog} to establish connection to ${dst_socket}..."
+
+    wait_until 5 get_src_socket "$@"
+}
+
+#######################################
+
+check_tickles ()
+{
+    local node="$1"
+    local test_ip="$2"
+    local test_port="$3"
+    local src_socket="$4"
+    try_command_on_node $node ctdb gettickles $test_ip $test_port
+    # SRC: 10.0.2.45:49091   DST: 10.0.2.143:445
+    grep -Fq "SRC: ${src_socket} " "$outfile"
+}
+
+check_tickles_all ()
+{
+    local numnodes="$1"
+    local test_ip="$2"
+    local test_port="$3"
+    local src_socket="$4"
+
+    try_command_on_node all ctdb gettickles $test_ip $test_port
+    # SRC: 10.0.2.45:49091   DST: 10.0.2.143:445
+    local count=$(grep -Fc "SRC: ${src_socket} " "$outfile" || true)
+    [ $count -eq $numnodes ]
+}
+
+
+
+#######################################
+
+# filename will be in $tcpdump_filename, pid in $tcpdump_pid
+tcpdump_start ()
+{
+    tcpdump_filter="$1" # global
+
+    echo "Running tcpdump..."
+    tcpdump_filename=$(mktemp)
+    ctdb_test_exit_hook_add "rm -f $tcpdump_filename"
+
+    # The only way of being sure that tcpdump is listening is to send
+    # some packets that it will see.  So we use dummy pings - the -U
+    # option to tcpdump ensures that packets are flushed to the file
+    # as they are captured.
+    local dummy_addr="127.3.2.1"
+    local dummy="icmp and dst host ${dummy_addr} and icmp[icmptype] == icmp-echo"
+    tcpdump -n -p -s 0 -e -U -w $tcpdump_filename -i any "($tcpdump_filter) or ($dummy)" &
+    ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
+
+    echo "Waiting for tcpdump output file to be ready..."
+    ping -q "$dummy_addr" >/dev/null 2>&1 &
+    ctdb_test_exit_hook_add "kill $! >/dev/null 2>&1"
+
+    tcpdump_listen_for_dummy ()
+    {
+	tcpdump -n -r $tcpdump_filename -c 1 "$dummy" >/dev/null 2>&1
+    }
+
+    wait_until 10 tcpdump_listen_for_dummy
+}
+
+# By default, wait for 1 matching packet.
+tcpdump_wait ()
+{
+	local count="${1:-1}"
+	local filter="${2:-${tcpdump_filter}}"
+
+	tcpdump_check ()
+	{
+		# It would be much nicer to add "ether src
+		# $releasing_mac" to the filter.  However, tcpdump
+		# does not allow MAC filtering unless an ethernet
+		# interface is specified with -i.  It doesn't work
+		# with "-i any" and it doesn't work when reading from
+		# a file.  :-(
+		local found
+		if [ -n "$releasing_mac" ] ; then
+			found=$(tcpdump -n -e -r "$tcpdump_filename" \
+					"$filter" 2>/dev/null |
+					       grep -c "In ${releasing_mac}")
+		else
+			found=$(tcpdump -n -e -r "$tcpdump_filename" \
+					"$filter" 2>/dev/null |
+					       wc -l)
+		fi
+
+		[ $found -ge $count ]
+	}
+
+	echo "Waiting for tcpdump to capture some packets..."
+	if ! wait_until 30 tcpdump_check ; then
+		echo "DEBUG AT $(date '+%F %T'):"
+		local i
+		for i in "onnode -q 0 $CTDB status" \
+				 "netstat -tanp" \
+				 "tcpdump -n -e -r $tcpdump_filename" ; do
+			echo "$i"
+			$i || true
+		done
+		return 1
+	fi
+}
+
+tcpdump_show ()
+{
+    local filter="${1:-${tcpdump_filter}}"
+
+    tcpdump -n -e -vv -XX -r $tcpdump_filename  "$filter" 2>/dev/null
+}
+
+tcp4tickle_sniff_start ()
+{
+    local src="$1"
+    local dst="$2"
+
+    local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}"
+    local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}"
+    local tickle_ack="${in} and (tcp[tcpflags] & tcp-ack != 0) and (tcp[14:2] == 1234)" # win == 1234
+    local ack_ack="${out} and (tcp[tcpflags] & tcp-ack != 0)"
+    tcptickle_reset="${in} and tcp[tcpflags] & tcp-rst != 0"
+    local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})"
+
+    tcpdump_start "$filter"
+}
+
+# tcp[] does not work for IPv6 (in some versions of tcpdump)
+tcp6tickle_sniff_start ()
+{
+    local src="$1"
+    local dst="$2"
+
+    local in="src host ${dst%:*} and tcp src port ${dst##*:} and dst host ${src%:*} and tcp dst port ${src##*:}"
+    local out="src host ${src%:*} and tcp src port ${src##*:} and dst host ${dst%:*} and tcp dst port ${dst##*:}"
+    local tickle_ack="${in} and (ip6[53] & tcp-ack != 0) and (ip6[54:2] == 1234)" # win == 1234
+    local ack_ack="${out} and (ip6[53] & tcp-ack != 0)"
+    tcptickle_reset="${in} and ip6[53] & tcp-rst != 0"
+    local filter="(${tickle_ack}) or (${ack_ack}) or (${tcptickle_reset})"
+
+    tcpdump_start "$filter"
+}
+
+tcptickle_sniff_start ()
+{
+    local src="$1"
+    local dst="$2"
+
+    case "${dst%:*}" in
+	*:*) tcp6tickle_sniff_start "$src" "$dst" ;;
+	*)   tcp4tickle_sniff_start "$src" "$dst" ;;
+    esac
+}
+
+tcptickle_sniff_wait_show ()
+{
+	local releasing_mac="$1"  # optional, used by tcpdump_wait()
+
+	tcpdump_wait 1 "$tcptickle_reset"
+
+	echo "GOOD: here are some TCP tickle packets:"
+	tcpdump_show
+}
+
+gratarp4_sniff_start ()
+{
+    tcpdump_start "arp host ${test_ip}"
+}
+
+gratarp6_sniff_start ()
+{
+    local neighbor_advertisement="icmp6 and ip6[40] == 136"
+    local hex=$(ipv6_to_hex "$test_ip")
+    local match_target="ip6[48:4] == 0x${hex:0:8} and ip6[52:4] == 0x${hex:8:8} and ip6[56:4] == 0x${hex:16:8} and ip6[60:4] == 0x${hex:24:8}"
+
+    tcpdump_start "${neighbor_advertisement} and ${match_target}"
+}
+
+gratarp_sniff_start ()
+{
+    case "$test_ip" in
+	*:*) gratarp6_sniff_start ;;
+	*)   gratarp4_sniff_start ;;
+    esac
+}
+
+gratarp_sniff_wait_show ()
+{
+    tcpdump_wait 2
+
+    echo "GOOD: this should be the some gratuitous ARPs:"
+    tcpdump_show
+}
+
+ping_wrapper ()
+{
+    case "$*" in
+	*:*) ping6 "$@"   ;;
+	*)   ping  "$@"   ;;
+    esac
+}
+
+#######################################
+
+nfs_test_setup ()
+{
+    select_test_node_and_ips
+
+    nfs_first_export=$(showmount -e $test_ip | sed -n -e '2s/ .*//p')
+
+    echo "Creating test subdirectory..."
+    try_command_on_node $test_node "TMPDIR=$nfs_first_export mktemp -d"
+    nfs_test_dir="$out"
+    try_command_on_node $test_node "chmod 777 $nfs_test_dir"
+
+    nfs_mnt_d=$(mktemp -d)
+    nfs_local_file="${nfs_mnt_d}/${nfs_test_dir##*/}/TEST_FILE"
+    nfs_remote_file="${nfs_test_dir}/TEST_FILE"
+
+    ctdb_test_exit_hook_add nfs_test_cleanup
+
+    echo "Mounting ${test_ip}:${nfs_first_export} on ${nfs_mnt_d} ..."
+    mount -o timeo=1,hard,intr,vers=3 \
+	"[${test_ip}]:${nfs_first_export}" ${nfs_mnt_d}
+}
+
+nfs_test_cleanup ()
+{
+    rm -f "$nfs_local_file"
+    umount -f "$nfs_mnt_d"
+    rmdir "$nfs_mnt_d"
+    onnode -q $test_node rmdir "$nfs_test_dir"
+}