diff options
Diffstat (limited to 'ctdb/config/statd-callout')
-rwxr-xr-x | ctdb/config/statd-callout | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/ctdb/config/statd-callout b/ctdb/config/statd-callout new file mode 100755 index 0000000..38c155e --- /dev/null +++ b/ctdb/config/statd-callout @@ -0,0 +1,254 @@ +#!/bin/sh + +# This must run as root as CTDB tool commands need to access CTDB socket +[ "$(id -u)" -eq 0 ] || exec sudo "$0" "$@" + +# statd must be configured to use this script as its high availability call-out. +# +# In most Linux versions this can be done using something like the following... +# +# /etc/sysconfig/nfs (Red Hat) or /etc/default/nfs-common (Debian): +# NFS_HOSTNAME=myhostname +# STATD_HOSTNAME="${NFS_HOSTNAME} -H /etc/ctdb/statd-callout" +# +# Newer Red Hat Linux variants instead use /etc/nfs.conf: +# [statd] +# name = myhostname +# ha-callout = /etc/ctdb/statd-callout + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD") + +. "${CTDB_BASE}/functions" + +# Overwrite this so we get some logging +die () +{ + script_log "statd-callout" "$@" + exit 1 +} + +# Try different variables to find config file for NFS_HOSTNAME +load_system_config "nfs" "nfs-common" + +# If NFS_HOSTNAME not set then try to pull it out of /etc/nfs.conf +if [ -z "$NFS_HOSTNAME" ]; then + if type nfsconf >/dev/null 2>&1; then + NFS_HOSTNAME=$(nfsconf --get statd name) + elif type git >/dev/null 2>&1; then + # git to the rescue! + NFS_HOSTNAME=$(git config --file=/etc/nfs.conf statd.name) + fi +fi + +[ -n "$NFS_HOSTNAME" ] || \ + die "NFS_HOSTNAME is not configured. statd-callout failed" + +############################################################ + +ctdb_setup_state_dir "service" "nfs" + +# script_state_dir set by ctdb_setup_state_dir() +# shellcheck disable=SC2154 +d="${script_state_dir}/statd-callout" + +mkdir -p "$d" || die "Failed to create directory \"${d}\"" +cd "$d" || die "Failed to change directory to \"${d}\"" + +pnn=$(ctdb_get_pnn) + +############################################################ + +send_notifies () +{ + _smnotify="${CTDB_HELPER_BINDIR}/smnotify" + + # State must monotonically increase, across the entire + # cluster. Use seconds since epoch and hope the time is in + # sync across nodes. Even numbers mean service is shut down, + # odd numbers mean service is started. + + # Intentionally round to an even number + # shellcheck disable=SC2017 + _state_even=$(( $(date '+%s') / 2 * 2)) + + _prev="" + while read _sip _cip ; do + # NOTE: Consider optimising smnotify to read all the + # data from stdin and then run it in the background. + + # Reset stateval for each serverip + if [ "$_sip" != "$_prev" ] ; then + _stateval="$_state_even" + fi + + # Send notifies for server shutdown + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$_sip" --stateval="$_stateval" + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$NFS_HOSTNAME" --stateval="$_stateval" + + # Send notifies for server startup + _stateval=$((_stateval + 1)) + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$_sip" --stateval="$_stateval" + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$NFS_HOSTNAME" --stateval="$_stateval" + done +} + +delete_records () +{ + while read _sip _cip ; do + _key="statd-state@${_sip}@${_cip}" + echo "\"${_key}\" \"\"" + done | $CTDB ptrans "ctdb.tdb" +} + +############################################################ + +case "$1" in + # Keep a single file to keep track of the last "add-client" or + # "del-client'. These get pushed to ctdb.tdb during "update", + # which will generally be run once each "monitor" cycle. In this + # way we avoid scalability problems with flood of persistent + # transactions after a "notify" when all the clients re-take their + # locks. + + add-client) + # statd does not tell us to which IP the client connected so + # we must add it to all the IPs that we serve + cip="$2" + date=$(date '+%s') + # x is intentionally ignored + # shellcheck disable=SC2034 + $CTDB ip -X | + tail -n +2 | + while IFS="|" read x sip node x ; do + [ "$node" = "$pnn" ] || continue # not us + key="statd-state@${sip}@${cip}" + echo "\"${key}\" \"${date}\"" >"$key" + done + ;; + + del-client) + # statd does not tell us from which IP the client disconnected + # so we must add it to all the IPs that we serve + cip="$2" + # x is intentionally ignored + # shellcheck disable=SC2034 + $CTDB ip -X | + tail -n +2 | + while IFS="|" read x sip node x ; do + [ "$node" = "$pnn" ] || continue # not us + key="statd-state@${sip}@${cip}" + echo "\"${key}\" \"\"" >"$key" + done + ;; + + update) + files=$(echo statd-state@*) + if [ "$files" = "statd-state@*" ] ; then + # No files! + exit 0 + fi + # Filter out lines for any IP addresses that are not currently + # hosted public IP addresses. + ctdb_ips=$($CTDB ip | tail -n +2) + sed_expr=$(echo "$ctdb_ips" | + awk -v pnn="$pnn" 'pnn == $2 { + ip = $1; gsub(/\./, "\\.", ip); + printf "/statd-state@%s@/p\n", ip }') + # Intentional multi-word expansion for multiple files + # shellcheck disable=SC2086 + items=$(sed -n "$sed_expr" $files) + if [ -n "$items" ] ; then + if echo "$items" | $CTDB ptrans "ctdb.tdb" ; then + # shellcheck disable=SC2086 + rm $files + fi + fi + ;; + + notify) + # we must restart the lockmanager (on all nodes) so that we get + # a clusterwide grace period (so other clients don't take out + # conflicting locks through other nodes before all locks have been + # reclaimed) + + # we need these settings to make sure that no tcp connections survive + # across a very fast failover/failback + #echo 10 > /proc/sys/net/ipv4/tcp_fin_timeout + #echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets + #echo 0 > /proc/sys/net/ipv4/tcp_max_orphans + + # Delete the notification list for statd, we don't want it to + # ping any clients + rm -f /var/lib/nfs/statd/sm/* + rm -f /var/lib/nfs/statd/sm.bak/* + + # We must also let some time pass between stopping and + # restarting the lock manager. Otherwise there is a window + # where the lock manager will respond "strangely" immediately + # after restarting it, which causes clients to fail to reclaim + # their locks. + nfs_callout_init + "$CTDB_NFS_CALLOUT" "stop" "nlockmgr" >/dev/null 2>&1 + sleep 2 + "$CTDB_NFS_CALLOUT" "start" "nlockmgr" >/dev/null 2>&1 + + # we now need to send out additional statd notifications to ensure + # that clients understand that the lockmanager has restarted. + # we have three cases: + # 1, clients that ignore the ip address the stat notification came from + # and ONLY care about the 'name' in the notify packet. + # these clients ONLY work with lock failover IFF that name + # can be resolved into an ipaddress that matches the one used + # to mount the share. (==linux clients) + # This is handled when starting lockmanager above, but those + # packets are sent from the "wrong" ip address, something linux + # clients are ok with, buth other clients will barf at. + # 2, Some clients only accept statd packets IFF they come from the + # 'correct' ip address. + # 2a,Send out the notification using the 'correct' ip address and also + # specify the 'correct' hostname in the statd packet. + # Some clients require both the correct source address and also the + # correct name. (these clients also ONLY work if the ip addresses + # used to map the share can be resolved into the name returned in + # the notify packet.) + # 2b,Other clients require that the source ip address of the notify + # packet matches the ip address used to take out the lock. + # I.e. that the correct source address is used. + # These clients also require that the statd notify packet contains + # the name as the ip address used when the lock was taken out. + # + # Both 2a and 2b are commonly used in lockmanagers since they maximize + # probability that the client will accept the statd notify packet and + # not just ignore it. + # For all IPs we serve, collect info and push to the config database + + # Construct a sed expression to take catdb output and produce pairs of: + # server-IP client-IP + # but only for the server-IPs that are hosted on this node. + ctdb_all_ips=$($CTDB ip all | tail -n +2) + sed_expr=$(echo "$ctdb_all_ips" | + awk -v pnn="$pnn" 'pnn == $2 { + ip = $1; gsub(/\./, "\\.", ip); + printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }') + + statd_state=$($CTDB catdb ctdb.tdb | sed -n "$sed_expr" | sort) + [ -n "$statd_state" ] || exit 0 + + echo "$statd_state" | send_notifies + echo "$statd_state" | delete_records + + # Remove any stale touch files (i.e. for IPs not currently + # hosted on this node and created since the last "update"). + # There's nothing else we can do with them at this stage. + echo "$ctdb_all_ips" | + awk -v pnn="$pnn" 'pnn != $2 { print $1 }' | + while read sip ; do + rm -f "statd-state@${sip}@"* + done + ;; +esac |