diff options
Diffstat (limited to '')
43 files changed, 5077 insertions, 0 deletions
diff --git a/ctdb/config/README b/ctdb/config/README new file mode 100644 index 0000000..d28f4f0 --- /dev/null +++ b/ctdb/config/README @@ -0,0 +1,31 @@ +This directory contains run-time support scripts for CTDB. + +Selected highlights: + + ctdb.init + + An initscript for starting ctdbd at boot time. + + events/ + + Eventscripts. See events/README for more details. + + functions + + Support functions, sourced by eventscripts and other scripts. + + statd-callout + + rpc.statd high-availability callout to support lock migration on + failover. + +Notes: + +* All of these scripts are written in POSIX Bourne shell. Please + avoid bash-isms, including the use of "local" variables (which are + not available in POSIX shell). + +* Do not use absolute paths for commands. Unit tests attempt to + replace many commands with stubs and can not do this if commands are + specified with absolute paths. The functions file controls $PATH so + absolute paths should not be required. diff --git a/ctdb/config/ctdb-crash-cleanup.sh b/ctdb/config/ctdb-crash-cleanup.sh new file mode 100755 index 0000000..95cfd75 --- /dev/null +++ b/ctdb/config/ctdb-crash-cleanup.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This script can be called from a cronjob to automatically drop/release +# all public ip addresses if CTDBD has crashed or stopped running. +# + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD") + +. "${CTDB_BASE}/functions" + +# If ctdb is running, just exit +if service ctdb status >/dev/null 2>&1 ; then + exit 0 +fi + +load_script_options "failover" "11.natgw" + +if [ ! -f "$CTDB_BASE/public_addresses" ] ; then + die "No public addresses file found. Can't clean up." +fi + +drop_all_public_ips 2>&1 | script_log "ctdb-crash-cleanup.sh" + +if [ -n "$CTDB_NATGW_PUBLIC_IP" ] ; then + drop_ip "$CTDB_NATGW_PUBLIC_IP" "ctdb-crash-cleanup.sh" +fi diff --git a/ctdb/config/ctdb.conf b/ctdb/config/ctdb.conf new file mode 100644 index 0000000..8e1b376 --- /dev/null +++ b/ctdb/config/ctdb.conf @@ -0,0 +1,22 @@ +# See ctdb.conf(5) for documentation +# +# See ctdb-script.options(5) for documentation about event script +# options + +[logging] + # Enable logging to syslog + # location = syslog + + # Default log level + # log level = NOTICE + +[cluster] + # Shared cluster lock file to avoid split brain. Daemon + # default is no cluster lock. Do NOT run CTDB without a + # cluster lock file unless you know exactly what you are + # doing. + # + # Please see the CLUSTER LOCK section in ctdb(7) for more + # details. + # + # cluster lock = !/bin/false CLUSTER LOCK NOT CONFIGURED diff --git a/ctdb/config/ctdb.init b/ctdb/config/ctdb.init new file mode 100755 index 0000000..6a7f781 --- /dev/null +++ b/ctdb/config/ctdb.init @@ -0,0 +1,161 @@ +#!/bin/sh + +# Start and stop CTDB (Clustered TDB daemon) +# +# chkconfig: - 90 01 +# +# description: Starts and stops CTDB +# pidfile: /var/run/ctdb/ctdbd.pid +# config: /etc/sysconfig/ctdb + +### BEGIN INIT INFO +# Provides: ctdb +# Required-Start: $local_fs $syslog $network $remote_fs +# Required-Stop: $local_fs $syslog $network $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: start and stop ctdb service +# Description: Start and stop CTDB (Clustered TDB daemon) +### END INIT INFO + +# Source function library. +if [ -f /etc/init.d/functions ] ; then + # Red Hat + . /etc/init.d/functions +elif [ -f /etc/rc.d/init.d/functions ] ; then + # Red Hat + . /etc/rc.d/init.d/functions +elif [ -f /etc/rc.status ] ; then + # SUSE + . /etc/rc.status + rc_reset + LC_ALL=en_US.UTF-8 +elif [ -f /lib/lsb/init-functions ] ; then + # Debian + . /lib/lsb/init-functions +fi + +# Avoid using root's TMPDIR +unset TMPDIR + +[ -n "$CTDB_BASE" ] || export CTDB_BASE="/etc/ctdb" + +. "${CTDB_BASE}/functions" + +load_system_config "network" + +# check networking is up (for redhat) +if [ "$NETWORKING" = "no" ] ; then + exit 0 +fi + +load_system_config "ctdb" + +detect_init_style +export CTDB_INIT_STYLE + +ctdbd="${CTDBD:-/usr/sbin/ctdbd}" +ctdb="${CTDB:-/usr/bin/ctdb}" +pidfile="/var/run/ctdb/ctdbd.pid" + +############################################################ + +start() +{ + printf "Starting ctdbd service: " + + case "$CTDB_INIT_STYLE" in + suse) + startproc "$ctdbd" + rc_status -v + ;; + redhat) + daemon --pidfile "$pidfile" "$ctdbd" + RETVAL=$? + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/ctdb || RETVAL=1 + return $RETVAL + ;; + debian) + eval start-stop-daemon --start --quiet --background --exec "$ctdbd" + ;; + esac +} + +stop() +{ + printf "Shutting down ctdbd service: " + + case "$CTDB_INIT_STYLE" in + suse) + "$ctdb" "shutdown" + rc_status -v + ;; + redhat) + "$ctdb" "shutdown" + RETVAL=$? + # Common idiom in Red Hat init scripts - success() always + # succeeds so this does behave like if-then-else + # shellcheck disable=SC2015 + [ $RETVAL -eq 0 ] && success || failure + echo "" + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb + return $RETVAL + ;; + debian) + "$ctdb" "shutdown" + log_end_msg $? + ;; + esac +} + +restart() +{ + stop + start +} + +check_status () +{ + case "$CTDB_INIT_STYLE" in + suse) + checkproc -p "$pidfile" "$ctdbd" + rc_status -v + ;; + redhat) + status -p "$pidfile" -l "ctdb" "$ctdbd" + ;; + debian) + status_of_proc -p "$pidfile" "$ctdbd" "ctdb" + ;; + esac +} + +############################################################ + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart|reload|force-reload) + restart + ;; + status) + check_status + ;; + condrestart|try-restart) + if check_status >/dev/null ; then + restart + fi + ;; + cron) + # used from cron to auto-restart ctdb + check_status >/dev/null 2>&1 || restart + ;; + *) + echo "Usage: $0 {start|stop|restart|reload|force-reload|status|cron|condrestart|try-restart}" + exit 1 +esac diff --git a/ctdb/config/ctdb.sudoers b/ctdb/config/ctdb.sudoers new file mode 100644 index 0000000..1c23818 --- /dev/null +++ b/ctdb/config/ctdb.sudoers @@ -0,0 +1,3 @@ +Defaults!/usr/local/etc/ctdb/statd-callout !requiretty + +rpcuser ALL=(ALL) NOPASSWD: /usr/local/etc/ctdb/statd-callout diff --git a/ctdb/config/ctdb.sysconfig b/ctdb/config/ctdb.sysconfig new file mode 100644 index 0000000..fc57929 --- /dev/null +++ b/ctdb/config/ctdb.sysconfig @@ -0,0 +1,11 @@ +# If using SYSV init, install this as /etc/sysconfig/ctdb, +# /etc/default/ctdb or similar + +# Allow 1M open files +ulimit -n 1048576 + +# Allow core files to be created +ulimit -c unlimited + +# Useful if default detection doesn't work +# CTDB_INIT_STYLE=debian diff --git a/ctdb/config/ctdb.tunables b/ctdb/config/ctdb.tunables new file mode 100644 index 0000000..b99e5cd --- /dev/null +++ b/ctdb/config/ctdb.tunables @@ -0,0 +1,2 @@ +# Set some CTDB tunable variables during CTDB startup? +# MutexEnabled=0 diff --git a/ctdb/config/debug-hung-script.sh b/ctdb/config/debug-hung-script.sh new file mode 100755 index 0000000..c1ac0f1 --- /dev/null +++ b/ctdb/config/debug-hung-script.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +# This script only works on Linux. Please modify (and submit patches) +# for other operating systems. + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +# Testing hook +if [ -n "$CTDB_DEBUG_HUNG_SCRIPT_LOGFILE" ] ; then + tmp="${CTDB_DEBUG_HUNG_SCRIPT_LOGFILE}.part" + exec >>"$tmp" 2>&1 +fi + +( + # No use running several of these in parallel if, say, "releaseip" + # event hangs for multiple IPs. In that case the output would be + # interleaved in the log and would just be confusing. + flock --wait 2 9 || exit 1 + + echo "===== Start of hung script debug for PID=\"$1\", event=\"$2\" =====" + + echo "pstree -p -a ${1}:" + out=$(pstree -p -a "$1") + echo "$out" + + # Check for processes matching a regular expression and print + # stack staces. This could help confirm that certain processes + # are stuck in certain places such as the cluster filesystem. The + # regexp must separate items with "|" and must not contain + # parentheses. The default pattern can be replaced for testing. + default_pat='exportfs|rpcinfo' + pat="${CTDB_DEBUG_HUNG_SCRIPT_STACKPAT:-${default_pat}}" + echo "$out" | + sed -r -n "s@.*-(.*(${pat}).*),([0-9]*).*@\\3 \\1@p" | + while read pid name ; do + trace=$(cat "/proc/${pid}/stack" 2>/dev/null) + # No! Checking the exit code afterwards is actually clearer... + # shellcheck disable=SC2181 + if [ $? -eq 0 ] ; then + echo "---- Stack trace of interesting process ${pid}[${name}] ----" + echo "$trace" + fi + done + + if [ "$2" != "init" ] ; then + echo "---- ctdb scriptstatus ${2}: ----" + $CTDB scriptstatus "$2" + fi + + echo "===== End of hung script debug for PID=\"$1\", event=\"$2\" =====" + + if [ -n "$CTDB_DEBUG_HUNG_SCRIPT_LOGFILE" ] ; then + mv "$tmp" "$CTDB_DEBUG_HUNG_SCRIPT_LOGFILE" + fi + +) 9>"${CTDB_SCRIPT_VARDIR}/debug-hung-script.lock" diff --git a/ctdb/config/debug_locks.sh b/ctdb/config/debug_locks.sh new file mode 100755 index 0000000..6c730ee --- /dev/null +++ b/ctdb/config/debug_locks.sh @@ -0,0 +1,218 @@ +#!/bin/sh + +# This script attempts to find processes holding locks on a particular +# CTDB database and dumps a stack trace for each such processe. +# +# There are 2 cases: +# +# * Samba is configured to use fcntl locks +# +# In this case /proc/locks is parsed to find potential lock holders +# +# * Samba is configured to use POSIX robust mutexes +# +# In this case the helper program tdb_mutex_check is used to find +# potential lock holders. +# +# This helper program uses a private glibc struct field, so is +# neither portable nor supported. If this field is not available +# then the helper is not built. Unexpected changes in internal +# glibc structures may cause unexpected results, including crashes. +# Bug reports for this helper program are not accepted without an +# accompanying patch. + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD") + +. "${CTDB_BASE}/functions" + +if [ $# -ne 4 ] ; then + die "usage: $0 <pid> { DB | RECORD } <tdb_path> { FCNTL | MUTEX }" +fi + +lock_helper_pid="$1" +# lock_scope is unused for now +# shellcheck disable=SC2034 +lock_scope="$2" +tdb_path="$3" +lock_type="$4" + +# type is at least mentioned in POSIX and more is portable than which(1) +# shellcheck disable=SC2039 +if ! type gstack >/dev/null 2>&1 ; then + gstack () + { + _pid="$1" + + gdb -batch --quiet -nx "/proc/${_pid}/exe" "$_pid" \ + -ex "thread apply all bt" 2>/dev/null | + grep '^\(#\|Thread \)' + } +fi + +# Load/cache database options from configuration file +ctdb_get_db_options + +dump_stack () +{ + _pid="$1" + + echo "----- Stack trace for PID=${_pid} -----" + _state=$(ps -p "$_pid" -o state= | cut -c 1) + if [ "$_state" = "D" ] ; then + # Don't run gstack on a process in D state since + # gstack will hang until the process exits D state. + # Although it is possible for a process to transition + # to D state after this check, it is unlikely because + # if a process is stuck in D state then it is probably + # the reason why this script was called. Note that a + # kernel stack almost certainly won't help diagnose a + # deadlock... but it will probably give us someone to + # blame! + echo "----- Process in D state, printing kernel stack only" + get_proc "${_pid}/stack" + else + gstack "$_pid" + fi +} + +dump_stacks () +{ + _pids="$1" + + # Use word splitting to squash whitespace + # shellcheck disable=SC2086 + _pids=$(echo $_pids | tr ' ' '\n' | sort -u) + + for _pid in $_pids; do + dump_stack "$_pid" + done +} + +get_tdb_file_id () +{ + if ! _device_inode=$(stat -c "%d:%i" "$tdb_path" 2>/dev/null) ; then + die "Unable to stat \"${tdb_path}\"" + fi + _device="${_device_inode%%:*}" + _device_major=$((_device >> 8)) + _device_minor=$((_device & 0xff)) + _inode="${_device_inode#*:}" + printf '%02x:%02x:%u\n' "$_device_major" "$_device_minor" "$_inode" +} + +debug_via_proc_locks () +{ + # Get file ID to match relevant column in /proc/locks + _file_id=$(get_tdb_file_id) + + # Log information from /proc/locks about the waiting process + _tdb=$(basename "$tdb_path") + _comm=$(ps -p "$lock_helper_pid" -o comm=) + _out=$(get_proc "locks" | + awk -v pid="$lock_helper_pid" \ + -v file_id="$_file_id" \ + -v file="$_tdb" \ + -v comm="$_comm" \ + '$2 == "->" && + $3 == "POSIX" && + $4 == "ADVISORY" && + $5 == "WRITE" && + $6 == pid && + $7 == file_id { print $6, comm, file, $8, $9 }') + if [ -n "$_out" ] ; then + echo "Waiter:" + echo "$_out" + fi + + # Parse /proc/locks and find process holding locks on $tdb_path + # extract following information + # pid process_name tdb_name offsets + _out=$(get_proc "locks" | + awk -v pid="$lock_helper_pid" \ + -v file_id="$_file_id" \ + -v file="$_tdb" \ + '$2 == "POSIX" && + $3 == "ADVISORY" && + $4 == "WRITE" && + $5 != pid && + $6 == file_id { print $5, file, $7, $8 }' | + while read -r _pid _rest ; do + _pname=$(ps -p "$_pid" -o comm=) + echo "$_pid $_pname $_rest" + done) + + if [ -z "$_out" ]; then + return + fi + + # Log information about locks + echo "Lock holders:" + echo "$_out" + + _pids=$(echo "$_out" | awk '{ print $1 }') + + lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}" +} + +debug_via_tdb_mutex () +{ + _helper="${CTDB_HELPER_BINDIR}/tdb_mutex_check" + if [ ! -x "$_helper" ] ; then + # Mutex helper not available - not supported? + # Avoid not found error... + return + fi + + # Helper should always succeed + if ! _t=$("$_helper" "$tdb_path") ; then + return + fi + + _out=$(echo "$_t" | sed -n -e 's#^\[\(.*\)\] pid=\(.*\)#\2 \1#p') + + if [ -z "$_out" ]; then + if [ -n "$_t" ] ; then + echo "$_t" | grep -F 'trylock failed' + fi + return + fi + + # Get process names, append $tdb_path + _out=$(echo "$_out" | + while read -r _pid _rest ; do + _pname=$(ps -p "$_pid" -o comm=) + _tdb=$(basename "$tdb_path") + echo "${_pid} ${_pname} ${_tdb} ${_rest}" + done) + + # Log information about locks + echo "Lock holders:" + echo "$_out" + + # Get PIDs of processes that are holding locks + _pids=$(echo "$_out" | + awk -v pid="$lock_helper_pid" '$1 != pid {print $1}') + + lock_holder_pids="${lock_holder_pids:+${lock_holder_pids} }${_pids}" +} + +( + flock -n 9 || exit 1 + + echo "===== Start of debug locks PID=$$ =====" + + lock_holder_pids="" + + debug_via_proc_locks + + if [ "$lock_type" = "MUTEX" ] ; then + debug_via_tdb_mutex + fi + + dump_stacks "$lock_holder_pids" + + echo "===== End of debug locks PID=$$ =====" +)9>"${CTDB_SCRIPT_VARDIR}/debug_locks.lock" | script_log "ctdbd-lock" + +exit 0 diff --git a/ctdb/config/events/README b/ctdb/config/events/README new file mode 100644 index 0000000..766cb4f --- /dev/null +++ b/ctdb/config/events/README @@ -0,0 +1,193 @@ +The events/ directory contains event scripts used by CTDB. Event +scripts are triggered on certain events, such as startup, monitoring +or public IP allocation. Scripts may be specific to services, +networking or internal CTDB operations. + +Scripts are divided into subdirectories for different CTDB components. +Right now the only component is "legacy". + +All event scripts start with the prefix 'NN.' where N is a digit. The +event scripts are run in sequence based on NN. Thus 10.interface will +be run before 60.nfs. It is recommended to keep each NN unique. +However, scripts with the same NN prefix will be executed in +alphanumeric sort order. + +As a special case, any eventscript that ends with a '~' character will be +ignored since this is a common postfix that some editors will append to +older versions of a file. Similarly, any eventscript with multiple '.'s +will be ignored as package managers can create copies with additional +suffix starting with '.' (e.g. .rpmnew, .dpkg-dist). + +Only executable event scripts are run by CTDB. Any event script that +does not have execute permission is ignored. + +The eventscripts are called with varying number of arguments. The +first argument is the event name and the rest of the arguments depend +on the event name. + +Event scripts must return 0 for success and non-zero for failure. + +Output of event scripts is logged. On failure the output of the +failing event script is included in the output of "ctdb scriptstatus". + +The following events are supported (with arguments shown): + +init + + This event is triggered once when CTDB is starting up. This + event is used to do some basic cleanup and initialisation. + + During the "init" event CTDB is not listening on its Unix + domain socket, so the "ctdb" CLI will not work. + + Failure of this event will cause CTDB to terminate. + + Example: 00.ctdb creates $CTDB_SCRIPT_VARDIR + +setup + + This event is triggered once, after the "init" event has + completed. + + For this and any subsequent events the CTDB Unix domain socket + is available, so the "ctdb" CLI will work. + + Failure of this event will cause CTDB to terminate. + + Example: 00.ctdb processes tunables defined in ctdb.tunables. + +startup + + This event is triggered after the "setup" event has completed + and CTDB has finished its initial database recovery. + + This event starts all services that are managed by CTDB. Each + service that is managed by CTDB should implement this event + and use it to (re)start the service. + + If the "startup" event fails then CTDB will retry it until it + succeeds. There is no limit on the number of retries. + + Example: 50.samba uses this event to start the Samba daemon. + +shutdown + + This event is triggered when CTDB is shutting down. + + This event shuts down all services that are managed by CTDB. + Each service that is managed by CTDB should implement this + event and use it to stop the service. + + Example: 50.samba uses this event to shut down the Samba + daemon. + +monitor + + This event is run periodically. The interval between + successive "monitor" events is configured using the + MonitorInterval tunable, which defaults to 15 seconds. + + This event is triggered by CTDB to continuously monitor that + all managed services are healthy. If all event scripts + complete then the monitor event successfully then the node is + marked HEALTHY. If any event script fails then no subsequent + scripts will be run for that event and the node is marked + UNHEALTHY. + + Each service that is managed by CTDB should implement this + event and use it to monitor the service. + + Example: 10.interface checks that each configured interface + for public IP addresses has a physical link established. + +startrecovery + + This event is triggered every time a database recovery process + is started. + + This is rarely used. + +recovered + + This event is triggered every time a database recovery process + is completed. + + This is rarely used. + +takeip <interface> <ip-address> <netmask-bits> + + This event is triggered for each public IP address taken by a + node during IP address (re)assignment. Multiple "takeip" + events can be run in parallel if multiple IP addresses are + being assigned. + + Example: In 10.interface the "ip" command (from the Linux + iproute2 package) is used to add the specified public IP + address to the specified interface. The "ip" command can + safely be run concurrently. However, the "iptables" command + cannot be run concurrently so a wrapper is used to serialise + runs using exclusive locking. + + If substantial work is required to reconfigure a service when + a public IP address is taken over it can be better to defer + service reconfiguration to the "ipreallocated" event, after + all IP addresses have been assigned. + + Example: 60.nfs uses ctdb_service_set_reconfigure() to flag + that public IP addresses have changed so that service + reconfiguration will occur in the "ipreallocated" event. + +releaseip <interface> <ip-address> <netmask-bits> + + This event is triggered for each public IP address released by + a node during IP address (re)assignment. Multiple "releaseip" + events can be run in parallel if multiple IP addresses are + being unassigned. + + In all other regards, this event is analogous to the "takeip" + event above. + +updateip <old-interface> <new-interface> <ip-address> <netmask-bits> + + This event is triggered for each public IP address moved + between interfaces on a node during IP address (re)assignment. + Multiple "updateip" events can be run in parallel if multiple + IP addresses are being moved. + + This event is only used if multiple interfaces are capable of + hosting an IP address, as specified in the public addresses + configuration file. + + This event is similar to the "takeip" event above. + +ipreallocated + + This event is triggered on all nodes as the last step of + public IP address (re)assignment. It is unconditionally + triggered after any "releaseip", "takeip" and "updateip" + events, even though these events may not run on some nodes if + there are no relevant changes. That is, the "ipreallocated" + event is triggered unconditionally, even on nodes where public + IP addresses assignements have not changed. + + This event is used to reconfigure services. + + Since "ipreallocated" is always run, this allows + reconfiguration to depend on the states of other nodes rather + that just IP addresses. + + Example: 11.natgw recalculates the NAT gateway master and + updates the relevant network configuration on each node if the + NAT gateway master has changed. + +Additional notes for "takeip", "releaseip", "updateip", +"ipreallocated": + +* Failure of any of these events causes IP allocation to be retried. + +* An event script can use ctdb_service_set_reconfigure() in "takeip", + "releaseip" or "updateip" events to flag that its service needs to + be reconfigured. The "ipreallocated" event can then use + ctdb_service_needs_reconfigure() to test if there were public IPs + changes to determine what type of reconfiguration (if any) is + needed. diff --git a/ctdb/config/events/legacy/00.ctdb.script b/ctdb/config/events/legacy/00.ctdb.script new file mode 100755 index 0000000..81c16af --- /dev/null +++ b/ctdb/config/events/legacy/00.ctdb.script @@ -0,0 +1,130 @@ +#!/bin/sh + +# Event script for ctdb-specific setup and other things that don't fit +# elsewhere. + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +############################################################ + +# type is commonly supported and more portable than which(1) +# shellcheck disable=SC2039 +select_tdb_checker () +{ + # Find the best TDB consistency check available. + use_tdb_tool_check=false + type tdbtool >/dev/null 2>&1 && found_tdbtool=true + type tdbdump >/dev/null 2>&1 && found_tdbdump=true + + if $found_tdbtool && echo "help" | tdbtool | grep -q check ; then + use_tdb_tool_check=true + elif $found_tdbtool && $found_tdbdump ; then + cat <<EOF +WARNING: The installed 'tdbtool' does not offer the 'check' subcommand. + Using 'tdbdump' for database checks. + Consider updating 'tdbtool' for better checks! +EOF + elif $found_tdbdump ; then + cat <<EOF +WARNING: 'tdbtool' is not available. + Using 'tdbdump' to check the databases. + Consider installing a recent 'tdbtool' for better checks! +EOF + else + cat <<EOF +WARNING: Cannot check databases since neither + 'tdbdump' nor 'tdbtool check' is available. + Consider installing tdbtool or at least tdbdump! +EOF + return 1 + fi +} + +check_tdb () +{ + _db="$1" + + if $use_tdb_tool_check ; then + # tdbtool always exits with 0 :-( + if timeout 10 tdbtool "$_db" check 2>/dev/null | + grep -q "Database integrity is OK" ; then + return 0 + else + return 1 + fi + else + timeout 10 tdbdump "$_db" >/dev/null 2>/dev/null + return $? + fi +} + +check_persistent_databases () +{ + _dir="${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" + [ -d "$_dir" ] || return 0 + + for _db in "$_dir/"*.tdb.*[0-9] ; do + [ -r "$_db" ] || continue + check_tdb "$_db" || \ + die "Persistent database $_db is corrupted! CTDB will not start." + done +} + +check_non_persistent_databases () +{ + _dir="${CTDB_DBDIR:-${CTDB_VARDIR}}" + [ -d "$_dir" ] || return 0 + + for _db in "${_dir}/"*.tdb.*[0-9] ; do + [ -r "$_db" ] || continue + check_tdb "$_db" || { + _backup="${_db}.$(date +'%Y%m%d.%H%M%S').corrupt" + cat <<EOF +WARNING: database ${_db} is corrupted. + Moving to backup ${_backup} for later analysis. +EOF + mv "$_db" "$_backup" + + # Now remove excess backups + _max="${CTDB_MAX_CORRUPT_DB_BACKUPS:-10}" + _bdb="${_db##*/}" # basename + find "$_dir" -name "${_bdb}.*.corrupt" | + sort -r | + tail -n +$((_max + 1)) | + xargs rm -f + } + done +} + +############################################################ + +ctdb_check_args "$@" + +case "$1" in +init) + # make sure we have a blank state directory for the scripts to work with + rm -rf "$CTDB_SCRIPT_VARDIR" + mkdir -p "$CTDB_SCRIPT_VARDIR" || \ + die "mkdir -p ${CTDB_SCRIPT_VARDIR} - failed - $?" $? + + # Load/cache database options from configuration file + ctdb_get_db_options + + if select_tdb_checker ; then + check_persistent_databases || exit $? + check_non_persistent_databases + fi + ;; + +startup) + $CTDB attach ctdb.tdb persistent + ;; +esac + +# all OK +exit 0 diff --git a/ctdb/config/events/legacy/01.reclock.script b/ctdb/config/events/legacy/01.reclock.script new file mode 100755 index 0000000..0406875 --- /dev/null +++ b/ctdb/config/events/legacy/01.reclock.script @@ -0,0 +1,34 @@ +#!/bin/sh +# script to check accessibility to the reclock file on a node + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +case "$1" in +init) + recovery_lock=$("${CTDB_HELPER_BINDIR}/ctdb-config" \ + get cluster "recovery lock") + # xshellcheck disable=SC2181 + # Above is already complicated enough without embedding into "if" + case $? in + 0) : ;; + 2) exit 0 ;; # ENOENT: not configured + *) die "Unexpected error getting recovery lock configuration" + esac + + if [ -z "$recovery_lock" ] ; then + exit 0 + fi + + # If a helper is specified then exit because this script can't + # do anything useful + case "$recovery_lock" in + !*) exit 0 ;; + esac + + d=$(dirname "$recovery_lock") + mkdir -p "$d" + ;; +esac diff --git a/ctdb/config/events/legacy/05.system.script b/ctdb/config/events/legacy/05.system.script new file mode 100755 index 0000000..56a07c7 --- /dev/null +++ b/ctdb/config/events/legacy/05.system.script @@ -0,0 +1,196 @@ +#!/bin/sh +# ctdb event script for checking local file system utilization + +[ -n "$CTDB_BASE" ] || + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +ctdb_setup_state_dir "service" "system-monitoring" + +validate_percentage() +{ + case "$1" in + "") return 1 ;; # A failure that doesn't need a warning + [0-9] | [0-9][0-9] | 100) return 0 ;; + *) + echo "WARNING: ${1} is an invalid percentage in \"${2}\" check" + return 1 + ;; + esac +} + +check_thresholds() +{ + _thing="$1" + _thresholds="$2" + _usage="$3" + _unhealthy_callout="$4" + + case "$_thresholds" in + *:*) + _warn_threshold="${_thresholds%:*}" + _unhealthy_threshold="${_thresholds#*:}" + ;; + *) + _warn_threshold="$_thresholds" + _unhealthy_threshold="" + ;; + esac + + _t=$(echo "$_thing" | sed -e 's@/@SLASH_@g' -e 's@ @_@g') + # script_state_dir set by ctdb_setup_state_dir() + # shellcheck disable=SC2154 + _cache="${script_state_dir}/cache_${_t}" + if [ -r "$_cache" ]; then + read -r _prev <"$_cache" + else + _prev=0 + fi + if validate_percentage "$_unhealthy_threshold" "$_thing"; then + if [ "$_usage" -ge "$_unhealthy_threshold" ]; then + printf 'ERROR: %s utilization %d%% >= threshold %d%%\n' \ + "$_thing" \ + "$_usage" \ + "$_unhealthy_threshold" + # Only run unhealthy callout if passing the + # unhealthy threshold. That is, if the + # previous usage was below the threshold. + if [ "$_prev" -lt "$_unhealthy_threshold" ]; then + eval "$_unhealthy_callout" + fi + echo "$_usage" >"$_cache" + exit 1 + fi + fi + + if validate_percentage "$_warn_threshold" "$_thing"; then + if [ "$_usage" -ge "$_warn_threshold" ]; then + if [ "$_usage" = "$_prev" ]; then + return + fi + printf 'WARNING: %s utilization %d%% >= threshold %d%%\n' \ + "$_thing" \ + "$_usage" \ + "$_warn_threshold" + echo "$_usage" >"$_cache" + else + if [ ! -r "$_cache" ]; then + return + fi + printf 'NOTICE: %s utilization %d%% < threshold %d%%\n' \ + "$_thing" \ + "$_usage" \ + "$_warn_threshold" + rm -f "$_cache" + fi + fi +} + +set_monitor_filsystem_usage_defaults() +{ + _fs_defaults_cache="${script_state_dir}/cache_filsystem_usage_defaults" + + if [ ! -r "$_fs_defaults_cache" ]; then + # Determine filesystem for each database directory, generate + # an entry to warn at 90%, de-duplicate entries, put all items + # on 1 line (so the read below gets everything) + for _t in "${CTDB_DBDIR:-${CTDB_VARDIR}}" \ + "${CTDB_DBDIR_PERSISTENT:-${CTDB_VARDIR}/persistent}" \ + "${CTDB_DBDIR_STATE:-${CTDB_VARDIR}/state}"; do + df -kP "$_t" | awk 'NR == 2 { printf "%s:90\n", $6 }' + done | sort -u | xargs >"$_fs_defaults_cache" + fi + + read -r CTDB_MONITOR_FILESYSTEM_USAGE <"$_fs_defaults_cache" +} + +monitor_filesystem_usage() +{ + if [ -z "$CTDB_MONITOR_FILESYSTEM_USAGE" ]; then + set_monitor_filsystem_usage_defaults + fi + + # Check each specified filesystem, specified in format + # <fs_mount>:<fs_warn_threshold>[:fs_unhealthy_threshold] + for _fs in $CTDB_MONITOR_FILESYSTEM_USAGE; do + _fs_mount="${_fs%%:*}" + _fs_thresholds="${_fs#*:}" + + if [ ! -d "$_fs_mount" ]; then + echo "WARNING: Directory ${_fs_mount} does not exist" + continue + fi + + # Get current utilization + _fs_usage=$(df -kP "$_fs_mount" | + sed -n -e 's@.*[[:space:]]\([[:digit:]]*\)%.*@\1@p') + if [ -z "$_fs_usage" ]; then + printf 'WARNING: Unable to get FS utilization for %s\n' \ + "$_fs_mount" + continue + fi + + check_thresholds "Filesystem ${_fs_mount}" \ + "$_fs_thresholds" \ + "$_fs_usage" + done +} + +dump_memory_info() +{ + get_proc "meminfo" + ps auxfww + set_proc "sysrq-trigger" "m" +} + +monitor_memory_usage() +{ + # Defaults + if [ -z "$CTDB_MONITOR_MEMORY_USAGE" ]; then + CTDB_MONITOR_MEMORY_USAGE=80 + fi + + _meminfo=$(get_proc "meminfo") + # Intentional word splitting here + # shellcheck disable=SC2046 + set -- $(echo "$_meminfo" | awk ' +$1 == "MemAvailable:" { memavail += $2 } +$1 == "MemFree:" { memfree += $2 } +$1 == "Cached:" { memfree += $2 } +$1 == "Buffers:" { memfree += $2 } +$1 == "MemTotal:" { memtotal = $2 } +$1 == "SwapFree:" { swapfree = $2 } +$1 == "SwapTotal:" { swaptotal = $2 } +END { + if (memavail != 0) { memfree = memavail ; } + if (memtotal + swaptotal != 0) { + usedtotal = memtotal - memfree + swaptotal - swapfree + print int(usedtotal / (memtotal + swaptotal) * 100) + } else { + print 0 + } +}') + _mem_usage="$1" + + check_thresholds "System memory" \ + "$CTDB_MONITOR_MEMORY_USAGE" \ + "$_mem_usage" \ + dump_memory_info +} + +case "$1" in +monitor) + # Load/cache database options from configuration file + ctdb_get_db_options + + rc=0 + monitor_filesystem_usage || rc=$? + monitor_memory_usage || rc=$? + exit $rc + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/06.nfs.script b/ctdb/config/events/legacy/06.nfs.script new file mode 100755 index 0000000..b937d43 --- /dev/null +++ b/ctdb/config/events/legacy/06.nfs.script @@ -0,0 +1,39 @@ +#!/bin/sh +# script to manage nfs in a clustered environment + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +service_name="nfs" + +load_script_options "service" "60.nfs" + +ctdb_setup_state_dir "service" "$service_name" + +###################################################################### + +nfs_callout_pre () +{ + _event="$1" + shift + + nfs_callout "${_event}-pre" "$@" +} + +###################################################################### + +# script_state_dir set by ctdb_setup_state_dir() +# shellcheck disable=SC2154 +nfs_callout_init "$script_state_dir" + +case "$1" in +takeip) + nfs_callout_pre "$@" + ;; + +releaseip) + nfs_callout_pre "$@" + ;; +esac diff --git a/ctdb/config/events/legacy/10.interface.script b/ctdb/config/events/legacy/10.interface.script new file mode 100755 index 0000000..7dfacd3 --- /dev/null +++ b/ctdb/config/events/legacy/10.interface.script @@ -0,0 +1,262 @@ +#!/bin/sh + +################################# +# interface event script for ctdb +# this adds/removes IPs from your +# public interface + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +ctdb_public_addresses="${CTDB_BASE}/public_addresses" + +if [ ! -f "$ctdb_public_addresses" ]; then + if [ "$1" = "init" ] ; then + echo "No public addresses file found" + fi + exit 0 +fi + +# This sets $all_interfaces as a side-effect. +get_all_interfaces () +{ + # Get all the interfaces listed in the public_addresses file + all_interfaces=$(sed -e '/^#.*/d' \ + -e 's/^[^\t ]*[\t ]*//' \ + -e 's/,/ /g' \ + -e 's/[\t ]*$//' "$ctdb_public_addresses") + + # Get the interfaces for which CTDB has public IPs configured. + # That is, for all but the 1st line, get the 1st field. + ctdb_ifaces=$($CTDB -X ifaces | sed -e '1d' -e 's@^|@@' -e 's@|.*@@') + + # Add $ctdb_ifaces and make $all_interfaces unique + # Use word splitting to squash whitespace + # shellcheck disable=SC2086 + all_interfaces=$(echo $all_interfaces $ctdb_ifaces | tr ' ' '\n' | sort -u) +} + +monitor_interfaces() +{ + get_all_interfaces + + down_interfaces_found=false + up_interfaces_found=false + + # Note that this loop must not exit early. It must process + # all interfaces so that the correct state for each interface + # is set in CTDB using setifacelink. + for _iface in $all_interfaces ; do + if interface_monitor "$_iface" ; then + up_interfaces_found=true + $CTDB setifacelink "$_iface" up >/dev/null 2>&1 + else + down_interfaces_found=true + $CTDB setifacelink "$_iface" down >/dev/null 2>&1 + fi + done + + if ! $down_interfaces_found ; then + return 0 + fi + + if ! $up_interfaces_found ; then + return 1 + fi + + if [ "$CTDB_PARTIALLY_ONLINE_INTERFACES" != "yes" ]; then + return 1 + fi + + return 0 +} + +# Sets: iface, ip, maskbits +get_iface_ip_maskbits () +{ + _iface_in="$1" + ip="$2" + _maskbits_in="$3" + + # Intentional word splitting here + # shellcheck disable=SC2046 + set -- $(ip_maskbits_iface "$ip") + if [ -n "$1" ] ; then + maskbits="$1" + iface="$2" + + if [ "$iface" != "$_iface_in" ] ; then + printf \ + 'WARNING: Public IP %s hosted on interface %s but VNN says %s\n' \ + "$ip" "$iface" "$_iface_in" + fi + if [ "$maskbits" != "$_maskbits_in" ] ; then + printf \ + 'WARNING: Public IP %s has %s bit netmask but VNN says %s\n' \ + "$ip" "$maskbits" "$_maskbits_in" + fi + else + die "ERROR: Unable to determine interface for IP ${ip}" + fi +} + +ip_block () +{ + _ip="$1" + _iface="$2" + + case "$_ip" in + *:*) _family="inet6" ;; + *) _family="inet" ;; + esac + + # Extra delete copes with previously killed script + iptables_wrapper "$_family" \ + -D INPUT -i "$_iface" -d "$_ip" -j DROP 2>/dev/null + iptables_wrapper "$_family" \ + -I INPUT -i "$_iface" -d "$_ip" -j DROP +} + +ip_unblock () +{ + _ip="$1" + _iface="$2" + + case "$_ip" in + *:*) _family="inet6" ;; + *) _family="inet" ;; + esac + + iptables_wrapper "$_family" \ + -D INPUT -i "$_iface" -d "$_ip" -j DROP 2>/dev/null +} + +ctdb_check_args "$@" + +case "$1" in +init) + # make sure that we only respond to ARP messages from the NIC where + # a particular ip address is associated. + get_proc sys/net/ipv4/conf/all/arp_filter >/dev/null 2>&1 && { + set_proc sys/net/ipv4/conf/all/arp_filter 1 + } + + _promote="sys/net/ipv4/conf/all/promote_secondaries" + get_proc "$_promote" >/dev/null 2>&1 || \ + die "Public IPs only supported if promote_secondaries is available" + + # make sure we drop any ips that might still be held if + # previous instance of ctdb got killed with -9 or similar + drop_all_public_ips + ;; + +startup) + monitor_interfaces + ;; + +shutdown) + drop_all_public_ips + ;; + +takeip) + iface=$2 + ip=$3 + maskbits=$4 + + add_ip_to_iface "$iface" "$ip" "$maskbits" || { + exit 1; + } + + # In case a previous "releaseip" for this IP was killed... + ip_unblock "$ip" "$iface" + + flush_route_cache + ;; + +releaseip) + # releasing an IP is a bit more complex than it seems. Once the IP + # is released, any open tcp connections to that IP on this host will end + # up being stuck. Some of them (such as NFS connections) will be unkillable + # so we need to use the killtcp ctdb function to kill them off. We also + # need to make sure that no new connections get established while we are + # doing this! So what we do is this: + # 1) firewall this IP, so no new external packets arrive for it + # 2) find existing connections, and kill them + # 3) remove the IP from the interface + # 4) remove the firewall rule + shift + get_iface_ip_maskbits "$@" + + ip_block "$ip" "$iface" + + kill_tcp_connections "$iface" "$ip" + + delete_ip_from_iface "$iface" "$ip" "$maskbits" || { + ip_unblock "$ip" "$iface" + exit 1 + } + + ip_unblock "$ip" "$iface" + + flush_route_cache + ;; + +updateip) + # moving an IP is a bit more complex than it seems. + # First we drop all traffic on the old interface. + # Then we try to add the ip to the new interface and before + # we finally remove it from the old interface. + # + # 1) firewall this IP, so no new external packets arrive for it + # 2) remove the IP from the old interface (and new interface, to be sure) + # 3) add the IP to the new interface + # 4) remove the firewall rule + # 5) use ctdb gratarp to propagate the new mac address + # 6) use netstat -tn to find existing connections, and tickle them + _oiface=$2 + niface=$3 + _ip=$4 + _maskbits=$5 + + get_iface_ip_maskbits "$_oiface" "$_ip" "$_maskbits" + oiface="$iface" + + # Could check maskbits too. However, that should never change + # so we want to notice if it does. + if [ "$oiface" = "$niface" ] ; then + echo "Redundant \"updateip\" - ${ip} already on ${niface}" + exit 0 + fi + + ip_block "$ip" "$oiface" + + delete_ip_from_iface "$oiface" "$ip" "$maskbits" 2>/dev/null + delete_ip_from_iface "$niface" "$ip" "$maskbits" 2>/dev/null + + add_ip_to_iface "$niface" "$ip" "$maskbits" || { + ip_unblock "$ip" "$oiface" + exit 1 + } + + ip_unblock "$ip" "$oiface" + + flush_route_cache + + # propagate the new mac address + $CTDB gratarp "$ip" "$niface" + + # tickle all existing connections, so that dropped packets + # are retransmited and the tcp streams work + tickle_tcp_connections "$ip" + ;; + +monitor) + monitor_interfaces || exit 1 + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/11.natgw.script b/ctdb/config/events/legacy/11.natgw.script new file mode 100755 index 0000000..fb93dea --- /dev/null +++ b/ctdb/config/events/legacy/11.natgw.script @@ -0,0 +1,242 @@ +#!/bin/sh +# Script to set up one of the nodes as a NAT gateway for all other nodes. +# This is used to ensure that all nodes in the cluster can still originate +# traffic to the external network even if there are no public addresses +# available. +# + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +service_name="natgw" + +load_script_options + +[ -n "$CTDB_NATGW_NODES" ] || exit 0 +export CTDB_NATGW_NODES + +ctdb_setup_state_dir "failover" "$service_name" + +# script_state_dir set by ctdb_setup_state_dir() +# shellcheck disable=SC2154 +natgw_cfg_new="${script_state_dir}/cfg_new" +natgw_cfg_old="${script_state_dir}/cfg_old" +natgw_leader_old="${script_state_dir}/leader_old" + +ctdb_natgw_follower_only () +{ + _ip_address=$(ctdb_get_ip_address) + + awk -v my_ip="$_ip_address" \ + '$1 == my_ip { if ($2 ~ "follower-only") { exit 0 } else { exit 1 } }' \ + "$CTDB_NATGW_NODES" +} + +natgw_check_config () +{ + [ -r "$CTDB_NATGW_NODES" ] || \ + die "error: CTDB_NATGW_NODES=${CTDB_NATGW_NODES} unreadable" + if ! ctdb_natgw_follower_only ; then + [ -n "$CTDB_NATGW_PUBLIC_IP" ] || \ + die "Invalid configuration: CTDB_NATGW_PUBLIC_IP not set" + [ -n "$CTDB_NATGW_PUBLIC_IFACE" ] || \ + die "Invalid configuration: CTDB_NATGW_PUBLIC_IFACE not set" + fi + [ -n "$CTDB_NATGW_PRIVATE_NETWORK" ] || \ + die "Invalid configuration: CTDB_NATGW_PRIVATE_NETWORK not set" + + # The default is to create a single default route + [ -n "$CTDB_NATGW_STATIC_ROUTES" ] || CTDB_NATGW_STATIC_ROUTES="0.0.0.0/0" +} + +natgw_write_config () +{ + _f="$1" + + cat >"$_f" <<EOF +CTDB_NATGW_NODES="$CTDB_NATGW_NODES" +CTDB_NATGW_PUBLIC_IP="$CTDB_NATGW_PUBLIC_IP" +CTDB_NATGW_PUBLIC_IFACE="$CTDB_NATGW_PUBLIC_IFACE" +CTDB_NATGW_DEFAULT_GATEWAY="$CTDB_NATGW_DEFAULT_GATEWAY" +CTDB_NATGW_PRIVATE_NETWORK="$CTDB_NATGW_PRIVATE_NETWORK" +CTDB_NATGW_STATIC_ROUTES="$CTDB_NATGW_STATIC_ROUTES" +EOF +} + +natgw_config_has_changed () +{ + natgw_write_config "$natgw_cfg_new" + + # Non-existent old returns true, no log message + if [ ! -f "$natgw_cfg_old" ] ; then + return 0 + fi + + # Handle no change + if cmp "$natgw_cfg_old" "$natgw_cfg_new" >/dev/null 2>&1 ; then + return 1 + fi + + echo "NAT gateway configuration has changed" + return 0 +} + +_natgw_clear () +{ + _ip="${CTDB_NATGW_PUBLIC_IP%/*}" + _maskbits="${CTDB_NATGW_PUBLIC_IP#*/}" + + delete_ip_from_iface \ + "$CTDB_NATGW_PUBLIC_IFACE" "$_ip" "$_maskbits" >/dev/null 2>&1 + for _net_gw in $CTDB_NATGW_STATIC_ROUTES ; do + _net="${_net_gw%@*}" + ip route del "$_net" metric 10 >/dev/null 2>/dev/null + done + + # Delete the masquerading setup from a previous iteration where we + # were the NAT-GW + iptables -D POSTROUTING -t nat \ + -s "$CTDB_NATGW_PRIVATE_NETWORK" ! -d "$CTDB_NATGW_PRIVATE_NETWORK" \ + -j MASQUERADE >/dev/null 2>/dev/null + + iptables -D INPUT -p tcp --syn -d "${_ip}/32" -j REJECT 2>/dev/null +} + +natgw_clear () +{ + if [ -r "$natgw_cfg_old" ] ; then + (. "$natgw_cfg_old" ; _natgw_clear) + else + _natgw_clear + fi +} + +natgw_set_leader () +{ + set_proc sys/net/ipv4/ip_forward 1 + iptables -A POSTROUTING -t nat \ + -s "$CTDB_NATGW_PRIVATE_NETWORK" ! -d "$CTDB_NATGW_PRIVATE_NETWORK" \ + -j MASQUERADE + + # block all incoming connections to the NATGW IP address + ctdb_natgw_public_ip_host="${CTDB_NATGW_PUBLIC_IP%/*}/32" + iptables -D INPUT -p tcp --syn \ + -d "$ctdb_natgw_public_ip_host" -j REJECT 2>/dev/null + iptables -I INPUT -p tcp --syn \ + -d "$ctdb_natgw_public_ip_host" -j REJECT 2>/dev/null + + ip addr add "$CTDB_NATGW_PUBLIC_IP" dev "$CTDB_NATGW_PUBLIC_IFACE" + for _net_gw in $CTDB_NATGW_STATIC_ROUTES ; do + _net="${_net_gw%@*}" + if [ "$_net" != "$_net_gw" ] ; then + _gw="${_net_gw#*@}" + else + _gw="$CTDB_NATGW_DEFAULT_GATEWAY" + fi + + [ -n "$_gw" ] || continue + ip route add "$_net" metric 10 via "$_gw" + done +} + +natgw_set_follower () +{ + _natgwip="$1" + + for _net_gw in $CTDB_NATGW_STATIC_ROUTES ; do + _net="${_net_gw%@*}" + ip route add "$_net" via "$_natgwip" metric 10 + done +} + +natgw_ensure_leader () +{ + # Intentional word splitting here + # shellcheck disable=SC2046 + set -- $("${CTDB_HELPER_BINDIR}/ctdb_natgw" leader) + natgwleader="${1:--1}" # Default is -1, for failure above + natgwip="$2" + + if [ "$natgwleader" = "-1" ]; then + # Fail... + die "There is no NATGW leader node" + fi +} + +natgw_leader_has_changed () +{ + if [ -r "$natgw_leader_old" ] ; then + read _old_natgwleader <"$natgw_leader_old" + else + _old_natgwleader="" + fi + [ "$_old_natgwleader" != "$natgwleader" ] +} + +natgw_save_state () +{ + echo "$natgwleader" >"$natgw_leader_old" + # Created by natgw_config_has_changed() + mv "$natgw_cfg_new" "$natgw_cfg_old" +} + + +case "$1" in +setup) + natgw_check_config + ;; + +startup) + natgw_check_config + + # Error if CTDB_NATGW_PUBLIC_IP is listed in public addresses + ip_pat=$(echo "$CTDB_NATGW_PUBLIC_IP" | sed -e 's@\.@\\.@g') + ctdb_public_addresses="${CTDB_BASE}/public_addresses" + if grep -q "^${ip_pat}[[:space:]]" "$ctdb_public_addresses" ; then + die "ERROR: CTDB_NATGW_PUBLIC_IP same as a public address" + fi + + # do not send out arp requests from loopback addresses + set_proc sys/net/ipv4/conf/all/arp_announce 2 + ;; + +updatenatgw|ipreallocated) + natgw_check_config + + natgw_ensure_leader + + natgw_config_has_changed || natgw_leader_has_changed || exit 0 + + natgw_clear + + pnn=$(ctdb_get_pnn) + if [ "$pnn" = "$natgwleader" ]; then + natgw_set_leader + else + natgw_set_follower "$natgwip" + fi + + # flush our route cache + set_proc sys/net/ipv4/route/flush 1 + + # Only update saved state when NATGW successfully updated + natgw_save_state + ;; + +shutdown|removenatgw) + natgw_check_config + natgw_clear + ;; + +monitor) + natgw_check_config + + if [ -n "$CTDB_NATGW_PUBLIC_IFACE" ] ; then + interface_monitor "$CTDB_NATGW_PUBLIC_IFACE" || exit 1 + fi + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/11.routing.script b/ctdb/config/events/legacy/11.routing.script new file mode 100755 index 0000000..7ba7f3b --- /dev/null +++ b/ctdb/config/events/legacy/11.routing.script @@ -0,0 +1,49 @@ +#!/bin/sh + +# Attempt to add a set of static routes. +# +# Do this in "ipreallocated" rather than just "startup" because some +# of the routes might be missing because the corresponding interface +# has not previously had any IPs assigned or IPs were previously +# released and corresponding routes were dropped. +# +# Addition of some routes might fail, errors go to /dev/null. +# +# Routes to add are defined in $CTDB_BASE/static-routes. Syntax is: +# +# IFACE NET/MASK GATEWAY +# +# Example: +# +# bond1 10.3.3.0/24 10.0.0.1 + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +[ -f "${CTDB_BASE}/static-routes" ] || { + exit 0 +} + +case "$1" in +ipreallocated) + while read iface dest gw; do + ip route add "$dest" via "$gw" dev "$iface" >/dev/null 2>&1 + done <"${CTDB_BASE}/static-routes" + ;; + +updateip) + oiface=$2 + niface=$3 + while read iface dest gw; do + if [ "$niface" = "$iface" ] || [ "$oiface" = "$iface" ] ; then + ip route add "$dest" via "$gw" dev "$iface" >/dev/null 2>&1 + fi + done <"${CTDB_BASE}/static-routes" + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/13.per_ip_routing.script b/ctdb/config/events/legacy/13.per_ip_routing.script new file mode 100755 index 0000000..d7949c6 --- /dev/null +++ b/ctdb/config/events/legacy/13.per_ip_routing.script @@ -0,0 +1,438 @@ +#!/bin/sh + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +service_name="per_ip_routing" + +# Do nothing if unconfigured +[ -n "$CTDB_PER_IP_ROUTING_CONF" ] || exit 0 + +table_id_prefix="ctdb." + +[ -n "$CTDB_PER_IP_ROUTING_RULE_PREF" ] || \ + die "error: CTDB_PER_IP_ROUTING_RULE_PREF not configured" + +[ "$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" -lt "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] 2>/dev/null || \ + die "error: CTDB_PER_IP_ROUTING_TABLE_ID_LOW[$CTDB_PER_IP_ROUTING_TABLE_ID_LOW] and/or CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH] improperly configured" + +if [ "$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" -le 253 ] && \ + [ 255 -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] ; then + die "error: range CTDB_PER_IP_ROUTING_TABLE_ID_LOW[$CTDB_PER_IP_ROUTING_TABLE_ID_LOW]..CTDB_PER_IP_ROUTING_TABLE_ID_HIGH[$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH] must not include 253-255" +fi + +have_link_local_config () +{ + [ "$CTDB_PER_IP_ROUTING_CONF" = "__auto_link_local__" ] +} + +if ! have_link_local_config && [ ! -r "$CTDB_PER_IP_ROUTING_CONF" ] ; then + die "error: CTDB_PER_IP_ROUTING_CONF=$CTDB_PER_IP_ROUTING_CONF file not found" +fi + +ctdb_setup_state_dir "failover" "$service_name" + +###################################################################### + +ipv4_is_valid_addr() +{ + _ip="$1" + + _count=0 + # Get the shell to break up the address into 1 word per octet + # Intentional word splitting here + # shellcheck disable=SC2086 + for _o in $(export IFS="." ; echo $_ip) ; do + # The 2>/dev/null stops output from failures where an "octet" + # is not numeric. The test will still fail. + if ! [ 0 -le $_o ] && [ $_o -le 255 ] 2>/dev/null ; then + return 1 + fi + _count=$((_count + 1)) + done + + # A valid IPv4 address has 4 octets + [ $_count -eq 4 ] +} + +ensure_ipv4_is_valid_addr () +{ + _event="$1" + _ip="$2" + + ipv4_is_valid_addr "$_ip" || { + echo "$0: $_event not an ipv4 address skipping IP:$_ip" + exit 0 + } +} + +ipv4_host_addr_to_net () +{ + _host="$1" + _maskbits="$2" + + # Convert the host address to an unsigned long by splitting out + # the octets and doing the math. + _host_ul=0 + # Intentional word splitting here + # shellcheck disable=SC2086 + for _o in $(export IFS="." ; echo $_host) ; do + _host_ul=$(( (_host_ul << 8) + _o)) # work around Emacs color bug + done + + # Calculate the mask and apply it. + _mask_ul=$(( 0xffffffff << (32 - _maskbits) )) + _net_ul=$(( _host_ul & _mask_ul )) + + # Now convert to a network address one byte at a time. + _net="" + for _o in $(seq 1 4) ; do + _net="$((_net_ul & 255))${_net:+.}${_net}" + _net_ul=$((_net_ul >> 8)) + done + + echo "${_net}/${_maskbits}" +} + +###################################################################### + +ensure_rt_tables () +{ + rt_tables="$CTDB_SYS_ETCDIR/iproute2/rt_tables" + # script_state_dir set by ctdb_setup_state_dir() + # shellcheck disable=SC2154 + rt_tables_lock="${script_state_dir}/rt_tables_lock" + + # This file should always exist. Even if this didn't exist on the + # system, adding a route will have created it. What if we startup + # and immediately shutdown? Let's be sure. + if [ ! -f "$rt_tables" ] ; then + mkdir -p "${rt_tables%/*}" # dirname + touch "$rt_tables" + fi +} + +# Setup a table id to use for the given IP. We don't need to know it, +# it just needs to exist in /etc/iproute2/rt_tables. Fail if no free +# table id could be found in the configured range. +ensure_table_id_for_ip () +{ + _ip=$1 + + ensure_rt_tables + + # Maintain a table id for each IP address we've ever seen in + # rt_tables. We use a "ctdb." prefix on the label. + _label="${table_id_prefix}${_ip}" + + # This finds either the table id corresponding to the label or a + # new unused one (that is greater than all the used ones in the + # range). + ( + # Note that die() just gets us out of the subshell... + flock --timeout 30 9 || \ + die "ensure_table_id_for_ip: failed to lock file $rt_tables" + + _new="$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" + while read _t _l ; do + # Skip comments + case "$_t" in + \#*) continue ;; + esac + # Found existing: done + if [ "$_l" = "$_label" ] ; then + return 0 + fi + # Potentially update the new table id to be used. The + # redirect stops error spam for a non-numeric value. + if [ "$_new" -le "$_t" ] && \ + [ "$_t" -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] \ + 2>/dev/null ; then + _new=$((_t + 1)) + fi + done <"$rt_tables" + + # If the new table id is legal then add it to the file and + # print it. + if [ "$_new" -le "$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" ] ; then + printf '%d\t%s\n' "$_new" "$_label" >>"$rt_tables" + return 0 + else + return 1 + fi + ) 9>"$rt_tables_lock" +} + +# Clean up all the table ids that we might own. +clean_up_table_ids () +{ + ensure_rt_tables + + ( + # Note that die() just gets us out of the subshell... + flock --timeout 30 9 || \ + die "clean_up_table_ids: failed to lock file $rt_tables" + + # Delete any items from the file that have a table id in our + # range or a label matching our label. Preserve comments. + _tmp="${rt_tables}.$$.ctdb" + awk -v min="$CTDB_PER_IP_ROUTING_TABLE_ID_LOW" \ + -v max="$CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" \ + -v pre="$table_id_prefix" \ + '/^#/ || + !(min <= $1 && $1 <= max) && + !(index($2, pre) == 1) { + print $0 }' "$rt_tables" >"$_tmp" + + mv "$_tmp" "$rt_tables" + ) 9>"$rt_tables_lock" +} + +###################################################################### + +# This prints the config for an IP, which is either relevant entries +# from the config file or, if set to the magic link local value, some +# link local routing config for the IP. +get_config_for_ip () +{ + _ip="$1" + + if have_link_local_config ; then + # When parsing public_addresses also split on '/'. This means + # that we get the maskbits as item #2 without further parsing. + while IFS="/$IFS" read _i _maskbits _x ; do + if [ "$_ip" = "$_i" ] ; then + printf "%s" "$_ip "; ipv4_host_addr_to_net "$_ip" "$_maskbits" + fi + done <"${CTDB_BASE}/public_addresses" + else + while read _i _rest ; do + if [ "$_ip" = "$_i" ] ; then + printf '%s\t%s\n' "$_ip" "$_rest" + fi + done <"$CTDB_PER_IP_ROUTING_CONF" + fi +} + +ip_has_configuration () +{ + _ip="$1" + + _conf=$(get_config_for_ip "$_ip") + [ -n "$_conf" ] +} + +add_routing_for_ip () +{ + _iface="$1" + _ip="$2" + + # Do nothing if no config for this IP. + ip_has_configuration "$_ip" || return 0 + + ensure_table_id_for_ip "$_ip" || \ + die "add_routing_for_ip: out of table ids in range $CTDB_PER_IP_ROUTING_TABLE_ID_LOW - $CTDB_PER_IP_ROUTING_TABLE_ID_HIGH" + + _pref="$CTDB_PER_IP_ROUTING_RULE_PREF" + _table_id="${table_id_prefix}${_ip}" + + del_routing_for_ip "$_ip" >/dev/null 2>&1 + + ip rule add from "$_ip" pref "$_pref" table "$_table_id" || \ + die "add_routing_for_ip: failed to add rule for $_ip" + + # Add routes to table for any lines matching the IP. + get_config_for_ip "$_ip" | + while read _i _dest _gw ; do + _r="$_dest ${_gw:+via} $_gw dev $_iface table $_table_id" + # Intentionally unquoted multi-word value here + # shellcheck disable=SC2086 + ip route add $_r || \ + die "add_routing_for_ip: failed to add route: $_r" + done +} + +del_routing_for_ip () +{ + _ip="$1" + + _pref="$CTDB_PER_IP_ROUTING_RULE_PREF" + _table_id="${table_id_prefix}${_ip}" + + # Do this unconditionally since we own any matching table ids. + # However, print a meaningful message if something goes wrong. + _cmd="ip rule del from $_ip pref $_pref table $_table_id" + _out=$($_cmd 2>&1) || \ + cat <<EOF +WARNING: Failed to delete policy routing rule + Command "$_cmd" failed: + $_out +EOF + # This should never usually fail, so don't redirect output. + # However, it can fail when deleting a rogue IP, since there will + # be no routes for that IP. In this case it should only fail when + # the rule deletion above has already failed because the table id + # is invalid. Therefore, go to a little bit of trouble to indent + # the failure message so that it is associated with the above + # warning message and doesn't look too nasty. + ip route flush table "$_table_id" 2>&1 | sed -e 's@^.@ &@' +} + +###################################################################### + +flush_rules_and_routes () +{ + ip rule show | + while read _p _x _i _x _t ; do + # Remove trailing colon after priority/preference. + _p="${_p%:}" + # Only remove rules that match our priority/preference. + [ "$CTDB_PER_IP_ROUTING_RULE_PREF" = "$_p" ] || continue + + echo "Removing ip rule for public address $_i for routing table $_t" + ip rule del from "$_i" table "$_t" pref "$_p" + ip route flush table "$_t" 2>/dev/null + done +} + +# Add any missing routes. Some might have gone missing if, for +# example, all IPs on the network were removed (possibly if the +# primary was removed). If $1 is "force" then (re-)add all the +# routes. +add_missing_routes () +{ + $CTDB ip -v -X | { + read _x # skip header line + + # Read the rest of the lines. We're only interested in the + # "IP" and "ActiveInterface" columns. The latter is only set + # for addresses local to this node, making it easy to skip + # non-local addresses. For each IP local address we check if + # the relevant routing table is populated and populate it if + # not. + while IFS="|" read _x _ip _x _iface _x ; do + [ -n "$_iface" ] || continue + + _table_id="${table_id_prefix}${_ip}" + if [ -z "$(ip route show table "$_table_id" 2>/dev/null)" ] || \ + [ "$1" = "force" ] ; then + add_routing_for_ip "$_iface" "$_ip" + fi + done + } || exit $? +} + +# Remove rules/routes for addresses that we're not hosting. If a +# releaseip event failed in an earlier script then we might not have +# had a chance to remove the corresponding rules/routes. +remove_bogus_routes () +{ + # Get a IPs current hosted by this node, each anchored with '@'. + _ips=$($CTDB ip -v -X | awk -F'|' 'NR > 1 && $4 != "" {printf "@%s@\n", $2}') + + # x is intentionally ignored + # shellcheck disable=SC2034 + ip rule show | + while read _p _x _i _x _t ; do + # Remove trailing colon after priority/preference. + _p="${_p%:}" + # Only remove rules that match our priority/preference. + [ "$CTDB_PER_IP_ROUTING_RULE_PREF" = "$_p" ] || continue + # Only remove rules for which we don't have an IP. This could + # be done with grep, but let's do it with shell prefix removal + # to avoid unnecessary processes. This falls through if + # "@${_i}@" isn't present in $_ips. + [ "$_ips" = "${_ips#*@"${_i}"@}" ] || continue + + echo "Removing ip rule/routes for unhosted public address $_i" + del_routing_for_ip "$_i" + done +} + +###################################################################### + +ctdb_check_args "$@" + +case "$1" in +startup) + flush_rules_and_routes + + # make sure that we only respond to ARP messages from the NIC + # where a particular ip address is associated. + get_proc sys/net/ipv4/conf/all/arp_filter >/dev/null 2>&1 && { + set_proc sys/net/ipv4/conf/all/arp_filter 1 + } + ;; + +shutdown) + flush_rules_and_routes + clean_up_table_ids + ;; + +takeip) + iface=$2 + ip=$3 + # maskbits included here so argument order is obvious + # shellcheck disable=SC2034 + maskbits=$4 + + ensure_ipv4_is_valid_addr "$1" "$ip" + add_routing_for_ip "$iface" "$ip" + + # flush our route cache + set_proc sys/net/ipv4/route/flush 1 + + $CTDB gratarp "$ip" "$iface" + ;; + +updateip) + # oiface, maskbits included here so argument order is obvious + # shellcheck disable=SC2034 + oiface=$2 + niface=$3 + ip=$4 + # shellcheck disable=SC2034 + maskbits=$5 + + ensure_ipv4_is_valid_addr "$1" "$ip" + add_routing_for_ip "$niface" "$ip" + + # flush our route cache + set_proc sys/net/ipv4/route/flush 1 + + $CTDB gratarp "$ip" "$niface" + tickle_tcp_connections "$ip" + ;; + +releaseip) + iface=$2 + ip=$3 + # maskbits included here so argument order is obvious + # shellcheck disable=SC2034 + maskbits=$4 + + ensure_ipv4_is_valid_addr "$1" "$ip" + del_routing_for_ip "$ip" + ;; + +ipreallocated) + add_missing_routes + remove_bogus_routes + ;; + +reconfigure) + echo "Reconfiguring service \"${service_name}\"..." + + add_missing_routes "force" + remove_bogus_routes + + # flush our route cache + set_proc sys/net/ipv4/route/flush 1 + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/20.multipathd.script b/ctdb/config/events/legacy/20.multipathd.script new file mode 100755 index 0000000..a420251 --- /dev/null +++ b/ctdb/config/events/legacy/20.multipathd.script @@ -0,0 +1,83 @@ +#!/bin/sh +# ctdb event script for monitoring the multipath daemon +# +# Configure monitporing of multipath devices by listing the device serials +# in /etc/ctdb/multipathd : +# CTDB_MONITOR_MPDEVICES="device1 device2 ..." +# + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +service_name="multipathd" + +load_script_options + +[ -n "$CTDB_MONITOR_MPDEVICES" ] || exit 0 + +ctdb_setup_state_dir "service" "$service_name" + +# script_state_dir set by ctdb_setup_state_dir() +# shellcheck disable=SC2154 +multipath_fail="${script_state_dir}/fail" + +multipathd_check_background() +{ + for _device in $CTDB_MONITOR_MPDEVICES; do + # Check multipath knows about the device + _out=$(multipath -ll "$_device") + if [ -z "$_out" ] ; then + echo "ERROR: device \"${_device}\" not known to multipathd" \ + >"$multipath_fail" + exit 1 + fi + + # Check for at least 1 active path + if ! echo "$_out" | grep 'prio=.* status=active' >/dev/null 2>&1 ; then + echo "ERROR: multipath device \"${_device}\" has no active paths" \ + >"$multipath_fail" + exit 1 + fi + done + exit 0 +} + +multipathd_check() +{ + # Run the actual check in the background since the call to + # multipath may block + multipathd_check_background </dev/null >/dev/null 2>&1 & + _pid="$!" + _timeleft=10 + + while [ $_timeleft -gt 0 ]; do + _timeleft=$((_timeleft - 1)) + + # see if the process still exists + kill -0 $_pid >/dev/null 2>&1 || { + if wait $_pid ; then + return 0 + else + cat "$multipath_fail" + rm -f "$multipath_fail" + return 1 + fi + } + sleep 1 + done + + echo "ERROR: callout to multipath checks hung" + # If hung then this probably won't work, but worth trying... + kill -9 $_pid >/dev/null 2>&1 + return 1 +} + +case "$1" in +monitor) + multipathd_check || die "multipath monitoring failed" + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/31.clamd.script b/ctdb/config/events/legacy/31.clamd.script new file mode 100755 index 0000000..5d60fe3 --- /dev/null +++ b/ctdb/config/events/legacy/31.clamd.script @@ -0,0 +1,37 @@ +#!/bin/sh +# event script to manage clamd in a cluster environment + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +detect_init_style + +case $CTDB_INIT_STYLE in +redhat) + service_name="clamd" + ;; +*) + service_name="clamav" + ;; +esac + +load_script_options + +case "$1" in +startup) + service "$service_name" stop > /dev/null 2>&1 + service "$service_name" start || exit $? + ;; + +shutdown) + service "$service_name"_stop + ;; + +monitor) + ctdb_check_unix_socket "$CTDB_CLAMD_SOCKET" || exit $? + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/40.vsftpd.script b/ctdb/config/events/legacy/40.vsftpd.script new file mode 100755 index 0000000..19d4007 --- /dev/null +++ b/ctdb/config/events/legacy/40.vsftpd.script @@ -0,0 +1,56 @@ +#!/bin/sh +# event strict to manage vsftpd in a cluster environment + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +service_name="vsftpd" + +service_reconfigure () +{ + service $service_name restart +} + +load_script_options + +ctdb_setup_state_dir "service" "$service_name" + +case "$1" in +startup) + service "$service_name" stop > /dev/null 2>&1 + service "$service_name" start + ctdb_counter_init + ;; + +shutdown) + service "$service_name" stop + ;; + +takeip|releaseip) + ctdb_service_set_reconfigure + ;; + +ipreallocated) + if ctdb_service_needs_reconfigure ; then + ctdb_service_reconfigure + fi + ;; + +monitor) + if ctdb_check_tcp_ports 21 ; then + ctdb_counter_init + else + ctdb_counter_incr + num_fails=$(ctdb_counter_get) + if [ "$num_fails" -ge 2 ] ; then + die "ERROR: ${num_fails} consecutive failures for vsftpd, marking node unhealthy" + elif [ "$num_fails" -eq 1 ] ; then + echo "WARNING: vsftpd not listening but less than 2 consecutive failures, not unhealthy yet" + fi + fi + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/41.httpd.script b/ctdb/config/events/legacy/41.httpd.script new file mode 100755 index 0000000..dd90aed --- /dev/null +++ b/ctdb/config/events/legacy/41.httpd.script @@ -0,0 +1,78 @@ +#!/bin/sh +# event script to manage httpd in a cluster environment + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +detect_init_style + +case $CTDB_INIT_STYLE in +redhat) + service_name="httpd" + ;; +suse|debian|*) + service_name="apache2" + ;; +esac + +load_script_options + +ctdb_setup_state_dir "service" "$service_name" + +# RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks +# semaphores. This is a hack to clean them up. +cleanup_httpd_semaphore_leak() { + killall -q -0 "$service_name" || + for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do + ipcrm -s "$i" + done +} + +########## + +service_start () +{ + cleanup_httpd_semaphore_leak + service $service_name start +} +service_stop () +{ + service $service_name stop + killall -q -9 $service_name || true +} + +case "$1" in +startup) + service_start + ctdb_counter_init + ;; + +shutdown) + service_stop + ;; + +monitor) + if ctdb_check_tcp_ports 80 >/dev/null 2>/dev/null ; then + ctdb_counter_init + else + ctdb_counter_incr + num_fails=$(ctdb_counter_get) + if [ "$num_fails" -eq 2 ] ; then + echo "HTTPD is not running. Trying to restart HTTPD." + service_stop + service_start + exit 0 + elif [ "$num_fails" -ge 5 ] ; then + echo "HTTPD is not running. Trying to restart HTTPD." + service_stop + service_start + exit 1 + fi + fi + ;; +esac + +exit 0 + diff --git a/ctdb/config/events/legacy/48.netbios.script b/ctdb/config/events/legacy/48.netbios.script new file mode 100755 index 0000000..4320447 --- /dev/null +++ b/ctdb/config/events/legacy/48.netbios.script @@ -0,0 +1,64 @@ +#!/bin/sh +# ctdb event script for Netbios Name Services + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +detect_init_style + +case $CTDB_INIT_STYLE in + suse) + CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-nmb} + ;; + debian) + CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-nmbd} + ;; + *) + # Use redhat style as default: + CTDB_SERVICE_NMB=${CTDB_SERVICE_NMB:-nmb} + ;; +esac + +service_name="netbios" + +load_script_options + +ctdb_setup_state_dir "service" "$service_name" + +service_start () +{ + # make sure nmbd is not already started + service "$CTDB_SERVICE_NMB" stop > /dev/null 2>&1 + killall -0 -q nmbd && { + sleep 1 + # make absolutely sure nmbd is dead + killall -q -9 nmbd + } + + # start Samba nmbd service. Start it reniced, as under very heavy load + # the number of smbd processes will mean that it leaves few cycles + # for anything else + nice_service "$CTDB_SERVICE_NMB" start || die "Failed to start nmbd" +} + +service_stop () +{ + service "$CTDB_SERVICE_NMB" stop +} + +########################### + +case "$1" in +startup) + service_start + ;; + +shutdown) + service_stop + ;; + +esac + +exit 0 diff --git a/ctdb/config/events/legacy/49.winbind.script b/ctdb/config/events/legacy/49.winbind.script new file mode 100755 index 0000000..852b541 --- /dev/null +++ b/ctdb/config/events/legacy/49.winbind.script @@ -0,0 +1,55 @@ +#!/bin/sh +# ctdb event script for winbind + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +CTDB_SERVICE_WINBIND=${CTDB_SERVICE_WINBIND:-winbind} + +# service_name is used by various functions +# shellcheck disable=SC2034 +service_name="winbind" + +load_script_options + +service_start () +{ + service "$CTDB_SERVICE_WINBIND" stop >/dev/null 2>&1 + killall -0 -q winbindd && { + sleep 1 + # make absolutely sure winbindd is dead + killall -q -9 winbindd + } + + service "$CTDB_SERVICE_WINBIND" start || \ + die "Failed to start winbind" +} + +service_stop () +{ + service "$CTDB_SERVICE_WINBIND" stop +} + +########################### + +case "$1" in +startup) + service_start + ;; + +shutdown) + service_stop + ;; + +monitor) + if ! out=$(wbinfo -p 2>&1) ; then + echo "ERROR: wbinfo -p returned error" + echo "$out" + exit 1 + fi + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/50.samba.script b/ctdb/config/events/legacy/50.samba.script new file mode 100755 index 0000000..81c6e7f --- /dev/null +++ b/ctdb/config/events/legacy/50.samba.script @@ -0,0 +1,163 @@ +#!/bin/sh +# ctdb event script for Samba + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +detect_init_style + +case $CTDB_INIT_STYLE in + suse) + CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb} + ;; + debian) + CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smbd} + ;; + *) + # Use redhat style as default: + CTDB_SERVICE_SMB=${CTDB_SERVICE_SMB:-smb} + ;; +esac + +service_name="samba" + +load_script_options + +ctdb_setup_state_dir "service" "$service_name" + +service_start () +{ + # make sure samba is not already started + service "$CTDB_SERVICE_SMB" stop > /dev/null 2>&1 + killall -0 -q smbd && { + sleep 1 + # make absolutely sure samba is dead + killall -q -9 smbd + } + # start Samba service. Start it reniced, as under very heavy load + # the number of smbd processes will mean that it leaves few cycles + # for anything else + nice_service "$CTDB_SERVICE_SMB" start || die "Failed to start samba" +} + +service_stop () +{ + service "$CTDB_SERVICE_SMB" stop + program_stack_traces "smbd" 5 +} + +###################################################################### +# Show the testparm output using a cached smb.conf to avoid delays due +# to registry access. + +# script_state_dir set by ctdb_setup_state_dir() +# shellcheck disable=SC2154 +smbconf_cache="$script_state_dir/smb.conf.cache" + +testparm_foreground_update () +{ + _timeout="$1" + + # No need to remove these temporary files, since there are only 2 + # of them. + _out="${smbconf_cache}.out" + _err="${smbconf_cache}.err" + + timeout "$_timeout" testparm -v -s >"$_out" 2>"$_err" + case $? in + 0) : ;; + 124) + if [ -f "$smbconf_cache" ] ; then + echo "WARNING: smb.conf cache update timed out - using old cache file" + return 1 + else + echo "ERROR: smb.conf cache create failed - testparm command timed out" + exit 1 + fi + ;; + *) + if [ -f "$smbconf_cache" ] ; then + echo "WARNING: smb.conf cache update failed - using old cache file" + cat "$_err" + return 1 + else + echo "ERROR: smb.conf cache create failed - testparm failed with:" + cat "$_err" + exit 1 + fi + esac + + # Only using $$ here to avoid a collision. This is written into + # CTDB's own state directory so there is no real need for a secure + # temporary file. + _tmpfile="${smbconf_cache}.$$" + # Patterns to exclude... + _pat='^[[:space:]]+(registry[[:space:]]+shares|include|copy|winbind[[:space:]]+separator)[[:space:]]+=' + grep -Ev "$_pat" <"$_out" >"$_tmpfile" + mv "$_tmpfile" "$smbconf_cache" # atomic + + return 0 +} + +testparm_background_update () +{ + _timeout="$1" + + testparm_foreground_update "$_timeout" >/dev/null 2>&1 </dev/null & +} + +testparm_cat () +{ + testparm -s "$smbconf_cache" "$@" 2>/dev/null +} + +list_samba_shares () +{ + testparm_cat | + sed -n -e 's@^[[:space:]]*path[[:space:]]*=[[:space:]]@@p' | + sed -e 's/"//g' +} + +list_samba_ports () +{ + testparm_cat --parameter-name="smb ports" | + sed -e 's@,@ @g' +} + +########################### + +case "$1" in +startup) + service_start + ;; + +shutdown) + service_stop + ;; + +monitor) + testparm_foreground_update 10 + ret=$? + + smb_ports="$CTDB_SAMBA_CHECK_PORTS" + if [ -z "$smb_ports" ] ; then + smb_ports=$(list_samba_ports) + [ -n "$smb_ports" ] || die "Failed to set smb ports" + fi + # Intentionally unquoted multi-word value here + # shellcheck disable=SC2086 + ctdb_check_tcp_ports $smb_ports || exit $? + + if [ "$CTDB_SAMBA_SKIP_SHARE_CHECK" != "yes" ] ; then + list_samba_shares | ctdb_check_directories || exit $? + fi + + if [ $ret -ne 0 ] ; then + testparm_background_update 10 + fi + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/60.nfs.script b/ctdb/config/events/legacy/60.nfs.script new file mode 100755 index 0000000..8e496f7 --- /dev/null +++ b/ctdb/config/events/legacy/60.nfs.script @@ -0,0 +1,299 @@ +#!/bin/sh +# script to manage nfs in a clustered environment + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +service_name="nfs" + +load_system_config "nfs" + +load_script_options + +ctdb_setup_state_dir "service" "$service_name" + +###################################################################### + +service_reconfigure () +{ + # Restart lock manager, notify clients + if [ -x "${CTDB_BASE}/statd-callout" ] ; then + "${CTDB_BASE}/statd-callout" notify & + fi >/dev/null 2>&1 +} + +###################################################################### + +###################################################### +# Check the health of NFS services +# +# Use .check files in $CTDB_NFS_CHECKS_DIR. +# Default is "${CTDB_BASE}/nfs-checks.d/" +###################################################### +nfs_check_services () +{ + _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}" + + # Files must end with .check - avoids editor backups, RPM fu, ... + for _f in "$_dir"/[0-9][0-9].*.check ; do + [ -r "$_f" ] || continue + + _t="${_f%.check}" + _progname="${_t##*/[0-9][0-9].}" + + nfs_check_service "$_progname" <"$_f" + done +} + +###################################################### +# Check the health of an NFS service +# +# $1 - progname, passed to rpcinfo (looked up in /etc/rpc) +# +# Reads variables from stdin +# +# Variables are: +# +# * family - "tcp" or "udp" or space separated list +# default: tcp, not used with "service_check_cmd" +# * version - optional, RPC service version number +# default is to omit to check for any version, +# not used with "service_check_cmd" +# * unhealthy_after - number of check fails before unhealthy +# default: 1 +# * restart_every - number of check fails before restart +# default: 0, meaning no restart +# * service_stop_cmd - command to stop service +# default: no default, must be provided if +# restart_every > 0 +# * service_start_cmd - command to start service +# default: no default, must be provided if +# restart_every > 0 +# * service_check_cmd - command to check health of service +# default is to check RPC service using rpcinfo +# * service_debug_cmd - command to debug a service after trying to stop it; +# for example, it can be useful to print stack +# traces of threads that have not exited, since +# they may be stuck doing I/O; +# no default, see also function program_stack_traces() +# +# Quoting in values is not preserved +# +###################################################### +nfs_check_service () +{ + _progname="$1" + + # This sub-shell is created to intentionally limit the scope of + # variable values read from the .check files. + # shellcheck disable=SC2030 + ( + # Subshell to restrict scope variables... + + # Defaults + family="tcp" + version="" + unhealthy_after=1 + restart_every=0 + service_stop_cmd="" + service_start_cmd="" + service_check_cmd="" + service_debug_cmd="" + + # Eval line-by-line. Expands variable references in values. + # Also allows variable name checking, which seems useful. + while read _line ; do + case "$_line" in + \#*|"") : ;; # Ignore comments, blank lines + + family=*|version=*|\ + unhealthy_after=*|restart_every=*|\ + service_stop_cmd=*|service_start_cmd=*|\ + service_check_cmd=*|service_debug_cmd=*) + + eval "$_line" + ;; + *) + echo "ERROR: Unknown variable for ${_progname}: ${_line}" + exit 1 + esac + done + + _ok=false + if [ -n "$service_check_cmd" ] ; then + # Using eval means variables can contain semicolon separated commands + if eval "$service_check_cmd" ; then + _ok=true + else + _err="monitoring service \"${_progname}\" failed" + fi + else + if nfs_check_rpcinfo \ + "$_progname" "$version" "$family" >/dev/null ; then + _ok=true + else + _err="$ctdb_check_rpc_out" + fi + fi + + if $_ok ; then + if [ $unhealthy_after -ne 1 ] || [ $restart_every -ne 0 ] ; then + ctdb_counter_init "$_progname" + fi + exit 0 + fi + + ctdb_counter_incr "$_progname" + _failcount=$(ctdb_counter_get "$_progname") + + _unhealthy=false + if [ "$unhealthy_after" -gt 0 ] ; then + if [ "$_failcount" -ge "$unhealthy_after" ] ; then + _unhealthy=true + echo "ERROR: $_err" + fi + fi + + if [ "$restart_every" -gt 0 ] ; then + if [ $((_failcount % restart_every)) -eq 0 ] ; then + if ! $_unhealthy ; then + echo "WARNING: $_err" + fi + nfs_restart_service + fi + fi + + if $_unhealthy ; then + exit 1 + fi + + return 0 + ) || exit 1 +} + +# Uses: service_stop_cmd, service_start_cmd, service_debug_cmd +# This function is called within the sub-shell that shellcheck thinks +# loses the above variable values. +# shellcheck disable=SC2031 +nfs_restart_service () +{ + if [ -z "$service_stop_cmd" ] || [ -z "$service_start_cmd" ] ; then + die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings" + fi + + echo "Trying to restart service \"${_progname}\"..." + # Using eval means variables can contain semicolon separated commands + eval "$service_stop_cmd" + if [ -n "$service_debug_cmd" ] ; then + eval "$service_debug_cmd" + fi + background_with_logging eval "$service_start_cmd" +} + +###################################################### +# Check an RPC service with rpcinfo +###################################################### +ctdb_check_rpc () +{ + _progname="$1" # passed to rpcinfo (looked up in /etc/rpc) + _version="$2" # optional, not passed if empty/unset + _family="${3:-tcp}" # optional, default is "tcp" + + case "$_family" in + tcp6|udp6) + _localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}" + ;; + *) + _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}" + esac + + # $_version is not quoted because it is optional + # shellcheck disable=SC2086 + if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \ + "$_progname" $_version 2>&1) ; then + ctdb_check_rpc_out="$_progname failed RPC check: +$ctdb_check_rpc_out" + echo "$ctdb_check_rpc_out" + return 1 + fi +} + +nfs_check_rpcinfo () +{ + _progname="$1" # passed to rpcinfo (looked up in /etc/rpc) + _versions="$2" # optional, space separated, not passed if empty/unset + _families="${3:-tcp}" # optional, space separated, default is "tcp" + + for _family in $_families ; do + if [ -n "$_versions" ] ; then + for _version in $_versions ; do + ctdb_check_rpc "$_progname" "$_version" "$_family" || return $? + done + else + ctdb_check_rpc "$_progname" "" "$_family" || return $? + fi + done +} + +################################################################## +# use statd-callout to update NFS lock info +################################################################## +nfs_update_lock_info () +{ + if [ -x "$CTDB_BASE/statd-callout" ] ; then + "$CTDB_BASE/statd-callout" update + fi +} + +###################################################################### + +# script_state_dir set by ctdb_setup_state_dir() +# shellcheck disable=SC2154 +nfs_callout_init "$script_state_dir" + +case "$1" in +startup) + nfs_callout "$@" || exit $? + ;; + +shutdown) + nfs_callout "$@" || exit $? + ;; + +takeip) + nfs_callout "$@" || exit $? + ctdb_service_set_reconfigure + ;; + +releaseip) + nfs_callout "$@" || exit $? + ctdb_service_set_reconfigure + ;; + +ipreallocated) + if ctdb_service_needs_reconfigure ; then + ctdb_service_reconfigure + fi + ;; + +monitor) + nfs_callout "monitor-pre" || exit $? + + # Check that directories for shares actually exist + if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then + nfs_callout "monitor-list-shares" | ctdb_check_directories || \ + exit $? + fi + + update_tickles 2049 + nfs_update_lock_info + + nfs_check_services + + nfs_callout "monitor-post" || exit $? + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/70.iscsi.script b/ctdb/config/events/legacy/70.iscsi.script new file mode 100755 index 0000000..e74651d --- /dev/null +++ b/ctdb/config/events/legacy/70.iscsi.script @@ -0,0 +1,87 @@ +#!/bin/sh + +# CTDB event script for TGTD based iSCSI + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +# service_name is used by various functions +# shellcheck disable=SC2034 +service_name="iscsi" + +load_script_options + +[ -z "$CTDB_START_ISCSI_SCRIPTS" ] && { + echo "No iscsi start script directory found" + exit 0 +} + +case "$1" in +ipreallocated) + all_ips=$($CTDB -X ip | tail -n +2) + + # Block the iSCSI port. Only block for the address families + # we have configured. This copes with, for example, ip6tables + # being unavailable on an IPv4-only system. + have_ipv4=false + have_ipv6=false + # x is intentionally ignored + # shellcheck disable=SC2034 + while IFS='|' read x ip pnn x ; do + case "$ip" in + *:*) have_ipv6=true ;; + *) have_ipv4=true ;; + esac + done <<EOF +$all_ips +EOF + if $have_ipv4 ; then + iptables -I INPUT 1 -p tcp --dport 3260 -j DROP + fi + if $have_ipv6 ; then + ip6tables -I INPUT 1 -p tcp --dport 3260 -j DROP + fi + + # Stop iSCSI daemon + killall -9 tgtd >/dev/null 2>/dev/null + + pnn=$(ctdb_get_pnn) + [ -n "$pnn" ] || die "Failed to get node pnn" + + # Start iSCSI daemon + tgtd >/dev/null 2>&1 + + # Run a script for each currently hosted public IP address + ips=$(echo "$all_ips" | awk -F'|' -v pnn="$pnn" '$3 == pnn {print $2}') + for ip in $ips ; do + script="${CTDB_START_ISCSI_SCRIPTS}/${ip}.sh" + if [ -x "$script" ] ; then + echo "Starting iSCSI service for public address ${ip}" + "$script" + fi + done + + # Unblock iSCSI port. These can be unconditional (compared to + # blocking above), since errors are redirected. + while iptables -D INPUT -p tcp --dport 3260 -j DROP >/dev/null 2>&1 ; do + : + done + while ip6tables -D INPUT -p tcp --dport 3260 -j DROP >/dev/null 2>&1 ; do + : + done + + ;; + +shutdown) + # Shutdown iSCSI daemon when ctdb goes down + killall -9 tgtd >/dev/null 2>&1 + ;; + +monitor) + ctdb_check_tcp_ports 3260 || exit $? + ;; +esac + +exit 0 diff --git a/ctdb/config/events/legacy/91.lvs.script b/ctdb/config/events/legacy/91.lvs.script new file mode 100755 index 0000000..8855068 --- /dev/null +++ b/ctdb/config/events/legacy/91.lvs.script @@ -0,0 +1,124 @@ +#!/bin/sh +# script to manage the lvs ip multiplexer for a single public address cluster + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && dirname "$PWD") + +. "${CTDB_BASE}/functions" + +load_script_options + +[ -n "$CTDB_LVS_NODES" ] || exit 0 +export CTDB_LVS_NODES + +# type is commonly supported and more portable than which(1) +# shellcheck disable=SC2039 +if ! type ipvsadm >/dev/null 2>&1 ; then + echo "LVS configured but ipvsadm not found" + exit 0 +fi + + +lvs_follower_only () +{ + _ip_address=$(ctdb_get_ip_address) + awk -v my_ip="$_ip_address" \ + '$1 == my_ip { if ($2 ~ "follower-only") { exit 0 } else { exit 1 } }' \ + "$CTDB_LVS_NODES" +} + +lvs_check_config () +{ + [ -r "$CTDB_LVS_NODES" ] || \ + die "error: CTDB_LVS_NODES=${CTDB_LVS_NODES} unreadable" + [ -n "$CTDB_LVS_PUBLIC_IP" ] || \ + die "Invalid configuration: CTDB_LVS_PUBLIC_IP not set" + if ! lvs_follower_only ; then + [ -n "$CTDB_LVS_PUBLIC_IFACE" ] || \ + die "Invalid configuration: CTDB_LVS_PUBLIC_IFACE not set" + fi +} + +case "$1" in +setup) + lvs_check_config + ;; +startup) + lvs_check_config + + ipvsadm -D -t "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1 + ipvsadm -D -u "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1 + + ip addr add "${CTDB_LVS_PUBLIC_IP}/32" dev lo scope host + + # do not respond to ARPs that are for ip addresses with scope 'host' + set_proc_maybe sys/net/ipv4/conf/all/arp_ignore 3 + # do not send out arp requests from loopback addresses + set_proc_maybe sys/net/ipv4/conf/all/arp_announce 2 + ;; + +shutdown) + lvs_check_config + + ipvsadm -D -t "$CTDB_LVS_PUBLIC_IP" + ipvsadm -D -u "$CTDB_LVS_PUBLIC_IP" + + ip addr del "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1 + + flush_route_cache + ;; + +ipreallocated) + lvs_check_config + + # Kill connections + ipvsadm -D -t "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1 + ipvsadm -D -u "$CTDB_LVS_PUBLIC_IP" >/dev/null 2>&1 + kill_tcp_connections_local_only \ + "$CTDB_LVS_PUBLIC_IFACE" "$CTDB_LVS_PUBLIC_IP" + + pnn=$(ctdb_get_pnn) + lvsleader=$("${CTDB_HELPER_BINDIR}/ctdb_lvs" leader) + if [ "$pnn" != "$lvsleader" ] ; then + # This node is not the LVS leader so change the IP address + # to have scope "host" so this node won't respond to ARPs + ip addr del "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1 + ip addr add "${CTDB_LVS_PUBLIC_IP}/32" dev lo scope host + exit 0 + fi + + # Change the scope so this node starts responding to ARPs + ip addr del "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1 + ip addr add "${CTDB_LVS_PUBLIC_IP}/32" dev lo >/dev/null 2>&1 + + ipvsadm -A -t "$CTDB_LVS_PUBLIC_IP" -p 1999999 -s lc + ipvsadm -A -u "$CTDB_LVS_PUBLIC_IP" -p 1999999 -s lc + + # Add all nodes (except this node) as LVS servers + "${CTDB_HELPER_BINDIR}/ctdb_lvs" list | + awk -v pnn="$pnn" '$1 != pnn { print $2 }' | + while read ip ; do + ipvsadm -a -t "$CTDB_LVS_PUBLIC_IP" -r "$ip" -g + ipvsadm -a -u "$CTDB_LVS_PUBLIC_IP" -r "$ip" -g + done + + # Add localhost too... + ipvsadm -a -t "$CTDB_LVS_PUBLIC_IP" -r 127.0.0.1 + ipvsadm -a -u "$CTDB_LVS_PUBLIC_IP" -r 127.0.0.1 + + $CTDB gratarp \ + "$CTDB_LVS_PUBLIC_IP" "$CTDB_LVS_PUBLIC_IFACE" >/dev/null 2>&1 + + flush_route_cache + ;; + +monitor) + lvs_check_config + + if [ -n "$CTDB_LVS_PUBLIC_IFACE" ] ; then + interface_monitor "$CTDB_LVS_PUBLIC_IFACE" || exit 1 + fi + ;; +esac + +exit 0 diff --git a/ctdb/config/functions b/ctdb/config/functions new file mode 100755 index 0000000..725993c --- /dev/null +++ b/ctdb/config/functions @@ -0,0 +1,1037 @@ +# Hey Emacs, this is a -*- shell-script -*- !!! + +# utility functions for ctdb event scripts + +if [ -z "$CTDB_BASE" ] ; then + echo 'CTDB_BASE unset in CTDB functions file' + exit 1 +fi +export CTDB_BASE + +# CTDB_VARDIR is used elsewhere +# shellcheck disable=SC2034 +CTDB_VARDIR="/usr/local/var/lib/ctdb" + +CTDB="${CTDB:-/usr/local/bin/ctdb}" + +# Only (and always) override these variables in test code + +if [ -z "$CTDB_SCRIPT_VARDIR" ] ; then + CTDB_SCRIPT_VARDIR="/usr/local/var/lib/ctdb/scripts" +fi + +if [ -z "$CTDB_SYS_ETCDIR" ] ; then + CTDB_SYS_ETCDIR="/etc" +fi + +if [ -z "$CTDB_HELPER_BINDIR" ] ; then + CTDB_HELPER_BINDIR="/usr/local/libexec/ctdb" +fi + +####################################### +# pull in a system config file, if any + +load_system_config () +{ + for _i ; do + + if [ -f "${CTDB_SYS_ETCDIR}/sysconfig/${_i}" ]; then + . "${CTDB_SYS_ETCDIR}/sysconfig/${_i}" + return + elif [ -f "${CTDB_SYS_ETCDIR}/default/${_i}" ]; then + . "${CTDB_SYS_ETCDIR}/default/${_i}" + return + fi + done +} + +# load_script_options [ component script ] +# script is an event script name relative to a component +# component is currently ignored +load_script_options () +{ + if [ $# -eq 2 ] ; then + _script="$2" + elif [ $# -eq 0 ] ; then + _script="" + else + die "usage: load_script_options [ component script ]" + fi + + _options="${CTDB_BASE}/script.options" + + if [ -r "$_options" ] ; then + . "$_options" + fi + + if [ -n "$_script" ] ; then + _s="${CTDB_BASE}/events/legacy/${_script}" + else + _s="${0%.script}" + fi + _options="${_s}.options" + + if [ -r "$_options" ] ; then + . "$_options" + fi +} + +############################################################## + +die () +{ + _msg="$1" + _rc="${2:-1}" + + echo "$_msg" >&2 + exit "$_rc" +} + +# Log given message or stdin to either syslog or a CTDB log file +# $1 is the tag passed to logger if syslog is in use. +script_log () +{ + _tag="$1" ; shift + + case "$CTDB_LOGGING" in + file:*|"") + if [ -n "$CTDB_LOGGING" ] ; then + _file="${CTDB_LOGGING#file:}" + else + _file="/usr/local/var/log/log.ctdb" + fi + { + if [ -n "$*" ] ; then + echo "$*" + else + cat + fi + } >>"$_file" + ;; + *) + # Handle all syslog:* variants here too. There's no tool to do + # the lossy things, so just use logger. + logger -t "ctdbd: ${_tag}" "$@" + ;; + esac +} + +# When things are run in the background in an eventscript then logging +# output might get lost. This is the "solution". :-) +background_with_logging () +{ + ( + "$@" 2>&1 </dev/null | + script_log "${script_name}&" + )& + + return 0 +} + +############################################################## +# check number of args for different events +ctdb_check_args () +{ + case "$1" in + takeip|releaseip) + if [ $# != 4 ]; then + echo "ERROR: must supply interface, IP and maskbits" + exit 1 + fi + ;; + updateip) + if [ $# != 5 ]; then + echo "ERROR: must supply old interface, new interface, IP and maskbits" + exit 1 + fi + ;; + esac +} + +############################################################## +# determine on what type of system (init style) we are running +detect_init_style() +{ + # only do detection if not already set: + if [ -n "$CTDB_INIT_STYLE" ] ; then + return + fi + + if [ -x /sbin/startproc ]; then + CTDB_INIT_STYLE="suse" + elif [ -x /sbin/start-stop-daemon ]; then + CTDB_INIT_STYLE="debian" + else + CTDB_INIT_STYLE="redhat" + fi +} + +###################################################### +# simulate /sbin/service on platforms that don't have it +# _service() makes it easier to hook the service() function for +# testing. +_service () +{ + _service_name="$1" + _op="$2" + + # do nothing, when no service was specified + [ -z "$_service_name" ] && return + + if [ -x /sbin/service ]; then + $_nice /sbin/service "$_service_name" "$_op" + elif [ -x /usr/sbin/service ]; then + $_nice /usr/sbin/service "$_service_name" "$_op" + elif [ -x /bin/systemctl ]; then + $_nice /bin/systemctl "$_op" "$_service_name" + elif [ -x "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" ]; then + $_nice "${CTDB_SYS_ETCDIR}/init.d/${_service_name}" "$_op" + elif [ -x "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" ]; then + $_nice "${CTDB_SYS_ETCDIR}/rc.d/init.d/${_service_name}" "$_op" + fi +} + +service() +{ + _nice="" + _service "$@" +} + +###################################################### +# simulate /sbin/service (niced) on platforms that don't have it +nice_service() +{ + _nice="nice" + _service "$@" +} + +###################################################### +# Cached retrieval of PNN from local node. This never changes so why +# open a client connection to the server each time this is needed? +ctdb_get_pnn () +{ + _pnn_file="${CTDB_SCRIPT_VARDIR}/my-pnn" + if [ ! -f "$_pnn_file" ] ; then + $CTDB pnn >"$_pnn_file" + fi + + cat "$_pnn_file" +} + +# Cached retrieval of private IP address from local node. This never +# changes. +ctdb_get_ip_address () +{ + _ip_addr_file="${CTDB_SCRIPT_VARDIR}/my-ip-address" + if [ ! -f "$_ip_addr_file" ] ; then + $CTDB -X nodestatus | + awk -F '|' 'NR == 2 { print $3 }' >"$_ip_addr_file" + fi + + cat "$_ip_addr_file" +} + +# Cached retrieval of database options for use by event scripts. +# +# If the variables are already set then they should not be overwritten +# - this should only happen during event script testing. +ctdb_get_db_options () +{ + _db_opts_file="${CTDB_SCRIPT_VARDIR}/db_options.cache" + + if [ ! -f "$_db_opts_file" ] ; then + { + ctdb_translate_option "database" \ + "volatile database directory" \ + "CTDB_DBDIR" + ctdb_translate_option "database" \ + "persistent database directory" \ + "CTDB_DBDIR_PERSISTENT" + ctdb_translate_option "database" \ + "state database directory" \ + "CTDB_DBDIR_STATE" + } >"$_db_opts_file" + fi + + . "$_db_opts_file" +} + +ctdb_translate_option () +{ + _section="$1" + _opt="$2" + _variable="$3" + + # ctdb-config already prints an error if something goes wrong + _t=$("${CTDB_HELPER_BINDIR}/ctdb-config" get "$_section" "$_opt") || \ + exit $? + echo "${_variable}=\"${_t}\"" +} + +###################################################### +# wrapper around /proc/ settings to allow them to be hooked +# for testing +# 1st arg is relative path under /proc/, 2nd arg is value to set +set_proc () +{ + echo "$2" >"/proc/$1" +} + +set_proc_maybe () +{ + if [ -w "/proc/$1" ] ; then + set_proc "$1" "$2" + fi +} + +###################################################### +# wrapper around getting file contents from /proc/ to allow +# this to be hooked for testing +# 1st arg is relative path under /proc/ +get_proc () +{ + cat "/proc/$1" +} + +###################################################### +# Print up to $_max kernel stack traces for processes named $_program +program_stack_traces () +{ + _prog="$1" + _max="${2:-1}" + + _count=1 + for _pid in $(pidof "$_prog") ; do + [ "$_count" -le "$_max" ] || break + + # Do this first to avoid racing with process exit + _stack=$(get_proc "${_pid}/stack" 2>/dev/null) + if [ -n "$_stack" ] ; then + echo "Stack trace for ${_prog}[${_pid}]:" + echo "$_stack" + _count=$((_count + 1)) + fi + done +} + +###################################################### +# Ensure $service_name is set +assert_service_name () +{ + # service_name is set by the event script + # shellcheck disable=SC2154 + [ -n "$service_name" ] || die "INTERNAL ERROR: \$service_name not set" +} + +###################################################### +# check a set of directories is available +# return 1 on a missing directory +# directories are read from stdin +###################################################### +ctdb_check_directories_probe() +{ + while IFS="" read d ; do + case "$d" in + *%*) + continue + ;; + *) + [ -d "${d}/." ] || return 1 + esac + done +} + +###################################################### +# check a set of directories is available +# directories are read from stdin +###################################################### +ctdb_check_directories() +{ + ctdb_check_directories_probe || { + echo "ERROR: $service_name directory \"$d\" not available" + exit 1 + } +} + +###################################################### +# check a set of tcp ports +# usage: ctdb_check_tcp_ports <ports...> +###################################################### + +# Check whether something is listening on all of the given TCP ports +# using the "ctdb checktcpport" command. +ctdb_check_tcp_ports() +{ + if [ -z "$1" ] ; then + echo "INTERNAL ERROR: ctdb_check_tcp_ports - no ports specified" + exit 1 + fi + + for _p ; do # process each function argument (port) + _cmd="$CTDB checktcpport $_p" + _out=$($_cmd 2>&1) + _ret=$? + case "$_ret" in + 0) + echo "$service_name not listening on TCP port $_p" + return 1 + ;; + 98) + # Couldn't bind, something already listening, next port + continue + ;; + *) + echo "unexpected error (${_ret}) running \"${_cmd}\"" + if [ -n "$_out" ] ; then + echo "$_out" + fi + return $_ret + ;; + esac + done + + # All ports listening + return 0 +} + +###################################################### +# check a unix socket +# usage: ctdb_check_unix_socket SOCKPATH +###################################################### +ctdb_check_unix_socket() +{ + _sockpath="$1" + + if [ -z "$_sockpath" ] ; then + echo "ERROR: ctdb_check_unix_socket() requires socket path" + return 1 + fi + + _out=$(ss -l -x "src ${_sockpath}" | tail -n +2) + if [ -z "$_out" ] ; then + echo "ERROR: ${service_name} not listening on ${_sockpath}" + return 1 + fi +} + +################################################ +# kill off any TCP connections with the given IP +################################################ +kill_tcp_connections () +{ + _iface="$1" + _ip="$2" + + _oneway=false + if [ "$3" = "oneway" ] ; then + _oneway=true + fi + + get_tcp_connections_for_ip "$_ip" | { + _killcount=0 + _connections="" + _nl=" +" + while read _dst _src; do + _destport="${_dst##*:}" + __oneway=$_oneway + case $_destport in + # we only do one-way killtcp for CIFS + 139|445) __oneway=true ;; + esac + + _connections="${_connections}${_nl}${_src} ${_dst}" + if ! $__oneway ; then + _connections="${_connections}${_nl}${_dst} ${_src}" + fi + + _killcount=$((_killcount + 1)) + done + + if [ $_killcount -eq 0 ] ; then + return + fi + + if [ -n "$CTDB_KILLTCP_DEBUGLEVEL" ]; then + _debuglevel="$CTDB_KILLTCP_DEBUGLEVEL" + else + _debuglevel="$CTDB_DEBUGLEVEL" + fi + echo "$_connections" | \ + CTDB_DEBUGLEVEL="$_debuglevel" \ + "${CTDB_HELPER_BINDIR}/ctdb_killtcp" "$_iface" || { + echo "Failed to kill TCP connections" + return + } + + _connections=$(get_tcp_connections_for_ip "$_ip") + if [ -z "$_connections" ] ; then + _remaining=0 + else + _remaining=$(echo "$_connections" | wc -l) + fi + + _actually_killed=$((_killcount - _remaining)) + + _t="${_actually_killed}/${_killcount}" + echo "Killed ${_t} TCP connections to released IP $_ip" + + if [ -n "$_connections" ] ; then + echo "Remaining connections:" + echo "$_connections" | sed -e 's|^| |' + fi + } +} + +################################################################## +# kill off the local end for any TCP connections with the given IP +################################################################## +kill_tcp_connections_local_only () +{ + kill_tcp_connections "$@" "oneway" +} + +################################################################## +# tickle any TCP connections with the given IP +################################################################## +tickle_tcp_connections () +{ + _ip="$1" + + # Get connections, both directions + _conns=$(get_tcp_connections_for_ip "$_ip" | \ + awk '{ print $1, $2 ; print $2, $1 }') + + echo "$_conns" | awk '{ print "Tickle TCP connection", $1, $2 }' + echo "$_conns" | ctdb tickle +} + +get_tcp_connections_for_ip () +{ + _ip="$1" + + ss -tn state established "src [$_ip]" | awk 'NR > 1 {print $3, $4}' +} + +######################################################## + +add_ip_to_iface () +{ + _iface=$1 + _ip=$2 + _maskbits=$3 + + # Ensure interface is up + ip link set "$_iface" up || \ + die "Failed to bringup interface $_iface" + + # Only need to define broadcast for IPv4 + case "$_ip" in + *:*) _bcast="" ;; + *) _bcast="brd +" ;; + esac + + # Intentionally unquoted multi-word value here + # shellcheck disable=SC2086 + ip addr add "$_ip/$_maskbits" $_bcast dev "$_iface" || { + echo "Failed to add $_ip/$_maskbits on dev $_iface" + return 1 + } + + # Wait 5 seconds for IPv6 addresses to stop being tentative... + if [ -z "$_bcast" ] ; then + for _x in $(seq 1 10) ; do + ip addr show to "${_ip}/128" | grep -q "tentative" || break + sleep 0.5 + done + + # If the address was a duplicate then it won't be on the + # interface so flag an error. + _t=$(ip addr show to "${_ip}/128") + case "$_t" in + "") + echo "Failed to add $_ip/$_maskbits on dev $_iface" + return 1 + ;; + *tentative*|*dadfailed*) + echo "Failed to add $_ip/$_maskbits on dev $_iface" + ip addr del "$_ip/$_maskbits" dev "$_iface" + return 1 + ;; + esac + fi +} + +delete_ip_from_iface() +{ + _iface=$1 + _ip=$2 + _maskbits=$3 + + # This could be set globally for all interfaces but it is probably + # better to avoid surprises, so limit it the interfaces where CTDB + # has public IP addresses. There isn't anywhere else convenient + # to do this so just set it each time. This is much cheaper than + # remembering and re-adding secondaries. + set_proc "sys/net/ipv4/conf/${_iface}/promote_secondaries" 1 + + ip addr del "$_ip/$_maskbits" dev "$_iface" || { + echo "Failed to del $_ip on dev $_iface" + return 1 + } +} + +# If the given IP is hosted then print 2 items: maskbits and iface +ip_maskbits_iface () +{ + _addr="$1" + + case "$_addr" in + *:*) _bits=128 ;; + *) _bits=32 ;; + esac + ip addr show to "${_addr}/${_bits}" 2>/dev/null | \ + awk 'NR == 1 { iface = $2; sub(":$", "", iface) ; + sub("@.*", "", iface) } + $1 ~ /inet/ { mask = $2; sub(".*/", "", mask); + print mask, iface }' +} + +drop_ip () +{ + _addr="${1%/*}" # Remove optional maskbits + + # Intentional word splitting here + # shellcheck disable=SC2046 + set -- $(ip_maskbits_iface "$_addr") + if [ -n "$1" ] ; then + _maskbits="$1" + _iface="$2" + echo "Removing public address $_addr/$_maskbits from device $_iface" + delete_ip_from_iface "$_iface" "$_addr" "$_maskbits" >/dev/null 2>&1 + fi +} + +drop_all_public_ips () +{ + # _x is intentionally ignored + # shellcheck disable=SC2034 + while read _ip _x ; do + case "$_ip" in + \#*) continue ;; + esac + drop_ip "$_ip" + done <"${CTDB_BASE}/public_addresses" +} + +flush_route_cache () +{ + set_proc_maybe sys/net/ipv4/route/flush 1 + set_proc_maybe sys/net/ipv6/route/flush 1 +} + +######################################################## +# Interface monitoring + +# If the interface is a virtual one (e.g. VLAN) then get the +# underlying interface +interface_get_real () +{ + # Output of "ip link show <iface>" + _iface_info="$1" + + # Extract the full interface description to see if it is a VLAN + _t=$(echo "$_iface_info" | + awk 'NR == 1 { iface = $2; sub(":$", "", iface) ; + print iface }') + case "$_t" in + *@*) + # VLAN: use the underlying interface, after the '@' + echo "${_t##*@}" + ;; + *) + # Not a regular VLAN. For backward compatibility, assume + # there is some other sort of VLAN that doesn't have the + # '@' in the output and only use what is before a '.'. If + # there is no '.' then this will be the whole interface + # name. + echo "${_t%%.*}" + esac +} + +# Check whether an interface is operational +interface_monitor () +{ + _iface="$1" + + _iface_info=$(ip link show "$_iface" 2>&1) || { + echo "ERROR: Monitored interface ${_iface} does not exist" + return 1 + } + + + # If the interface is a virtual one (e.g. VLAN) then get the + # underlying interface. + _realiface=$(interface_get_real "$_iface_info") + + if _bi=$(get_proc "net/bonding/${_realiface}" 2>/dev/null) ; then + # This is a bond: various monitoring strategies + echo "$_bi" | grep -q 'Currently Active Slave: None' && { + echo "ERROR: No active slaves for bond device ${_realiface}" + return 1 + } + echo "$_bi" | grep -q '^MII Status: up' || { + echo "ERROR: public network interface ${_realiface} is down" + return 1 + } + echo "$_bi" | grep -q '^Bonding Mode: IEEE 802.3ad Dynamic link aggregation' && { + # This works around a bug in the driver where the + # overall bond status can be up but none of the actual + # physical interfaces have a link. + echo "$_bi" | grep 'MII Status:' | tail -n +2 | grep -q '^MII Status: up' || { + echo "ERROR: No active slaves for 802.ad bond device ${_realiface}" + return 1 + } + } + + return 0 + else + # Not a bond + case "$_iface" in + lo*) + # loopback is always working + return 0 + ;; + ib*) + # we don't know how to test ib links + return 0 + ;; + *) + ethtool "$_iface" | grep -q 'Link detected: yes' || { + # On some systems, this is not successful when a + # cable is plugged but the interface has not been + # brought up previously. Bring the interface up + # and try again... + ip link set "$_iface" up + ethtool "$_iface" | grep -q 'Link detected: yes' || { + echo "ERROR: No link on the public network interface ${_iface}" + return 1 + } + } + return 0 + ;; + esac + fi +} + +######################################################## +# Simple counters +_ctdb_counter_common () +{ + [ $# -le 1 ] || die "usage: _ctdb_counter_common [name]" + + if [ $# -eq 1 ] ; then + _counter_name="${1}.failcount" + else + _counter_name="failcount" + fi + + if [ -z "$script_state_dir" ] ; then + die "ctdb_counter_* functions need ctdb_setup_state_dir()" + fi + + _counter_file="${script_state_dir}/${_counter_name}" +} +# Some code passes an argument +# shellcheck disable=SC2120 +ctdb_counter_init () { + _ctdb_counter_common "$1" + + : >"$_counter_file" +} +ctdb_counter_incr () { + _ctdb_counter_common "$1" + + # unary counting using newlines! + echo >>"$_counter_file" +} +ctdb_counter_get () { + _ctdb_counter_common "$1" + # unary counting! + _val=$(wc -c < "$_counter_file" 2>/dev/null || echo 0) + # Strip leading spaces from ouput of wc (on freebsd) + # shellcheck disable=SC2086 + echo $_val +} + +######################################################## + +# ctdb_setup_state_dir <type> <name> +# Sets/creates script_state_dir) +ctdb_setup_state_dir () +{ + [ $# -eq 2 ] || die "usage: ctdb_setup_state_dir <type> <name>" + + _type="$1" + _name="$2" + + script_state_dir="${CTDB_SCRIPT_VARDIR}/${_type}/${_name}" + + mkdir -p "$script_state_dir" || \ + die "Error creating script state dir \"${script_state_dir}\"" +} + +################################################################## +# Reconfigure a service on demand + +_ctdb_service_reconfigure_common () +{ + if [ -z "$script_state_dir" ] ; then + die "ctdb_service_*_reconfigure() needs ctdb_setup_state_dir()" + fi + + _ctdb_service_reconfigure_flag="${script_state_dir}/need_reconfigure" +} + +ctdb_service_needs_reconfigure () +{ + _ctdb_service_reconfigure_common + [ -e "$_ctdb_service_reconfigure_flag" ] +} + +ctdb_service_set_reconfigure () +{ + _ctdb_service_reconfigure_common + : >"$_ctdb_service_reconfigure_flag" +} + +ctdb_service_unset_reconfigure () +{ + _ctdb_service_reconfigure_common + rm -f "$_ctdb_service_reconfigure_flag" +} + +ctdb_service_reconfigure () +{ + echo "Reconfiguring service \"${service_name}\"..." + ctdb_service_unset_reconfigure + service_reconfigure || return $? + # Intentionally have this use $service_name as default + # shellcheck disable=SC2119 + ctdb_counter_init +} + +# Default service_reconfigure() function does nothing. +service_reconfigure () +{ + : +} + +# Default service_start() and service_stop() functions. + +# These may be overridden in an eventscript. +service_start () +{ + service "$service_name" start +} + +service_stop () +{ + service "$service_name" stop +} + +################################################################## + +# This exists only for backward compatibility with 3rd party scripts +# that call it +ctdb_standard_event_handler () +{ + : +} + +iptables_wrapper () +{ + _family="$1" ; shift + if [ "$_family" = "inet6" ] ; then + _iptables_cmd="ip6tables" + else + _iptables_cmd="iptables" + fi + + # iptables doesn't like being re-entered, so flock-wrap it. + flock -w 30 "${CTDB_SCRIPT_VARDIR}/iptables.flock" "$_iptables_cmd" "$@" +} + +# AIX (and perhaps others?) doesn't have mktemp +# type is commonly supported and more portable than which(1) +# shellcheck disable=SC2039 +if ! type mktemp >/dev/null 2>&1 ; then + mktemp () + { + _dir=false + if [ "$1" = "-d" ] ; then + _dir=true + shift + fi + _d="${TMPDIR:-/tmp}" + _hex10=$(dd if=/dev/urandom count=20 2>/dev/null | \ + cksum | \ + awk '{print $1}') + _t="${_d}/tmp.${_hex10}" + ( + umask 077 + if $_dir ; then + mkdir "$_t" + else + : >"$_t" + fi + ) + echo "$_t" + } +fi + +###################################################################### +# NFS callout handling + +nfs_callout_init () +{ + _state_dir="$1" + + if [ -z "$CTDB_NFS_CALLOUT" ] ; then + CTDB_NFS_CALLOUT="${CTDB_BASE}/nfs-linux-kernel-callout" + fi + # Always export, for statd callout + export CTDB_NFS_CALLOUT + + # If the callout wants to use this then it must create it + export CTDB_NFS_CALLOUT_STATE_DIR="${_state_dir}/callout-state" + + # Export, if set, for use by clustered NFS callouts + if [ -n "$CTDB_NFS_STATE_FS_TYPE" ] ; then + export CTDB_NFS_STATE_FS_TYPE + fi + if [ -n "$CTDB_NFS_STATE_MNT" ] ; then + export CTDB_NFS_STATE_MNT + fi + + nfs_callout_cache="${_state_dir}/nfs_callout_cache" + nfs_callout_cache_callout="${nfs_callout_cache}/CTDB_NFS_CALLOUT" + nfs_callout_cache_ops="${nfs_callout_cache}/ops" +} + +nfs_callout_register () +{ + mkdir -p "$nfs_callout_cache_ops" + rm -f "$nfs_callout_cache_ops"/* + + echo "$CTDB_NFS_CALLOUT" >"$nfs_callout_cache_callout" + + _t=$("$CTDB_NFS_CALLOUT" "register") + if [ -n "$_t" ] ; then + echo "$_t" | + while IFS="" read _op ; do + touch "${nfs_callout_cache_ops}/${_op}" + done + else + touch "${nfs_callout_cache_ops}/ALL" + fi +} + +nfs_callout () +{ + # Re-run registration if $CTDB_NFS_CALLOUT has changed + _prev="" + if [ -r "$nfs_callout_cache_callout" ] ; then + read _prev <"$nfs_callout_cache_callout" + fi + if [ "$CTDB_NFS_CALLOUT" != "$_prev" ] ; then + nfs_callout_register + fi + + # Run the operation if it is registered... + if [ -e "${nfs_callout_cache_ops}/${1}" ] || \ + [ -e "${nfs_callout_cache_ops}/ALL" ]; then + "$CTDB_NFS_CALLOUT" "$@" + fi +} + +######################################################## +# tickle handling +######################################################## + +update_tickles () +{ + _port="$1" + + tickledir="${CTDB_SCRIPT_VARDIR}/tickles" + mkdir -p "$tickledir" + + # What public IPs do I hold? + _pnn=$(ctdb_get_pnn) + _ips=$($CTDB -X ip | awk -F'|' -v pnn="$_pnn" '$3 == pnn {print $2}') + + # IPs and port as ss filters + _ip_filter="" + for _ip in $_ips ; do + _ip_filter="${_ip_filter}${_ip_filter:+ || }src [${_ip}]" + done + _port_filter="sport == :${_port}" + + # Record connections to our public IPs in a temporary file. + # This temporary file is in CTDB's private state directory and + # $$ is used to avoid a very rare race involving CTDB's script + # debugging. No security issue, nothing to see here... + _my_connections="${tickledir}/${_port}.connections.$$" + # Parentheses are needed around the filters for precedence but + # the parentheses can't be empty! + # + # Recent versions of ss print square brackets around IPv6 + # addresses. While it is desirable to update CTDB's address + # parsing and printing code, something needs to be done here + # for backward compatibility, so just delete the brackets. + ss -tn state established \ + "${_ip_filter:+( ${_ip_filter} )}" \ + "${_port_filter:+( ${_port_filter} )}" | + awk 'NR > 1 {print $4, $3}' | + tr -d '][' | + sort >"$_my_connections" + + # Record our current tickles in a temporary file + _my_tickles="${tickledir}/${_port}.tickles.$$" + for _i in $_ips ; do + $CTDB -X gettickles "$_i" "$_port" | + awk -F'|' 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }' + done | + sort >"$_my_tickles" + + # Add tickles for connections that we haven't already got tickles for + comm -23 "$_my_connections" "$_my_tickles" | \ + $CTDB addtickle + + # Remove tickles for connections that are no longer there + comm -13 "$_my_connections" "$_my_tickles" | \ + $CTDB deltickle + + rm -f "$_my_connections" "$_my_tickles" + + # Remove stale files from killed scripts + # Files can't have spaces in name, more portable than -print0/-0 + # shellcheck disable=SC2038 + (cd "$tickledir" && find . -type f -mmin +10 | xargs -r rm) +} + +######################################################## +# load a site local config file +######################################################## + +[ -x "${CTDB_BASE}/rc.local" ] && { + . "${CTDB_BASE}/rc.local" +} + +[ -d "${CTDB_BASE}/rc.local.d" ] && { + for i in "${CTDB_BASE}/rc.local.d"/* ; do + [ -x "$i" ] && . "$i" + done +} + +script_name="${0##*/}" # basename diff --git a/ctdb/config/nfs-checks.d/00.portmapper.check b/ctdb/config/nfs-checks.d/00.portmapper.check new file mode 100644 index 0000000..24def35 --- /dev/null +++ b/ctdb/config/nfs-checks.d/00.portmapper.check @@ -0,0 +1,2 @@ +# portmapper +unhealthy_after=1 diff --git a/ctdb/config/nfs-checks.d/10.status.check b/ctdb/config/nfs-checks.d/10.status.check new file mode 100644 index 0000000..b8ce1e0 --- /dev/null +++ b/ctdb/config/nfs-checks.d/10.status.check @@ -0,0 +1,7 @@ +# status +version="1" +restart_every=2 +unhealthy_after=6 +service_stop_cmd="$CTDB_NFS_CALLOUT stop status" +service_start_cmd="$CTDB_NFS_CALLOUT start status" +service_debug_cmd="program_stack_traces rpc.statd 5" diff --git a/ctdb/config/nfs-checks.d/20.nfs.check b/ctdb/config/nfs-checks.d/20.nfs.check new file mode 100644 index 0000000..dad1cdc --- /dev/null +++ b/ctdb/config/nfs-checks.d/20.nfs.check @@ -0,0 +1,7 @@ +# nfs +version="3" +restart_every=10 +unhealthy_after=2 +service_stop_cmd="$CTDB_NFS_CALLOUT stop nfs" +service_start_cmd="$CTDB_NFS_CALLOUT start nfs" +service_debug_cmd="program_stack_traces nfsd 5" diff --git a/ctdb/config/nfs-checks.d/30.nlockmgr.check b/ctdb/config/nfs-checks.d/30.nlockmgr.check new file mode 100644 index 0000000..6660ca0 --- /dev/null +++ b/ctdb/config/nfs-checks.d/30.nlockmgr.check @@ -0,0 +1,6 @@ +# nlockmgr +version="4" +restart_every=2 +unhealthy_after=6 +service_stop_cmd="$CTDB_NFS_CALLOUT stop nlockmgr" +service_start_cmd="$CTDB_NFS_CALLOUT start nlockmgr" diff --git a/ctdb/config/nfs-checks.d/40.mountd.check b/ctdb/config/nfs-checks.d/40.mountd.check new file mode 100644 index 0000000..bfe4c27 --- /dev/null +++ b/ctdb/config/nfs-checks.d/40.mountd.check @@ -0,0 +1,7 @@ +# mountd +version="1" +restart_every=2 +unhealthy_after=6 +service_stop_cmd="$CTDB_NFS_CALLOUT stop mountd" +service_start_cmd="$CTDB_NFS_CALLOUT start mountd" +service_debug_cmd="program_stack_traces rpc.mountd 5" diff --git a/ctdb/config/nfs-checks.d/50.rquotad.check b/ctdb/config/nfs-checks.d/50.rquotad.check new file mode 100644 index 0000000..98bd8d9 --- /dev/null +++ b/ctdb/config/nfs-checks.d/50.rquotad.check @@ -0,0 +1,7 @@ +# rquotad +version="1" +restart_every=2 +unhealthy_after=6 +service_stop_cmd="$CTDB_NFS_CALLOUT stop rquotad" +service_start_cmd="$CTDB_NFS_CALLOUT start rquotad" +service_debug_cmd="program_stack_traces rpc.rquotad 5" diff --git a/ctdb/config/nfs-checks.d/README b/ctdb/config/nfs-checks.d/README new file mode 100644 index 0000000..044067a --- /dev/null +++ b/ctdb/config/nfs-checks.d/README @@ -0,0 +1,31 @@ +NFS check configuration files. + +Files are named NN.RPCSERVICE.check. Files without a .check suffix +are ignored. + +Supported variables are: + +* family - "tcp" or "udp" or space separated list + default: tcp, not used with "service_check_cmd" +* version - optional, RPC service version number + default is to omit to check for any version, + not used with "service_check_cmd" +* unhealthy_after - number of check fails before unhealthy + default: 1 +* restart_every - number of check fails before restart + default: 0, meaning no restart +* service_stop_cmd - command to stop service + default: no default, must be provided if + restart_every > 0 +* service_start_cmd - command to start service + default: no default, must be provided if + restart_every > 0 +* service_check_cmd - command to check health of service + default is to check RPC service using rpcinfo +* service_debug_cmd - command to debug a service after trying to stop it; + for example, it can be useful to print stack + traces of threads that have not exited, since + they may be stuck doing I/O; + no default, see also function program_stack_traces() + +Quoting inside values is not preserved. diff --git a/ctdb/config/nfs-linux-kernel-callout b/ctdb/config/nfs-linux-kernel-callout new file mode 100755 index 0000000..f2f3e38 --- /dev/null +++ b/ctdb/config/nfs-linux-kernel-callout @@ -0,0 +1,441 @@ +#!/bin/sh + +# Exit on 1st error +set -e + +# NFS exports file. Some code below keeps a cache of output derived +# from exportfs(8). When this file is updated the cache is invalid +# and needs to be regenerated. +# +# To change the file, edit the default value below. Do not set +# CTDB_NFS_EXPORTS_FILE - it isn't a configuration variable, just a +# hook for testing. +nfs_exports_file="${CTDB_NFS_EXPORTS_FILE:-/var/lib/nfs/etab}" + +# As above, edit the default value below. CTDB_NFS_DISTRO_STYLE is a +# test variable only. +nfs_distro_style="${CTDB_NFS_DISTRO_STYLE:-systemd-redhat}" + +# As above, edit the default value below. CTDB_SYS_ETCDIR is a +# test variable only. +etc_dir="${CTDB_SYS_ETCDIR:-/etc}" + +# A value of "AUTO" for any service means that service is usually +# automatically started and stopped by one of the other services. +# Such services will still be restarted by hand on failure, if +# configured to do so. This allows services that should not be +# running to be set to "". + +case "$nfs_distro_style" in +systemd-*) + # Defaults + nfs_service="nfs-server" + nfs_lock_service="rpc-statd" + nfs_mountd_service="nfs-mountd" + nfs_status_service="rpc-statd" + nfs_rquotad_service="rpc-rquotad" + nfs_config="${etc_dir}/sysconfig/nfs" + nfs_rquotad_config="" # Not use with systemd, restart via service + + case "$nfs_distro_style" in + *-redhat | *-suse) + : # Defaults only + ;; + *-debian) + nfs_rquotad_service="quotarpc" + ;; + *) + echo "Internal error" + exit 1 + ;; + esac + ;; + +sysvinit-*) + # Defaults + nfs_service="nfs" + nfs_lock_service="AUTO" + nfs_mountd_service="AUTO" + nfs_status_service="AUTO" + nfs_rquotad_service="AUTO" + nfs_config="${etc_dir}/sysconfig/nfs" + nfs_rquotad_config="$nfs_config" + + case "$nfs_distro_style" in + *-redhat) + nfs_lock_service="nfslock" + ;; + *-suse) + nfs_service="nfsserver" + ;; + *-debian) + nfs_service="nfs-kernel-server" + nfs_config="${etc_dir}/default/nfs-kernel-server" + nfs_rquotad_config="${etc_dir}/default/quota" + ;; + *) + echo "Internal error" + exit 1 + ;; + esac + ;; + +*) + echo "Internal error" + exit 1 + ;; +esac + +# Override for unit testing +if [ -z "$PROCFS_PATH" ]; then + PROCFS_PATH="/proc" +fi + +################################################## + +usage() +{ + _c=$(basename "$0") + cat <<EOF +usage: $_c { shutdown | startup } + $_c { stop | start } { nfs | nlockmgr } + $_c { monitor-list-shares | monitor-post } + $_c { register } +EOF + exit 1 +} + +################################################## + +nfs_load_config() +{ + _config="${1:-${nfs_config}}" + + if [ -r "$_config" ]; then + . "$_config" + fi +} + +################################################## + +service_is_auto_started() +{ + [ "$1" = "AUTO" ] +} + +service_is_defined() +{ + _service="$1" + + [ -n "$_service" ] && ! service_is_auto_started "$_service" +} + +service_if_defined() +{ + _service="$1" + _action="$2" + + if service_is_defined "$_service"; then + service "$_service" "$_action" + fi +} + +################################################## +# Overall NFS service stop and start + +nfs_service_stop() +{ + service_if_defined "$nfs_rquotad_service" stop + + service "$nfs_service" stop + + service_if_defined "$nfs_lock_service" stop +} + +nfs_service_start() +{ + service_if_defined "$nfs_lock_service" start + + service "$nfs_service" start + + service_if_defined "$nfs_rquotad_service" start +} + +################################################## +# service "stop" and "start" options for restarting + +manual_stop() +{ + case "$1" in + mountd) + killall -q -9 rpc.mountd + ;; + rquotad) + killall -q -9 rpc.rquotad + ;; + status) + killall -q -9 rpc.statd + ;; + *) + echo "$0: Internal error - invalid call to manual_stop()" + exit 1 + ;; + esac +} + +service_or_manual_stop() +{ + _rpc_service="$1" + _system_service="$2" + + if service_is_defined "$_system_service"; then + service "$_system_service" stop + elif service_is_auto_started "$_system_service"; then + manual_stop "$_rpc_service" + fi +} + +service_stop() +{ + _rpc_service="$1" + + case "$_rpc_service" in + nfs) + echo 0 >"${PROCFS_PATH}/fs/nfsd/threads" + nfs_service_stop >/dev/null 2>&1 || true + pkill -9 nfsd + ;; + nlockmgr) + if service_is_defined "$nfs_lock_service" ; then + service "$nfs_lock_service" stop >/dev/null 2>&1 || true + else + service "$nfs_service" stop >/dev/null 2>&1 || true + fi + ;; + mountd) + service_or_manual_stop "$_rpc_service" "$nfs_mountd_service" + ;; + rquotad) + service_or_manual_stop "$_rpc_service" "$nfs_rquotad_service" + ;; + status) + service_or_manual_stop "$_rpc_service" "$nfs_status_service" + ;; + *) + usage + ;; + esac +} + +manual_start() +{ + case "$1" in + mountd) + nfs_load_config + if [ -z "$RPCMOUNTDOPTS" ]; then + RPCMOUNTDOPTS="${MOUNTD_PORT:+-p }$MOUNTD_PORT" + fi + # shellcheck disable=SC2086 + rpc.mountd $RPCMOUNTDOPTS + ;; + rquotad) + nfs_load_config "$nfs_rquotad_config" + if [ -z "$RPCRQUOTADOPTS" ]; then + RPCRQUOTADOPTS="${RQUOTAD_PORT:+-p }$RQUOTAD_PORT" + fi + # shellcheck disable=SC2086 + rpc.rquotad $RPCRQUOTADOPTS + ;; + status) + nfs_load_config + # Red Hat uses STATDARG, Debian uses STATDOPTS + opts="${STATDARG:-${STATDOPTS:-''}}" + if [ -z "$opts" ]; then + # shellcheck disable=SC2086 + set -- \ + ${STATD_HA_CALLOUT:+-H} $STATD_HA_CALLOUT \ + ${STATD_HOSTNAME:+-n} $STATD_HOSTNAME \ + ${STATD_PORT:+-p} $STATD_PORT \ + ${STATD_OUTGOING_PORT:+-o} $STATD_OUTGOING_PORT + opts="$*" + fi + # shellcheck disable=SC2086 + rpc.statd $opts + ;; + *) + echo "$0: Internal error - invalid call to manual_start()" + exit 1 + ;; + esac +} + +service_or_manual_start() +{ + _rpc_service="$1" + _system_service="$2" + + if service_is_defined "$_system_service"; then + service "$_system_service" start + elif service_is_auto_started "$_system_service"; then + manual_start "$_rpc_service" + fi +} + +service_start() +{ + _rpc_service="$1" + + case "$_rpc_service" in + nfs) + nfs_service_start + ;; + nlockmgr) + if service_is_defined "$nfs_lock_service" ; then + service "$nfs_lock_service" start + else + service "$nfs_service" start + fi + ;; + mountd) + service_or_manual_start "$_rpc_service" "$nfs_mountd_service" + ;; + rquotad) + service_or_manual_start "$_rpc_service" "$nfs_rquotad_service" + ;; + status) + service_or_manual_start "$_rpc_service" "$nfs_status_service" + ;; + *) + usage + ;; + esac +} + +################################################## +# service init startup and final shutdown + +nfs_shutdown() +{ + nfs_service_stop +} + +nfs_startup() +{ + nfs_service_stop || true + nfs_service_start + _f="${PROCFS_PATH}/sys/net/ipv4/tcp_tw_recycle" + if [ -f "$_f" ]; then + echo 1 >"$_f" + fi +} + +################################################## +# monitor-post support + +nfs_check_thread_count() +{ + # Load NFS configuration to get desired number of threads. + nfs_load_config + + # If $RPCNFSDCOUNT/$USE_KERNEL_NFSD_NUMBER isn't set then we could + # guess the default from the initscript. However, let's just + # assume that those using the default don't care about the number + # of threads and that they have switched on this feature in error. + _configured_threads="${RPCNFSDCOUNT:-${USE_KERNEL_NFSD_NUMBER}}" + if [ -z "$_configured_threads" ] && type nfsconf >/dev/null 2>&1; then + _configured_threads=$(nfsconf --get nfsd threads) || true + fi + [ -n "$_configured_threads" ] || return 0 + + _threads_file="${PROCFS_PATH}/fs/nfsd/threads" + + # nfsd should be running the configured number of threads. If + # there are a different number of threads then tell nfsd the + # correct number. + read -r _running_threads <"$_threads_file" || { + echo "WARNING: Reading \"${_threads_file}\" unexpectedly failed" + exit 0 + } + + # Intentionally not arithmetic comparison - avoids extra errors + # when above read fails in an unexpected way... + if [ "$_running_threads" != "$_configured_threads" ]; then + echo "Attempting to correct number of nfsd threads from ${_running_threads} to ${_configured_threads}" + echo "$_configured_threads" >"$_threads_file" + fi +} + +################################################## +# list share directories + +nfs_monitor_list_shares() +{ + _cache_file="${CTDB_NFS_CALLOUT_STATE_DIR}/list_shares_cache" + # -nt operator is well supported in Linux: dash, bash, ksh, ... + # shellcheck disable=SC2039,SC3013 + if [ ! -r "$nfs_exports_file" ] || [ ! -r "$_cache_file" ] || + [ "$nfs_exports_file" -nt "$_cache_file" ]; then + mkdir -p "$CTDB_NFS_CALLOUT_STATE_DIR" + # We could just use the contents of $nfs_exports_file. + # However, let's regard that file as internal to NFS and use + # exportfs, which is the public API. + if ! _exports=$(exportfs -v); then + echo "WARNING: failed to run exportfs to list NFS shares" >&2 + return + fi + + echo "$_exports" | + grep '^/' | + sed -e 's@[[:space:]][[:space:]]*[^[:space:]()][^[:space:]()]*([^[:space:]()][^[:space:]()]*)$@@' | + sort -u >"$_cache_file" + fi + + cat "$_cache_file" +} + +################################################## + +nfs_register() +{ + cat <<EOF +shutdown +startup +stop +start +monitor-list-shares +monitor-post +EOF +} + +################################################## + +case "$1" in +shutdown) + nfs_shutdown + ;; +startup) + nfs_startup + ;; +stop) + service_stop "$2" + ;; +start) + service_start "$2" + ;; +monitor-list-shares) + nfs_monitor_list_shares + ;; +monitor-post) + nfs_check_thread_count + ;; +register) + nfs_register + ;; +monitor-pre | releaseip | takeip | releaseip-pre | takeip-pre) + # Not required/implemented + : + ;; +*) + usage + ;; +esac diff --git a/ctdb/config/notification.README b/ctdb/config/notification.README new file mode 100755 index 0000000..16b632f --- /dev/null +++ b/ctdb/config/notification.README @@ -0,0 +1,36 @@ +This directory should contain executable programs ending in ".script" +to handle CTDB event notifications. The first and only argument +passed to each program is the event, which is one of: + + init, setup, startup, unhealthy, healthy + +An example script that sends SNMP traps for unhealthy/healthy might +look like this: + + #!/bin/sh + + case "$1" in + unhealthy) + # Send an SNMP trap saying that the node is unhealthy: + snmptrap -m ALL -v 1 -c public 10.1.1.105 ctdb \ + $(hostname) 0 0 $(date +"%s") ctdb.nodeHealth.0 i 1 + ;; + healthy) + # Send an SNMP trap saying that the node is healthy again: + snmptrap -m ALL -v 1 -c public 10.1.1.105 ctdb \ + $(hostname) 0 0 $(date +"%s") ctdb.nodeHealth.0 i 0 + ;; + esac + +Alternatively, email could be sent: + + #!/bin/sh + + case "$1" in + unhealthy) + mail -s "$(hostname) is UNHEALTHY" foo@example.com </dev/null >/dev/null 2>&1 + ;; + healthy) + mail -s "$(hostname) is HEALTHY" foo@example.com </dev/null >/dev/null 2>&1 + ;; + esac diff --git a/ctdb/config/notify.sh b/ctdb/config/notify.sh new file mode 100755 index 0000000..db69afc --- /dev/null +++ b/ctdb/config/notify.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +# This is script is invoked from ctdb when certain events happen. See +# /etc/ctdb/events/notification/README for more details. + +d=$(dirname "$0") +nd="${d}/events/notification" + +ok=true + +for i in "${nd}/"*.script ; do + # Files must be executable + [ -x "$i" ] || continue + + # Flag failures + "$i" "$1" || ok=false +done + +$ok diff --git a/ctdb/config/script.options b/ctdb/config/script.options new file mode 100644 index 0000000..79e82af --- /dev/null +++ b/ctdb/config/script.options @@ -0,0 +1,16 @@ +# For now, use script.options to demonstrate these options. See +# *.options examples for more specific examples. + +# +# Samba configuration +# + +# 50.samba.options +# CTDB_SAMBA_SKIP_SHARE_CHECK=yes + +# +# NFS configuration +# + +# 60.nfs.options +CTDB_RPCINFO_LOCALHOST="127.0.0.1" diff --git a/ctdb/config/statd-callout b/ctdb/config/statd-callout new file mode 100755 index 0000000..38c155e --- /dev/null +++ b/ctdb/config/statd-callout @@ -0,0 +1,254 @@ +#!/bin/sh + +# This must run as root as CTDB tool commands need to access CTDB socket +[ "$(id -u)" -eq 0 ] || exec sudo "$0" "$@" + +# statd must be configured to use this script as its high availability call-out. +# +# In most Linux versions this can be done using something like the following... +# +# /etc/sysconfig/nfs (Red Hat) or /etc/default/nfs-common (Debian): +# NFS_HOSTNAME=myhostname +# STATD_HOSTNAME="${NFS_HOSTNAME} -H /etc/ctdb/statd-callout" +# +# Newer Red Hat Linux variants instead use /etc/nfs.conf: +# [statd] +# name = myhostname +# ha-callout = /etc/ctdb/statd-callout + +[ -n "$CTDB_BASE" ] || \ + CTDB_BASE=$(d=$(dirname "$0") && cd -P "$d" && echo "$PWD") + +. "${CTDB_BASE}/functions" + +# Overwrite this so we get some logging +die () +{ + script_log "statd-callout" "$@" + exit 1 +} + +# Try different variables to find config file for NFS_HOSTNAME +load_system_config "nfs" "nfs-common" + +# If NFS_HOSTNAME not set then try to pull it out of /etc/nfs.conf +if [ -z "$NFS_HOSTNAME" ]; then + if type nfsconf >/dev/null 2>&1; then + NFS_HOSTNAME=$(nfsconf --get statd name) + elif type git >/dev/null 2>&1; then + # git to the rescue! + NFS_HOSTNAME=$(git config --file=/etc/nfs.conf statd.name) + fi +fi + +[ -n "$NFS_HOSTNAME" ] || \ + die "NFS_HOSTNAME is not configured. statd-callout failed" + +############################################################ + +ctdb_setup_state_dir "service" "nfs" + +# script_state_dir set by ctdb_setup_state_dir() +# shellcheck disable=SC2154 +d="${script_state_dir}/statd-callout" + +mkdir -p "$d" || die "Failed to create directory \"${d}\"" +cd "$d" || die "Failed to change directory to \"${d}\"" + +pnn=$(ctdb_get_pnn) + +############################################################ + +send_notifies () +{ + _smnotify="${CTDB_HELPER_BINDIR}/smnotify" + + # State must monotonically increase, across the entire + # cluster. Use seconds since epoch and hope the time is in + # sync across nodes. Even numbers mean service is shut down, + # odd numbers mean service is started. + + # Intentionally round to an even number + # shellcheck disable=SC2017 + _state_even=$(( $(date '+%s') / 2 * 2)) + + _prev="" + while read _sip _cip ; do + # NOTE: Consider optimising smnotify to read all the + # data from stdin and then run it in the background. + + # Reset stateval for each serverip + if [ "$_sip" != "$_prev" ] ; then + _stateval="$_state_even" + fi + + # Send notifies for server shutdown + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$_sip" --stateval="$_stateval" + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$NFS_HOSTNAME" --stateval="$_stateval" + + # Send notifies for server startup + _stateval=$((_stateval + 1)) + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$_sip" --stateval="$_stateval" + "$_smnotify" --client="$_cip" --ip="$_sip" \ + --server="$NFS_HOSTNAME" --stateval="$_stateval" + done +} + +delete_records () +{ + while read _sip _cip ; do + _key="statd-state@${_sip}@${_cip}" + echo "\"${_key}\" \"\"" + done | $CTDB ptrans "ctdb.tdb" +} + +############################################################ + +case "$1" in + # Keep a single file to keep track of the last "add-client" or + # "del-client'. These get pushed to ctdb.tdb during "update", + # which will generally be run once each "monitor" cycle. In this + # way we avoid scalability problems with flood of persistent + # transactions after a "notify" when all the clients re-take their + # locks. + + add-client) + # statd does not tell us to which IP the client connected so + # we must add it to all the IPs that we serve + cip="$2" + date=$(date '+%s') + # x is intentionally ignored + # shellcheck disable=SC2034 + $CTDB ip -X | + tail -n +2 | + while IFS="|" read x sip node x ; do + [ "$node" = "$pnn" ] || continue # not us + key="statd-state@${sip}@${cip}" + echo "\"${key}\" \"${date}\"" >"$key" + done + ;; + + del-client) + # statd does not tell us from which IP the client disconnected + # so we must add it to all the IPs that we serve + cip="$2" + # x is intentionally ignored + # shellcheck disable=SC2034 + $CTDB ip -X | + tail -n +2 | + while IFS="|" read x sip node x ; do + [ "$node" = "$pnn" ] || continue # not us + key="statd-state@${sip}@${cip}" + echo "\"${key}\" \"\"" >"$key" + done + ;; + + update) + files=$(echo statd-state@*) + if [ "$files" = "statd-state@*" ] ; then + # No files! + exit 0 + fi + # Filter out lines for any IP addresses that are not currently + # hosted public IP addresses. + ctdb_ips=$($CTDB ip | tail -n +2) + sed_expr=$(echo "$ctdb_ips" | + awk -v pnn="$pnn" 'pnn == $2 { + ip = $1; gsub(/\./, "\\.", ip); + printf "/statd-state@%s@/p\n", ip }') + # Intentional multi-word expansion for multiple files + # shellcheck disable=SC2086 + items=$(sed -n "$sed_expr" $files) + if [ -n "$items" ] ; then + if echo "$items" | $CTDB ptrans "ctdb.tdb" ; then + # shellcheck disable=SC2086 + rm $files + fi + fi + ;; + + notify) + # we must restart the lockmanager (on all nodes) so that we get + # a clusterwide grace period (so other clients don't take out + # conflicting locks through other nodes before all locks have been + # reclaimed) + + # we need these settings to make sure that no tcp connections survive + # across a very fast failover/failback + #echo 10 > /proc/sys/net/ipv4/tcp_fin_timeout + #echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets + #echo 0 > /proc/sys/net/ipv4/tcp_max_orphans + + # Delete the notification list for statd, we don't want it to + # ping any clients + rm -f /var/lib/nfs/statd/sm/* + rm -f /var/lib/nfs/statd/sm.bak/* + + # We must also let some time pass between stopping and + # restarting the lock manager. Otherwise there is a window + # where the lock manager will respond "strangely" immediately + # after restarting it, which causes clients to fail to reclaim + # their locks. + nfs_callout_init + "$CTDB_NFS_CALLOUT" "stop" "nlockmgr" >/dev/null 2>&1 + sleep 2 + "$CTDB_NFS_CALLOUT" "start" "nlockmgr" >/dev/null 2>&1 + + # we now need to send out additional statd notifications to ensure + # that clients understand that the lockmanager has restarted. + # we have three cases: + # 1, clients that ignore the ip address the stat notification came from + # and ONLY care about the 'name' in the notify packet. + # these clients ONLY work with lock failover IFF that name + # can be resolved into an ipaddress that matches the one used + # to mount the share. (==linux clients) + # This is handled when starting lockmanager above, but those + # packets are sent from the "wrong" ip address, something linux + # clients are ok with, buth other clients will barf at. + # 2, Some clients only accept statd packets IFF they come from the + # 'correct' ip address. + # 2a,Send out the notification using the 'correct' ip address and also + # specify the 'correct' hostname in the statd packet. + # Some clients require both the correct source address and also the + # correct name. (these clients also ONLY work if the ip addresses + # used to map the share can be resolved into the name returned in + # the notify packet.) + # 2b,Other clients require that the source ip address of the notify + # packet matches the ip address used to take out the lock. + # I.e. that the correct source address is used. + # These clients also require that the statd notify packet contains + # the name as the ip address used when the lock was taken out. + # + # Both 2a and 2b are commonly used in lockmanagers since they maximize + # probability that the client will accept the statd notify packet and + # not just ignore it. + # For all IPs we serve, collect info and push to the config database + + # Construct a sed expression to take catdb output and produce pairs of: + # server-IP client-IP + # but only for the server-IPs that are hosted on this node. + ctdb_all_ips=$($CTDB ip all | tail -n +2) + sed_expr=$(echo "$ctdb_all_ips" | + awk -v pnn="$pnn" 'pnn == $2 { + ip = $1; gsub(/\./, "\\.", ip); + printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }') + + statd_state=$($CTDB catdb ctdb.tdb | sed -n "$sed_expr" | sort) + [ -n "$statd_state" ] || exit 0 + + echo "$statd_state" | send_notifies + echo "$statd_state" | delete_records + + # Remove any stale touch files (i.e. for IPs not currently + # hosted on this node and created since the last "update"). + # There's nothing else we can do with them at this stage. + echo "$ctdb_all_ips" | + awk -v pnn="$pnn" 'pnn != $2 { print $1 }' | + while read sip ; do + rm -f "statd-state@${sip}@"* + done + ;; +esac diff --git a/ctdb/configure b/ctdb/configure new file mode 100755 index 0000000..48b786b --- /dev/null +++ b/ctdb/configure @@ -0,0 +1,22 @@ +#!/bin/sh + +PREVPATH=`dirname $0` + +WAF=buildtools/bin/waf +[ -x "$WAF" ] || WAF=../buildtools/bin/waf + +# using JOBS=1 gives maximum compatibility with +# systems like AIX which have broken threading in python +JOBS=1 +export JOBS + +# Make sure we don't have any library preloaded. +unset LD_PRELOAD + +# Make sure we get stable hashes +PYTHONHASHSEED=1 +export PYTHONHASHSEED + +cd . || exit 1 +$PYTHON $WAF configure "$@" || exit 1 +cd $PREVPATH diff --git a/ctdb/configure.rpm b/ctdb/configure.rpm new file mode 100755 index 0000000..e8ec3b2 --- /dev/null +++ b/ctdb/configure.rpm @@ -0,0 +1,20 @@ +#!/bin/sh + +if gcc -dM -E - </dev/null | grep -Eq '__(x86_64|powerpc64)__' ; then + _libdir=/usr/lib64 +else + _libdir=/usr/lib +fi + +CFLAGS="-Wall -g -D_GNU_SOURCE" ./configure \ + --builtin-libraries=replace,popt \ + --bundled-libraries=!talloc,!tevent,!tdb \ + --minimum-library-version=talloc:2.1.14,tdb:1.3.17,tevent:0.9.37 \ + --prefix=/usr \ + --includedir=/usr/include/ctdb \ + --libdir=${_libdir} \ + --libexecdir=/usr/libexec \ + --sysconfdir=/etc \ + --mandir=/usr/man \ + --localstatedir=/var \ + $* |