diff options
Diffstat (limited to 'ctdb/tools/ctdb_diagnostics')
-rwxr-xr-x | ctdb/tools/ctdb_diagnostics | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/ctdb/tools/ctdb_diagnostics b/ctdb/tools/ctdb_diagnostics new file mode 100755 index 0000000..d16a71c --- /dev/null +++ b/ctdb/tools/ctdb_diagnostics @@ -0,0 +1,346 @@ +#!/bin/sh +# a script to test the basic setup of a CTDB/Samba install +# tridge@samba.org September 2007 +# martin@meltin.net August 2010 + +usage () +{ + cat >&2 <<EOF +Usage: ctdb_diagnostics [OPTION] ... + options: + -n <nodes> Comma separated list of nodes to operate on + -c Ignore comment lines (starting with '#') in file comparisons + -w Ignore whitespace in file comparisons + --no-ads Do not use commands that assume an Active Directory Server +EOF + exit 1 + +} + +nodes=$(ctdb listnodes -X | cut -d'|' -f2) +bad_nodes="" +diff_opts= +no_ads=false + +parse_options () +{ + temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@") + + # No! Checking the exit code afterwards is actually clearer... + # shellcheck disable=SC2181 + [ $? -eq 0 ] || usage + + eval set -- "$temp" + + while true ; do + case "$1" in + -n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;; + -c) diff_opts="${diff_opts} -I ^#.*" ; shift ;; + -w) diff_opts="${diff_opts} -w" ; shift ;; + --no-ads) no_ads=true ; shift ;; + --) shift ; break ;; + -h|--help|*) usage ;; + esac + done + + [ $# -ne 0 ] && usage +} + +parse_options "$@" + +# Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout. +case "$EXTRA_SSH_OPTS" in + *ConnectTimeout=*) : ;; + *) + export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5" +esac + +# Filter nodes. Remove any nodes we can't contact from $node and add +# them to $bad_nodes. +_nodes="" +for _i in $nodes ; do + if onnode "$_i" true >/dev/null 2>&1 ; then + _nodes="${_nodes}${_nodes:+ }${_i}" + else + bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}" + fi +done +nodes="$_nodes" + +nodes_comma=$(echo "$nodes" | sed -e 's@[[:space:]]@,@g') + +PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin" + +# list of config files that must exist and that we check are the same +# on the nodes +if [ -d /etc/sysconfig ] ; then + CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf" +else + CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/default/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/default/nfs /etc/exports /etc/vsftpd/vsftpd.conf" +fi + +# list of config files that may exist and should be checked that they +# are the same on the nodes +CONFIG_FILES_MAY="/usr/local/etc/ctdb/public_addresses /usr/local/etc/ctdb/static-routes" + +exec 2>&1 + +cat <<EOF +-------------------------------------------------------------------- +ctdb_diagnostics starting. This script will gather information about +your ctdb cluster. You should send the output of this script along +with any ctdb or clustered Samba bug reports. +-------------------------------------------------------------------- +EOF + +date + +error() { + msg="$1" + echo "ERROR: $msg" + NUM_ERRORS=$((NUM_ERRORS + 1)) + echo " ERROR[$NUM_ERRORS]: $msg" >> "$ERRORS" +} + +show_file() { + fname="$1" + _fdetails=$(ls -l "$fname" 2>&1) + echo " ================================" + echo " File: $fname" + echo " $_fdetails" + sed 's/^/ /' "$fname" 2>&1 + echo " ================================" +} + +show_all() { + echo "running $1 on nodes $nodes_comma" + onnode "$nodes_comma" "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1 +} + +show_and_compare_files () { + + fmt="$1" ; shift + + for f ; do + _bf=$(basename "$f") + first=true + + for n in $nodes ; do + + if $first ; then + onnode "$n" [ -r "$f" ] || { + # This function takes a format string + # shellcheck disable=SC2059 + msg=$(printf "$fmt" "$f" "$n") + error "$msg" + continue 2; + } + + fstf="${tmpdir}/${_bf}.node${n}" + onnode "$n" cat "$f" >"$fstf" 2>&1 + + _fdetails=$(onnode "$n" ls -l "$f" 2>&1) + echo " ================================" + echo " File (on node $n): $f" + echo " $_fdetails" + sed 's/^/ /' "$fstf" + echo " ================================" + first=false + else + echo "Testing for same config file $f on node $n" + tmpf="${tmpdir}/${_bf}.node${n}" + onnode "$n" cat "$f" >"$tmpf" 2>&1 + # Intentional multi-word splitting on diff_opts + # shellcheck disable=SC2086 + diff $diff_opts "$fstf" "$tmpf" >/dev/null 2>&1 || { + error "File $f is different on node $n" + diff -u $diff_opts "$fstf" "$tmpf" + } + rm -f "$tmpf" + fi + done + + rm -f "$fstf" + done +} + +if ! tmpdir=$(mktemp -d) ; then + echo "Unable to create a temporary directory" + exit 1 +fi +ERRORS="${tmpdir}/diag_err" +NUM_ERRORS=0 + +cat <<EOF +Diagnosis started on these nodes: +$nodes_comma +EOF + +if [ -n "$bad_nodes" ] ; then + cat <<EOF + +NOT RUNNING DIAGNOSTICS on these uncontactable nodes: +$bad_nodes +EOF + +fi + +cat <<EOF + +For reference, here is the nodes file on the current node... +EOF + +show_file /usr/local/etc/ctdb/nodes + +cat <<EOF +-------------------------------------------------------------------- +Comping critical config files on nodes $nodes_comma +EOF + +# Intentional multi-word splitting on CONFIG_FILES_MUST +# shellcheck disable=SC2086 +show_and_compare_files \ + "%s is missing on node %d" \ + $CONFIG_FILES_MUST + +# Intentional multi-word splitting on CONFIG_FILES_MAY +# shellcheck disable=SC2086 +show_and_compare_files \ + "Optional file %s is not present on node %d" \ + $CONFIG_FILES_MAY + +cat <<EOF +-------------------------------------------------------------------- +Checking for clock drift +EOF +t=$(date +%s) +for i in $nodes; do + t2=$(onnode "$i" date +%s) + d=$((t2 - t)) + if [ "$d" -gt 30 ] || [ "$d" -lt -30 ]; then + error "time on node $i differs by $d seconds" + fi +done + +cat <<EOF +-------------------------------------------------------------------- +Showing software versions +EOF +show_all "uname -a" +[ -x /bin/rpm ] && { + show_all "rpm -qa | grep -E 'samba|ctdb|gpfs'" +} +[ -x /usr/bin/dpkg-query ] && { + show_all "/usr/bin/dpkg-query --show 'ctdb'" + show_all "/usr/bin/dpkg-query --show 'samba'" + #show_all "/usr/bin/dpkg-query --show 'gpfs'" +} + + +cat <<EOF +-------------------------------------------------------------------- +Showing ctdb status and recent log entries +EOF +show_all "ctdb status; ctdb ip" +show_all "ctdb statistics" +show_all "ctdb uptime" +show_all "ctdb listvars" +show_all "ctdb getdbmap" +show_all "ctdb -X getdbmap | awk -F'|' 'NR > 1 {print \$3}' | sort | xargs -n 1 ctdb dbstatistics" + +echo "Showing log.ctdb" +show_all "test -f /usr/local/var/log/log.ctdb && tail -100 /usr/local/var/log/log.ctdb" + +show_all "tail -200 /var/log/messages" +show_all "ls -lRs /usr/local/var/lib/ctdb" +show_all "ls -lRs /usr/local/etc/ctdb" + + +cat <<EOF +-------------------------------------------------------------------- +Showing system and process status +EOF +show_all "df" +show_all "df -i" +show_all "mount" +show_all "w" +show_all "ps axfwu" +show_all "dmesg" +show_all "/sbin/lspci" +show_all "dmidecode" +show_all "cat /proc/partitions" +show_all "cat /proc/cpuinfo" +show_all "cat /proc/scsi/scsi" +show_all "/sbin/ifconfig -a" +show_all "/sbin/ifconfig -a" +show_all "cat /proc/net/dev" +show_all "/sbin/ip addr list" +show_all "/sbin/route -n" +show_all "ss -s" +show_all "free" +show_all "crontab -l" +show_all "sysctl -a" +show_all "iptables -L -n" +show_all "iptables -L -n -t nat" +show_all "/usr/sbin/rpcinfo -p" +show_all "/usr/sbin/showmount -a" +show_all "/usr/sbin/showmount -e" +show_all "/usr/sbin/nfsstat -v" +[ -x /sbin/multipath ] && { + show_all "/sbin/multipath -ll" +} +[ -x /sbin/chkconfig ] && { + show_all "/sbin/chkconfig --list" +} +[ -x /usr/sbin/getenforce ] && { + show_all "/usr/sbin/getenforce" +} +[ -d /proc/net/bonding ] && { + for f in /proc/net/bonding/*; do + show_all "cat $f" + done +} + +cat <<EOF +-------------------------------------------------------------------- +Showing Samba status +EOF +show_all "smbstatus -n -B" +if $no_ads ; then + echo + echo "Skipping \"net ads testjoin\" as requested" + echo +else + show_all "net ads testjoin" +fi +show_all "net conf list" +show_all "lsof -n | grep smbd" +show_all "lsof -n | grep ctdbd" +show_all "netstat -tan" +if $no_ads ; then + echo + echo "Skipping \"net ads info\" as requested" + echo +else + show_all "net ads info" +fi +show_all "date" +show_all "smbclient -U% -L 127.0.0.1" +WORKGROUP=$(testparm -s --parameter-name=WORKGROUP 2> /dev/null) +show_all id "$WORKGROUP/Administrator" +show_all "wbinfo -p" +show_all "wbinfo --online-status" +show_all "smbd -b" + +date +echo "Diagnostics finished with $NUM_ERRORS errors" + +[ -r "$ERRORS" ] && { + cat "$ERRORS" + rm -f "$ERRORS" +} + +rm -rf "$tmpdir" + +exit $NUM_ERRORS + |