summaryrefslogtreecommitdiffstats
path: root/ctdb/tools/ctdb_diagnostics
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb/tools/ctdb_diagnostics')
-rwxr-xr-xctdb/tools/ctdb_diagnostics346
1 files changed, 346 insertions, 0 deletions
diff --git a/ctdb/tools/ctdb_diagnostics b/ctdb/tools/ctdb_diagnostics
new file mode 100755
index 0000000..80a5657
--- /dev/null
+++ b/ctdb/tools/ctdb_diagnostics
@@ -0,0 +1,346 @@
+#!/bin/sh
+# a script to test the basic setup of a CTDB/Samba install
+# tridge@samba.org September 2007
+# martin@meltin.net August 2010
+
+usage ()
+{
+ cat >&2 <<EOF
+Usage: ctdb_diagnostics [OPTION] ...
+ options:
+ -n <nodes> Comma separated list of nodes to operate on
+ -c Ignore comment lines (starting with '#') in file comparisons
+ -w Ignore whitespace in file comparisons
+ --no-ads Do not use commands that assume an Active Directory Server
+EOF
+ exit 1
+
+}
+
+nodes=$(ctdb listnodes -X | cut -d'|' -f2)
+bad_nodes=""
+diff_opts=
+no_ads=false
+
+parse_options ()
+{
+ temp=$(getopt -n "ctdb_diagnostics" -o "n:cwh" -l no-ads,help -- "$@")
+
+ # No! Checking the exit code afterwards is actually clearer...
+ # shellcheck disable=SC2181
+ [ $? -eq 0 ] || usage
+
+ eval set -- "$temp"
+
+ while true ; do
+ case "$1" in
+ -n) nodes=$(echo "$2" | sed -e 's@,@ @g') ; shift 2 ;;
+ -c) diff_opts="${diff_opts} -I ^#.*" ; shift ;;
+ -w) diff_opts="${diff_opts} -w" ; shift ;;
+ --no-ads) no_ads=true ; shift ;;
+ --) shift ; break ;;
+ -h|--help|*) usage ;;
+ esac
+ done
+
+ [ $# -ne 0 ] && usage
+}
+
+parse_options "$@"
+
+# Use 5s ssh timeout if EXTRA_SSH_OPTS doesn't set a timeout.
+case "$EXTRA_SSH_OPTS" in
+ *ConnectTimeout=*) : ;;
+ *)
+ export EXTRA_SSH_OPTS="${EXTRA_SSH_OPTS} -o ConnectTimeout=5"
+esac
+
+# Filter nodes. Remove any nodes we can't contact from $node and add
+# them to $bad_nodes.
+_nodes=""
+for _i in $nodes ; do
+ if onnode "$_i" true >/dev/null 2>&1 ; then
+ _nodes="${_nodes}${_nodes:+ }${_i}"
+ else
+ bad_nodes="${bad_nodes}${bad_nodes:+,}${_i}"
+ fi
+done
+nodes="$_nodes"
+
+nodes_comma=$(echo "$nodes" | sed -e 's@[[:space:]]@,@g')
+
+PATH="$PATH:/sbin:/usr/sbin:/usr/lpp/mmfs/bin"
+
+# list of config files that must exist and that we check are the same
+# on the nodes
+if [ -d /etc/sysconfig ] ; then
+ CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/sysconfig/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/sysconfig/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
+else
+ CONFIG_FILES_MUST="/etc/krb5.conf /etc/hosts /usr/local/etc/ctdb/nodes /etc/default/ctdb /etc/resolv.conf /etc/nsswitch.conf /etc/sysctl.conf /etc/samba/smb.conf /etc/fstab /etc/multipath.conf /etc/pam.d/system-auth /etc/default/nfs /etc/exports /etc/vsftpd/vsftpd.conf"
+fi
+
+# list of config files that may exist and should be checked that they
+# are the same on the nodes
+CONFIG_FILES_MAY="/usr/local/etc/ctdb/public_addresses /usr/local/etc/ctdb/static-routes"
+
+exec 2>&1
+
+cat <<EOF
+--------------------------------------------------------------------
+ctdb_diagnostics starting. This script will gather information about
+your ctdb cluster. You should send the output of this script along
+with any ctdb or clustered Samba bug reports.
+--------------------------------------------------------------------
+EOF
+
+date
+
+error() {
+ msg="$1"
+ echo "ERROR: $msg"
+ NUM_ERRORS=$((NUM_ERRORS + 1))
+ echo " ERROR[$NUM_ERRORS]: $msg" >> "$ERRORS"
+}
+
+show_file() {
+ fname="$1"
+ _fdetails=$(ls -l "$fname" 2>&1)
+ echo " ================================"
+ echo " File: $fname"
+ echo " $_fdetails"
+ sed 's/^/ /' "$fname" 2>&1
+ echo " ================================"
+}
+
+show_all() {
+ echo "running $1 on nodes $nodes_comma"
+ onnode "$nodes_comma" "hostname; date; $1 2>&1 | sed 's/^/ /'" 2>&1
+}
+
+show_and_compare_files () {
+
+ fmt="$1" ; shift
+
+ for f ; do
+ _bf=$(basename "$f")
+ first=true
+
+ for n in $nodes ; do
+
+ if $first ; then
+ onnode "$n" [ -r "$f" ] || {
+ # This function takes a format string
+ # shellcheck disable=SC2059
+ msg=$(printf "$fmt" "$f" "$n")
+ error "$msg"
+ continue 2;
+ }
+
+ fstf="${tmpdir}/${_bf}.node${n}"
+ onnode "$n" cat "$f" >"$fstf" 2>&1
+
+ _fdetails=$(onnode "$n" ls -l "$f" 2>&1)
+ echo " ================================"
+ echo " File (on node $n): $f"
+ echo " $_fdetails"
+ sed 's/^/ /' "$fstf"
+ echo " ================================"
+ first=false
+ else
+ echo "Testing for same config file $f on node $n"
+ tmpf="${tmpdir}/${_bf}.node${n}"
+ onnode "$n" cat "$f" >"$tmpf" 2>&1
+ # Intentional multi-word splitting on diff_opts
+ # shellcheck disable=SC2086
+ diff $diff_opts "$fstf" "$tmpf" >/dev/null 2>&1 || {
+ error "File $f is different on node $n"
+ diff -u $diff_opts "$fstf" "$tmpf"
+ }
+ rm -f "$tmpf"
+ fi
+ done
+
+ rm -f "$fstf"
+ done
+}
+
+if ! tmpdir=$(mktemp -d) ; then
+ echo "Unable to create a temporary directory"
+ exit 1
+fi
+ERRORS="${tmpdir}/diag_err"
+NUM_ERRORS=0
+
+cat <<EOF
+Diagnosis started on these nodes:
+$nodes_comma
+EOF
+
+if [ -n "$bad_nodes" ] ; then
+ cat <<EOF
+
+NOT RUNNING DIAGNOSTICS on these uncontactable nodes:
+$bad_nodes
+EOF
+
+fi
+
+cat <<EOF
+
+For reference, here is the nodes file on the current node...
+EOF
+
+show_file /usr/local/etc/ctdb/nodes
+
+cat <<EOF
+--------------------------------------------------------------------
+Comping critical config files on nodes $nodes_comma
+EOF
+
+# Intentional multi-word splitting on CONFIG_FILES_MUST
+# shellcheck disable=SC2086
+show_and_compare_files \
+ "%s is missing on node %d" \
+ $CONFIG_FILES_MUST
+
+# Intentional multi-word splitting on CONFIG_FILES_MAY
+# shellcheck disable=SC2086
+show_and_compare_files \
+ "Optional file %s is not present on node %d" \
+ $CONFIG_FILES_MAY
+
+cat <<EOF
+--------------------------------------------------------------------
+Checking for clock drift
+EOF
+t=$(date +%s)
+for i in $nodes; do
+ t2=$(onnode "$i" date +%s)
+ d=$((t2 - t))
+ if [ "$d" -gt 30 ] || [ "$d" -lt -30 ]; then
+ error "time on node $i differs by $d seconds"
+ fi
+done
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing software versions
+EOF
+show_all "uname -a"
+[ -x /bin/rpm ] && {
+ show_all "rpm -qa | egrep 'samba|ctdb|gpfs'"
+}
+[ -x /usr/bin/dpkg-query ] && {
+ show_all "/usr/bin/dpkg-query --show 'ctdb'"
+ show_all "/usr/bin/dpkg-query --show 'samba'"
+ #show_all "/usr/bin/dpkg-query --show 'gpfs'"
+}
+
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing ctdb status and recent log entries
+EOF
+show_all "ctdb status; ctdb ip"
+show_all "ctdb statistics"
+show_all "ctdb uptime"
+show_all "ctdb listvars"
+show_all "ctdb getdbmap"
+show_all "ctdb -X getdbmap | awk -F'|' 'NR > 1 {print \$3}' | sort | xargs -n 1 ctdb dbstatistics"
+
+echo "Showing log.ctdb"
+show_all "test -f /usr/local/var/log/log.ctdb && tail -100 /usr/local/var/log/log.ctdb"
+
+show_all "tail -200 /var/log/messages"
+show_all "ls -lRs /usr/local/var/lib/ctdb"
+show_all "ls -lRs /usr/local/etc/ctdb"
+
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing system and process status
+EOF
+show_all "df"
+show_all "df -i"
+show_all "mount"
+show_all "w"
+show_all "ps axfwu"
+show_all "dmesg"
+show_all "/sbin/lspci"
+show_all "dmidecode"
+show_all "cat /proc/partitions"
+show_all "cat /proc/cpuinfo"
+show_all "cat /proc/scsi/scsi"
+show_all "/sbin/ifconfig -a"
+show_all "/sbin/ifconfig -a"
+show_all "cat /proc/net/dev"
+show_all "/sbin/ip addr list"
+show_all "/sbin/route -n"
+show_all "ss -s"
+show_all "free"
+show_all "crontab -l"
+show_all "sysctl -a"
+show_all "iptables -L -n"
+show_all "iptables -L -n -t nat"
+show_all "/usr/sbin/rpcinfo -p"
+show_all "/usr/sbin/showmount -a"
+show_all "/usr/sbin/showmount -e"
+show_all "/usr/sbin/nfsstat -v"
+[ -x /sbin/multipath ] && {
+ show_all "/sbin/multipath -ll"
+}
+[ -x /sbin/chkconfig ] && {
+ show_all "/sbin/chkconfig --list"
+}
+[ -x /usr/sbin/getenforce ] && {
+ show_all "/usr/sbin/getenforce"
+}
+[ -d /proc/net/bonding ] && {
+ for f in /proc/net/bonding/*; do
+ show_all "cat $f"
+ done
+}
+
+cat <<EOF
+--------------------------------------------------------------------
+Showing Samba status
+EOF
+show_all "smbstatus -n -B"
+if $no_ads ; then
+ echo
+ echo "Skipping \"net ads testjoin\" as requested"
+ echo
+else
+ show_all "net ads testjoin"
+fi
+show_all "net conf list"
+show_all "lsof -n | grep smbd"
+show_all "lsof -n | grep ctdbd"
+show_all "netstat -tan"
+if $no_ads ; then
+ echo
+ echo "Skipping \"net ads info\" as requested"
+ echo
+else
+ show_all "net ads info"
+fi
+show_all "date"
+show_all "smbclient -U% -L 127.0.0.1"
+WORKGROUP=$(testparm -s --parameter-name=WORKGROUP 2> /dev/null)
+show_all id "$WORKGROUP/Administrator"
+show_all "wbinfo -p"
+show_all "wbinfo --online-status"
+show_all "smbd -b"
+
+date
+echo "Diagnostics finished with $NUM_ERRORS errors"
+
+[ -r "$ERRORS" ] && {
+ cat "$ERRORS"
+ rm -f "$ERRORS"
+}
+
+rm -rf "$tmpdir"
+
+exit $NUM_ERRORS
+