summaryrefslogtreecommitdiffstats
path: root/hb_report
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:40:13 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 06:40:13 +0000
commite9be59e1502a41bab9891d96d753102a7dafef0b (patch)
treec3b2da87c414881f4b53d0964f407c83492d813e /hb_report
parentInitial commit. (diff)
downloadcluster-glue-e9be59e1502a41bab9891d96d753102a7dafef0b.tar.xz
cluster-glue-e9be59e1502a41bab9891d96d753102a7dafef0b.zip
Adding upstream version 1.0.12.upstream/1.0.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'hb_report')
-rw-r--r--hb_report/Makefile.am26
-rw-r--r--hb_report/ha_cf_support.sh83
-rwxr-xr-xhb_report/hb_report.in1445
-rw-r--r--hb_report/openais_conf_support.sh97
-rw-r--r--hb_report/utillib.sh752
5 files changed, 2403 insertions, 0 deletions
diff --git a/hb_report/Makefile.am b/hb_report/Makefile.am
new file mode 100644
index 0000000..cd4ad65
--- /dev/null
+++ b/hb_report/Makefile.am
@@ -0,0 +1,26 @@
+#
+# heartbeat: Linux-HA heartbeat code
+#
+# Copyright (C) 2001 Michael Moerz
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+MAINTAINERCLEANFILES = Makefile.in
+
+hanoarchdir = $(datadir)/$(PACKAGE_NAME)
+
+hanoarch_DATA = utillib.sh ha_cf_support.sh openais_conf_support.sh
+sbin_SCRIPTS = hb_report
+
diff --git a/hb_report/ha_cf_support.sh b/hb_report/ha_cf_support.sh
new file mode 100644
index 0000000..7b35c98
--- /dev/null
+++ b/hb_report/ha_cf_support.sh
@@ -0,0 +1,83 @@
+ # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
+ #
+ # This program is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU General Public
+ # License as published by the Free Software Foundation; either
+ # version 2.1 of the License, or (at your option) any later version.
+ #
+ # This software is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public
+ # License along with this library; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ #
+
+#
+# Stack specific part (heartbeat)
+# ha.cf/logd.cf parsing
+#
+getcfvar() {
+ [ -f "$CONF" ] || return
+ sed 's/#.*//' < $CONF |
+ grep -w "^$1" |
+ sed 's/^[^[:space:]]*[[:space:]]*//'
+}
+iscfvarset() {
+ test "`getcfvar $1`"
+}
+iscfvartrue() {
+ getcfvar "$1" |
+ egrep -qsi "^(true|y|yes|on|1)"
+}
+uselogd() {
+ iscfvartrue use_logd &&
+ return 0 # if use_logd true
+ iscfvarset logfacility ||
+ iscfvarset logfile ||
+ iscfvarset debugfile ||
+ return 0 # or none of the log options set
+ false
+}
+get_hb_logvars() {
+ # unless logfacility is set to none, heartbeat/ha_logd are
+ # going to log through syslog
+ HA_LOGFACILITY=`getcfvar logfacility`
+ [ "" = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=$DEFAULT_HA_LOGFACILITY
+ [ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=""
+ HA_LOGFILE=`getcfvar logfile`
+ HA_DEBUGFILE=`getcfvar debugfile`
+}
+getlogvars() {
+ HA_LOGFACILITY=${HA_LOGFACILITY:-$DEFAULT_HA_LOGFACILITY}
+ HA_LOGLEVEL="info"
+ cfdebug=`getcfvar debug` # prefer debug level if set
+ isnumber $cfdebug || cfdebug=""
+ [ "$cfdebug" ] && [ $cfdebug -gt 0 ] &&
+ HA_LOGLEVEL="debug"
+ if uselogd; then
+ [ -f "$LOGD_CF" ] || {
+ debug "logd used but logd.cf not found: using defaults"
+ return # no configuration: use defaults
+ }
+ debug "reading log settings from $LOGD_CF"
+ get_logd_logvars
+ else
+ debug "reading log settings from $CONF"
+ get_hb_logvars
+ fi
+}
+cluster_info() {
+ echo "heartbeat version: `$HA_BIN/heartbeat -V`"
+}
+essential_files() {
+ cat<<EOF
+d $HA_VARLIB 0755 root root
+d $HA_VARLIB/ccm 0750 hacluster haclient
+d $PCMK_LIB 0750 hacluster haclient
+d $PE_STATE_DIR 0750 hacluster haclient
+d $CIB_DIR 0750 hacluster haclient
+EOF
+}
diff --git a/hb_report/hb_report.in b/hb_report/hb_report.in
new file mode 100755
index 0000000..a3f9c66
--- /dev/null
+++ b/hb_report/hb_report.in
@@ -0,0 +1,1445 @@
+#!/bin/sh
+
+ # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
+ #
+ # This program is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU General Public
+ # License as published by the Free Software Foundation; either
+ # version 2.1 of the License, or (at your option) any later version.
+ #
+ # This software is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public
+ # License along with this library; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ #
+
+. @OCF_ROOT_DIR@/lib/heartbeat/ocf-shellfuncs
+
+HA_NOARCHBIN=@datadir@/@PACKAGE_NAME@
+
+. $HA_NOARCHBIN/utillib.sh
+
+unset LANG
+export LC_ALL=POSIX
+
+PROG=`basename $0`
+
+# the default syslog facility is not (yet) exported by heartbeat
+# to shell scripts
+#
+DEFAULT_HA_LOGFACILITY="daemon"
+export DEFAULT_HA_LOGFACILITY
+LOGD_CF=`findlogdcf @sysconfdir@ $HA_DIR`
+export LOGD_CF
+
+SSH_PASSWORD_NODES=""
+: ${SSH_OPTS="-o StrictHostKeyChecking=no -o EscapeChar=none"}
+LOG_PATTERNS="CRIT: ERROR:"
+# PEINPUTS_PATT="peng.*PEngine Input stored"
+
+# Important events
+#
+# Patterns format:
+# title extended_regexp
+# NB: don't use spaces in titles or regular expressions!
+EVENT_PATTERNS="
+membership crmd.*ccm_event.*(NEW|LOST)|pcmk_peer_update.*(lost|memb):
+quorum crmd.*crm_update_quorum:.Updating.quorum.status|crmd.*ais.disp.*quorum.(lost|ac?quir)
+pause Process.pause.detected
+resources lrmd.*rsc:(start|stop)
+stonith crmd.*te_fence_node.*Exec|stonith-ng.*log_oper.*reboot|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=)
+start_stop Configuration.validated..Starting.heartbeat|Corosync.Cluster.Engine|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete
+"
+
+init_tmpfiles
+
+#
+# the instance where user runs hb_report is the master
+# the others are slaves
+#
+if [ x"$1" = x__slave ]; then
+ SLAVE=1
+fi
+
+usage() {
+ cat<<EOF
+usage: hb_report -f {time|"cts:"testnum} [-t time]
+ [-u user] [-X ssh-options] [-l file] [-n nodes] [-E files]
+ [-p patt] [-L patt] [-e prog] [-MSDZAQVsvhd] [dest]
+
+ -f time: time to start from or a CTS test number
+ -t time: time to finish at (dflt: now)
+ -d : don't compress, but leave result in a directory
+ -n nodes: node names for this cluster; this option is additive
+ (use either -n "a b" or -n a -n b)
+ if you run $PROG on the loghost or use autojoin,
+ it is highly recommended to set this option
+ -u user: ssh user to access other nodes (dflt: empty, root, hacluster)
+ -X ssh-options: extra ssh(1) options
+ -l file: log file
+ -E file: extra logs to collect; this option is additive
+ (dflt: /var/log/messages)
+ -s : sanitize the PE and CIB files
+ -p patt: regular expression to match variables containing sensitive data;
+ this option is additive (dflt: "passw.*")
+ -L patt: regular expression to match in log files for analysis;
+ this option is additive (dflt: $LOG_PATTERNS)
+ -e prog: your favourite editor
+ -Q : don't run resource intensive operations (speed up)
+ -M : don't collect extra logs (/var/log/messages)
+ -D : don't invoke editor to write description
+ -Z : if destination directories exist, remove them instead of exiting
+ (this is default for CTS)
+ -A : this is an OpenAIS cluster
+ -S : single node operation; don't try to start report
+ collectors on other nodes
+ -v : increase verbosity
+ -V : print version
+ dest : report name (may include path where to store the report)
+EOF
+
+[ "$1" != short ] &&
+ cat<<EOF
+
+ . the multifile output is stored in a tarball {dest}.tar.bz2
+ . the time specification is as in either Date::Parse or
+ Date::Manip, whatever you have installed; Date::Parse is
+ preferred
+ . we try to figure where is the logfile; if we can't, please
+ clue us in ('-l')
+ . we collect only one logfile and /var/log/messages; if you
+ have more than one logfile, then use '-E' option to supply
+ as many as you want ('-M' empties the list)
+
+ Examples
+
+ hb_report -f 2pm report_1
+ hb_report -f "2007/9/5 12:30" -t "2007/9/5 14:00" report_2
+ hb_report -f 1:00 -t 3:00 -l /var/log/cluster/ha-debug report_3
+ hb_report -f "09sep07 2:00" -u hbadmin report_4
+ hb_report -f 18:00 -p "usern.*" -p "admin.*" report_5
+ hb_report -f cts:133 ctstest_133
+
+ . WARNING . WARNING . WARNING . WARNING . WARNING . WARNING .
+
+ We won't sanitize the CIB and the peinputs files, because
+ that would make them useless when trying to reproduce the
+ PE behaviour. You may still choose to obliterate sensitive
+ information if you use the -s and -p options, but in that
+ case the support may be lacking as well. The logs and the
+ crm_mon, ccm_tool, and crm_verify output are *not* sanitized.
+
+ Additional system logs (/var/log/messages) are collected in
+ order to have a more complete report. If you don't want that
+ specify -M.
+
+ IT IS YOUR RESPONSIBILITY TO PROTECT THE DATA FROM EXPOSURE!
+EOF
+ exit
+}
+version() {
+ echo "@PACKAGE_NAME@: @PACKAGE_VERSION@ (@GLUE_BUILD_VERSION@)"
+ exit
+}
+#
+# these are "global" variables
+#
+setvarsanddefaults() {
+ local now=`perl -e 'print time()'`
+ # used by all
+ DEST=""
+ FROM_TIME=""
+ CTS=""
+ TO_TIME=0
+ HA_LOG=""
+ UNIQUE_MSG="Mark:HB_REPORT:$now"
+ SANITIZE="passw.*"
+ DO_SANITIZE=""
+ FORCE_REMOVE_DEST=""
+ COMPRESS="1"
+ # logs to collect in addition
+ # NB: they all have to be in syslog format
+ #
+ EXTRA_LOGS="/var/log/messages"
+ # used only by the master
+ NO_SSH=""
+ SSH_USER=""
+ TRY_SSH="root hacluster"
+ SLAVEPIDS=""
+ NO_DESCRIPTION="1"
+ SKIP_LVL=0
+ VERBOSITY=0
+}
+#
+# caller may skip collecting information if the skip level
+# exceeds the given value
+#
+skip_lvl() {
+ [ $SKIP_LVL -ge $1 ]
+}
+chkname() {
+ [ "$1" ] || usage short
+ echo $1 | grep -qs '[^a-zA-Z0-9@_+=:.-]' &&
+ fatal "$1 contains illegal characters"
+}
+set_dest() {
+ # default DEST has already been set earlier (if the
+ # argument is missing)
+ if [ $# -eq 1 ]; then
+ DEST=`basename $1`
+ DESTDIR=`dirname $1`
+ fi
+ chkname $DEST
+ if [ -z "$COMPRESS" -a -e "$DESTDIR/$DEST" ]; then
+ if [ "$FORCE_REMOVE_DEST" -o "$CTS" ]; then
+ rm -rf $DESTDIR/$DEST
+ else
+ fatal "destination directory $DESTDIR/$DEST exists, please cleanup or use -Z"
+ fi
+ fi
+}
+chktime() {
+ [ "$1" ] || fatal "bad time specification: $2 (try 'YYYY-M-D H:M:S') "
+}
+no_dir() {
+ fatal "could not create the working directory $WORKDIR"
+}
+time2str() {
+ perl -e "use POSIX; print strftime('%x %X',localtime($1));"
+}
+# try to figure out where pacemaker ... etc
+get_pe_state_dir() {
+ PE_STATE_DIR=`strings $CRM_DAEMON_DIR/pengine |
+ awk 'NF==1&&/var\/lib\/.*pengine$/'`
+ test -d "$PE_STATE_DIR"
+}
+get_cib_dir() {
+ CIB_DIR=`strings $CRM_DAEMON_DIR/crmd |
+ awk 'NF==1&&/var\/lib\/.*(cib|crm)$/'`
+ test -d "$CIB_DIR"
+}
+get_pe_state_dir2() {
+ # PE_STATE_DIR
+ local localstatedir lastf
+ localstatedir=`dirname $HA_VARLIB`
+ lastf=$(2>/dev/null ls -rt `2>/dev/null find /var/lib -name pengine -type d |
+ sed 's,$,/*.last,'` | tail -1)
+ if [ -f "$lastf" ]; then
+ PE_STATE_DIR=`dirname $lastf`
+ else
+ for p in pacemaker/pengine pengine heartbeat/pengine; do
+ if [ -d $localstatedir/$p ]; then
+ debug "setting PE_STATE_DIR to $localstatedir/$p"
+ PE_STATE_DIR=$localstatedir/$p
+ break
+ fi
+ done
+ fi
+}
+get_cib_dir2() {
+ # CIB
+ # HA_VARLIB is normally set to {localstatedir}/heartbeat
+ local localstatedir
+ localstatedir=`dirname $HA_VARLIB`
+ for p in pacemaker/cib heartbeat/crm; do
+ if [ -f $localstatedir/$p/cib.xml ]; then
+ debug "setting CIB_DIR to $localstatedir/$p"
+ CIB_DIR=$localstatedir/$p
+ break
+ fi
+ done
+}
+get_crm_daemon_dir() {
+ # CRM_DAEMON_DIR
+ local libdir p
+ libdir=`dirname $HA_BIN`
+ for p in pacemaker heartbeat; do
+ if [ -x $libdir/$p/crmd ]; then
+ debug "setting CRM_DAEMON_DIR to $libdir/$p"
+ CRM_DAEMON_DIR=$libdir/$p
+ return 0
+ fi
+ done
+ return 1
+}
+get_crm_daemon_dir2() {
+ # CRM_DAEMON_DIR again (brute force)
+ local p d d2
+ for p in /usr /usr/local /opt; do
+ for d in libexec lib64 lib; do
+ for d2 in pacemaker heartbeat; do
+ if [ -x $p/$d/$d2/crmd ]; then
+ debug "setting CRM_DAEMON_DIR to $p/$d/$d2"
+ CRM_DAEMON_DIR=$p/$d/$d2
+ break
+ fi
+ done
+ done
+ done
+}
+compatibility_pcmk() {
+ get_crm_daemon_dir || get_crm_daemon_dir2
+ if [ ! -d "$CRM_DAEMON_DIR" ]; then
+ fatal "cannot find pacemaker daemon directory!"
+ fi
+ get_pe_state_dir || get_pe_state_dir2
+ get_cib_dir || get_cib_dir2
+ debug "setting PCMK_LIB to `dirname $CIB_DIR`"
+ PCMK_LIB=`dirname $CIB_DIR`
+ # PTEST
+ PTEST=`echo_ptest_tool`
+ export PE_STATE_DIR CIB_DIR CRM_DAEMON_DIR PCMK_LIB PTEST
+}
+
+#
+# find log files
+#
+logmark() {
+ logger -p $*
+ debug "run: logger -p $*"
+}
+#
+# first try syslog files, if none found then use the
+# logfile/debugfile settings
+#
+findlog() {
+ local logf=""
+ if [ "$HA_LOGFACILITY" ]; then
+ logf=`findmsg $UNIQUE_MSG | awk '{print $1}'`
+ fi
+ if [ -f "$logf" ]; then
+ echo $logf
+ else
+ echo ${HA_DEBUGFILE:-$HA_LOGFILE}
+ [ "${HA_DEBUGFILE:-$HA_LOGFILE}" ] &&
+ debug "will try with ${HA_DEBUGFILE:-$HA_LOGFILE}"
+ fi
+}
+
+#
+# find log slices
+#
+
+find_decompressor() {
+ if echo $1 | grep -qs 'xz$'; then
+ echo "xz -dc"
+ elif echo $1 | grep -qs 'bz2$'; then
+ echo "bzip2 -dc"
+ elif echo $1 | grep -qs 'gz$'; then
+ echo "gzip -dc"
+ else
+ echo "cat"
+ fi
+}
+#
+# check if the log contains a piece of our segment
+#
+is_our_log() {
+ local logf=$1
+ local from_time=$2
+ local to_time=$3
+
+ local cat=`find_decompressor $logf`
+ local first_time="`$cat $logf | head -10 | find_first_ts`"
+ local last_time="`$cat $logf | tail -10 | tac | find_first_ts`"
+ if [ x = "x$first_time" -o x = "x$last_time" ]; then
+ return 0 # skip (empty log?)
+ fi
+ if [ $from_time -gt $last_time ]; then
+ # we shouldn't get here anyway if the logs are in order
+ return 2 # we're past good logs; exit
+ fi
+ if [ $from_time -ge $first_time ]; then
+ return 3 # this is the last good log
+ fi
+ # have to go further back
+ if [ $to_time -eq 0 -o $to_time -ge $first_time ]; then
+ return 1 # include this log
+ else
+ return 0 # don't include this log
+ fi
+}
+#
+# go through archived logs (timewise backwards) and see if there
+# are lines belonging to us
+# (we rely on untouched log files, i.e. that modify time
+# hasn't been changed)
+#
+arch_logs() {
+ local next_log
+ local logf=$1
+ local from_time=$2
+ local to_time=$3
+
+ # look for files such as: ha-log-20090308 or
+ # ha-log-20090308.gz (.bz2) or ha-log.0, etc
+ ls -t $logf $logf*[0-9z] 2>/dev/null |
+ while read next_log; do
+ is_our_log $next_log $from_time $to_time
+ case $? in
+ 0) ;; # noop, continue
+ 1) echo $next_log # include log and continue
+ debug "found log $next_log"
+ ;;
+ 2) break;; # don't go through older logs!
+ 3) echo $next_log # include log and continue
+ debug "found log $next_log"
+ break
+ ;; # don't go through older logs!
+ esac
+ done
+}
+#
+# print part of the log
+#
+print_log() {
+ local cat=`find_decompressor $1`
+ $cat $1
+}
+print_logseg() {
+ if test -x $HA_NOARCHBIN/print_logseg; then
+ $HA_NOARCHBIN/print_logseg "$1" "$2" "$3"
+ return
+ fi
+
+ local logf=$1
+ local from_time=$2
+ local to_time=$3
+ local tmp sourcef
+
+ # uncompress to a temp file (if necessary)
+ local cat=`find_decompressor $logf`
+ if [ "$cat" != "cat" ]; then
+ tmp=`mktemp` ||
+ fatal "disk full"
+ add_tmpfiles $tmp
+ $cat $logf > $tmp ||
+ fatal "disk full"
+ sourcef=$tmp
+ else
+ sourcef=$logf
+ tmp=""
+ fi
+
+ if [ "$from_time" = 0 ]; then
+ FROM_LINE=1
+ else
+ FROM_LINE=`findln_by_time $sourcef $from_time`
+ fi
+ if [ -z "$FROM_LINE" ]; then
+ warning "couldn't find line for time $from_time; corrupt log file?"
+ return
+ fi
+
+ TO_LINE=""
+ if [ "$to_time" != 0 ]; then
+ TO_LINE=`findln_by_time $sourcef $to_time`
+ if [ -z "$TO_LINE" ]; then
+ warning "couldn't find line for time $to_time; corrupt log file?"
+ return
+ fi
+ fi
+ dumplog $sourcef $FROM_LINE $TO_LINE
+ debug "including segment [$FROM_LINE-$TO_LINE] from $logf"
+}
+#
+# print some log info (important for crm history)
+#
+loginfo() {
+ local logf=$1
+ local fake=$2
+ local nextpos=`python -c "f=open('$logf');f.seek(0,2);print f.tell()+1"`
+ if [ "$fake" ]; then
+ echo "synthetic:$logf $nextpos"
+ else
+ echo "$logf $nextpos"
+ fi
+}
+#
+# find log/set of logs which are interesting for us
+#
+dumplogset() {
+ local logf=$1
+ local from_time=$2
+ local to_time=$3
+
+ local logf_set=`arch_logs $logf $from_time $to_time`
+ if [ x = "x$logf_set" ]; then
+ return
+ fi
+
+ local num_logs=`echo "$logf_set" | wc -l`
+ local oldest=`echo $logf_set | awk '{print $NF}'`
+ local newest=`echo $logf_set | awk '{print $1}'`
+ local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'`
+
+ # the first logfile: from $from_time to $to_time (or end)
+ # logfiles in the middle: all
+ # the last logfile: from beginning to $to_time (or end)
+ case $num_logs in
+ 1) print_logseg $newest $from_time $to_time;;
+ *)
+ print_logseg $oldest $from_time 0
+ for f in $mid_logfiles; do
+ print_log $f
+ debug "including complete $f logfile"
+ done
+ print_logseg $newest 0 $to_time
+ ;;
+ esac
+}
+
+#
+# cts_findlogseg finds lines for the CTS test run (FROM_LINE and
+# TO_LINE) and then extracts the timestamps to get FROM_TIME and
+# TO_TIME
+#
+cts_findlogseg() {
+ local testnum=$1
+ local logf=$2
+ if [ "x$logf" = "x" ]; then
+ logf=`findmsg "Running test.*\[ *$testnum\]" | awk '{print $1}'`
+ fi
+ getstampproc=`find_getstampproc < $logf`
+ export getstampproc # used by linetime
+
+ FROM_LINE=`grep -n "Running test.*\[ *$testnum\]" $logf | tail -1 | sed 's/:.*//'`
+ if [ -z "$FROM_LINE" ]; then
+ warning "couldn't find line for CTS test $testnum; corrupt log file?"
+ exit 1
+ else
+ FROM_TIME=`linetime $logf $FROM_LINE`
+ fi
+ TO_LINE=`grep -n "Running test.*\[ *$(($testnum+1))\]" $logf | tail -1 | sed 's/:.*//'`
+ [ "$TO_LINE" -a $FROM_LINE -lt $TO_LINE ] ||
+ TO_LINE=`wc -l < $logf`
+ TO_TIME=`linetime $logf $TO_LINE`
+ debug "including segment [$FROM_LINE-$TO_LINE] from $logf"
+ dumplog $logf $FROM_LINE $TO_LINE
+}
+
+#
+# this is how we pass environment to other hosts
+#
+dumpenv() {
+ cat<<EOF
+DEST=$DEST
+FROM_TIME=$FROM_TIME
+TO_TIME=$TO_TIME
+USER_NODES="$USER_NODES"
+NODES="$NODES"
+MASTER_NODE="$MASTER_NODE"
+HA_LOG=$HA_LOG
+MASTER_IS_HOSTLOG=$MASTER_IS_HOSTLOG
+UNIQUE_MSG=$UNIQUE_MSG
+SANITIZE="$SANITIZE"
+DO_SANITIZE="$DO_SANITIZE"
+SKIP_LVL="$SKIP_LVL"
+EXTRA_LOGS="$EXTRA_LOGS"
+USER_CLUSTER_TYPE="$USER_CLUSTER_TYPE"
+CONF="$CONF"
+B_CONF="$B_CONF"
+PACKAGES="$PACKAGES"
+CORES_DIRS="$CORES_DIRS"
+VERBOSITY="$VERBOSITY"
+EOF
+}
+is_collector() {
+ test "$SLAVE"
+}
+is_node() {
+ test "$THIS_IS_NODE"
+}
+is_master() {
+ ! is_collector && test "$WE" = "$MASTER_NODE"
+}
+start_slave_collector() {
+ local node=$1
+
+ dumpenv |
+ if [ "$node" = "$WE" ]; then
+ debug "running: $LOCAL_SUDO hb_report __slave"
+ $LOCAL_SUDO hb_report __slave
+ else
+ debug "running: ssh $SSH_OPTS $node \"$SUDO hb_report __slave"
+ ssh $SSH_OPTS $node \
+ "$SUDO hb_report __slave"
+ fi | (cd $WORKDIR && tar xf -)
+}
+
+#
+# does ssh work?
+# and how
+# test the provided ssh user
+# or try to find a ssh user which works without password
+# if ssh works without password, we can run the collector in the
+# background and save some time
+#
+testsshconn() {
+ ssh $SSH_OPTS -T -o Batchmode=yes $1 true 2>/dev/null
+}
+findsshuser() {
+ local n u rc
+ local ssh_s ssh_user="__undef" try_user_list
+ if [ -z "$SSH_USER" ]; then
+ try_user_list="__default $TRY_SSH"
+ else
+ try_user_list="$SSH_USER"
+ fi
+ for n in $NODES; do
+ rc=1
+ [ "$n" = "$WE" ] && continue
+ for u in $try_user_list; do
+ if [ "$u" != '__default' ]; then
+ ssh_s=$u@$n
+ else
+ ssh_s=$n
+ fi
+ if testsshconn $ssh_s; then
+ debug "ssh $ssh_s OK"
+ ssh_user="$u"
+ try_user_list="$u" # we support just one user
+ rc=0
+ break
+ else
+ debug "ssh $ssh_s failed"
+ fi
+ done
+ [ $rc = 1 ] &&
+ SSH_PASSWORD_NODES="$SSH_PASSWORD_NODES $n"
+ done
+ if [ -n "$SSH_PASSWORD_NODES" ]; then
+ warning "passwordless ssh to node(s) $SSH_PASSWORD_NODES does not work"
+ fi
+
+ if [ "$ssh_user" = "__undef" ]; then
+ return 1
+ fi
+ if [ "$ssh_user" != "__default" ]; then
+ SSH_USER=$ssh_user
+ fi
+ return 0
+}
+node_needs_pwd() {
+ local n
+ for n in $SSH_PASSWORD_NODES; do
+ [ "$n" = "$1" ] && return 0
+ done
+ return 1
+}
+say_ssh_user() {
+ if [ -n "$SSH_USER" ]; then
+ echo $SSH_USER
+ else
+ echo your user
+ fi
+}
+
+#
+# the usual stuff
+#
+getbacktraces() {
+ local f bf flist
+ flist=$(
+ for f in `find_files "$CORES_DIRS" $1 $2`; do
+ bf=`basename $f`
+ test `expr match $bf core` -gt 0 &&
+ echo $f
+ done)
+ [ "$flist" ] && {
+ getbt $flist > $3
+ debug "found backtraces: $flist"
+ }
+}
+pe2dot() {
+ local pef=`basename $1`
+ local dotf=`basename $pef .bz2`.dot
+ test -z "$PTEST" && return
+ (
+ cd `dirname $1`
+ $PTEST -D $dotf -x $pef >/dev/null 2>&1
+ )
+}
+getpeinputs() {
+ local pe_dir flist
+ local f
+ pe_dir=$PE_STATE_DIR
+ debug "looking for PE files in $pe_dir"
+ flist=$(
+ find_files $pe_dir $1 $2 | grep -v '[.]last$'
+ )
+ [ "$flist" ] && {
+ mkdir $3/`basename $pe_dir`
+ (
+ cd $3/`basename $pe_dir`
+ for f in $flist; do
+ ln -s $f
+ done
+ )
+ debug "found `echo $flist | wc -w` pengine input files in $pe_dir"
+ }
+ if [ `echo $flist | wc -w` -le 20 ]; then
+ for f in $flist; do
+ skip_lvl 1 || pe2dot $3/`basename $pe_dir`/`basename $f`
+ done
+ else
+ debug "too many PE inputs to create dot files"
+ fi
+}
+getratraces() {
+ local trace_dir flist
+ local f
+ trace_dir=$HA_VARLIB/trace_ra
+ test -d "$trace_dir" || return 0
+ debug "looking for RA trace files in $trace_dir"
+ flist=$(find_files $trace_dir $1 $2 | sed "s,`dirname $trace_dir`/,,g")
+ [ "$flist" ] && {
+ tar -cf - -C `dirname $trace_dir` $flist | tar -xf - -C $3
+ debug "found `echo $flist | wc -w` RA trace files in $trace_dir"
+ }
+}
+touch_DC_if_dc() {
+ local dc
+ dc=`crmadmin -D 2>/dev/null | awk '{print $NF}'`
+ if [ "$WE" = "$dc" ]; then
+ touch $1/DC
+ fi
+}
+corosync_blackbox() {
+ local from_time=$1
+ local to_time=$2
+ local outf=$3
+ local inpf
+ inpf=`find_files /var/lib/corosync $from_time $to_time | grep -w fdata`
+ if [ -f "$inpf" ]; then
+ corosync-blackbox > $outf
+ touch -r $inpf $outf
+ fi
+}
+getconfigurations() {
+ local conf
+ local dest=$1
+ for conf in $CONFIGURATIONS; do
+ if [ -f $conf ]; then
+ cp -p $conf $dest
+ elif [ -d $conf ]; then
+ (
+ cd `dirname $conf` &&
+ tar cf - `basename $conf` | (cd $dest && tar xf -)
+ )
+ fi
+ done
+}
+
+
+#
+# some basic system info and stats
+#
+sys_info() {
+ cluster_info
+ hb_report -V # our info
+ echo "resource-agents: `grep 'Build version:' @OCF_ROOT_DIR@/lib/heartbeat/ocf-shellfuncs`"
+ crm_info
+ pkg_versions $PACKAGES
+ skip_lvl 1 || verify_packages $PACKAGES
+ echo "Platform: `uname`"
+ echo "Kernel release: `uname -r`"
+ echo "Architecture: `uname -m`"
+ [ `uname` = Linux ] &&
+ echo "Distribution: `distro`"
+}
+sys_stats() {
+ set -x
+ uname -n
+ uptime
+ ps axf
+ ps auxw
+ top -b -n 1
+ ifconfig -a
+ ip addr list
+ netstat -i
+ arp -an
+ test -d /proc && {
+ cat /proc/cpuinfo
+ }
+ lsscsi
+ lspci
+ mount
+ # df can block, run in background, allow for 5 seconds (!)
+ local maxcnt=5
+ df &
+ while kill -0 $! >/dev/null 2>&1; do
+ sleep 1
+ if [ $maxcnt -le 0 ]; then
+ warning "df appears to be hanging, continuing without it"
+ break
+ fi
+ maxcnt=$((maxcnt-1))
+ done
+ set +x
+}
+time_status() {
+ date
+ ntpdc -pn
+}
+dlm_dump() {
+ if which dlm_tool >/dev/null 2>&1 ; then
+ echo NOTICE - Lockspace overview:
+ dlm_tool ls
+ dlm_tool ls | grep name |
+ while read X N ; do
+ echo NOTICE - Lockspace $N:
+ dlm_tool lockdump $N
+ done
+ echo NOTICE - Lockspace history:
+ dlm_tool dump
+ fi
+}
+
+
+#
+# replace sensitive info with '****'
+#
+sanitize() {
+ local f rc
+ for f in $1/$B_CONF; do
+ [ -f "$f" ] && sanitize_one $f
+ done
+ rc=0
+ for f in $1/$CIB_F $1/pengine/*; do
+ if [ -f "$f" ]; then
+ if [ "$DO_SANITIZE" ]; then
+ sanitize_one $f
+ else
+ test_sensitive_one $f && rc=1
+ fi
+ fi
+ done
+ [ $rc -ne 0 ] && {
+ warning "some PE or CIB files contain possibly sensitive data"
+ warning "you may not want to send this report to a public mailing list"
+ }
+}
+
+#
+# remove duplicates if files are same, make links instead
+#
+consolidate() {
+ for n in $NODES; do
+ if [ -f $1/$2 ]; then
+ rm $1/$n/$2
+ else
+ mv $1/$n/$2 $1
+ fi
+ ln -s ../$2 $1/$n
+ done
+}
+
+#
+# some basic analysis of the report
+#
+checkcrmvfy() {
+ for n in $NODES; do
+ if [ -s $1/$n/$CRM_VERIFY_F ]; then
+ echo "WARN: crm_verify reported warnings at $n:"
+ cat $1/$n/$CRM_VERIFY_F
+ fi
+ done
+}
+checkbacktraces() {
+ for n in $NODES; do
+ [ -s $1/$n/$BT_F ] && {
+ echo "WARN: coredumps found at $n:"
+ egrep 'Core was generated|Program terminated' \
+ $1/$n/$BT_F |
+ sed 's/^/ /'
+ }
+ done
+}
+checkpermissions() {
+ for n in $NODES; do
+ if [ -s $1/$n/$PERMISSIONS_F ]; then
+ echo "WARN: problem with permissions/ownership at $n:"
+ cat $1/$n/$PERMISSIONS_F
+ fi
+ done
+}
+checklogs() {
+ local logs pattfile l n
+ logs=$(find $1 -name $HALOG_F;
+ for l in $EXTRA_LOGS; do find $1/* -name `basename $l`; done)
+ [ "$logs" ] || return
+ pattfile=`mktemp` ||
+ fatal "cannot create temporary files"
+ add_tmpfiles $pattfile
+ for p in $LOG_PATTERNS; do
+ echo "$p"
+ done > $pattfile
+ echo ""
+ echo "Log patterns:"
+ for n in $NODES; do
+ cat $logs | grep -f $pattfile
+ done
+}
+
+#
+# check if files have same content in the cluster
+#
+cibdiff() {
+ local d1 d2
+ d1=`dirname $1`
+ d2=`dirname $2`
+ if [ -f $d1/RUNNING -a -f $d2/RUNNING ] ||
+ [ -f $d1/STOPPED -a -f $d2/STOPPED ]; then
+ if which crm_diff > /dev/null 2>&1; then
+ crm_diff -c -n $1 -o $2
+ else
+ info "crm_diff(8) not found, cannot diff CIBs"
+ fi
+ else
+ echo "can't compare cibs from running and stopped systems"
+ fi
+}
+txtdiff() {
+ diff -bBu $1 $2
+}
+diffcheck() {
+ [ -f "$1" ] || {
+ echo "$1 does not exist"
+ return 1
+ }
+ [ -f "$2" ] || {
+ echo "$2 does not exist"
+ return 1
+ }
+ case `basename $1` in
+ $CIB_F)
+ cibdiff $1 $2;;
+ $B_CONF)
+ txtdiff $1 $2;; # confdiff?
+ *)
+ txtdiff $1 $2;;
+ esac
+}
+analyze_one() {
+ local rc node0 n
+ rc=0
+ node0=""
+ for n in $NODES; do
+ if [ "$node0" ]; then
+ diffcheck $1/$node0/$2 $1/$n/$2
+ rc=$(($rc+$?))
+ else
+ node0=$n
+ fi
+ done
+ return $rc
+}
+analyze() {
+ local f flist
+ flist="$HOSTCACHE $MEMBERSHIP_F $CIB_F $CRM_MON_F $B_CONF logd.cf $SYSINFO_F"
+ for f in $flist; do
+ printf "Diff $f... "
+ ls $1/*/$f >/dev/null 2>&1 || {
+ echo "no $1/*/$f :/"
+ continue
+ }
+ if analyze_one $1 $f; then
+ echo "OK"
+ [ "$f" != $CIB_F ] &&
+ consolidate $1 $f
+ else
+ echo ""
+ fi
+ done
+ checkcrmvfy $1
+ checkbacktraces $1
+ checkpermissions $1
+ checklogs $1
+}
+events_all() {
+ local Epatt title p
+ Epatt=`echo "$EVENT_PATTERNS" |
+ while read title p; do [ -n "$p" ] && echo -n "|$p"; done |
+ sed 's/.//'
+ `
+ grep -E "$Epatt" $1
+}
+events() {
+ local destdir n
+ destdir=$1
+ if [ -f $destdir/$HALOG_F ]; then
+ events_all $destdir/$HALOG_F > $destdir/events.txt
+ for n in $NODES; do
+ awk "\$4==\"$n\"" $destdir/events.txt > $destdir/$n/events.txt
+ done
+ else
+ for n in $NODES; do
+ [ -f $destdir/$n/$HALOG_F ] ||
+ continue
+ events_all $destdir/$n/$HALOG_F > $destdir/$n/events.txt
+ done
+ fi
+}
+
+#
+# description template, editing, and other notes
+#
+mktemplate() {
+ cat<<EOF
+Please edit this template and describe the issue/problem you
+encountered. Then, post to
+ Linux-HA@lists.linux-ha.org
+or file a bug at
+ http://developerbugs.linux-foundation.org/
+
+See http://linux-ha.org/wiki/ReportingProblems for detailed
+description on how to report problems.
+
+Thank you.
+
+Date: `date`
+By: $PROG $userargs
+Subject: [short problem description]
+Severity: [choose one] enhancement minor normal major critical blocking
+Component: [choose one] CRM LRM CCM RA fencing $CLUSTER_TYPE comm GUI tools other
+
+Detailed description:
+---
+[...]
+---
+
+EOF
+
+ if [ -f $WORKDIR/$SYSINFO_F ]; then
+ echo "Common system info found:"
+ cat $WORKDIR/$SYSINFO_F
+ else
+ for n in $NODES; do
+ if [ -f $WORKDIR/$n/$SYSINFO_F ]; then
+ echo "System info $n:"
+ sed 's/^/ /' $WORKDIR/$n/$SYSINFO_F
+ fi
+ done
+ fi
+}
+edittemplate() {
+ local ec
+ if ec=`pickfirst $EDITOR vim vi emacs nano`; then
+ $ec $1
+ else
+ warning "could not find a text editor"
+ fi
+}
+pickcompress() {
+ if COMPRESS_PROG=`pickfirst bzip2 gzip xz`; then
+ if [ "$COMPRESS_PROG" = xz ]; then
+ COMPRESS_EXT=.xz
+ elif [ "$COMPRESS_PROG" = bzip2 ]; then
+ COMPRESS_EXT=.bz2
+ else
+ COMPRESS_EXT=.gz
+ fi
+ else
+ warning "could not find a compression program; the resulting tarball may be huge"
+ COMPRESS_PROG=cat
+ COMPRESS_EXT=
+ fi
+}
+# get the right part of the log
+getlog() {
+ local cnt
+ local outf
+ outf=$WORKDIR/$HALOG_F
+
+ if [ "$HA_LOG" ]; then # log provided by the user?
+ [ -f "$HA_LOG" ] || { # not present
+ is_collector || # warning if not on slave
+ warning "$HA_LOG not found; we will try to find log ourselves"
+ HA_LOG=""
+ }
+ fi
+ if [ "$HA_LOG" = "" ]; then
+ HA_LOG=`findlog`
+ [ "$HA_LOG" ] &&
+ cnt=`fgrep -c $UNIQUE_MSG < $HA_LOG`
+ fi
+ if [ "$HA_LOG" = "" -o ! -f "$HA_LOG" ]; then
+ if [ "$CTS" ]; then
+ cts_findlogseg $CTS > $outf
+ else
+ warning "no log at $WE"
+ fi
+ return
+ fi
+ if [ "$cnt" ] && [ $cnt -gt 1 -a $cnt -eq $NODECNT ]; then
+ MASTER_IS_HOSTLOG=1
+ info "found the central log!"
+ fi
+
+ if [ "$NO_str2time" ]; then
+ warning "a log found; but we cannot slice it"
+ warning "please install the perl Date::Parse module"
+ elif [ "$CTS" ]; then
+ cts_findlogseg $CTS $HA_LOG > $outf
+ else
+ getstampproc=`find_getstampproc < $HA_LOG`
+ if [ "$getstampproc" ]; then
+ export getstampproc # used by linetime
+ dumplogset $HA_LOG $FROM_TIME $TO_TIME > $outf &&
+ loginfo $HA_LOG > $outf.info ||
+ fatal "disk full"
+ else
+ warning "could not figure out the log format of $HA_LOG"
+ fi
+ fi
+}
+#
+# get all other info (config, stats, etc)
+#
+collect_info() {
+ local l
+ sys_info > $WORKDIR/$SYSINFO_F 2>&1 &
+ sys_stats > $WORKDIR/$SYSSTATS_F 2>&1 &
+ getconfig $WORKDIR
+ getpeinputs $FROM_TIME $TO_TIME $WORKDIR &
+ crmconfig $WORKDIR &
+ skip_lvl 1 || touch_DC_if_dc $WORKDIR &
+ getbacktraces $FROM_TIME $TO_TIME $WORKDIR/$BT_F
+ getconfigurations $WORKDIR
+ check_perms > $WORKDIR/$PERMISSIONS_F 2>&1
+ dlm_dump > $WORKDIR/$DLM_DUMP_F 2>&1
+ time_status > $WORKDIR/$TIME_F 2>&1
+ corosync_blackbox $FROM_TIME $TO_TIME $WORKDIR/$COROSYNC_RECORDER_F
+ getratraces $FROM_TIME $TO_TIME $WORKDIR
+ wait
+ skip_lvl 1 || sanitize $WORKDIR
+
+ for l in $EXTRA_LOGS; do
+ [ "$NO_str2time" ] && break
+ [ ! -f "$l" ] && continue
+ if [ "$l" = "$HA_LOG" -a "$l" != "$HALOG_F" ]; then
+ ln -s $HALOG_F $WORKDIR/`basename $l`
+ continue
+ fi
+ getstampproc=`find_getstampproc < $l`
+ if [ "$getstampproc" ]; then
+ export getstampproc # used by linetime
+ dumplogset $l $FROM_TIME $TO_TIME > $WORKDIR/`basename $l` &&
+ loginfo $l > $WORKDIR/`basename $l`.info ||
+ fatal "disk full"
+ else
+ warning "could not figure out the log format of $l"
+ fi
+ done
+}
+finalword() {
+ if [ "$COMPRESS" = "1" ]; then
+ echo "The report is saved in $DESTDIR/$DEST.tar$COMPRESS_EXT"
+ else
+ echo "The report is saved in $DESTDIR/$DEST"
+ fi
+ echo " "
+ echo "Thank you for taking time to create this report."
+}
+
+[ $# -eq 0 ] && usage
+
+# check for the major prereq for a) parameter parsing and b)
+# parsing logs
+#
+NO_str2time=""
+t=`str2time "12:00"`
+if [ "$t" = "" ]; then
+ NO_str2time=1
+ is_collector ||
+ fatal "please install the perl Date::Parse module"
+fi
+
+WE=`uname -n` # who am i?
+tmpdir=`mktemp -t -d .hb_report.workdir.XXXXXX` ||
+ fatal "disk full"
+add_tmpfiles $tmpdir
+WORKDIR=$tmpdir
+
+#
+# part 1: get and check options; and the destination
+#
+if ! is_collector; then
+ setvarsanddefaults
+ userargs="$@"
+ DESTDIR=.
+ DEST="hb_report-"`date +"%a-%d-%b-%Y"`
+ while getopts f:t:l:u:X:p:L:e:E:n:MSDCZAVsvhdQ o; do
+ case "$o" in
+ h) usage;;
+ V) version;;
+ f)
+ if echo "$OPTARG" | grep -qs '^cts:'; then
+ FROM_TIME=0 # to be calculated later
+ CTS=`echo "$OPTARG" | sed 's/cts://'`
+ DEST="cts-$CTS-"`date +"%a-%d-%b-%Y"`
+ else
+ FROM_TIME=`str2time "$OPTARG"`
+ chktime "$FROM_TIME" "$OPTARG"
+ fi
+ ;;
+ t) TO_TIME=`str2time "$OPTARG"`
+ chktime "$TO_TIME" "$OPTARG"
+ ;;
+ n) NODES_SOURCE=user
+ USER_NODES="$USER_NODES $OPTARG"
+ ;;
+ u) SSH_USER="$OPTARG";;
+ X) SSH_OPTS="$SSH_OPTS $OPTARG";;
+ l) HA_LOG="$OPTARG";;
+ e) EDITOR="$OPTARG";;
+ p) SANITIZE="$SANITIZE $OPTARG";;
+ s) DO_SANITIZE="1";;
+ Q) SKIP_LVL=$((SKIP_LVL + 1));;
+ L) LOG_PATTERNS="$LOG_PATTERNS $OPTARG";;
+ S) NO_SSH=1;;
+ D) NO_DESCRIPTION=1;;
+ C) : ;;
+ Z) FORCE_REMOVE_DEST=1;;
+ M) EXTRA_LOGS="";;
+ E) EXTRA_LOGS="$EXTRA_LOGS $OPTARG";;
+ A) USER_CLUSTER_TYPE="openais";;
+ v) VERBOSITY=$((VERBOSITY + 1));;
+ d) COMPRESS="";;
+ [?]) usage short;;
+ esac
+ done
+ shift $(($OPTIND-1))
+ [ $# -gt 1 ] && usage short
+ set_dest $*
+ [ "$FROM_TIME" ] || usage short
+ WORKDIR=$tmpdir/$DEST
+else
+ WORKDIR=$tmpdir/$DEST/$WE
+fi
+
+mkdir -p $WORKDIR
+[ -d $WORKDIR ] || no_dir
+
+if is_collector; then
+ cat > $WORKDIR/.env
+ . $WORKDIR/.env
+fi
+
+[ $VERBOSITY -gt 1 ] && {
+ is_collector || {
+ info "high debug level, please read debug.out"
+ }
+ PS4='+ `date +"%T"`: ${FUNCNAME[0]:+${FUNCNAME[0]}:}${LINENO}: '
+ if echo "$SHELL" | grep bash > /dev/null &&
+ [ ${BASH_VERSINFO[0]} = "4" ]; then
+ exec 3>>$WORKDIR/debug.out
+ BASH_XTRACEFD=3
+ else
+ exec 2>>$WORKDIR/debug.out
+ fi
+ set -x
+}
+
+compatibility_pcmk
+
+# allow user to enforce the cluster type
+# if not, then it is found out on _all_ nodes
+if [ -z "$USER_CLUSTER_TYPE" ]; then
+ CLUSTER_TYPE=`get_cluster_type`
+else
+ CLUSTER_TYPE=$USER_CLUSTER_TYPE
+fi
+
+# the very first thing we must figure out is which cluster
+# stack is used
+CORES_DIRS="`2>/dev/null ls -d $HA_VARLIB/cores $PCMK_LIB/cores | uniq`"
+PACKAGES="pacemaker libpacemaker3
+pacemaker-pygui pacemaker-pymgmt pymgmt-client
+openais libopenais2 libopenais3 corosync libcorosync4
+resource-agents cluster-glue libglue2 ldirectord
+heartbeat heartbeat-common heartbeat-resources libheartbeat2
+ocfs2-tools ocfs2-tools-o2cb ocfs2console
+ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace
+drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace
+drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen
+lvm2 lvm2-clvm cmirrord
+libdlm libdlm2 libdlm3
+hawk ruby lighttpd
+kernel-default kernel-pae kernel-xen
+glibc
+"
+case "$CLUSTER_TYPE" in
+openais)
+ CONF=/etc/corosync/corosync.conf # corosync?
+ if test -f $CONF; then
+ CORES_DIRS="$CORES_DIRS /var/lib/corosync"
+ else
+ CONF=/etc/ais/openais.conf
+ CORES_DIRS="$CORES_DIRS /var/lib/openais"
+ fi
+ CF_SUPPORT=$HA_NOARCHBIN/openais_conf_support.sh
+ MEMBERSHIP_TOOL_OPTS=""
+ unset HOSTCACHE HB_UUID_F
+ ;;
+heartbeat)
+ CONF=$HA_CF
+ CF_SUPPORT=$HA_NOARCHBIN/ha_cf_support.sh
+ MEMBERSHIP_TOOL_OPTS="-H"
+ ;;
+esac
+B_CONF=`basename $CONF`
+
+if test -f "$CF_SUPPORT"; then
+ . $CF_SUPPORT
+else
+ fatal "no stack specific support: $CF_SUPPORT"
+fi
+
+if [ "x$CTS" = "x" ] || is_collector; then
+ getlogvars
+ debug "log settings: facility=$HA_LOGFACILITY logfile=$HA_LOGFILE debugfile=$HA_DEBUGFILE"
+elif ! is_collector; then
+ ctslog=`findmsg "CTS: Stack:" | awk '{print $1}'`
+ debug "Using CTS control file: $ctslog"
+ USER_NODES=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '`
+ HA_LOGFACILITY=`findmsg "CTS:.*Environment.SyslogFacility" | awk '{print $NF}'`
+ NODES_SOURCE=user
+fi
+
+# the goods
+ANALYSIS_F=analysis.txt
+DESCRIPTION_F=description.txt
+HALOG_F=ha-log.txt
+BT_F=backtraces.txt
+SYSINFO_F=sysinfo.txt
+SYSSTATS_F=sysstats.txt
+DLM_DUMP_F=dlm_dump.txt
+TIME_F=time.txt
+export ANALYSIS_F DESCRIPTION_F HALOG_F BT_F SYSINFO_F SYSSTATS_F DLM_DUMP_F TIME_F
+CRM_MON_F=crm_mon.txt
+MEMBERSHIP_F=members.txt
+HB_UUID_F=hb_uuid.txt
+HOSTCACHE=hostcache
+CRM_VERIFY_F=crm_verify.txt
+PERMISSIONS_F=permissions.txt
+CIB_F=cib.xml
+CIB_TXT_F=cib.txt
+COROSYNC_RECORDER_F=fdata.txt
+export CRM_MON_F MEMBERSHIP_F CRM_VERIFY_F CIB_F CIB_TXT_F HB_UUID_F PERMISSIONS_F
+export COROSYNC_RECORDER_F
+CONFIGURATIONS="/etc/drbd.conf /etc/drbd.d /etc/booth/booth.conf"
+export CONFIGURATIONS
+
+THIS_IS_NODE=""
+if ! is_collector; then
+ MASTER_NODE=$WE
+ NODES=`getnodes`
+ debug "nodes: `echo $NODES`"
+fi
+NODECNT=`echo $NODES | wc -w`
+if [ "$NODECNT" = 0 ]; then
+ fatal "could not figure out a list of nodes; is this a cluster node?"
+fi
+if echo $NODES | grep -wqs $WE; then # are we a node?
+ THIS_IS_NODE=1
+fi
+
+# this only on master
+if ! is_collector; then
+
+ # if this is not a node, then some things afterwards might
+ # make no sense (not work)
+ if ! is_node && [ "$NODES_SOURCE" != user ]; then
+ warning "this is not a node and you didn't specify a list of nodes using -n"
+ fi
+#
+# part 2: ssh business
+#
+ # find out if ssh works
+ if [ -z "$NO_SSH" ]; then
+ # if the ssh user was supplied, consider that it
+ # works; helps reduce the number of ssh invocations
+ findsshuser
+ if [ -n "$SSH_USER" ]; then
+ SSH_OPTS="$SSH_OPTS -o User=$SSH_USER"
+ fi
+ fi
+ # assume that only root can collect data
+ SUDO=""
+ if [ -z "$SSH_USER" -a `id -u` != 0 ] ||
+ [ -n "$SSH_USER" -a "$SSH_USER" != root ]; then
+ debug "ssh user other than root, use sudo"
+ SUDO="sudo -u root"
+ fi
+ LOCAL_SUDO=""
+ if [ `id -u` != 0 ]; then
+ debug "local user other than root, use sudo"
+ LOCAL_SUDO="sudo -u root"
+ fi
+fi
+
+if is_collector && [ "$HA_LOGFACILITY" ]; then
+ logmark $HA_LOGFACILITY.$HA_LOGLEVEL $UNIQUE_MSG
+ # allow for the log message to get (hopefully) written to the
+ # log file
+ sleep 1
+fi
+
+#
+# part 4: find the logs and cut out the segment for the period
+#
+
+# if the master is also a node, getlog is going to be invoked
+# from the collector
+(is_master && is_node) ||
+ getlog
+
+if ! is_collector; then
+ for node in $NODES; do
+ if node_needs_pwd $node; then
+ info "Please provide password for `say_ssh_user` at $node"
+ info "Note that collecting data will take a while."
+ start_slave_collector $node
+ else
+ start_slave_collector $node &
+ SLAVEPIDS="$SLAVEPIDS $!"
+ fi
+ done
+fi
+
+#
+# part 5: endgame:
+# slaves tar their results to stdout, the master waits
+# for them, analyses results, asks the user to edit the
+# problem description template, and prints final notes
+#
+if is_collector; then
+ collect_info
+ (cd $WORKDIR/.. && tar -h -cf - $WE)
+else
+ if [ -n "$SLAVEPIDS" ]; then
+ wait $SLAVEPIDS
+ fi
+ analyze $WORKDIR > $WORKDIR/$ANALYSIS_F &
+ events $WORKDIR &
+ mktemplate > $WORKDIR/$DESCRIPTION_F
+ [ "$NO_DESCRIPTION" ] || {
+ echo press enter to edit the problem description...
+ read junk
+ edittemplate $WORKDIR/$DESCRIPTION_F
+ }
+ wait
+ if [ "$COMPRESS" = "1" ]; then
+ pickcompress
+ (cd $WORKDIR/.. && tar cf - $DEST) | $COMPRESS_PROG > $DESTDIR/$DEST.tar$COMPRESS_EXT
+ else
+ mv $WORKDIR $DESTDIR
+ fi
+ finalword
+fi
diff --git a/hb_report/openais_conf_support.sh b/hb_report/openais_conf_support.sh
new file mode 100644
index 0000000..b96d1aa
--- /dev/null
+++ b/hb_report/openais_conf_support.sh
@@ -0,0 +1,97 @@
+ # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
+ #
+ # This program is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU General Public
+ # License as published by the Free Software Foundation; either
+ # version 2.1 of the License, or (at your option) any later version.
+ #
+ # This software is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public
+ # License along with this library; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ #
+
+#
+# Stack specific part (openais)
+# openais.conf/logd.cf parsing
+#
+# cut out a stanza
+getstanza() {
+ awk -v name="$1" '
+ !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start
+ if ($1 == name)
+ in_stanza = 1
+ }
+ in_stanza { print }
+ in_stanza && NF==1 && $1 == "}" { exit }
+ '
+}
+# supply stanza in $1 and variable name in $2
+# (stanza is optional)
+getcfvar() {
+ [ -f "$CONF" ] || return
+ sed 's/#.*//' < $CONF |
+ if [ $# -eq 2 ]; then
+ getstanza "$1"
+ shift 1
+ else
+ cat
+ fi |
+ awk -v varname="$1" '
+ NF==2 && match($1,varname":$")==1 { print $2; exit; }
+ '
+}
+iscfvarset() {
+ test "`getcfvar $1`"
+}
+iscfvartrue() {
+ getcfvar $1 $2 |
+ egrep -qsi "^(true|y|yes|on|1)"
+}
+uselogd() {
+ iscfvartrue use_logd
+}
+get_ais_logvars() {
+ if iscfvartrue to_file; then
+ HA_LOGFILE=`getcfvar logfile`
+ HA_LOGFILE=${HA_LOGFILE:-"syslog"}
+ HA_DEBUGFILE=$HA_LOGFILE
+ elif iscfvartrue to_syslog; then
+ HA_LOGFACILITY=`getcfvar syslog_facility`
+ HA_LOGFACILITY=${HA_LOGFACILITY:-"daemon"}
+ fi
+}
+getlogvars() {
+ HA_LOGFACILITY=${HA_LOGFACILITY:-$DEFAULT_HA_LOGFACILITY}
+ HA_LOGLEVEL="info"
+ iscfvartrue debug && # prefer debug level if set
+ HA_LOGLEVEL="debug"
+ if uselogd; then
+ [ -f "$LOGD_CF" ] || {
+ debug "logd used but logd.cf not found: using defaults"
+ return # no configuration: use defaults
+ }
+ debug "reading log settings from $LOGD_CF"
+ get_logd_logvars
+ else
+ debug "reading log settings from $CONF"
+ get_ais_logvars
+ fi
+}
+cluster_info() {
+ : echo "openais version: how?"
+ if [ "$CONF" = /etc/corosync/corosync.conf ]; then
+ /usr/sbin/corosync -v
+ fi
+}
+essential_files() {
+ cat<<EOF
+d $PCMK_LIB 0750 hacluster haclient
+d $PE_STATE_DIR 0750 hacluster haclient
+d $CIB_DIR 0750 hacluster haclient
+EOF
+}
diff --git a/hb_report/utillib.sh b/hb_report/utillib.sh
new file mode 100644
index 0000000..0fcab80
--- /dev/null
+++ b/hb_report/utillib.sh
@@ -0,0 +1,752 @@
+ # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de>
+ #
+ # This program is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU General Public
+ # License as published by the Free Software Foundation; either
+ # version 2.1 of the License, or (at your option) any later version.
+ #
+ # This software is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public
+ # License along with this library; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ #
+
+#
+# figure out the cluster type, depending on the process list
+# and existence of configuration files
+#
+get_cluster_type() {
+ if ps -ef | egrep -qs '[a]isexec|[c]orosync' ||
+ [ -f /etc/ais/openais.conf -a ! -f "$HA_CF" ] ||
+ [ -f /etc/corosync/corosync.conf -a ! -f "$HA_CF" ]
+ then
+ debug "this is OpenAIS cluster stack"
+ echo "openais"
+ else
+ debug "this is Heartbeat cluster stack"
+ echo "heartbeat"
+ fi
+}
+#
+# find out which membership tool is installed
+#
+echo_membership_tool() {
+ local f membership_tools
+ membership_tools="ccm_tool crm_node"
+ for f in $membership_tools; do
+ which $f 2>/dev/null && break
+ done
+}
+# find out if ptest or crm_simulate
+#
+echo_ptest_tool() {
+ local f ptest_progs
+ ptest_progs="crm_simulate ptest"
+ for f in $ptest_progs; do
+ which $f 2>/dev/null && break
+ done
+}
+#
+# find nodes for this cluster
+#
+getnodes() {
+ # 1. set by user?
+ if [ "$USER_NODES" ]; then
+ echo $USER_NODES
+ # 2. running crm
+ elif iscrmrunning; then
+ debug "querying CRM for nodes"
+ get_crm_nodes
+ # 3. hostcache
+ elif [ -f $HA_VARLIB/hostcache ]; then
+ debug "reading nodes from $HA_VARLIB/hostcache"
+ awk '{print $1}' $HA_VARLIB/hostcache
+ # 4. ha.cf
+ elif [ "$CLUSTER_TYPE" = heartbeat ]; then
+ debug "reading nodes from ha.cf"
+ getcfvar node
+ # 5. if the cluster's stopped, try the CIB
+ elif [ -f $CIB_DIR/$CIB_F ]; then
+ debug "reading nodes from the archived $CIB_DIR/$CIB_F"
+ (CIB_file=$CIB_DIR/$CIB_F get_crm_nodes)
+ fi
+}
+
+logd_getcfvar() {
+ sed 's/#.*//' < $LOGD_CF |
+ grep -w "^$1" |
+ sed 's/^[^[:space:]]*[[:space:]]*//'
+}
+get_logd_logvars() {
+ # unless logfacility is set to none, heartbeat/ha_logd are
+ # going to log through syslog
+ HA_LOGFACILITY=`logd_getcfvar logfacility`
+ [ "" = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=$DEFAULT_HA_LOGFACILITY
+ [ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=""
+ HA_LOGFILE=`logd_getcfvar logfile`
+ HA_DEBUGFILE=`logd_getcfvar debugfile`
+}
+findlogdcf() {
+ local f
+ for f in \
+ `test -x $HA_BIN/ha_logd &&
+ which strings > /dev/null 2>&1 &&
+ strings $HA_BIN/ha_logd | grep 'logd\.cf'` \
+ `for d; do echo $d/logd.cf $d/ha_logd.cf; done`
+ do
+ if [ -f "$f" ]; then
+ echo $f
+ debug "found logd.cf at $f"
+ return 0
+ fi
+ done
+ debug "no logd.cf"
+ return 1
+}
+#
+# logging
+#
+syslogmsg() {
+ local severity logtag
+ severity=$1
+ shift 1
+ logtag=""
+ [ "$HA_LOGTAG" ] && logtag="-t $HA_LOGTAG"
+ logger -p ${HA_LOGFACILITY:-$DEFAULT_HA_LOGFACILITY}.$severity $logtag $*
+}
+
+#
+# find log destination
+#
+findmsg() {
+ local d syslogdirs favourites mark log
+ # this is tricky, we try a few directories
+ syslogdirs="/var/log /var/logs /var/syslog /var/adm
+ /var/log/ha /var/log/cluster /var/log/pacemaker
+ /var/log/heartbeat /var/log/crm /var/log/corosync /var/log/openais"
+ favourites="ha-*"
+ mark=$1
+ log=""
+ for d in $syslogdirs; do
+ [ -d $d ] || continue
+ log=`grep -l -e "$mark" $d/$favourites` && break
+ test "$log" && break
+ log=`grep -l -e "$mark" $d/*` && break
+ test "$log" && break
+ done 2>/dev/null
+ [ "$log" ] &&
+ ls -t $log | tr '\n' ' '
+ [ "$log" ] &&
+ debug "found HA log at `ls -t $log | tr '\n' ' '`" ||
+ debug "no HA log found in $syslogdirs"
+}
+
+#
+# print a segment of a log file
+#
+str2time() {
+ perl -e "\$time='$*';" -e '
+ $unix_tm = 0;
+ eval "use Date::Parse";
+ if (!$@) {
+ $unix_tm = str2time($time);
+ } else {
+ eval "use Date::Manip";
+ if (!$@) {
+ $unit_tm = UnixDate(ParseDateString($time), "%s");
+ }
+ }
+ if ($unix_tm != "") {
+ $unix_tm = int($unix_tm);
+ }
+ print $unix_tm;
+ '
+}
+getstamp_syslog() {
+ awk '{print $1,$2,$3}'
+}
+getstamp_legacy() {
+ awk '{print $2}' | sed 's/_/ /'
+}
+getstamp_rfc5424() {
+ awk '{print $1}'
+}
+get_ts() {
+ local l="$1" ts
+ ts=$(str2time `echo "$l" | $getstampproc`)
+ if [ -z "$ts" ]; then
+ local fmt
+ for fmt in rfc5424 syslog legacy; do
+ [ "getstamp_$fmt" = "$getstampproc" ] && continue
+ ts=$(str2time `echo "$l" | getstamp_$fmt`)
+ [ -n "$ts" ] && break
+ done
+ fi
+ echo $ts
+}
+linetime() {
+ get_ts "`tail -n +$2 $1 | head -1`"
+}
+find_getstampproc() {
+ local t l func trycnt
+ t=0 l="" func=""
+ trycnt=10
+ while [ $trycnt -gt 0 ] && read l; do
+ t=$(str2time `echo $l | getstamp_syslog`)
+ if [ "$t" ]; then
+ func="getstamp_syslog"
+ debug "the log file is in the syslog format"
+ break
+ fi
+ t=$(str2time `echo $l | getstamp_rfc5424`)
+ if [ "$t" ]; then
+ func="getstamp_rfc5424"
+ debug "the log file is in the rfc5424 format"
+ break
+ fi
+ t=$(str2time `echo $l | getstamp_legacy`)
+ if [ "$t" ]; then
+ func="getstamp_legacy"
+ debug "the log file is in the legacy format (please consider switching to syslog format)"
+ break
+ fi
+ trycnt=$(($trycnt-1))
+ done
+ echo $func
+}
+find_first_ts() {
+ local l ts
+ while read l; do
+ ts=`get_ts "$l"`
+ [ "$ts" ] && break
+ warning "cannot extract time: |$l|; will try the next one"
+ done
+ echo $ts
+}
+findln_by_time() {
+ local logf=$1
+ local tm=$2
+ local first=1
+ local last=`wc -l < $logf`
+ local tmid mid trycnt
+ while [ $first -le $last ]; do
+ mid=$((($last+$first)/2))
+ trycnt=10
+ while [ $trycnt -gt 0 ]; do
+ tmid=`linetime $logf $mid`
+ [ "$tmid" ] && break
+ warning "cannot extract time: $logf:$mid; will try the next one"
+ trycnt=$(($trycnt-1))
+ # shift the whole first-last segment
+ first=$(($first-1))
+ last=$(($last-1))
+ mid=$((($last+$first)/2))
+ done
+ if [ -z "$tmid" ]; then
+ warning "giving up on log..."
+ return
+ fi
+ if [ $tmid -gt $tm ]; then
+ last=$(($mid-1))
+ elif [ $tmid -lt $tm ]; then
+ first=$(($mid+1))
+ else
+ break
+ fi
+ done
+ echo $mid
+}
+
+dumplog() {
+ local logf=$1
+ local from_line=$2
+ local to_line=$3
+ [ "$from_line" ] ||
+ return
+ tail -n +$from_line $logf |
+ if [ "$to_line" ]; then
+ head -$(($to_line-$from_line+1))
+ else
+ cat
+ fi
+}
+
+#
+# find files newer than a and older than b
+#
+isnumber() {
+ echo "$*" | grep -qs '^[0-9][0-9]*$'
+}
+touchfile() {
+ local t
+ t=`mktemp` &&
+ perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' &&
+ echo $t
+}
+find_files() {
+ local dirs from_time to_time
+ local from_stamp to_stamp findexp
+ dirs=$1
+ from_time=$2
+ to_time=$3
+ isnumber "$from_time" && [ "$from_time" -gt 0 ] || {
+ warning "sorry, can't find files based on time if you don't supply time"
+ return
+ }
+ if ! from_stamp=`touchfile $from_time`; then
+ warning "can't create temporary files"
+ return
+ fi
+ add_tmpfiles $from_stamp
+ findexp="-newer $from_stamp"
+ if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then
+ if ! to_stamp=`touchfile $to_time`; then
+ warning "can't create temporary files"
+ return
+ fi
+ add_tmpfiles $to_stamp
+ findexp="$findexp ! -newer $to_stamp"
+ fi
+ find $dirs -type f $findexp
+}
+
+#
+# check permissions of files/dirs
+#
+pl_checkperms() {
+perl -e '
+# check permissions and ownership
+# uid and gid are numeric
+# everything must match exactly
+# no error checking! (file should exist, etc)
+($filename, $perms, $in_uid, $in_gid) = @ARGV;
+($mode,$uid,$gid) = (stat($filename))[2,4,5];
+$p=sprintf("%04o", $mode & 07777);
+$p ne $perms and exit(1);
+$uid ne $in_uid and exit(1);
+$gid ne $in_gid and exit(1);
+' $*
+}
+num_id() {
+ getent $1 $2 | awk -F: '{print $3}'
+}
+chk_id() {
+ [ "$2" ] && return 0
+ echo "$1: id not found"
+ return 1
+}
+check_perms() {
+ local f p uid gid n_uid n_gid
+ essential_files |
+ while read type f p uid gid; do
+ [ -$type $f ] || {
+ echo "$f wrong type or doesn't exist"
+ continue
+ }
+ n_uid=`num_id passwd $uid`
+ chk_id "$uid" "$n_uid" || continue
+ n_gid=`num_id group $gid`
+ chk_id "$gid" "$n_gid" || continue
+ pl_checkperms $f $p $n_uid $n_gid || {
+ echo "wrong permissions or ownership for $f:"
+ ls -ld $f
+ }
+ done
+}
+
+#
+# coredumps
+#
+pkg_mgr_list() {
+# list of:
+# regex pkg_mgr
+# no spaces allowed in regex
+ cat<<EOF
+zypper.install zypper
+EOF
+}
+listpkg_zypper() {
+ local bins
+ local binary=$1 core=$2
+ gdb $binary $core </dev/null 2>&1 |
+ awk '
+ # this zypper version dumps all packages on a single line
+ /Missing separate debuginfos.*zypper.install/ {
+ sub(".*zypper.install ",""); print
+ exit}
+ n>0 && /^Try: zypper install/ {gsub("\"",""); print $NF}
+ n>0 {n=0}
+ /Missing separate debuginfo/ {n=1}
+ ' | sort -u
+}
+fetchpkg_zypper() {
+ local pkg
+ debug "get debuginfo packages using zypper: $@"
+ zypper -qn ref > /dev/null
+ for pkg in $@; do
+ zypper -qn install -C $pkg >/dev/null
+ done
+}
+find_pkgmgr() {
+ local binary=$1 core=$2
+ local regex pkg_mgr
+ pkg_mgr_list |
+ while read regex pkg_mgr; do
+ if gdb $binary $core </dev/null 2>&1 |
+ grep "$regex" > /dev/null; then
+ echo $pkg_mgr
+ break
+ fi
+ done
+}
+get_debuginfo() {
+ local binary=$1 core=$2
+ local pkg_mgr pkgs
+ gdb $binary $core </dev/null 2>/dev/null |
+ egrep 'Missing.*debuginfo|no debugging symbols found' > /dev/null ||
+ return # no missing debuginfo
+ pkg_mgr=`find_pkgmgr $binary $core`
+ if [ -z "$pkg_mgr" ]; then
+ warning "found core for $binary but there is no debuginfo and we don't know how to get it on this platform"
+ return
+ fi
+ pkgs=`listpkg_$pkg_mgr $binary $core`
+ [ -n "$pkgs" ] &&
+ fetchpkg_$pkg_mgr $pkgs
+}
+findbinary() {
+ local random_binary binary fullpath
+ random_binary=`which cat 2>/dev/null` # suppose we are lucky
+ binary=`gdb $random_binary $1 < /dev/null 2>/dev/null |
+ grep 'Core was generated' | awk '{print $5}' |
+ sed "s/^.//;s/[.':]*$//"`
+ if [ x = x"$binary" ]; then
+ debug "could not detect the program name for core $1 from the gdb output; will try with file(1)"
+ binary=$(file $1 | awk '/from/{
+ for( i=1; i<=NF; i++ )
+ if( $i == "from" ) {
+ print $(i+1)
+ break
+ }
+ }')
+ binary=`echo $binary | tr -d "'"`
+ binary=$(echo $binary | tr -d '`')
+ if [ "$binary" ]; then
+ binary=`which $binary 2>/dev/null`
+ fi
+ fi
+ if [ x = x"$binary" ]; then
+ warning "could not find the program path for core $1"
+ return
+ fi
+ fullpath=`which $binary 2>/dev/null`
+ if [ x = x"$fullpath" ]; then
+ for d in $HA_BIN $CRM_DAEMON_DIR; do
+ if [ -x $d/$binary ]; then
+ echo $d/$binary
+ debug "found the program at $d/$binary for core $1"
+ else
+ warning "could not find the program path for core $1"
+ fi
+ done
+ else
+ echo $fullpath
+ debug "found the program at $fullpath for core $1"
+ fi
+}
+getbt() {
+ local corefile absbinpath
+ which gdb > /dev/null 2>&1 || {
+ warning "please install gdb to get backtraces"
+ return
+ }
+ for corefile; do
+ absbinpath=`findbinary $corefile`
+ [ x = x"$absbinpath" ] && continue
+ get_debuginfo $absbinpath $corefile
+ echo "====================== start backtrace ======================"
+ ls -l $corefile
+ gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
+ $absbinpath $corefile 2>/dev/null
+ echo "======================= end backtrace ======================="
+ done
+}
+
+#
+# heartbeat configuration/status
+#
+iscrmrunning() {
+ local pid maxwait
+ ps -ef | grep -qs [c]rmd || return 1
+ crmadmin -D >/dev/null 2>&1 &
+ pid=$!
+ maxwait=100
+ while kill -0 $pid 2>/dev/null && [ $maxwait -gt 0 ]; do
+ sleep 0.1
+ maxwait=$(($maxwait-1))
+ done
+ if kill -0 $pid 2>/dev/null; then
+ kill $pid
+ false
+ else
+ wait $pid
+ fi
+}
+dumpstate() {
+ crm_mon -1 | grep -v '^Last upd' > $1/$CRM_MON_F
+ cibadmin -Ql > $1/$CIB_F
+ `echo_membership_tool` $MEMBERSHIP_TOOL_OPTS -p > $1/$MEMBERSHIP_F 2>&1
+}
+getconfig() {
+ [ -f "$CONF" ] &&
+ cp -p $CONF $1/
+ [ -f "$LOGD_CF" ] &&
+ cp -p $LOGD_CF $1/
+ if iscrmrunning; then
+ dumpstate $1
+ touch $1/RUNNING
+ else
+ cp -p $CIB_DIR/$CIB_F $1/ 2>/dev/null
+ touch $1/STOPPED
+ fi
+ [ "$HOSTCACHE" ] &&
+ cp -p $HA_VARLIB/hostcache $1/$HOSTCACHE 2>/dev/null
+ [ "$HB_UUID_F" ] &&
+ crm_uuid -r > $1/$HB_UUID_F 2>&1
+ [ -f "$1/$CIB_F" ] &&
+ crm_verify -V -x $1/$CIB_F >$1/$CRM_VERIFY_F 2>&1
+}
+crmconfig() {
+ [ -f "$1/$CIB_F" ] && which crm >/dev/null 2>&1 &&
+ CIB_file=$1/$CIB_F crm configure show >$1/$CIB_TXT_F 2>&1
+}
+get_crm_nodes() {
+ cibadmin -Ql -o nodes |
+ awk '
+ /<node / {
+ for( i=1; i<=NF; i++ )
+ if( $i~/^uname=/ ) {
+ sub("uname=.","",$i);
+ sub("\".*","",$i);
+ print $i;
+ next;
+ }
+ }
+ '
+}
+get_live_nodes() {
+ if [ `id -u` = 0 ] && which fping >/dev/null 2>&1; then
+ fping -a $@ 2>/dev/null
+ else
+ local h
+ for h; do ping -c 2 -q $h >/dev/null 2>&1 && echo $h; done
+ fi
+}
+
+#
+# remove values of sensitive attributes
+#
+# this is not proper xml parsing, but it will work under the
+# circumstances
+is_sensitive_xml() {
+ local patt epatt
+ epatt=""
+ for patt in $SANITIZE; do
+ epatt="$epatt|$patt"
+ done
+ epatt="`echo $epatt|sed 's/.//'`"
+ egrep -qs "name=\"$epatt\""
+}
+test_sensitive_one() {
+ local file compress decompress
+ file=$1
+ compress=""
+ echo $file | grep -qs 'gz$' && compress=gzip
+ echo $file | grep -qs 'bz2$' && compress=bzip2
+ if [ "$compress" ]; then
+ decompress="$compress -dc"
+ else
+ compress=cat
+ decompress=cat
+ fi
+ $decompress < $file | is_sensitive_xml
+}
+sanitize_xml_attrs() {
+ local patt
+ sed $(
+ for patt in $SANITIZE; do
+ echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/"
+ done
+ )
+}
+sanitize_hacf() {
+ awk '
+ $1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; }
+ {print}
+ '
+}
+sanitize_one() {
+ local file compress decompress tmp ref
+ file=$1
+ compress=""
+ echo $file | grep -qs 'gz$' && compress=gzip
+ echo $file | grep -qs 'bz2$' && compress=bzip2
+ if [ "$compress" ]; then
+ decompress="$compress -dc"
+ else
+ compress=cat
+ decompress=cat
+ fi
+ tmp=`mktemp`
+ ref=`mktemp`
+ add_tmpfiles $tmp $ref
+ if [ -z "$tmp" -o -z "$ref" ]; then
+ fatal "cannot create temporary files"
+ fi
+ touch -r $file $ref # save the mtime
+ if [ "`basename $file`" = ha.cf ]; then
+ sanitize_hacf
+ else
+ $decompress | sanitize_xml_attrs | $compress
+ fi < $file > $tmp
+ mv $tmp $file
+ touch -r $ref $file
+}
+
+#
+# keep the user posted
+#
+fatal() {
+ echo "`uname -n`: ERROR: $*" >&2
+ exit 1
+}
+warning() {
+ echo "`uname -n`: WARN: $*" >&2
+}
+info() {
+ echo "`uname -n`: INFO: $*" >&2
+}
+debug() {
+ [ "$VERBOSITY" ] && [ $VERBOSITY -gt 0 ] &&
+ echo "`uname -n`: DEBUG: $*" >&2
+ return 0
+}
+pickfirst() {
+ for x; do
+ which $x >/dev/null 2>&1 && {
+ echo $x
+ return 0
+ }
+ done
+ return 1
+}
+
+# tmp files business
+drop_tmpfiles() {
+ trap 'rm -rf `cat $__TMPFLIST`; rm $__TMPFLIST' EXIT
+}
+init_tmpfiles() {
+ if __TMPFLIST=`mktemp`; then
+ drop_tmpfiles
+ else
+ # this is really bad, let's just leave
+ fatal "eek, mktemp cannot create temporary files"
+ fi
+}
+add_tmpfiles() {
+ test -f "$__TMPFLIST" || return
+ echo $* >> $__TMPFLIST
+}
+
+#
+# get some system info
+#
+distro() {
+ local relf f
+ which lsb_release >/dev/null 2>&1 && {
+ lsb_release -d
+ debug "using lsb_release for distribution info"
+ return
+ }
+ relf=`ls /etc/debian_version 2>/dev/null` ||
+ relf=`ls /etc/slackware-version 2>/dev/null` ||
+ relf=`ls -d /etc/*-release 2>/dev/null` && {
+ for f in $relf; do
+ test -f $f && {
+ echo "`ls $f` `cat $f`"
+ debug "found $relf distribution release file"
+ return
+ }
+ done
+ }
+ warning "no lsb_release, no /etc/*-release, no /etc/debian_version: no distro information"
+}
+
+pkg_ver_deb() {
+ dpkg-query -f '${Name} ${Version}' -W $* 2>/dev/null
+}
+pkg_ver_rpm() {
+ rpm -q --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' $* 2>&1 |
+ grep -v 'not installed'
+}
+pkg_ver_pkg_info() {
+ for pkg; do
+ pkg_info | grep $pkg
+ done
+}
+pkg_ver_pkginfo() {
+ for pkg; do
+ pkginfo $pkg | awk '{print $3}' # format?
+ done
+}
+verify_deb() {
+ debsums -s $* 2>/dev/null
+}
+verify_rpm() {
+ rpm --verify $* 2>&1 | grep -v 'not installed'
+}
+verify_pkg_info() {
+ :
+}
+verify_pkginfo() {
+ :
+}
+
+get_pkg_mgr() {
+ local pkg_mgr
+ if which dpkg >/dev/null 2>&1 ; then
+ pkg_mgr="deb"
+ elif which rpm >/dev/null 2>&1 ; then
+ pkg_mgr="rpm"
+ elif which pkg_info >/dev/null 2>&1 ; then
+ pkg_mgr="pkg_info"
+ elif which pkginfo >/dev/null 2>&1 ; then
+ pkg_mgr="pkginfo"
+ else
+ warning "Unknown package manager!"
+ return
+ fi
+ echo $pkg_mgr
+}
+
+pkg_versions() {
+ local pkg_mgr=`get_pkg_mgr`
+ [ -z "$pkg_mgr" ] &&
+ return
+ debug "the package manager is $pkg_mgr"
+ pkg_ver_$pkg_mgr $*
+}
+verify_packages() {
+ local pkg_mgr=`get_pkg_mgr`
+ [ -z "$pkg_mgr" ] &&
+ return
+ verify_$pkg_mgr $*
+}
+
+crm_info() {
+ $CRM_DAEMON_DIR/crmd version 2>&1
+}