diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /tools/crm_report.in | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/crm_report.in')
-rw-r--r-- | tools/crm_report.in | 481 |
1 files changed, 481 insertions, 0 deletions
diff --git a/tools/crm_report.in b/tools/crm_report.in new file mode 100644 index 0000000..1818879 --- /dev/null +++ b/tools/crm_report.in @@ -0,0 +1,481 @@ +#!/bin/sh +# +# Copyright 2010-2019 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +TEMP=`@GETOPT_PATH@ \ + -o hv?xl:f:t:n:T:L:p:c:dSCu:D:MVse: \ + --long help,corosync,cts:,cts-log:,dest:,node:,nodes:,from:,to:,sos-mode,logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features,rsh: \ + -n 'crm_report' -- "$@"` +# The quotes around $TEMP are essential +eval set -- "$TEMP" + +progname=$(basename "$0") +rsh="ssh -T" +tests="" +nodes="" +compress=1 +cluster="any" +ssh_user="root" +search_logs=1 +sos_mode=0 +report_data=`dirname $0` +maxdepth=5 + +extra_logs="" +sanitize_patterns="passw.*" +log_patterns="CRIT: ERROR:" + +usage() { +cat<<EOF +$progname - Create archive of everything needed when reporting cluster problems + + +Usage: $progname [options] [DEST] + +Required option: + -f, --from TIME time prior to problems beginning + (as "YYYY-M-D H:M:S" including the quotes) + +Options: + -V increase verbosity (may be specified multiple times) + -h, --help display this message + -v, --version display software version + --features display software features + -t, --to TIME time at which all problems were resolved + (as "YYYY-M-D H:M:S" including the quotes; default "now") + -T, --cts TEST CTS test or set of tests to extract + --cts-log CTS master logfile + -n, --nodes NODES node names for this cluster (only needed if cluster is + not active on this host; accepts -n "a b" or -n a -n b) + -M do not search for cluster logs + -l, --logfile FILE log file to collect (in addition to detected logs if -M + is not specified; may be specified multiple times) + -p PATT additional regular expression to match variables to be + masked in output (default: "passw.*") + -L PATT additional regular expression to match in log files for + analysis (default: $log_patterns) + -S, --single-node don't attempt to collect data from other nodes + -c, --cluster TYPE force the cluster type instead of detecting + (currently only corosync is supported) + -C, --corosync force the cluster type to be corosync + -u, --user USER username to use when collecting data from other nodes + (default root) + -D, --max-depth search depth to use when attempting to locate files + -e, --rsh command to use to run commands on other nodes + (default ssh -T) + -d, --as-directory leave result as a directory tree instead of archiving + --sos-mode use defaults suitable for being called by sosreport tool + (behavior subject to change and not useful to end users) + DEST, --dest DEST custom destination directory or file name + +$progname works best when run from a cluster node on a running cluster, +but can be run from a stopped cluster node or a Pacemaker Remote node. + +If neither --nodes nor --single-node is given, $progname will guess the +node list, but may have trouble detecting Pacemaker Remote nodes. +Unless --single-node is given, the node names (whether specified by --nodes +or detected automatically) must be resolvable and reachable via the command +specified by -e/--rsh using the user specified by -u/--user. + +Examples: + $progname -f "2011-12-14 13:05:00" unexplained-apache-failure + $progname -f 2011-12-14 -t 2011-12-15 something-that-took-multiple-days + $progname -f 13:05:00 -t 13:12:00 brief-outage +EOF +} + +case "$1" in + -v|--version) echo "$progname @VERSION@-@BUILD_VERSION@"; exit 0;; + --features) echo "@VERSION@-@BUILD_VERSION@: @PCMK_FEATURES@"; exit 0;; + --|-h|--help) usage; exit 0;; +esac + +# Prefer helpers in the same directory if they exist, to simplify development +if [ ! -f $report_data/report.common ]; then + report_data=@datadir@/@PACKAGE@ +else + echo "Using local helpers" +fi + +. $report_data/report.common + +while true; do + case "$1" in + -x) set -x; shift;; + -V) verbose=`expr $verbose + 1`; shift;; + -T|--cts) tests="$tests $2"; shift; shift;; + --cts-log) ctslog="$2"; shift; shift;; + -f|--from) start_time=`get_time "$2"`; shift; shift;; + -t|--to) end_time=`get_time "$2"`; shift; shift;; + -n|--node|--nodes) nodes="$nodes $2"; shift; shift;; + -S|--single-node) nodes="$host"; shift;; + -l|--logfile) extra_logs="$extra_logs $2"; shift; shift;; + -p) sanitize_patterns="$sanitize_patterns $2"; shift; shift;; + -L) log_patterns="$log_patterns `echo $2 | sed 's/ /\\\W/g'`"; shift; shift;; + -d|--as-directory) compress=0; shift;; + -C|--corosync) cluster="corosync"; shift;; + -c|--cluster) cluster="$2"; shift; shift;; + -e|--rsh) rsh="$2"; shift; shift;; + -u|--user) ssh_user="$2"; shift; shift;; + -D|--max-depth) maxdepth="$2"; shift; shift;; + -M) search_logs=0; shift;; + --sos-mode) sos_mode=1; nodes="$host"; shift;; + --dest) DESTDIR=$2; shift; shift;; + --) if [ ! -z $2 ]; then DESTDIR=$2; fi; break;; + -h|--help) usage; exit 0;; + # Options for compatibility with hb_report + -s) shift;; + + *) echo "Unknown argument: $1"; usage; exit 1;; + esac +done + + +collect_data() { + label="$1" + start=`expr $2 - 10` + end=`expr $3 + 10` + masterlog=$4 + + if [ "x$DESTDIR" != x ]; then + echo $DESTDIR | grep -e "^/" -qs + if [ $? = 0 ]; then + l_base=$DESTDIR + else + l_base="`pwd`/$DESTDIR" + fi + debug "Using custom scratch dir: $l_base" + r_base=`basename $l_base` + else + l_base=$HOME/$label + r_base=$label + fi + + if [ -e $l_base ]; then + fatal "Output directory $l_base already exists, specify an alternate name with --dest" + fi + mkdir -p $l_base + + if [ "x$masterlog" != "x" ]; then + dumplogset "$masterlog" $start $end > "$l_base/$HALOG_F" + fi + + for node in $nodes; do + cat <<EOF >$l_base/.env +LABEL="$label" +REPORT_HOME="$r_base" +REPORT_MASTER="$host" +REPORT_TARGET="$node" +LOG_START=$start +LOG_END=$end +REMOVE=1 +SANITIZE="$sanitize_patterns" +CLUSTER=$cluster +LOG_PATTERNS="$log_patterns" +EXTRA_LOGS="$extra_logs" +SEARCH_LOGS=$search_logs +SOS_MODE=$sos_mode +verbose=$verbose +maxdepth=$maxdepth +EOF + + if [ $host = $node ]; then + cat <<EOF >>$l_base/.env +REPORT_HOME="$l_base" +EOF + cat $l_base/.env $report_data/report.common $report_data/report.collector > $l_base/collector + bash $l_base/collector + else + cat $l_base/.env $report_data/report.common $report_data/report.collector \ + | $rsh -l $ssh_user $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" | (cd $l_base && tar mxf -) + fi + done + + analyze $l_base > $l_base/$ANALYSIS_F + if [ -f $l_base/$HALOG_F ]; then + node_events $l_base/$HALOG_F > $l_base/$EVENTS_F + fi + + for node in $nodes; do + cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F + if [ -s $l_base/$node/$EVENTS_F ]; then + cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F + elif [ -s $l_base/$HALOG_F ]; then + awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F + fi + done + + log " " + if [ $compress = 1 ]; then + fname=`shrink $l_base` + rm -rf $l_base + log "Collected results are available in $fname" + log " " + log "Please create a bug entry at" + log " @BUG_URL@" + log "Include a description of your problem and attach this tarball" + log " " + log "Thank you for taking time to create this report." + else + log "Collected results are available in $l_base" + fi + log " " +} + +# +# check if files have same content in the cluster +# +cibdiff() { + d1=$(dirname $1) + d2=$(dirname $2) + + if [ -f "$d1/RUNNING" ] && [ ! -f "$d2/RUNNING" ]; then + DIFF_OK=0 + elif [ -f "$d1/STOPPED" ] && [ ! -f "$d2/STOPPED" ]; then + DIFF_OK=0 + else + DIFF_OK=1 + fi + + if [ $DIFF_OK -eq 1 ]; then + if which crm_diff > /dev/null 2>&1; then + crm_diff -c -n $1 -o $2 + else + info "crm_diff(8) not found, cannot diff CIBs" + fi + else + echo "can't compare cibs from running and stopped systems" + fi +} + +diffcheck() { + [ -f "$1" ] || { + echo "$1 does not exist" + return 1 + } + [ -f "$2" ] || { + echo "$2 does not exist" + return 1 + } + case $(basename "$1") in + $CIB_F) cibdiff $1 $2 ;; + *) diff -u $1 $2 ;; + esac +} + +# +# remove duplicates if files are same, make links instead +# +consolidate() { + for n in $nodes; do + if [ -f $1/$2 ]; then + rm $1/$n/$2 + else + mv $1/$n/$2 $1 + fi + ln -s ../$2 $1/$n + done +} + +analyze_one() { + rc=0 + node0="" + for n in $nodes; do + if [ "$node0" ]; then + diffcheck $1/$node0/$2 $1/$n/$2 + rc=$(($rc+$?)) + else + node0=$n + fi + done + return $rc +} + +analyze() { + flist="$MEMBERSHIP_F $CIB_F $CRM_MON_F $SYSINFO_F" + for f in $flist; do + printf "Diff $f... " + ls $1/*/$f >/dev/null 2>&1 || { + echo "no $1/*/$f :/" + continue + } + if analyze_one $1 $f; then + echo "OK" + [ "$f" != $CIB_F ] && consolidate $1 $f + else + echo "" + fi + done +} + +do_cts() { + test_sets=`echo $tests | tr ',' ' '` + for test_set in $test_sets; do + + start_time=0 + start_test=`echo $test_set | tr '-' ' ' | awk '{print $1}'` + + end_time=0 + end_test=`echo $test_set | tr '-' ' ' | awk '{print $2}'` + + if [ x$end_test = x ]; then + msg="Extracting test $start_test" + label="CTS-$start_test-`date +"%b-%d-%Y"`" + end_test=`expr $start_test + 1` + else + msg="Extracting tests $start_test to $end_test" + label="CTS-$start_test-$end_test-`date +"%b-%d-%Y"`" + end_test=`expr $end_test + 1` + fi + + if [ $start_test = 0 ]; then + start_pat="BEGINNING [0-9].* TESTS" + else + start_pat="Running test.*\[ *$start_test\]" + fi + + if [ x$ctslog = x ]; then + ctslog=`findmsg 1 "$start_pat"` + + if [ x$ctslog = x ]; then + fatal "No CTS control file detected" + else + log "Using CTS control file: $ctslog" + fi + fi + + line=`grep -n "$start_pat" $ctslog | tail -1 | sed 's/:.*//'` + if [ ! -z "$line" ]; then + start_time=`linetime $ctslog $line` + fi + + line=`grep -n "Running test.*\[ *$end_test\]" $ctslog | tail -1 | sed 's/:.*//'` + if [ ! -z "$line" ]; then + end_time=`linetime $ctslog $line` + fi + + if [ -z "$nodes" ]; then + nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '` + log "Calculated node list: $nodes" + fi + + if [ $end_time -lt $start_time ]; then + debug "Test didn't complete, grabbing everything up to now" + end_time=`date +%s` + fi + + if [ $start_time != 0 ];then + log "$msg (`time2str $start_time` to `time2str $end_time`)" + collect_data $label $start_time $end_time $ctslog + else + fatal "$msg failed: not found" + fi + done +} + +node_names_from_xml() { + awk ' + /uname/ { + for( i=1; i<=NF; i++ ) + if( $i~/^uname=/ ) { + sub("uname=.","",$i); + sub("\".*","",$i); + print $i; + next; + } + } + ' | tr '\n' ' ' +} + +getnodes() { + cluster="$1" + + # 1. Live (cluster nodes or Pacemaker Remote nodes) + # TODO: This will not detect Pacemaker Remote nodes unless they + # have ever had a permanent node attribute set, because it only + # searches the nodes section. It should also search the config + # for resources that create Pacemaker Remote nodes. + cib_nodes=$(cibadmin -Ql -o nodes 2>/dev/null) + if [ $? -eq 0 ]; then + debug "Querying CIB for nodes" + echo "$cib_nodes" | node_names_from_xml + return + fi + + # 2. Saved + if [ -f "@CRM_CONFIG_DIR@/cib.xml" ]; then + debug "Querying on-disk CIB for nodes" + grep "node " "@CRM_CONFIG_DIR@/cib.xml" | node_names_from_xml + return + fi + + # 3. logs + # TODO: Look for something like crm_update_peer +} + +if [ $compress -eq 1 ]; then + require_tar +fi + +if [ "x$tests" != "x" ]; then + do_cts + +elif [ "x$start_time" != "x" ]; then + masterlog="" + + if [ -z "$sanitize_patterns" ]; then + log "WARNING: The tarball produced by this program may contain" + log " sensitive information such as passwords." + log "" + log "We will attempt to remove such information if you use the" + log "-p option. For example: -p \"pass.*\" -p \"user.*\"" + log "" + log "However, doing this may reduce the ability for the recipients" + log "to diagnose issues and generally provide assistance." + log "" + log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE" + log "" + fi + + # If user didn't specify a cluster stack, make a best guess if possible. + if [ -z "$cluster" ] || [ "$cluster" = "any" ]; then + cluster=$(get_cluster_type) + fi + + # If user didn't specify node(s), make a best guess if possible. + if [ -z "$nodes" ]; then + nodes=`getnodes $cluster` + if [ -n "$nodes" ]; then + log "Calculated node list: $nodes" + else + fatal "Cannot determine nodes; specify --nodes or --single-node" + fi + fi + + if + echo $nodes | grep -qs $host + then + debug "We are a cluster node" + else + debug "We are a log master" + masterlog=`findmsg 1 "pacemaker-controld\\|CTS"` + fi + + + if [ -z $end_time ]; then + end_time=`perl -e 'print time()'` + fi + label="pcmk-`date +"%a-%d-%b-%Y"`" + log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)" + collect_data $label $start_time $end_time $masterlog +else + fatal "Not sure what to do, no tests or time ranges to extract" +fi + +# vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: |