| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| TEMP=`@GETOPT_PATH@ \ |
| -o hv?xl:f:t:n:T:L:p:c:dSCu:D:MVse: \ |
| --long help,corosync,cts:,cts-log:,dest:,node:,nodes:,from:,to:,sos-mode,logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features,rsh: \ |
| -n 'crm_report' -- "$@"` |
| |
| eval set -- "$TEMP" |
| |
| progname=$(basename "$0") |
| rsh="ssh -T" |
| tests="" |
| nodes="" |
| compress=1 |
| cluster="any" |
| ssh_user="root" |
| search_logs=1 |
| sos_mode=0 |
| report_data=`dirname $0` |
| maxdepth=5 |
| |
| extra_logs="" |
| sanitize_patterns="passw.*" |
| log_patterns="CRIT: ERROR:" |
| |
| usage() { |
| cat<<EOF |
| $progname - Create archive of everything needed when reporting cluster problems |
| |
| |
| Usage: $progname [options] [DEST] |
| |
| Required option: |
| -f, --from TIME time prior to problems beginning |
| (as "YYYY-M-D H:M:S" including the quotes) |
| |
| Options: |
| -V increase verbosity (may be specified multiple times) |
| -h, --help display this message |
| -v, --version display software version |
| --features display software features |
| -t, --to TIME time at which all problems were resolved |
| (as "YYYY-M-D H:M:S" including the quotes; default "now") |
| -T, --cts TEST CTS test or set of tests to extract |
| --cts-log CTS master logfile |
| -n, --nodes NODES node names for this cluster (only needed if cluster is |
| not active on this host; accepts -n "a b" or -n a -n b) |
| -M do not search for cluster logs |
| -l, --logfile FILE log file to collect (in addition to detected logs if -M |
| is not specified; may be specified multiple times) |
| -p PATT additional regular expression to match variables to be |
| masked in output (default: "passw.*") |
| -L PATT additional regular expression to match in log files for |
| analysis (default: $log_patterns) |
| -S, --single-node don't attempt to collect data from other nodes |
| -c, --cluster TYPE force the cluster type instead of detecting |
| (currently only corosync is supported) |
| -C, --corosync force the cluster type to be corosync |
| -u, --user USER username to use when collecting data from other nodes |
| (default root) |
| -D, --max-depth search depth to use when attempting to locate files |
| -e, --rsh command to use to run commands on other nodes |
| (default ssh -T) |
| -d, --as-directory leave result as a directory tree instead of archiving |
| --sos-mode use defaults suitable for being called by sosreport tool |
| (behavior subject to change and not useful to end users) |
| DEST, --dest DEST custom destination directory or file name |
| |
| $progname works best when run from a cluster node on a running cluster, |
| but can be run from a stopped cluster node or a Pacemaker Remote node. |
| |
| If neither --nodes nor --single-node is given, $progname will guess the |
| node list, but may have trouble detecting Pacemaker Remote nodes. |
| Unless --single-node is given, the node names (whether specified by --nodes |
| or detected automatically) must be resolvable and reachable via the command |
| specified by -e/--rsh using the user specified by -u/--user. |
| |
| Examples: |
| $progname -f "2011-12-14 13:05:00" unexplained-apache-failure |
| $progname -f 2011-12-14 -t 2011-12-15 something-that-took-multiple-days |
| $progname -f 13:05:00 -t 13:12:00 brief-outage |
| EOF |
| } |
| |
| case "$1" in |
| -v|--version) echo "$progname @VERSION@-@BUILD_VERSION@"; exit 0;; |
| --features) echo "@VERSION@-@BUILD_VERSION@: @PCMK_FEATURES@"; exit 0;; |
| --|-h|--help) usage; exit 0;; |
| esac |
| |
| |
| if [ ! -f $report_data/report.common ]; then |
| report_data=@datadir@/@PACKAGE@ |
| else |
| echo "Using local helpers" |
| fi |
| |
| . $report_data/report.common |
| |
| while true; do |
| case "$1" in |
| -x) set -x; shift;; |
| -V) verbose=`expr $verbose + 1`; shift;; |
| -T|--cts) tests="$tests $2"; shift; shift;; |
| --cts-log) ctslog="$2"; shift; shift;; |
| -f|--from) start_time=`get_time "$2"`; shift; shift;; |
| -t|--to) end_time=`get_time "$2"`; shift; shift;; |
| -n|--node|--nodes) nodes="$nodes $2"; shift; shift;; |
| -S|--single-node) nodes="$host"; shift;; |
| -l|--logfile) extra_logs="$extra_logs $2"; shift; shift;; |
| -p) sanitize_patterns="$sanitize_patterns $2"; shift; shift;; |
| -L) log_patterns="$log_patterns `echo $2 | sed 's/ /\\\W/g'`"; shift; shift;; |
| -d|--as-directory) compress=0; shift;; |
| -C|--corosync) cluster="corosync"; shift;; |
| -c|--cluster) cluster="$2"; shift; shift;; |
| -e|--rsh) rsh="$2"; shift; shift;; |
| -u|--user) ssh_user="$2"; shift; shift;; |
| -D|--max-depth) maxdepth="$2"; shift; shift;; |
| -M) search_logs=0; shift;; |
| --sos-mode) sos_mode=1; nodes="$host"; shift;; |
| --dest) DESTDIR=$2; shift; shift;; |
| --) if [ ! -z $2 ]; then DESTDIR=$2; fi; break;; |
| -h|--help) usage; exit 0;; |
| |
| -s) shift;; |
| |
| *) echo "Unknown argument: $1"; usage; exit 1;; |
| esac |
| done |
| |
| |
| collect_data() { |
| label="$1" |
| start=`expr $2 - 10` |
| end=`expr $3 + 10` |
| masterlog=$4 |
| |
| if [ "x$DESTDIR" != x ]; then |
| echo $DESTDIR | grep -e "^/" -qs |
| if [ $? = 0 ]; then |
| l_base=$DESTDIR |
| else |
| l_base="`pwd`/$DESTDIR" |
| fi |
| debug "Using custom scratch dir: $l_base" |
| r_base=`basename $l_base` |
| else |
| l_base=$HOME/$label |
| r_base=$label |
| fi |
| |
| if [ -e $l_base ]; then |
| fatal "Output directory $l_base already exists, specify an alternate name with --dest" |
| fi |
| mkdir -p $l_base |
| |
| if [ "x$masterlog" != "x" ]; then |
| dumplogset "$masterlog" $start $end > "$l_base/$HALOG_F" |
| fi |
| |
| for node in $nodes; do |
| cat <<EOF >$l_base/.env |
| LABEL="$label" |
| REPORT_HOME="$r_base" |
| REPORT_MASTER="$host" |
| REPORT_TARGET="$node" |
| LOG_START=$start |
| LOG_END=$end |
| REMOVE=1 |
| SANITIZE="$sanitize_patterns" |
| CLUSTER=$cluster |
| LOG_PATTERNS="$log_patterns" |
| EXTRA_LOGS="$extra_logs" |
| SEARCH_LOGS=$search_logs |
| SOS_MODE=$sos_mode |
| verbose=$verbose |
| maxdepth=$maxdepth |
| EOF |
| |
| if [ $host = $node ]; then |
| cat <<EOF >>$l_base/.env |
| REPORT_HOME="$l_base" |
| EOF |
| cat $l_base/.env $report_data/report.common $report_data/report.collector > $l_base/collector |
| bash $l_base/collector |
| else |
| cat $l_base/.env $report_data/report.common $report_data/report.collector \ |
| | $rsh -l $ssh_user $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" | (cd $l_base && tar mxf -) |
| fi |
| done |
| |
| analyze $l_base > $l_base/$ANALYSIS_F |
| if [ -f $l_base/$HALOG_F ]; then |
| node_events $l_base/$HALOG_F > $l_base/$EVENTS_F |
| fi |
| |
| for node in $nodes; do |
| cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F |
| if [ -s $l_base/$node/$EVENTS_F ]; then |
| cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F |
| elif [ -s $l_base/$HALOG_F ]; then |
| awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F |
| fi |
| done |
| |
| log " " |
| if [ $compress = 1 ]; then |
| fname=`shrink $l_base` |
| rm -rf $l_base |
| log "Collected results are available in $fname" |
| log " " |
| log "Please create a bug entry at" |
| log " @BUG_URL@" |
| log "Include a description of your problem and attach this tarball" |
| log " " |
| log "Thank you for taking time to create this report." |
| else |
| log "Collected results are available in $l_base" |
| fi |
| log " " |
| } |
| |
| |
| |
| |
| cibdiff() { |
| d1=$(dirname $1) |
| d2=$(dirname $2) |
| |
| if [ -f "$d1/RUNNING" ] && [ ! -f "$d2/RUNNING" ]; then |
| DIFF_OK=0 |
| elif [ -f "$d1/STOPPED" ] && [ ! -f "$d2/STOPPED" ]; then |
| DIFF_OK=0 |
| else |
| DIFF_OK=1 |
| fi |
| |
| if [ $DIFF_OK -eq 1 ]; then |
| if which crm_diff > /dev/null 2>&1; then |
| crm_diff -c -n $1 -o $2 |
| else |
| info "crm_diff(8) not found, cannot diff CIBs" |
| fi |
| else |
| echo "can't compare cibs from running and stopped systems" |
| fi |
| } |
| |
| diffcheck() { |
| [ -f "$1" ] || { |
| echo "$1 does not exist" |
| return 1 |
| } |
| [ -f "$2" ] || { |
| echo "$2 does not exist" |
| return 1 |
| } |
| case $(basename "$1") in |
| $CIB_F) cibdiff $1 $2 ;; |
| *) diff -u $1 $2 ;; |
| esac |
| } |
| |
| |
| |
| |
| consolidate() { |
| for n in $nodes; do |
| if [ -f $1/$2 ]; then |
| rm $1/$n/$2 |
| else |
| mv $1/$n/$2 $1 |
| fi |
| ln -s ../$2 $1/$n |
| done |
| } |
| |
| analyze_one() { |
| rc=0 |
| node0="" |
| for n in $nodes; do |
| if [ "$node0" ]; then |
| diffcheck $1/$node0/$2 $1/$n/$2 |
| rc=$(($rc+$?)) |
| else |
| node0=$n |
| fi |
| done |
| return $rc |
| } |
| |
| analyze() { |
| flist="$MEMBERSHIP_F $CIB_F $CRM_MON_F $SYSINFO_F" |
| for f in $flist; do |
| printf "Diff $f... " |
| ls $1/*/$f >/dev/null 2>&1 || { |
| echo "no $1/*/$f :/" |
| continue |
| } |
| if analyze_one $1 $f; then |
| echo "OK" |
| [ "$f" != $CIB_F ] && consolidate $1 $f |
| else |
| echo "" |
| fi |
| done |
| } |
| |
| do_cts() { |
| test_sets=`echo $tests | tr ',' ' '` |
| for test_set in $test_sets; do |
| |
| start_time=0 |
| start_test=`echo $test_set | tr '-' ' ' | awk '{print $1}'` |
| |
| end_time=0 |
| end_test=`echo $test_set | tr '-' ' ' | awk '{print $2}'` |
| |
| if [ x$end_test = x ]; then |
| msg="Extracting test $start_test" |
| label="CTS-$start_test-`date +"%b-%d-%Y"`" |
| end_test=`expr $start_test + 1` |
| else |
| msg="Extracting tests $start_test to $end_test" |
| label="CTS-$start_test-$end_test-`date +"%b-%d-%Y"`" |
| end_test=`expr $end_test + 1` |
| fi |
| |
| if [ $start_test = 0 ]; then |
| start_pat="BEGINNING [0-9].* TESTS" |
| else |
| start_pat="Running test.*\[ *$start_test\]" |
| fi |
| |
| if [ x$ctslog = x ]; then |
| ctslog=`findmsg 1 "$start_pat"` |
| |
| if [ x$ctslog = x ]; then |
| fatal "No CTS control file detected" |
| else |
| log "Using CTS control file: $ctslog" |
| fi |
| fi |
| |
| line=`grep -n "$start_pat" $ctslog | tail -1 | sed 's/:.*//'` |
| if [ ! -z "$line" ]; then |
| start_time=`linetime $ctslog $line` |
| fi |
| |
| line=`grep -n "Running test.*\[ *$end_test\]" $ctslog | tail -1 | sed 's/:.*//'` |
| if [ ! -z "$line" ]; then |
| end_time=`linetime $ctslog $line` |
| fi |
| |
| if [ -z "$nodes" ]; then |
| nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '` |
| log "Calculated node list: $nodes" |
| fi |
| |
| if [ $end_time -lt $start_time ]; then |
| debug "Test didn't complete, grabbing everything up to now" |
| end_time=`date +%s` |
| fi |
| |
| if [ $start_time != 0 ];then |
| log "$msg (`time2str $start_time` to `time2str $end_time`)" |
| collect_data $label $start_time $end_time $ctslog |
| else |
| fatal "$msg failed: not found" |
| fi |
| done |
| } |
| |
| node_names_from_xml() { |
| awk ' |
| /uname/ { |
| for( i=1; i<=NF; i++ ) |
| if( $i~/^uname=/ ) { |
| sub("uname=.","",$i); |
| sub("\".*","",$i); |
| print $i; |
| next; |
| } |
| } |
| ' | tr '\n' ' ' |
| } |
| |
| getnodes() { |
| cluster="$1" |
| |
| |
| |
| |
| |
| |
| cib_nodes=$(cibadmin -Ql -o nodes 2>/dev/null) |
| if [ $? -eq 0 ]; then |
| debug "Querying CIB for nodes" |
| echo "$cib_nodes" | node_names_from_xml |
| return |
| fi |
| |
| |
| if [ -f "@CRM_CONFIG_DIR@/cib.xml" ]; then |
| debug "Querying on-disk CIB for nodes" |
| grep "node " "@CRM_CONFIG_DIR@/cib.xml" | node_names_from_xml |
| return |
| fi |
| |
| |
| |
| } |
| |
| if [ $compress -eq 1 ]; then |
| require_tar |
| fi |
| |
| if [ "x$tests" != "x" ]; then |
| do_cts |
| |
| elif [ "x$start_time" != "x" ]; then |
| masterlog="" |
| |
| if [ -z "$sanitize_patterns" ]; then |
| log "WARNING: The tarball produced by this program may contain" |
| log " sensitive information such as passwords." |
| log "" |
| log "We will attempt to remove such information if you use the" |
| log "-p option. For example: -p \"pass.*\" -p \"user.*\"" |
| log "" |
| log "However, doing this may reduce the ability for the recipients" |
| log "to diagnose issues and generally provide assistance." |
| log "" |
| log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE" |
| log "" |
| fi |
| |
| |
| if [ -z "$cluster" ] || [ "$cluster" = "any" ]; then |
| cluster=$(get_cluster_type) |
| fi |
| |
| |
| if [ -z "$nodes" ]; then |
| nodes=`getnodes $cluster` |
| if [ -n "$nodes" ]; then |
| log "Calculated node list: $nodes" |
| else |
| fatal "Cannot determine nodes; specify --nodes or --single-node" |
| fi |
| fi |
| |
| if |
| echo $nodes | grep -qs $host |
| then |
| debug "We are a cluster node" |
| else |
| debug "We are a log master" |
| masterlog=`findmsg 1 "pacemaker-controld\\|CTS"` |
| fi |
| |
| |
| if [ -z $end_time ]; then |
| end_time=`perl -e 'print time()'` |
| fi |
| label="pcmk-`date +"%a-%d-%b-%Y"`" |
| log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)" |
| collect_data $label $start_time $end_time $masterlog |
| else |
| fatal "Not sure what to do, no tests or time ranges to extract" |
| fi |
| |
| |