| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| host=`uname -n` |
| shorthost=`echo $host | sed s:\\\\..*::` |
| if [ -z $verbose ]; then |
| verbose=0 |
| fi |
| |
| |
| EVENTS_F=events.txt |
| ANALYSIS_F=analysis.txt |
| HALOG_F=cluster-log.txt |
| BT_F=backtraces.txt |
| SYSINFO_F=sysinfo.txt |
| SYSSTATS_F=sysstats.txt |
| DLM_DUMP_F=dlm_dump.txt |
| CRM_MON_F=crm_mon.txt |
| MEMBERSHIP_F=members.txt |
| CRM_VERIFY_F=crm_verify.txt |
| PERMISSIONS_F=permissions.txt |
| CIB_F=cib.xml |
| CIB_TXT_F=cib.txt |
| DRBD_INFO_F=drbd_info.txt |
| |
| EVENT_PATTERNS=" |
| state do_state_transition |
| membership pcmk_peer_update.*(lost|memb): |
| quorum (crmd|pacemaker-controld).*crm_update_quorum |
| pause Process.pause.detected |
| resources (lrmd|pacemaker-execd).*rsc:(start|stop) |
| stonith te_fence_node|fenced.*(requests|(Succeeded|Failed).to.|result=) |
| start_stop shutdown.decision|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete |
| " |
| |
| |
| |
| |
| PACKAGES="pacemaker pacemaker-libs pacemaker-cluster-libs libpacemaker3 |
| pacemaker-remote pacemaker-pygui pacemaker-pymgmt pymgmt-client |
| corosync corosynclib libcorosync4 |
| resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord |
| ocfs2-tools ocfs2-tools-o2cb ocfs2console |
| ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace |
| drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace |
| drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen |
| lvm2 lvm2-clvm cmirrord |
| libdlm libdlm2 libdlm3 |
| hawk ruby lighttpd |
| kernel-default kernel-pae kernel-xen |
| glibc |
| " |
| |
| |
| SYSLOGS=" |
| /var/log/* |
| /var/logs/* |
| /var/syslog/* |
| /var/adm/* |
| /var/log/ha/* |
| /var/log/cluster/* |
| /var/log/pacemaker/* |
| " |
| |
| |
| REMOTED_STATUS=-1 |
| |
| |
| |
| |
| record() { |
| if [ x != x"$REPORT_HOME" -a -d "${REPORT_HOME}/$shorthost" ]; then |
| rec="${REPORT_HOME}/$shorthost/report.out" |
| |
| elif [ x != x"${l_base}" -a -d "${l_base}" ]; then |
| rec="${l_base}/report.summary" |
| |
| else |
| rec="/dev/null" |
| fi |
| printf "%-10s $*\n" "$shorthost:" 2>&1 >> "${rec}" |
| } |
| |
| log() { |
| printf "%-10s $*\n" "$shorthost:" 1>&2 |
| record "$*" |
| } |
| |
| debug() { |
| if [ $verbose -gt 0 ]; then |
| log "Debug: $*" |
| else |
| record "Debug: $*" |
| fi |
| } |
| |
| info() { |
| log "$*" |
| } |
| |
| warning() { |
| log "WARN: $*" |
| } |
| |
| fatal() { |
| log "ERROR: $*" |
| exit 1 |
| } |
| |
| require_tar() { |
| which tar >/dev/null 2>&1 |
| if [ $? -ne 0 ]; then |
| fatal "Required program 'tar' not found, please install and re-run" |
| fi |
| } |
| |
| |
| |
| |
| |
| |
| is_running() { |
| ps -G "0 $(getent group '@CRM_DAEMON_GROUP@' 2>/dev/null | cut -d: -f3) $(id -G)" \ |
| -u "0 @CRM_DAEMON_USER@ $(id -u)" -f \ |
| | grep -Eqs $(echo "$1" | sed -e 's/^\(.\)/[\1]/') |
| } |
| |
| has_remoted() { |
| if [ $REMOTED_STATUS -eq -1 ]; then |
| REMOTED_STATUS=1 |
| if which pacemaker-remoted >/dev/null 2>&1; then |
| REMOTED_STATUS=0 |
| |
| elif which pacemaker_remoted >/dev/null 2>&1; then |
| REMOTED_STATUS=0 |
| elif [ -x "@sbindir@/pacemaker-remoted" ]; then |
| REMOTED_STATUS=0 |
| elif [ -x "@sbindir@/pacemaker_remoted" ]; then |
| REMOTED_STATUS=0 |
| else |
| |
| |
| for d in /{usr,opt}/{local/,}{s,}bin; do |
| if [ -x "${d}/pacemaker-remoted" ]; then |
| REMOTED_STATUS=0 |
| elif [ -x "${d}/pacemaker_remoted" ]; then |
| REMOTED_STATUS=0 |
| fi |
| done |
| fi |
| fi |
| return $REMOTED_STATUS |
| } |
| |
| |
| found_dir() { |
| echo "$2" |
| info "Pacemaker $1 found in: $2" |
| } |
| |
| detect_daemon_dir() { |
| info "Searching for where Pacemaker daemons live... this may take a while" |
| |
| for d in \ |
| {/usr,/usr/local,/opt/local,@exec_prefix@}/{libexec,lib64,lib}/pacemaker |
| do |
| |
| |
| if [ -e $d/pacemaker-schedulerd ] || [ -e $d/cts-exec-helper ]; then |
| found_dir "daemons" "$d" |
| return |
| fi |
| done |
| |
| |
| if has_remoted; then |
| info "Pacemaker daemons not found (this appears to be a Pacemaker Remote node)" |
| return |
| fi |
| |
| for f in $(find / -maxdepth $maxdepth -type f -name pacemaker-schedulerd -o -name cts-exec-helper); do |
| d=$(dirname "$f") |
| found_dir "daemons" "$d" |
| return |
| done |
| |
| fatal "Pacemaker daemons not found (nonstandard installation?)" |
| } |
| |
| detect_cib_dir() { |
| d="${local_state_dir}/lib/pacemaker/cib" |
| if [ -f "$d/cib.xml" ]; then |
| found_dir "config files" "$d" |
| return |
| fi |
| |
| |
| if has_remoted; then |
| info "Pacemaker config not found (this appears to be a Pacemaker Remote node)" |
| return |
| fi |
| |
| info "Searching for where Pacemaker keeps config information... this may take a while" |
| |
| for f in $(find / -maxdepth $maxdepth -type f -name cib.xml); do |
| d=$(dirname $f) |
| found_dir "config files" "$d" |
| return |
| done |
| |
| warning "Pacemaker config not found (nonstandard installation?)" |
| } |
| |
| detect_state_dir() { |
| if [ -n "$CRM_CONFIG_DIR" ]; then |
| |
| |
| dirname "$CRM_CONFIG_DIR" |
| |
| |
| elif [ -d "$local_state_dir/lib/pacemaker" ]; then |
| echo $local_state_dir/lib/pacemaker |
| fi |
| } |
| |
| detect_pe_dir() { |
| config_root="$1" |
| |
| d="$config_root/pengine" |
| if [ -d "$d" ]; then |
| found_dir "scheduler inputs" "$d" |
| return |
| fi |
| |
| if has_remoted; then |
| info "Pacemaker scheduler inputs not found (this appears to be a Pacemaker Remote node)" |
| return |
| fi |
| |
| info "Searching for where Pacemaker keeps scheduler inputs... this may take a while" |
| for d in $(find / -maxdepth $maxdepth -type d -name pengine); do |
| found_dir "scheduler inputs" "$d" |
| return |
| done |
| |
| fatal "Pacemaker scheduler inputs not found (nonstandard installation?)" |
| } |
| |
| detect_host() { |
| local_state_dir=@localstatedir@ |
| |
| if [ -d $local_state_dir/run ]; then |
| CRM_STATE_DIR=$local_state_dir/run/crm |
| else |
| info "Searching for where Pacemaker keeps runtime data... this may take a while" |
| for d in `find / -maxdepth $maxdepth -type d -name run`; do |
| local_state_dir=`dirname $d` |
| CRM_STATE_DIR=$d/crm |
| break |
| done |
| info "Found: $CRM_STATE_DIR" |
| fi |
| debug "Machine runtime directory: $local_state_dir" |
| debug "Pacemaker runtime data located in: $CRM_STATE_DIR" |
| |
| CRM_DAEMON_DIR=$(detect_daemon_dir) |
| CRM_CONFIG_DIR=$(detect_cib_dir) |
| config_root=$(detect_state_dir) |
| |
| |
| BLACKBOX_DIR=$config_root/blackbox |
| debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" |
| |
| PE_STATE_DIR=$(detect_pe_dir "$config_root") |
| |
| CRM_CORE_DIRS="" |
| for d in $config_root/cores $local_state_dir/lib/corosync; do |
| if [ -d $d ]; then |
| CRM_CORE_DIRS="$CRM_CORE_DIRS $d" |
| fi |
| done |
| debug "Core files located under: $CRM_CORE_DIRS" |
| } |
| |
| time2str() { |
| perl -e "use POSIX; print strftime('%x %X',localtime($1));" |
| } |
| |
| get_time() { |
| perl -e "\$time=\"$*\";" -e ' |
| $unix_tm = 0; |
| eval "use Date::Parse"; |
| if (index($time, ":") < 0) { |
| } elsif (!$@) { |
| $unix_tm = str2time($time); |
| } else { |
| eval "use Date::Manip"; |
| if (!$@) { |
| $unix_tm = UnixDate(ParseDateString($time), "%s"); |
| } |
| } |
| if ($unix_tm != "") { |
| print int($unix_tm); |
| } else { |
| print ""; |
| } |
| ' |
| } |
| |
| get_time_syslog() { |
| awk '{print $1,$2,$3}' |
| } |
| |
| get_time_legacy() { |
| awk '{print $2}' | sed 's/_/ /' |
| } |
| |
| get_time_iso8601() { |
| awk '{print $1}' |
| } |
| |
| get_time_format_for_string() { |
| l="$*" |
| t=$(get_time `echo $l | get_time_syslog`) |
| if [ "x$t" != x ]; then |
| echo syslog |
| return |
| fi |
| |
| t=$(get_time `echo $l | get_time_iso8601`) |
| if [ "x$t" != x ]; then |
| echo iso8601 |
| return |
| fi |
| |
| t=$(get_time `echo $l | get_time_legacy`) |
| if [ "x$t" != x ]; then |
| echo legacy |
| return |
| fi |
| } |
| |
| get_time_format() { |
| t=0 l="" func="" |
| trycnt=10 |
| while [ $trycnt -gt 0 ] && read l; do |
| func=$(get_time_format_for_string $l) |
| if [ "x$func" != x ]; then |
| break |
| fi |
| trycnt=$(($trycnt-1)) |
| done |
| |
| echo $func |
| } |
| |
| get_time_from_line() { |
| GTFL_FORMAT="$1" |
| shift |
| if [ "$GTFL_FORMAT" = "" ]; then |
| GTFL_FORMAT=$(get_time_format_for_string "$@") |
| fi |
| case $GTFL_FORMAT in |
| syslog|legacy|iso8601) |
| get_time $(echo "$@" | get_time_${GTFL_FORMAT}) |
| ;; |
| *) |
| warning "Unknown time format in: $@" |
| ;; |
| esac |
| } |
| |
| get_first_time() { |
| l="" |
| format=$1 |
| while read l; do |
| ts=$(get_time_from_line "$format" "$l") |
| if [ "x$ts" != x ]; then |
| echo "$ts" |
| return |
| fi |
| done |
| } |
| |
| get_last_time() { |
| l="" |
| best=`date +%s` |
| format=$1 |
| while read l; do |
| ts=$(get_time_from_line "$format" "$l") |
| if [ "x$ts" != x ]; then |
| best=$ts |
| fi |
| done |
| echo $best |
| } |
| |
| linetime() { |
| get_time_from_line "" $(tail -n +$2 $1 | grep -a ":[0-5][0-9]:" | head -n 1) |
| } |
| |
| |
| |
| |
| |
| |
| |
| findmsg() { |
| max=$1 |
| pattern="$2" |
| found=0 |
| |
| |
| candidates=$(ls -1td $SYSLOGS 2>/dev/null) |
| if [ -z "$candidates" ]; then |
| debug "No system logs found to search for pattern \'$pattern\'" |
| return |
| fi |
| |
| |
| SAVE_IFS=$IFS |
| IFS=" |
| " |
| |
| |
| logfiles="" |
| for f in $candidates; do |
| local cat="" |
| |
| |
| if [ ! -f "$f" ] || [ ! -r "$f" ] || [ ! -s "$f" ] ; then |
| continue |
| fi |
| |
| cat=$(find_decompressor "$f") |
| |
| |
| |
| |
| |
| if [ $($cat "$f" 2>/dev/null | head -c 1024 | tr -d '[:print:][:space:]' | wc -c) -gt 256 ] |
| then |
| continue |
| fi |
| |
| |
| $cat "$f" 2>/dev/null | LC_ALL="C" grep -q -e "$pattern" |
| if [ $? -eq 0 ]; then |
| |
| |
| |
| if [ -z "$logfiles" ]; then |
| logfiles="$f" |
| else |
| logfiles="$logfiles |
| $f" |
| fi |
| |
| |
| found=$(($found+1)) |
| if [ $found -ge $max ]; then |
| break |
| fi |
| fi |
| done 2>/dev/null |
| IFS=$SAVE_IFS |
| if [ -z "$logfiles" ]; then |
| debug "Pattern \'$pattern\' not found in any system logs" |
| else |
| debug "Pattern \'$pattern\' found in: [ $logfiles ]" |
| echo "$logfiles" |
| fi |
| } |
| |
| node_events() { |
| if [ -e $1 ]; then |
| Epatt=`echo "$EVENT_PATTERNS" | |
| while read title p; do [ -n "$p" ] && echo -n "|$p"; done | |
| sed 's/.//' |
| ` |
| grep -E "$Epatt" $1 |
| fi |
| } |
| |
| pickfirst() { |
| for x; do |
| which $x >/dev/null 2>&1 && { |
| echo $x |
| return 0 |
| } |
| done |
| return 1 |
| } |
| |
| shrink() { |
| olddir=$PWD |
| dir=`dirname $1` |
| base=`basename $1` |
| |
| target=$1.tar |
| tar_options="cf" |
| |
| variant=`pickfirst bzip2 gzip xz false` |
| case $variant in |
| bz*) |
| tar_options="jcf" |
| target="$target.bz2" |
| ;; |
| gz*) |
| tar_options="zcf" |
| target="$target.gz" |
| ;; |
| xz*) |
| tar_options="Jcf" |
| target="$target.xz" |
| ;; |
| *) |
| warning "Could not find a compression program, the resulting tarball may be huge" |
| ;; |
| esac |
| |
| if [ -e $target ]; then |
| fatal "Destination $target already exists, specify an alternate name with --dest" |
| fi |
| |
| cd $dir >/dev/null 2>&1 |
| tar $tar_options $target $base >/dev/null 2>&1 |
| if [ $? -ne 0 ]; then |
| fatal "Could not archive $base, please investigate and collect manually" |
| fi |
| cd $olddir >/dev/null 2>&1 |
| |
| echo $target |
| } |
| |
| findln_by_time() { |
| local logf=$1 |
| local tm=$2 |
| local first=1 |
| |
| |
| |
| |
| local fileSize=`ls -lh "$logf" | awk '{ print $5 }' | grep -ie G` |
| if [ x$fileSize != x ]; then |
| warning "$logf is ${fileSize} in size and could take many hours to process. Skipping." |
| return |
| fi |
| |
| local last=`wc -l < $logf` |
| while [ $first -le $last ]; do |
| mid=$((($last+$first)/2)) |
| trycnt=10 |
| while [ $trycnt -gt 0 ]; do |
| tmid=`linetime $logf $mid` |
| [ "$tmid" ] && break |
| warning "cannot extract time: $logf:$mid; will try the next one" |
| trycnt=$(($trycnt-1)) |
| |
| first=$(($first-1)) |
| last=$(($last-1)) |
| mid=$((($last+$first)/2)) |
| done |
| if [ -z "$tmid" ]; then |
| warning "giving up on log..." |
| return |
| fi |
| if [ $tmid -gt $tm ]; then |
| last=$(($mid-1)) |
| elif [ $tmid -lt $tm ]; then |
| first=$(($mid+1)) |
| else |
| break |
| fi |
| done |
| echo $mid |
| } |
| |
| dumplog() { |
| local logf=$1 |
| local from_line=$2 |
| local to_line=$3 |
| [ "$from_line" ] || |
| return |
| tail -n +$from_line $logf | |
| if [ "$to_line" ]; then |
| head -$(($to_line-$from_line+1)) |
| else |
| cat |
| fi |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| find_decompressor() { |
| case $1 in |
| *bz2) echo "bzip2 -dc" ;; |
| *gz) echo "gzip -dc" ;; |
| *xz) echo "xz -dc" ;; |
| *) echo "cat" ;; |
| esac |
| } |
| |
| |
| |
| |
| is_our_log() { |
| local logf=$1 |
| local from_time=$2 |
| local to_time=$3 |
| |
| local cat=`find_decompressor $logf` |
| local format=`$cat $logf | get_time_format` |
| local first_time=`$cat $logf | head -10 | get_first_time $format` |
| local last_time=`$cat $logf | tail -10 | get_last_time $format` |
| |
| if [ x = "x$first_time" -o x = "x$last_time" ]; then |
| warning "Skipping bad logfile '$1': Could not determine log dates" |
| return 0 |
| fi |
| if [ $from_time -gt $last_time ]; then |
| |
| return 2 |
| fi |
| if [ $from_time -ge $first_time ]; then |
| return 3 |
| fi |
| |
| if [ x = "x$to_time" -o $to_time -ge $first_time ]; then |
| return 1 |
| else |
| return 0 |
| fi |
| } |
| |
| |
| |
| |
| |
| |
| arch_logs() { |
| local logf=$1 |
| local from_time=$2 |
| local to_time=$3 |
| |
| |
| |
| ls -t $logf $logf*[0-9z] 2>/dev/null | |
| while read next_log; do |
| is_our_log $next_log $from_time $to_time |
| case $? in |
| 0) ;; |
| 1) echo $next_log |
| debug "Found log $next_log" |
| ;; |
| 2) break;; |
| 3) echo $next_log |
| debug "Found log $next_log" |
| break |
| ;; |
| esac |
| done |
| } |
| |
| |
| |
| |
| drop_tmp_file() { |
| [ -z "$tmp" ] || rm -f "$tmp" |
| } |
| |
| print_logseg() { |
| local logf=$1 |
| local from_time=$2 |
| local to_time=$3 |
| |
| |
| local cat=`find_decompressor $logf` |
| if [ "$cat" != "cat" ]; then |
| tmp=`mktemp` |
| $cat $logf > $tmp |
| trap drop_tmp_file 0 |
| sourcef=$tmp |
| else |
| sourcef=$logf |
| tmp="" |
| fi |
| |
| if [ "$from_time" = 0 ]; then |
| FROM_LINE=1 |
| else |
| FROM_LINE=`findln_by_time $sourcef $from_time` |
| fi |
| if [ -z "$FROM_LINE" ]; then |
| warning "couldn't find line for time $from_time; corrupt log file?" |
| return |
| fi |
| |
| TO_LINE="" |
| if [ "$to_time" != 0 ]; then |
| TO_LINE=`findln_by_time $sourcef $to_time` |
| if [ -z "$TO_LINE" ]; then |
| warning "couldn't find line for time $to_time; corrupt log file?" |
| return |
| fi |
| if [ $FROM_LINE -lt $TO_LINE ]; then |
| dumplog $sourcef $FROM_LINE $TO_LINE |
| log "Including segment [$FROM_LINE-$TO_LINE] from $logf" |
| else |
| debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" |
| fi |
| else |
| dumplog $sourcef $FROM_LINE $TO_LINE |
| log "Including all logs after line $FROM_LINE from $logf" |
| fi |
| drop_tmp_file |
| trap "" 0 |
| } |
| |
| |
| |
| |
| dumplogset() { |
| local logf=$1 |
| local from_time=$2 |
| local to_time=$3 |
| |
| local logf_set=`arch_logs $logf $from_time $to_time` |
| if [ x = "x$logf_set" ]; then |
| return |
| fi |
| |
| local num_logs=`echo "$logf_set" | wc -l` |
| local oldest=`echo $logf_set | awk '{print $NF}'` |
| local newest=`echo $logf_set | awk '{print $1}'` |
| local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` |
| |
| |
| |
| |
| case $num_logs in |
| 1) print_logseg $newest $from_time $to_time;; |
| *) |
| print_logseg $oldest $from_time 0 |
| for f in $mid_logfiles; do |
| `find_decompressor $f` $f |
| debug "including complete $f logfile" |
| done |
| print_logseg $newest 0 $to_time |
| ;; |
| esac |
| } |
| |
| |
| getstanza() { |
| awk -v name="$1" ' |
| !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start |
| if ($1 == name) |
| in_stanza = 1 |
| } |
| in_stanza { print } |
| in_stanza && NF==1 && $1 == "}" { exit } |
| ' |
| } |
| |
| |
| getcfvar() { |
| cf_type=$1; shift; |
| cf_var=$1; shift; |
| cf_file=$* |
| |
| [ -f "$cf_file" ] || return |
| case $cf_type in |
| corosync) |
| sed 's/#.*//' < $cf_file | |
| if [ $# -eq 2 ]; then |
| getstanza "$cf_var" |
| shift 1 |
| else |
| cat |
| fi | |
| awk -v varname="$cf_var" ' |
| NF==2 && match($1,varname":$")==1 { print $2; exit; } |
| ' |
| ;; |
| esac |
| } |
| |
| pickfirst() { |
| for x; do |
| which $x >/dev/null 2>&1 && { |
| echo $x |
| return 0 |
| } |
| done |
| return 1 |
| } |
| |
| |
| |
| |
| |
| get_cluster_type() { |
| if is_running corosync; then |
| tool=`pickfirst corosync-objctl corosync-cmapctl` |
| case $tool in |
| *objctl) quorum=`$tool -a | grep quorum.provider | sed 's/.*=\s*//'`;; |
| *cmapctl) quorum=`$tool | grep quorum.provider | sed 's/.*=\s*//'`;; |
| esac |
| stack="corosync" |
| |
| |
| |
| |
| elif [ -f /etc/corosync/corosync.conf ]; then |
| stack="corosync" |
| |
| else |
| |
| |
| stack="any" |
| fi |
| |
| debug "Detected the '$stack' cluster stack" |
| echo $stack |
| } |
| |
| find_cluster_cf() { |
| case $1 in |
| corosync) |
| best_size=0 |
| best_file="" |
| |
| |
| for cf in /etc/corosync/corosync.conf; do |
| if [ -f $cf ]; then |
| size=`wc -l $cf | awk '{print $1}'` |
| if [ $size -gt $best_size ]; then |
| best_size=$size |
| best_file=$cf |
| fi |
| fi |
| done |
| if [ -z "$best_file" ]; then |
| debug "Looking for corosync configuration file. This may take a while..." |
| for f in `find / -maxdepth $maxdepth -type f -name corosync.conf`; do |
| best_file=$f |
| break |
| done |
| fi |
| debug "Located corosync config file: $best_file" |
| echo "$best_file" |
| ;; |
| any) |
| |
| |
| ;; |
| *) |
| warning "Unknown cluster type: $1" |
| ;; |
| esac |
| } |
| |
| |
| |
| |
| |
| t=`get_time "12:00"` |
| if [ "$t" = "" ]; then |
| fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" |
| fi |
| |
| |