#!/bin/sh
#
# ocf:pacemaker:SystemHealth resource agent
#
# Copyright 2009-2019 the Pacemaker project contributors
#
# The version control history for this file may have further details.
#
# This source code is licensed under the GNU General Public License version 2
# (GPLv2) WITHOUT ANY WARRANTY.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"}
. "${OCF_FUNCTIONS}"
: ${__OCF_ACTION:="$1"}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SystemHealth" version="0.1">
<version>1.0</version>
<longdesc lang="en">
This is a SystemHealth Resource Agent. It is used to monitor
the health of a system via IPMI.
</longdesc>
<shortdesc lang="en">SystemHealth resource agent</shortdesc>
<parameters>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" timeout="20s" />
<action name="reload" timeout="20s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
}
#######################################################################
SystemHealth_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
SystemHealth_check_tools() {
which servicelog_notify > /dev/null 2>&1
RC=$?
if [ $RC -ne 0 ]; then
ocf_log err "servicelog_notify not found!"
return $OCF_ERR_INSTALLED
fi
which ipmiservicelogd > /dev/null 2>&1
RC=$?
if [ $RC -ne 0 ]; then
ocf_log err "ipmiservicelogd not found!"
return $OCF_ERR_INSTALLED
fi
test -x "$OCF_RESKEY_program"
RC=$?
if [ $RC -ne 0 ]; then
ocf_log err "$OCF_RESKEY_program not found!"
return $OCF_ERR_INSTALLED
fi
}
SystemHealth_start() {
SystemHealth_monitor
RC=$?
if [ $RC -eq $OCF_ERR_GENERIC ]; then
return $OCF_ERR_GENERIC
elif [ $RC -eq $OCF_SUCCESS ]; then
ocf_log warn "starting an already started SystemHealth"
return $OCF_SUCCESS
fi
service ipmi start > /dev/null 2>&1
RC=$?
if [ $RC -ne 0 ]; then
ocf_log err "Could not start service IPMI!"
return $OCF_ERR_GENERIC
fi
ipmiservicelogd smi 0 > /dev/null 2>&1 &
RC=$?
if [ $RC -ne 0 ]; then
ocf_log err "Could not start ipmiservicelogd!"
return $OCF_ERR_GENERIC
fi
servicelog_notify --add --type=EVENT --command="$OCF_RESKEY_program" --method=num_arg --match='type=4' > /dev/null 2>&1
RC=$?
if [ $RC -ne 0 ]; then
ocf_log err "servicelog_notify register handler failed!"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
SystemHealth_stop() {
SystemHealth_monitor
RC=$?
if [ $RC -eq $OCF_ERR_GENERIC ]; then
return $OCF_ERR_GENERIC
elif [ $RC -eq $OCF_SUCCESS ]; then
killall ipmiservicelogd
RC1=$?
if [ $RC1 -ne 0 ]; then
ocf_log err "Could not stop ipmiservicelogd!"
fi
servicelog_notify --remove --command="$OCF_RESKEY_program" > /dev/null 2>&1
RC2=$?
if [ $RC2 -ne 0 ]; then
ocf_log err "servicelog_notify remove handler failed!"
fi
if [ $RC1 -eq 0 ] && [ $RC2 -eq 0 ]; then
return $OCF_SUCCESS
else
return $OCF_ERR_GENERIC
fi
elif [ $RC -eq $OCF_NOT_RUNNING ]; then
ocf_log warn "stopping an already stopped SystemHealth"
return $OCF_SUCCESS
else
ocf_log err "SystemHealth_stop: should not be here!"
return $OCF_ERR_GENERIC
fi
}
SystemHealth_monitor() {
# Monitor _MUST!_ differentiate correctly between running
# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
# That is THREE states, not just yes/no.
if [ ! -f /var/run/ipmiservicelogd.pid0 ]; then
ocf_log debug "ipmiservicelogd is not running!"
return $OCF_NOT_RUNNING
fi
ps -p "$(cat /var/run/ipmiservicelogd.pid0)" >/dev/null 2>&1
RC=$?
if [ $RC -ne 0 ]; then
ocf_log debug "ipmiservicelogd's pid $(cat /var/run/ipmiservicelogd.pid0) is not running!"
rm /var/run/ipmiservicelogd.pid0
return $OCF_ERR_GENERIC
fi
servicelog_notify --list --command="$OCF_RESKEY_program" > /dev/null 2>&1
RC=$?
if [ $RC -eq 0 ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi
}
SystemHealth_validate() {
SystemHealth_check_tools
RC=$?
if [ $RC -ne 0 ]; then
return $RC
fi
return $OCF_SUCCESS
}
: ${OCF_RESKEY_program:=/usr/sbin/notifyServicelogEvent}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage|help) SystemHealth_usage
exit $OCF_SUCCESS
;;
esac
SystemHealth_check_tools
RC=$?
if [ $RC -ne 0 ]; then
case "$__OCF_ACTION" in
stop) exit $OCF_SUCCESS;;
*) exit $RC;;
esac
fi
case "$__OCF_ACTION" in
start) SystemHealth_start;;
stop) SystemHealth_stop;;
monitor) SystemHealth_monitor;;
reload) ocf_log info "Reloading..."
SystemHealth_start
;;
validate-all) ;;
*) SystemHealth_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: