dhodovsk / source-git / pacemaker

Forked from source-git/pacemaker 3 years ago
Clone
Blob Blame History Raw
#!/bin/sh
#
# ocf:pacemaker:SystemHealth resource agent
#
# Copyright 2009-2019 the Pacemaker project contributors
#
# The version control history for this file may have further details.
#
# This source code is licensed under the GNU General Public License version 2
# (GPLv2) WITHOUT ANY WARRANTY.
#

#######################################################################
# Initialization:

: ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"}
. "${OCF_FUNCTIONS}"
: ${__OCF_ACTION:="$1"}

#######################################################################

meta_data() {
    cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SystemHealth" version="0.1">
<version>1.0</version>

<longdesc lang="en">
This is a SystemHealth Resource Agent.  It is used to monitor
the health of a system via IPMI.
</longdesc>
<shortdesc lang="en">SystemHealth resource agent</shortdesc>

<parameters>
</parameters>

<actions>
<action name="start"        timeout="20s" />
<action name="stop"         timeout="20s" />
<action name="monitor"      timeout="20s" />
<action name="reload"       timeout="20s" />
<action name="meta-data"    timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
}

#######################################################################

SystemHealth_usage() {
    cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}

SystemHealth_check_tools() {
    which servicelog_notify > /dev/null 2>&1
    RC=$?

    if [ $RC -ne 0 ]; then
        ocf_log err "servicelog_notify not found!"
        return $OCF_ERR_INSTALLED
    fi

    which ipmiservicelogd > /dev/null 2>&1
    RC=$?

    if [ $RC -ne 0 ]; then
        ocf_log err "ipmiservicelogd not found!"
        return $OCF_ERR_INSTALLED
    fi

    test -x "$OCF_RESKEY_program"
    RC=$?

    if [ $RC -ne 0 ]; then
        ocf_log err "$OCF_RESKEY_program not found!"
        return $OCF_ERR_INSTALLED
    fi
}

SystemHealth_start() {
    SystemHealth_monitor
    RC=$?

    if [ $RC -eq $OCF_ERR_GENERIC ]; then
        return $OCF_ERR_GENERIC
    elif [ $RC -eq $OCF_SUCCESS ]; then
        ocf_log warn "starting an already started SystemHealth"
        return $OCF_SUCCESS
    fi

    service ipmi start > /dev/null 2>&1
    RC=$?

    if [ $RC -ne 0 ]; then
        ocf_log err "Could not start service IPMI!"
        return $OCF_ERR_GENERIC
    fi

    ipmiservicelogd smi 0 > /dev/null 2>&1 &
    RC=$?

    if [ $RC -ne 0 ]; then
        ocf_log err "Could not start ipmiservicelogd!"
        return $OCF_ERR_GENERIC
    fi

    servicelog_notify --add --type=EVENT --command="$OCF_RESKEY_program" --method=num_arg --match='type=4' > /dev/null 2>&1
    RC=$?

    if [ $RC -ne 0 ]; then
        ocf_log err "servicelog_notify register handler failed!"
        return $OCF_ERR_GENERIC
    fi

    return $OCF_SUCCESS
}

SystemHealth_stop() {
    SystemHealth_monitor
    RC=$?

    if [ $RC -eq $OCF_ERR_GENERIC ]; then
        return $OCF_ERR_GENERIC
    elif [ $RC -eq  $OCF_SUCCESS ]; then
        killall ipmiservicelogd
        RC1=$?

        if [ $RC1 -ne 0 ]; then
            ocf_log err "Could not stop ipmiservicelogd!"
        fi

        servicelog_notify --remove --command="$OCF_RESKEY_program" > /dev/null 2>&1
        RC2=$?

        if [ $RC2 -ne 0 ]; then
            ocf_log err "servicelog_notify remove handler failed!"
        fi

        if [ $RC1 -eq 0 ] && [ $RC2 -eq 0 ]; then
            return $OCF_SUCCESS
        else
            return $OCF_ERR_GENERIC
        fi
    elif [ $RC -eq $OCF_NOT_RUNNING ]; then
        ocf_log warn "stopping an already stopped SystemHealth"
        return $OCF_SUCCESS
    else
        ocf_log err "SystemHealth_stop: should not be here!"
        return $OCF_ERR_GENERIC
    fi
}

SystemHealth_monitor() {
    # Monitor _MUST!_ differentiate correctly between running
    # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
    # That is THREE states, not just yes/no.

    if [ ! -f /var/run/ipmiservicelogd.pid0 ]; then
        ocf_log debug "ipmiservicelogd is not running!"
        return $OCF_NOT_RUNNING
    fi

    ps -p "$(cat /var/run/ipmiservicelogd.pid0)" >/dev/null 2>&1
    RC=$?

    if [ $RC -ne 0 ]; then
        ocf_log debug "ipmiservicelogd's pid $(cat /var/run/ipmiservicelogd.pid0) is not running!"

        rm /var/run/ipmiservicelogd.pid0

        return $OCF_ERR_GENERIC
    fi

    servicelog_notify --list --command="$OCF_RESKEY_program" > /dev/null 2>&1
    RC=$?

    if [ $RC -eq 0 ]; then
        return $OCF_SUCCESS
    else
        return $OCF_NOT_RUNNING
    fi
}

SystemHealth_validate() {

    SystemHealth_check_tools
    RC=$?

    if [ $RC -ne 0 ]; then
        return $RC
    fi

    return $OCF_SUCCESS
}

: ${OCF_RESKEY_program:=/usr/sbin/notifyServicelogEvent}

case $__OCF_ACTION in
meta-data)      meta_data
                exit $OCF_SUCCESS
                ;;
usage|help)     SystemHealth_usage
                exit $OCF_SUCCESS
                ;;
esac

SystemHealth_check_tools
RC=$?

if [ $RC -ne 0 ]; then
        case "$__OCF_ACTION" in
        stop)           exit $OCF_SUCCESS;;
        *)              exit $RC;;
        esac
fi

case "$__OCF_ACTION" in
start)          SystemHealth_start;;
stop)           SystemHealth_stop;;
monitor)        SystemHealth_monitor;;
reload)         ocf_log info "Reloading..."
                SystemHealth_start
                ;;
validate-all)   ;;
*)              SystemHealth_usage
                exit $OCF_ERR_UNIMPLEMENTED
                ;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc

# vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: