Blob Blame History Raw
#!/bin/sh
#
# glusterd
#
# Description:  Manages a glusterd server as a (typically cloned)
#               HA resource
#
# Authors:      Florian Haas (hastexo Professional Services GmbH)
#
# License:      GNU General Public License (GPL)

#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

# Convenience variables
# When sysconfdir and localstatedir aren't passed in as
# configure flags, they're defined in terms of prefix
prefix=@prefix@
SHORTHOSTNAME=`hostname -s`
#######################################################################

OCF_RESKEY_binary_default="gluster"

: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}

volume_meta_data() {
    cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="volume" version="0.1">
  <version>0.1</version>
  <longdesc lang="en">
Manages a GlusterFS volume and monitors its bricks. When a resource of
this type is configured as a clone (as is commonly the case), then it
must have clone ordering enabled.
  </longdesc>
  <shortdesc lang="en">Manages a GlusterFS volume</shortdesc>
  <parameters>
    <parameter name="volname" required="1">
      <longdesc lang="en">
      The name of the volume to manage.
      </longdesc>
      <shortdesc lang="en">volume name</shortdesc>
      <content type="string"/>
    </parameter>
    <parameter name="binary">
      <longdesc lang="en">
      Name of the gluster executable. Specify a full absolute
      path if the binary is not in your \$PATH.
      </longdesc>
      <shortdesc lang="en">gluster executable</shortdesc>
      <content type="string" default="$OCF_RESKEY_binary_default"/>
    </parameter>
  </parameters>
  <actions>
    <action name="start"        timeout="20" />
    <action name="stop"         timeout="20" />
    <action name="monitor"      timeout="20" interval="10" />
    <action name="reload"       timeout="20" />
    <action name="meta-data"    timeout="5" />
    <action name="validate-all"   timeout="20" />
  </actions>
</resource-agent>
EOF

}

volume_getdir() {
    local voldir
    voldir="@GLUSTERD_WORKDIR@/vols/${OCF_RESKEY_volname}"

    [ -d ${voldir} ] || return 1

    echo "${voldir}"
    return 0
}

volume_getpid_dir() {
    local volpid_dir
    volpid_dir="/var/run/gluster/vols/${OCF_RESKEY_volname}"

    [ -d ${volpid_dir} ] || return 1

    echo "${volpid_dir}"
    return 0
}

volume_getbricks() {
    local infofile
    local voldir
    voldir=`volume_getdir`
    infofile="${voldir}/info"

    [ -e ${infofile} ] || return 1

    echo "`sed -n -e "s/^brick-.\+=${SHORTHOSTNAME}://p" < ${infofile}`"
    return 0
}

volume_getpids() {
    local bricks
    local pidfile
    local infofile
    local volpid_dir

    volpid_dir=`volume_getpid_dir`
    bricks=`volume_getbricks`

    for brick in ${bricks}; do
	pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid"
	[ -e $pidfile ] || return 1
	cat $pidfile
    done

    return 0
}

volume_start() {
    local volume_options

    # exit immediately if configuration is not valid
    volume_validate_all || exit $?

    # if resource is already running, bail out early
    if volume_monitor; then
        ocf_log info "Resource is already running"
        return $OCF_SUCCESS
    fi

    # actually start up the resource here
    ocf_run "$OCF_RESKEY_binary" \
	volume start "$OCF_RESKEY_volname" force || exit $OCF_ERR_GENERIC

    # After the resource has been started, check whether it started up
    # correctly. If the resource starts asynchronously, the agent may
    # spin on the monitor function here -- if the resource does not
    # start up within the defined timeout, the cluster manager will
    # consider the start action failed
    while ! volume_monitor; do
        ocf_log debug "Resource has not started yet, waiting"
        sleep 1
    done

    # only return $OCF_SUCCESS if _everything_ succeeded as expected
    return $OCF_SUCCESS
}

volume_stop() {
    local rc
    local pid

    # exit immediately if configuration is not valid
    volume_validate_all || exit $?

    volume_monitor
    rc=$?
    case "$rc" in
        "$OCF_SUCCESS")
            # Currently running. Normal, expected behavior.
            ocf_log debug "Resource is currently running"
            ;;
        "$OCF_NOT_RUNNING")
            # Currently not running. Nothing to do.
            ocf_log info "Resource is already stopped"
            return $OCF_SUCCESS
            ;;
    esac

    # actually shut down the resource here (make sure to immediately
    # exit with an $OCF_ERR_ error code if anything goes seriously
    # wrong)
    pids=`volume_getpids`
    for pid in $pids; do
	ocf_run kill -s TERM $pid
    done

    # After the resource has been stopped, check whether it shut down
    # correctly. If the resource stops asynchronously, the agent may
    # spin on the monitor function here -- if the resource does not
    # shut down within the defined timeout, the cluster manager will
    # consider the stop action failed
    while volume_monitor; do
        ocf_log debug "Resource has not stopped yet, waiting"
        sleep 1
    done

    # only return $OCF_SUCCESS if _everything_ succeeded as expected
    return $OCF_SUCCESS

}

volume_monitor() {
    local pid

    pids=`volume_getpids` || return $OCF_NOT_RUNNING

    for pid in $pids; do
	ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
    done

    ocf_log debug "Local bricks for volume ${OCF_RESKEY_volname} running with PIDs $pids"
    return $OCF_SUCCESS
}

volume_validate_all() {
    # Test for configuration errors first
    if [ -z "${OCF_RESKEY_volname}" ]; then
	ocf_log err 'Missing required parameter "volname"'
	return $OCF_ERR_CONFIGURED
    fi

    # Test for required binaries
    check_binary $OCF_RESKEY_binary

    return $OCF_SUCCESS
}



# Make sure meta-data and usage always succeed
case $__OCF_ACTION in
meta-data)      volume_meta_data
                exit $OCF_SUCCESS
                ;;
usage|help)     volume_usage
                exit $OCF_SUCCESS
                ;;
esac

# Anything other than meta-data and usage must pass validation
volume_validate_all || exit $?

# Translate each action into the appropriate function call
case $__OCF_ACTION in
start)          volume_start;;
stop)           volume_stop;;
status|monitor) volume_monitor;;
reload)         ocf_log info "Reloading..."
                volume_start
                ;;
validate-all)   ;;
notify)		exit $OCF_SUCCESS;;
*)              volume_usage
                exit $OCF_ERR_UNIMPLEMENTED
                ;;
esac
rc=$?

# The resource agent may optionally log a debug message
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
exit $rc