#!/bin/sh
#
# glusterd
#
# Description: Manages a glusterd server as a (typically cloned)
# HA resource
#
# Authors: Florian Haas (hastexo Professional Services GmbH)
#
# License: GNU General Public License (GPL)
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Convenience variables
# When sysconfdir and localstatedir aren't passed in as
# configure flags, they're defined in terms of prefix
prefix=@prefix@
SHORTHOSTNAME=`hostname -s`
#######################################################################
OCF_RESKEY_binary_default="gluster"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
volume_meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="volume" version="0.1">
<version>0.1</version>
<longdesc lang="en">
Manages a GlusterFS volume and monitors its bricks. When a resource of
this type is configured as a clone (as is commonly the case), then it
must have clone ordering enabled.
</longdesc>
<shortdesc lang="en">Manages a GlusterFS volume</shortdesc>
<parameters>
<parameter name="volname" required="1">
<longdesc lang="en">
The name of the volume to manage.
</longdesc>
<shortdesc lang="en">volume name</shortdesc>
<content type="string"/>
</parameter>
<parameter name="binary">
<longdesc lang="en">
Name of the gluster executable. Specify a full absolute
path if the binary is not in your \$PATH.
</longdesc>
<shortdesc lang="en">gluster executable</shortdesc>
<content type="string" default="$OCF_RESKEY_binary_default"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="20" interval="10" />
<action name="reload" timeout="20" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20" />
</actions>
</resource-agent>
EOF
}
volume_getdir() {
local voldir
voldir="@GLUSTERD_WORKDIR@/vols/${OCF_RESKEY_volname}"
[ -d ${voldir} ] || return 1
echo "${voldir}"
return 0
}
volume_getpid_dir() {
local volpid_dir
volpid_dir="/var/run/gluster/vols/${OCF_RESKEY_volname}"
[ -d ${volpid_dir} ] || return 1
echo "${volpid_dir}"
return 0
}
volume_getbricks() {
local infofile
local voldir
voldir=`volume_getdir`
infofile="${voldir}/info"
[ -e ${infofile} ] || return 1
echo "`sed -n -e "s/^brick-.\+=${SHORTHOSTNAME}://p" < ${infofile}`"
return 0
}
volume_getpids() {
local bricks
local pidfile
local infofile
local volpid_dir
volpid_dir=`volume_getpid_dir`
bricks=`volume_getbricks`
for brick in ${bricks}; do
pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid"
[ -e $pidfile ] || return 1
cat $pidfile
done
return 0
}
volume_start() {
local volume_options
# exit immediately if configuration is not valid
volume_validate_all || exit $?
# if resource is already running, bail out early
if volume_monitor; then
ocf_log info "Resource is already running"
return $OCF_SUCCESS
fi
# actually start up the resource here
ocf_run "$OCF_RESKEY_binary" \
volume start "$OCF_RESKEY_volname" force || exit $OCF_ERR_GENERIC
# After the resource has been started, check whether it started up
# correctly. If the resource starts asynchronously, the agent may
# spin on the monitor function here -- if the resource does not
# start up within the defined timeout, the cluster manager will
# consider the start action failed
while ! volume_monitor; do
ocf_log debug "Resource has not started yet, waiting"
sleep 1
done
# only return $OCF_SUCCESS if _everything_ succeeded as expected
return $OCF_SUCCESS
}
volume_stop() {
local rc
local pid
# exit immediately if configuration is not valid
volume_validate_all || exit $?
volume_monitor
rc=$?
case "$rc" in
"$OCF_SUCCESS")
# Currently running. Normal, expected behavior.
ocf_log debug "Resource is currently running"
;;
"$OCF_NOT_RUNNING")
# Currently not running. Nothing to do.
ocf_log info "Resource is already stopped"
return $OCF_SUCCESS
;;
esac
# actually shut down the resource here (make sure to immediately
# exit with an $OCF_ERR_ error code if anything goes seriously
# wrong)
pids=`volume_getpids`
for pid in $pids; do
ocf_run kill -s TERM $pid
done
# After the resource has been stopped, check whether it shut down
# correctly. If the resource stops asynchronously, the agent may
# spin on the monitor function here -- if the resource does not
# shut down within the defined timeout, the cluster manager will
# consider the stop action failed
while volume_monitor; do
ocf_log debug "Resource has not stopped yet, waiting"
sleep 1
done
# only return $OCF_SUCCESS if _everything_ succeeded as expected
return $OCF_SUCCESS
}
volume_monitor() {
local pid
pids=`volume_getpids` || return $OCF_NOT_RUNNING
for pid in $pids; do
ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
done
ocf_log debug "Local bricks for volume ${OCF_RESKEY_volname} running with PIDs $pids"
return $OCF_SUCCESS
}
volume_validate_all() {
# Test for configuration errors first
if [ -z "${OCF_RESKEY_volname}" ]; then
ocf_log err 'Missing required parameter "volname"'
return $OCF_ERR_CONFIGURED
fi
# Test for required binaries
check_binary $OCF_RESKEY_binary
return $OCF_SUCCESS
}
# Make sure meta-data and usage always succeed
case $__OCF_ACTION in
meta-data) volume_meta_data
exit $OCF_SUCCESS
;;
usage|help) volume_usage
exit $OCF_SUCCESS
;;
esac
# Anything other than meta-data and usage must pass validation
volume_validate_all || exit $?
# Translate each action into the appropriate function call
case $__OCF_ACTION in
start) volume_start;;
stop) volume_stop;;
status|monitor) volume_monitor;;
reload) ocf_log info "Reloading..."
volume_start
;;
validate-all) ;;
notify) exit $OCF_SUCCESS;;
*) volume_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
# The resource agent may optionally log a debug message
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
exit $rc