|
Packit |
13e616 |
#!/bin/bash
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# Copyright (c) 2008 Voltaire, Inc. All rights reserved.
|
|
Packit |
13e616 |
# Copyright (c) 2006 Mellanox Technologies. All rights reserved.
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# This Software is licensed under one of the following licenses:
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# 1) under the terms of the "Common Public License 1.0" a copy of which is
|
|
Packit |
13e616 |
# available from the Open Source Initiative, see
|
|
Packit |
13e616 |
# http://www.opensource.org/licenses/cpl.php.
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# 2) under the terms of the "The BSD License" a copy of which is
|
|
Packit |
13e616 |
# available from the Open Source Initiative, see
|
|
Packit |
13e616 |
# http://www.opensource.org/licenses/bsd-license.php.
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
|
|
Packit |
13e616 |
# copy of which is available from the Open Source Initiative, see
|
|
Packit |
13e616 |
# http://www.opensource.org/licenses/gpl-license.php.
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# Licensee has the right to choose one of the above licenses.
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# Redistributions of source code must retain the above copyright
|
|
Packit |
13e616 |
# notice and one of the license notices.
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
# Redistributions in binary form must reproduce both the above copyright
|
|
Packit |
13e616 |
# notice, one of the license notices in the documentation
|
|
Packit |
13e616 |
# and/or other materials provided with the distribution.
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
#
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# OpenSM found to have the following problem
|
|
Packit |
13e616 |
# when handover is performed:
|
|
Packit |
13e616 |
# If some of the cluster nodes are rebooted during the handover they loose their LID assignment.
|
|
Packit |
13e616 |
# The reason for it is that the standby SM does not obey its own Guid to LID table
|
|
Packit |
13e616 |
# and simply uses the discovered LIDs. If some nodes are not available for it
|
|
Packit |
13e616 |
# their previous LID assignment is lost forever.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# The idea is to use an external daemon that will distribute
|
|
Packit |
13e616 |
# the semi-static LID assignment table from the master SM to all standby SMs.
|
|
Packit |
13e616 |
# A standby SM, becoming a master . needs to obey the copied semi static LID assignment table.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
prefix=@prefix@
|
|
Packit |
13e616 |
exec_prefix=@exec_prefix@
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CONFIG=@sysconfdir@/sysconfig/opensm
|
|
Packit |
13e616 |
if [ -f $CONFIG ]; then
|
|
Packit |
13e616 |
. $CONFIG
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
SLDD_DEBUG=${SLDD_DEBUG:-0}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CACHE_FILE=${CACHE_FILE:-/var/cache/opensm/guid2lid:/var/cache/opensm/guid2mkey:/var/cache/opensm/neighbors}
|
|
Packit |
13e616 |
declare -a arr_CACHE_FILES
|
|
Packit |
13e616 |
arr_CACHE_FILES=(`echo $CACHE_FILE| sed 's/:/\n/g' | sort | uniq`)
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
PING='ping -w 1 -c 1'
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
RCP=${RCP:-/usr/bin/scp}
|
|
Packit |
13e616 |
RSH=${RSH:-/usr/bin/ssh}
|
|
Packit |
13e616 |
IFCONFIG=${IFCONFIG:-'/sbin/ifconfig -a'}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
declare -i SLDD_DEBUG
|
|
Packit |
13e616 |
RESCAN_TIME=${RESCAN_TIME:-60}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if [ -z "${OSM_HOSTS}" ]; then
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "No OpenSM servers (OSM_HOSTS) configured for the IB subnet."
|
|
Packit |
13e616 |
exit 0
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
declare -a arr_OSM_HOSTS
|
|
Packit |
13e616 |
arr_OSM_HOSTS=(${OSM_HOSTS})
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
num_of_osm_hosts=${#arr_OSM_HOSTS[@]}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if [ ${num_of_osm_hosts} -eq 1 ]; then
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "One OpenSM server configured in the IB subnet." &&
|
|
Packit |
13e616 |
echo "Nothing to be done for SLDD"
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
exit 0
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
trap 'trap_handler' 15
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
trap_handler()
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
logger -i "SLDD: Exiting."
|
|
Packit |
13e616 |
exit 0
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
is_alive()
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
$PING $1 > /dev/null 2>&1
|
|
Packit |
13e616 |
return $?
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
is_local()
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
$IFCONFIG | grep -w "$1" > /dev/null 2>&1
|
|
Packit |
13e616 |
return $?
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
update_remote_cache()
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
/bin/rm -f "$1.upd"
|
|
Packit |
13e616 |
/bin/cp -a "$1" "$1.upd"
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "Updating remote cache file"
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
for host in ${OSM_HOSTS}
|
|
Packit |
13e616 |
do
|
|
Packit |
13e616 |
# Skip local host update
|
|
Packit |
13e616 |
if [ "${host}" == "${local_host}" ]; then
|
|
Packit |
13e616 |
continue
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if is_alive $host; then
|
|
Packit |
13e616 |
cache_dir=$(dirname "$1")
|
|
Packit |
13e616 |
stat=$($RSH $host "/bin/mkdir -p ${cache_dir} > /dev/null 2>&1; /bin/rm -f "$1.${local_host}" > /dev/null 2>&1; echo \$?" | tr -d '[:space:]')
|
|
Packit |
13e616 |
if [ "X${stat}" == "X0" ]; then
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "Updating $host"
|
|
Packit |
13e616 |
logger -i "SLDD: updating $host with $1"
|
|
Packit |
13e616 |
$RCP "$1.upd" "${host}:$1.${local_host}"
|
|
Packit |
13e616 |
/bin/cp "$1.upd" "$1.${host}"
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "$RSH to $host failed."
|
|
Packit |
13e616 |
logger -i "SLDD: Failed to update $host with $1. $RSH without password should be enabled"
|
|
Packit |
13e616 |
exit 5
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "$host is down."
|
|
Packit |
13e616 |
continue
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
done
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
get_latest_remote_cache()
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
# Find most updated remote cache file (the suffix should be like ip address: *.*.*.*)
|
|
Packit |
13e616 |
echo -n "$(/bin/ls -1t $1.*.* 2> /dev/null | head -1)"
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
get_largest_remote_cache()
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
# Find largest (size) remote cache file (the suffix should be like ip address: *.*.*.*)
|
|
Packit |
13e616 |
echo -n "$(/bin/ls -1S $1.*.* 2> /dev/null | head -1)"
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
swap_cache_files()
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
/bin/rm -f "$1.old"
|
|
Packit |
13e616 |
/bin/mv "$1" "$1.old"
|
|
Packit |
13e616 |
/bin/cp "$2" "$1"
|
|
Packit |
13e616 |
touch "$1.tmp"
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# Find local host in the osm hosts list
|
|
Packit |
13e616 |
local_host=""
|
|
Packit |
13e616 |
for host in ${OSM_HOSTS}
|
|
Packit |
13e616 |
do
|
|
Packit |
13e616 |
if is_local $host; then
|
|
Packit |
13e616 |
local_host=${host}
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
done
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# Get cache file info
|
|
Packit |
13e616 |
declare -i new_size=0
|
|
Packit |
13e616 |
declare -ai arr_last_size
|
|
Packit |
13e616 |
for i in ${!arr_CACHE_FILES[@]}
|
|
Packit |
13e616 |
do
|
|
Packit |
13e616 |
arr_last_size[$i]=0
|
|
Packit |
13e616 |
done
|
|
Packit |
13e616 |
declare -i largest_remote_cache_size=0
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
for i in ${!arr_CACHE_FILES[@]}
|
|
Packit |
13e616 |
do
|
|
Packit |
13e616 |
cache_file=${arr_CACHE_FILES[$i]}
|
|
Packit |
13e616 |
if [ -e ${cache_file} ]; then
|
|
Packit |
13e616 |
arr_last_size[$i]=$(du -b ${cache_file} | awk '{print$1}' | tr -d '[:space:]')
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
touch ${cache_file} ${cache_file}.tmp
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# if [ ${arr_last_size[$i]} -gt 0 ]; then
|
|
Packit |
13e616 |
# # First time update
|
|
Packit |
13e616 |
# update_remote_cache ${cache_file}
|
|
Packit |
13e616 |
# fi
|
|
Packit |
13e616 |
done
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
while true
|
|
Packit |
13e616 |
do
|
|
Packit |
13e616 |
for i in ${!arr_CACHE_FILES[@]}
|
|
Packit |
13e616 |
do
|
|
Packit |
13e616 |
cache_file=${arr_CACHE_FILES[$i]}
|
|
Packit |
13e616 |
if [ -s "${cache_file}" ]; then
|
|
Packit |
13e616 |
new_size=$(du -b ${cache_file} | awk '{print$1}' | tr -d '[:space:]')
|
|
Packit |
13e616 |
# Check if local cache file grew from its last version or the time stamp changed
|
|
Packit |
13e616 |
if [ ${new_size} -gt ${arr_last_size[$i]} ]
|
|
Packit |
13e616 |
[ "$(/bin/ls -1t ${cache_file} ${cache_file}.tmp 2> /dev/null | head -1)" != "${cache_file}.tmp" ]; then
|
|
Packit |
13e616 |
largest_remote_cache=$(get_largest_remote_cache ${cache_file})
|
|
Packit |
13e616 |
if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then
|
|
Packit |
13e616 |
largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]')
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
largest_remote_cache_size=0
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# Check if local cache file larger than remote chache file
|
|
Packit |
13e616 |
if [ ${new_size} -gt ${largest_remote_cache_size} ]; then
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "Local cache file larger then remote. Update remote cache files"
|
|
Packit |
13e616 |
arr_last_size[$i]=${new_size}
|
|
Packit |
13e616 |
update_remote_cache ${cache_file}
|
|
Packit |
13e616 |
continue
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
largest_remote_cache=$(get_largest_remote_cache ${cache_file})
|
|
Packit |
13e616 |
if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then
|
|
Packit |
13e616 |
largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]')
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
largest_remote_cache_size=0
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# Update local cache file from remote
|
|
Packit |
13e616 |
if [ ${largest_remote_cache_size} -gt ${new_size} ]; then
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "Local cache file shorter then remote. Use ${largest_remote_cache}"
|
|
Packit |
13e616 |
logger -i "SLDD: updating local cache file with ${largest_remote_cache}"
|
|
Packit |
13e616 |
swap_cache_files ${cache_file} ${largest_remote_cache}
|
|
Packit |
13e616 |
arr_last_size[$i]=${largest_remote_cache_size}
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
else # The local cache file is empty
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "${cache_file} is empty"
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
largest_remote_cache=$(get_largest_remote_cache ${cache_file})
|
|
Packit |
13e616 |
if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then
|
|
Packit |
13e616 |
# Copy it to the current cache
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "Local cache file is empty. Use ${largest_remote_cache}"
|
|
Packit |
13e616 |
logger -i "SLDD: updating local cache file with ${largest_remote_cache}"
|
|
Packit |
13e616 |
swap_cache_files ${cache_file} ${largest_remote_cache}
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
fi
|
|
Packit |
13e616 |
done
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
[ $SLDD_DEBUG -eq 1 ] &&
|
|
Packit |
13e616 |
echo "Sleeping ${RESCAN_TIME} seconds."
|
|
Packit |
13e616 |
sleep ${RESCAN_TIME}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
done
|