|
Packit |
857059 |
#!/bin/sh
|
|
Packit |
857059 |
# BEGIN_ICS_COPYRIGHT8 ****************************************
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
# Copyright (c) 2015, Intel Corporation
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
# Redistribution and use in source and binary forms, with or without
|
|
Packit |
857059 |
# modification, are permitted provided that the following conditions are met:
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
# * Redistributions of source code must retain the above copyright notice,
|
|
Packit |
857059 |
# this list of conditions and the following disclaimer.
|
|
Packit |
857059 |
# * Redistributions in binary form must reproduce the above copyright
|
|
Packit |
857059 |
# notice, this list of conditions and the following disclaimer in the
|
|
Packit |
857059 |
# documentation and/or other materials provided with the distribution.
|
|
Packit |
857059 |
# * Neither the name of Intel Corporation nor the names of its contributors
|
|
Packit |
857059 |
# may be used to endorse or promote products derived from this software
|
|
Packit |
857059 |
# without specific prior written permission.
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
Packit |
857059 |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
Packit |
857059 |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
Packit |
857059 |
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
Packit |
857059 |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
Packit |
857059 |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
Packit |
857059 |
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
Packit |
857059 |
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
Packit |
857059 |
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
Packit |
857059 |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
# END_ICS_COPYRIGHT8 ****************************************
|
|
Packit |
857059 |
|
|
Packit |
857059 |
# [ICS VERSION STRING: unknown]
|
|
Packit |
857059 |
|
|
Packit |
857059 |
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
# Initialization:
|
|
Packit |
857059 |
# - specify the absolute paths mpirun if is is not in $PATH
|
|
Packit |
857059 |
# - EXIT_VALUE should not be set to 0
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
cleanup_openmpi()
|
|
Packit |
857059 |
{
|
|
Packit |
857059 |
trap "" 1 2 3 15
|
|
Packit |
857059 |
|
|
Packit |
857059 |
for afile in $APP_FILE $HOST_FILE
|
|
Packit |
857059 |
do
|
|
Packit |
857059 |
if [ -f "$afile" ]; then
|
|
Packit |
857059 |
rm -f $afile >/dev/null 1>&2
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
done
|
|
Packit |
857059 |
}
|
|
Packit |
857059 |
|
|
Packit |
857059 |
sig_handler_opempi()
|
|
Packit |
857059 |
{
|
|
Packit |
857059 |
trap "" 1 2 3 15
|
|
Packit |
857059 |
echo "Signal received. Exiting ..." | tee -a $LOGFILE
|
|
Packit |
857059 |
cleanup_openmpi
|
|
Packit |
857059 |
exit 1
|
|
Packit |
857059 |
}
|
|
Packit |
857059 |
|
|
Packit |
857059 |
# -----------------------------------------------------
|
|
Packit |
857059 |
# Source the LSF environment. Optional.
|
|
Packit |
857059 |
# -----------------------------------------------------
|
|
Packit |
857059 |
. ${LSF_ENVDIR}/lsf.conf
|
|
Packit |
857059 |
|
|
Packit |
857059 |
# -----------------------------------------------------
|
|
Packit |
857059 |
# Set up the variable LSF_TS representing the TaskStarter.
|
|
Packit |
857059 |
# -----------------------------------------------------
|
|
Packit |
857059 |
LSF_TS="$LSF_BINDIR/TaskStarter"
|
|
Packit |
857059 |
USR_APP_FILE=""
|
|
Packit |
857059 |
|
|
Packit |
857059 |
# This script assumes mpirun of OPENMPI can be found in PATH
|
|
Packit |
857059 |
# environment variable.
|
|
Packit |
857059 |
MPIHOME=/opt/openmpi/gnu
|
|
Packit |
857059 |
#MPIRUN_CMD="$MPIHOME/bin/mpirun"
|
|
Packit |
857059 |
MPIRUN_CMD="$MPICH_PREFIX/bin/mpirun"
|
|
Packit |
857059 |
|
|
Packit |
857059 |
LOGFILE="/dev/null"
|
|
Packit |
857059 |
EXIT_VALUE="66"
|
|
Packit |
857059 |
TASK_GEOM_OK="0"
|
|
Packit |
857059 |
JOB_SHAREDIR="$HOME"
|
|
Packit |
857059 |
OPENMPI_OPTS=" "
|
|
Packit |
857059 |
if [ "$JOB_SHAREDIR" = "" ]; then
|
|
Packit |
857059 |
JOB_SHAREDIR=`pwd`
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
# If task geometry is not used
|
|
Packit |
857059 |
# Reverse order of the host list to gain better performance
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
if [ -n "$LSB_PJL_TASK_GEOMETRY" ]; then
|
|
Packit |
857059 |
REVERSE_ORDER="n"
|
|
Packit |
857059 |
else
|
|
Packit |
857059 |
REVERSE_ORDER="y"
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
# Create files with a unique name based on the LSF job ID:
|
|
Packit |
857059 |
# - log file
|
|
Packit |
857059 |
# - machine file
|
|
Packit |
857059 |
# - temp file
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
if [ "$LSB_BATCH_JID" != "" ]; then
|
|
Packit |
857059 |
UNIQUE_ID="$LSB_BATCH_JID"
|
|
Packit |
857059 |
else
|
|
Packit |
857059 |
UNIQUE_ID=`hostname`_"$$"
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
|
|
Packit |
857059 |
APP_FILE="$JOB_SHAREDIR/.openmpi_appfile_${UNIQUE_ID}"
|
|
Packit |
857059 |
HOST_FILE="$JOB_SHAREDIR/.host_file_${UNIQUE_ID}"
|
|
Packit |
857059 |
|
|
Packit |
857059 |
trap "sig_handler_opempi" 1 2 3 15
|
|
Packit |
857059 |
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
# Reverse LSB_MCPU_HOSTS
|
|
Packit |
857059 |
#
|
|
Packit |
857059 |
if [ "$REVERSE_ORDER" = "y" ]; then
|
|
Packit |
857059 |
HOST=""
|
|
Packit |
857059 |
NEW_LSB_MCPU_HOSTS=""
|
|
Packit |
857059 |
for i in $LSB_MCPU_HOSTS
|
|
Packit |
857059 |
do
|
|
Packit |
857059 |
if [ -z "$HOST" ]
|
|
Packit |
857059 |
then
|
|
Packit |
857059 |
HOST="$i"
|
|
Packit |
857059 |
else
|
|
Packit |
857059 |
NEW_LSB_MCPU_HOSTS="$HOST $i $NEW_LSB_MCPU_HOSTS"
|
|
Packit |
857059 |
HOST=""
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
done
|
|
Packit |
857059 |
LSB_MCPU_HOSTS=$NEW_LSB_MCPU_HOSTS
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
|
|
Packit |
857059 |
# -----------------------------------------------------
|
|
Packit |
857059 |
# Process the command line:
|
|
Packit |
857059 |
# - extract [mpiopts] from the command line
|
|
Packit |
857059 |
# - extract jobname [jobopts] from the command line
|
|
Packit |
857059 |
# -----------------------------------------------------
|
|
Packit |
857059 |
while [ $# -gt 0 ]
|
|
Packit |
857059 |
do
|
|
Packit |
857059 |
case "$1" in
|
|
Packit |
857059 |
-aborted|--aborted|-path|--path|--tmpdir|--universe|-x|-wdir|--wdir|--prefix|-debugger|--debugger)
|
|
Packit |
857059 |
OPENMPI_OPTS="$OPENMPI_OPTS $1 $2 "
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-app|--app)
|
|
Packit |
857059 |
# --app <arg0> Provide an appfile; ignore all other command line options
|
|
Packit |
857059 |
echo "User defined -app option found" >> $LOGFILE
|
|
Packit |
857059 |
USR_APP_FILE="$2"
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-bynode|--bynode|-byslot|--byslot)
|
|
Packit |
857059 |
if [ -z "$LSB_PJL_TASK_GEOMETRY" ]; then
|
|
Packit |
857059 |
OPENMPI_OPTS="$OPENMPI_OPTS $1 "
|
|
Packit |
857059 |
else
|
|
Packit |
857059 |
echo "LSB_PJL_TASK_GEOMETRY is defined, mpirun option $1 will be ignored" >> $LOGFILE
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-c|-np|--np|-n|--n|-tv|--tv)
|
|
Packit |
857059 |
echo "mpirun option $1 $2 will be ignored" >> $LOGFILE
|
|
Packit |
857059 |
# <arg0> Number of processes to run
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-d|-debug|--debug|--debug-daemons|--debug-daemons-file|--no-daemonize|--debug-devel|-q|--quiet|-V|--version|-nooversubscribe|--nooversubscribe )
|
|
Packit |
857059 |
OPENMPI_OPTS="$OPENMPI_OPTS $1 "
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-gmca|--gmca|-mca|--mca)
|
|
Packit |
857059 |
#<arg0> <arg1>
|
|
Packit |
857059 |
OPENMPI_OPTS="$OPENMPI_OPTS $1 $2 $3 "
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-h|--help|-nw|--nw|-v|--verbose)
|
|
Packit |
857059 |
OPENMPI_OPTS="$OPENMPI_OPTS $1 "
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-H|-host|--host|-hostfile|--hostfile|-machinefile|--machinefile)
|
|
Packit |
857059 |
echo "mpirun option $1 $2 will be ignored" >> $LOGFILE
|
|
Packit |
857059 |
# <arg0> List of hosts to invoke processes on
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
-nolocal|--nolocal)
|
|
Packit |
857059 |
echo "mpirun option $1 will be ignored" >> $LOGFILE
|
|
Packit |
857059 |
shift
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
*)
|
|
Packit |
857059 |
break
|
|
Packit |
857059 |
;;
|
|
Packit |
857059 |
esac
|
|
Packit |
857059 |
done
|
|
Packit |
857059 |
|
|
Packit |
857059 |
JOB_CMDLN="$*"
|
|
Packit |
857059 |
|
|
Packit |
857059 |
# -----------------------------------------------------
|
|
Packit |
857059 |
# Set up the CMD_LINE variable representing the integrated section of the command line:
|
|
Packit |
857059 |
# - LSF_TS, script variable representing the TaskStarter binary.
|
|
Packit |
857059 |
# TaskStarter must start each and every job task process.
|
|
Packit |
857059 |
# - LSF_TS_OPTIONS, LSF environment variable containing all necessary information
|
|
Packit |
857059 |
# for TaskStarter to callback to LSF's Parallel Application Manager.
|
|
Packit |
857059 |
# - JOB_CMDLN, script variable containing the job and job options
|
|
Packit |
857059 |
#--------------------------------------------------------------------------------
|
|
Packit |
857059 |
if [ -z "$LSF_TS_OPTIONS" ]
|
|
Packit |
857059 |
then
|
|
Packit |
857059 |
echo CMD_LINE="$JOB_CMDLN" >> $LOGFILE
|
|
Packit |
857059 |
CMD_LINE="$JOB_CMDLN "
|
|
Packit |
857059 |
else
|
|
Packit |
857059 |
echo CMD_LINE="$LSF_TS $LSF_TS_OPTIONS $JOB_CMDLN" >> $LOGFILE
|
|
Packit |
857059 |
CMD_LINE="$LSF_TS $LSF_TS_OPTIONS $JOB_CMDLN "
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
# Construct LSF Job -app file
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
if [ "$USR_APP_FILE" != "" ]; then
|
|
Packit |
857059 |
# User defined --app <appfile>
|
|
Packit |
857059 |
echo "User defined -app $USR_APP_FILE is not allowed. Exiting..." | tee -a $LOGFILE
|
|
Packit |
857059 |
cleanup_openmpi
|
|
Packit |
857059 |
exit 1
|
|
Packit |
857059 |
elif [ "$LSB_PJL_TASK_GEOMETRY" != "" ]; then
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
# handle $LSB_PJL_TASK_GEOMETRY
|
|
Packit |
857059 |
# It will shuffle the order the appfile
|
|
Packit |
857059 |
# based on the order of task geometry
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
. $LSF_BINDIR/pjllib.sh
|
|
Packit |
857059 |
TOTAL_CPUS=`echo $LSB_MCPU_HOSTS | /bin/awk '
|
|
Packit |
857059 |
BEGIN {counter=0}
|
|
Packit |
857059 |
{
|
|
Packit |
857059 |
size = split($0, a, " ");
|
|
Packit |
857059 |
for (i = 1; i <= size; i += 2) {
|
|
Packit |
857059 |
counter = counter + a[i + 1];
|
|
Packit |
857059 |
for (j = 0; j < a[i + 1]; ++j) {
|
|
Packit |
857059 |
print a[i] > hfile;
|
|
Packit |
857059 |
}
|
|
Packit |
857059 |
}
|
|
Packit |
857059 |
}
|
|
Packit |
857059 |
END { print counter}' hfile=$HOST_FILE`
|
|
Packit |
857059 |
|
|
Packit |
857059 |
if [ -f "${LSF_BINDIR}/pjllib.sh" ]; then
|
|
Packit |
857059 |
# get a host list each host per line that satisfies the task geometry
|
|
Packit |
857059 |
# then construct the appfile based on new host list
|
|
Packit |
857059 |
. ${LSF_BINDIR}/pjllib.sh
|
|
Packit |
857059 |
reorder_file_based_on_task_geom "$HOST_FILE" "sort"
|
|
Packit |
857059 |
EXIT_VALUE=$?
|
|
Packit |
857059 |
if [ "$EXIT_VALUE" != "0" ]; then
|
|
Packit |
857059 |
echo "Error in reorder_file_based_on_task_geom \"$HOST_FILE\" \"sort\", Exit ..." 1>&2
|
|
Packit |
857059 |
cleanup_openmpi
|
|
Packit |
857059 |
exit ${EXIT_VALUE}
|
|
Packit |
857059 |
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
_new_host_list=`cat $HOST_FILE`
|
|
Packit |
857059 |
echo $_new_host_list | /bin/awk ' {
|
|
Packit |
857059 |
size = split($0, a, " ");
|
|
Packit |
857059 |
for (i = 1; i <= size; i++) {
|
|
Packit |
857059 |
newln = sprintf("-host %s -n 1 %s %s",a[i], (openmpi_opt)?openmpi_opt:" ", (cmdln)?cmdln:" ");
|
|
Packit |
857059 |
print newln > appfile
|
|
Packit |
857059 |
}
|
|
Packit |
857059 |
}' appfile="$APP_FILE" cmdln="$CMD_LINE" openmpi_opt="$OPENMPI_OPTS"
|
|
Packit |
857059 |
else
|
|
Packit |
857059 |
echo "Cannot find pjllib.sh in $LSF_BINDIR. Exiting..." 1>&2
|
|
Packit |
857059 |
cleanup_openmpi
|
|
Packit |
857059 |
exit 1
|
|
Packit |
857059 |
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
else
|
|
Packit |
857059 |
echo $LSB_MCPU_HOSTS | /bin/awk ' {
|
|
Packit |
857059 |
size = split($0, a, " ");
|
|
Packit |
857059 |
for (i = 1; i < size; i++) {
|
|
Packit |
857059 |
newln = "";
|
|
Packit |
857059 |
newln = sprintf("-host %s -n %s %s %s",a[i], a[++i], (openmpi_opt)?openmpi_opt:" ", (cmdln)?cmdln:" ");
|
|
Packit |
857059 |
print newln > appfile
|
|
Packit |
857059 |
}
|
|
Packit |
857059 |
}' appfile="$APP_FILE" cmdln="$CMD_LINE" openmpi_opt="$OPENMPI_OPTS"
|
|
Packit |
857059 |
fi
|
|
Packit |
857059 |
|
|
Packit |
857059 |
echo "appfile $APP_FILE reads" >> $LOGFILE
|
|
Packit |
857059 |
cat $APP_FILE >> $LOGFILE
|
|
Packit |
857059 |
echo "command used to launch the job : " >> $LOGFILE
|
|
Packit |
857059 |
echo " $MPIRUN_CMD -app $APP_FILE " >> $LOGFILE
|
|
Packit |
857059 |
$MPIRUN_CMD --app $APP_FILE
|
|
Packit |
857059 |
EXIT_VALUE=$?
|
|
Packit |
857059 |
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
# Clean up:
|
|
Packit |
857059 |
# - remove temporary files
|
|
Packit |
857059 |
# - exit with the exit value of the mpirun command
|
|
Packit |
857059 |
#------------------------------------------------------------------
|
|
Packit |
857059 |
cleanup_openmpi
|
|
Packit |
857059 |
exit $EXIT_VALUE
|
|
Packit |
857059 |
|
|
Packit |
857059 |
#-------------------------------------------------------------------------
|
|
Packit |
857059 |
# End the script.
|
|
Packit |
857059 |
#-------------------------------------------------------------------------
|