Blame Lsf/openmpi_wrapper

Packit 857059
#!/bin/sh
Packit 857059
# BEGIN_ICS_COPYRIGHT8 ****************************************
Packit 857059
# 
Packit 857059
# Copyright (c) 2015, Intel Corporation
Packit 857059
# 
Packit 857059
# Redistribution and use in source and binary forms, with or without
Packit 857059
# modification, are permitted provided that the following conditions are met:
Packit 857059
# 
Packit 857059
#     * Redistributions of source code must retain the above copyright notice,
Packit 857059
#       this list of conditions and the following disclaimer.
Packit 857059
#     * Redistributions in binary form must reproduce the above copyright
Packit 857059
#       notice, this list of conditions and the following disclaimer in the
Packit 857059
#       documentation and/or other materials provided with the distribution.
Packit 857059
#     * Neither the name of Intel Corporation nor the names of its contributors
Packit 857059
#       may be used to endorse or promote products derived from this software
Packit 857059
#       without specific prior written permission.
Packit 857059
# 
Packit 857059
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit 857059
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit 857059
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
Packit 857059
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
Packit 857059
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
Packit 857059
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
Packit 857059
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
Packit 857059
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
Packit 857059
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Packit 857059
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 857059
# 
Packit 857059
# END_ICS_COPYRIGHT8   ****************************************
Packit 857059
Packit 857059
# [ICS VERSION STRING: unknown]
Packit 857059
Packit 857059
Packit 857059
#------------------------------------------------------------------
Packit 857059
# Initialization:
Packit 857059
# - specify the absolute paths mpirun if is is not in $PATH 
Packit 857059
# - EXIT_VALUE should not be set to 0
Packit 857059
#------------------------------------------------------------------
Packit 857059
cleanup_openmpi()
Packit 857059
{
Packit 857059
    trap "" 1 2 3 15
Packit 857059
Packit 857059
    for afile in $APP_FILE $HOST_FILE
Packit 857059
    do
Packit 857059
        if [ -f "$afile" ]; then
Packit 857059
            rm -f $afile >/dev/null 1>&2
Packit 857059
        fi
Packit 857059
    done
Packit 857059
}
Packit 857059
Packit 857059
sig_handler_opempi()
Packit 857059
{
Packit 857059
    trap "" 1 2 3 15
Packit 857059
    echo "Signal received. Exiting ..." | tee -a $LOGFILE
Packit 857059
    cleanup_openmpi
Packit 857059
    exit 1
Packit 857059
}
Packit 857059
Packit 857059
# -----------------------------------------------------
Packit 857059
# Source the LSF environment. Optional.
Packit 857059
# -----------------------------------------------------
Packit 857059
. ${LSF_ENVDIR}/lsf.conf
Packit 857059
Packit 857059
# -----------------------------------------------------
Packit 857059
# Set up the variable LSF_TS representing the TaskStarter.
Packit 857059
# -----------------------------------------------------
Packit 857059
LSF_TS="$LSF_BINDIR/TaskStarter"
Packit 857059
USR_APP_FILE=""
Packit 857059
Packit 857059
# This script assumes mpirun of OPENMPI can be found in PATH 
Packit 857059
# environment variable.
Packit 857059
MPIHOME=/opt/openmpi/gnu
Packit 857059
#MPIRUN_CMD="$MPIHOME/bin/mpirun"
Packit 857059
MPIRUN_CMD="$MPICH_PREFIX/bin/mpirun"
Packit 857059
Packit 857059
LOGFILE="/dev/null"
Packit 857059
EXIT_VALUE="66"
Packit 857059
TASK_GEOM_OK="0"
Packit 857059
JOB_SHAREDIR="$HOME"
Packit 857059
OPENMPI_OPTS=" "
Packit 857059
if [ "$JOB_SHAREDIR" = "" ]; then
Packit 857059
    JOB_SHAREDIR=`pwd`
Packit 857059
fi
Packit 857059
Packit 857059
#
Packit 857059
# If task geometry is not used 
Packit 857059
# Reverse order of the host list to gain better performance
Packit 857059
#
Packit 857059
if [ -n "$LSB_PJL_TASK_GEOMETRY" ]; then
Packit 857059
    REVERSE_ORDER="n"
Packit 857059
else
Packit 857059
    REVERSE_ORDER="y"
Packit 857059
fi
Packit 857059
Packit 857059
#------------------------------------------------------------------
Packit 857059
# Create files with a unique name based on the LSF job ID:
Packit 857059
# - log file 
Packit 857059
# - machine file 
Packit 857059
# - temp file 
Packit 857059
#------------------------------------------------------------------
Packit 857059
if [ "$LSB_BATCH_JID" != "" ]; then
Packit 857059
    UNIQUE_ID="$LSB_BATCH_JID"
Packit 857059
else
Packit 857059
    UNIQUE_ID=`hostname`_"$$"
Packit 857059
fi
Packit 857059
Packit 857059
APP_FILE="$JOB_SHAREDIR/.openmpi_appfile_${UNIQUE_ID}"
Packit 857059
HOST_FILE="$JOB_SHAREDIR/.host_file_${UNIQUE_ID}"
Packit 857059
Packit 857059
trap "sig_handler_opempi" 1 2 3 15
Packit 857059
Packit 857059
#
Packit 857059
# Reverse LSB_MCPU_HOSTS
Packit 857059
#
Packit 857059
if [ "$REVERSE_ORDER" = "y" ]; then
Packit 857059
    HOST=""
Packit 857059
    NEW_LSB_MCPU_HOSTS=""
Packit 857059
    for i in $LSB_MCPU_HOSTS
Packit 857059
    do
Packit 857059
        if [ -z "$HOST" ]
Packit 857059
        then
Packit 857059
            HOST="$i"
Packit 857059
        else
Packit 857059
            NEW_LSB_MCPU_HOSTS="$HOST $i $NEW_LSB_MCPU_HOSTS"
Packit 857059
            HOST=""
Packit 857059
        fi
Packit 857059
    done
Packit 857059
    LSB_MCPU_HOSTS=$NEW_LSB_MCPU_HOSTS
Packit 857059
fi
Packit 857059
Packit 857059
#  -----------------------------------------------------
Packit 857059
#  Process the command line: 
Packit 857059
# - extract [mpiopts] from the command line
Packit 857059
# - extract jobname [jobopts] from the command line
Packit 857059
#  -----------------------------------------------------
Packit 857059
while [ $# -gt 0 ]
Packit 857059
do
Packit 857059
    case "$1" in
Packit 857059
       -aborted|--aborted|-path|--path|--tmpdir|--universe|-x|-wdir|--wdir|--prefix|-debugger|--debugger)
Packit 857059
           OPENMPI_OPTS="$OPENMPI_OPTS $1 $2 "
Packit 857059
           shift
Packit 857059
           shift
Packit 857059
           ;;
Packit 857059
        -app|--app) 
Packit 857059
            # --app <arg0>  Provide an appfile; ignore all other command line options
Packit 857059
            echo "User defined -app option found" >> $LOGFILE
Packit 857059
            USR_APP_FILE="$2"
Packit 857059
            shift
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
        -bynode|--bynode|-byslot|--byslot)
Packit 857059
            if [ -z "$LSB_PJL_TASK_GEOMETRY" ]; then
Packit 857059
                OPENMPI_OPTS="$OPENMPI_OPTS $1 "
Packit 857059
            else
Packit 857059
                echo "LSB_PJL_TASK_GEOMETRY is defined, mpirun option $1 will be ignored" >> $LOGFILE
Packit 857059
            fi
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
        -c|-np|--np|-n|--n|-tv|--tv)
Packit 857059
            echo "mpirun option $1 $2 will be ignored" >> $LOGFILE
Packit 857059
            # <arg0>       Number of processes to run
Packit 857059
            shift
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
        -d|-debug|--debug|--debug-daemons|--debug-daemons-file|--no-daemonize|--debug-devel|-q|--quiet|-V|--version|-nooversubscribe|--nooversubscribe )  
Packit 857059
            OPENMPI_OPTS="$OPENMPI_OPTS $1 "
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
        -gmca|--gmca|-mca|--mca) 
Packit 857059
            #<arg0> <arg1>
Packit 857059
            OPENMPI_OPTS="$OPENMPI_OPTS $1 $2 $3 "
Packit 857059
            shift
Packit 857059
            shift
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
        -h|--help|-nw|--nw|-v|--verbose)
Packit 857059
            OPENMPI_OPTS="$OPENMPI_OPTS $1 "
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
        -H|-host|--host|-hostfile|--hostfile|-machinefile|--machinefile)
Packit 857059
            echo "mpirun option $1 $2 will be ignored" >> $LOGFILE
Packit 857059
            # <arg0>   List of hosts to invoke processes on
Packit 857059
            shift
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
        -nolocal|--nolocal)
Packit 857059
            echo "mpirun option $1 will be ignored" >> $LOGFILE
Packit 857059
            shift
Packit 857059
            ;;
Packit 857059
	*)
Packit 857059
            break
Packit 857059
            ;;
Packit 857059
    esac
Packit 857059
done
Packit 857059
Packit 857059
JOB_CMDLN="$*"
Packit 857059
Packit 857059
# -----------------------------------------------------
Packit 857059
#  Set up the CMD_LINE variable representing the integrated section of the command line:
Packit 857059
# - LSF_TS, script variable representing the TaskStarter binary. 
Packit 857059
#   TaskStarter must start each and every job task process.
Packit 857059
# - LSF_TS_OPTIONS, LSF environment variable containing all necessary information 
Packit 857059
#   for TaskStarter to callback to LSF's Parallel Application Manager.
Packit 857059
# - JOB_CMDLN, script variable containing the job and job options
Packit 857059
#--------------------------------------------------------------------------------
Packit 857059
if [ -z "$LSF_TS_OPTIONS" ]
Packit 857059
then
Packit 857059
    echo CMD_LINE="$JOB_CMDLN" >> $LOGFILE
Packit 857059
    CMD_LINE="$JOB_CMDLN "
Packit 857059
else
Packit 857059
    echo CMD_LINE="$LSF_TS $LSF_TS_OPTIONS $JOB_CMDLN" >> $LOGFILE
Packit 857059
    CMD_LINE="$LSF_TS $LSF_TS_OPTIONS $JOB_CMDLN "
Packit 857059
fi
Packit 857059
Packit 857059
#------------------------------------------------------------------
Packit 857059
# Construct LSF Job -app file
Packit 857059
#------------------------------------------------------------------
Packit 857059
if [ "$USR_APP_FILE" != "" ]; then
Packit 857059
    # User defined --app <appfile>
Packit 857059
    echo "User defined -app $USR_APP_FILE is not allowed. Exiting..." | tee -a $LOGFILE
Packit 857059
    cleanup_openmpi
Packit 857059
    exit 1
Packit 857059
elif [ "$LSB_PJL_TASK_GEOMETRY" != "" ]; then
Packit 857059
    #------------------------------------------------------------------
Packit 857059
    # handle $LSB_PJL_TASK_GEOMETRY
Packit 857059
    # It will shuffle the order the appfile
Packit 857059
    # based on the order of task geometry
Packit 857059
    #------------------------------------------------------------------
Packit 857059
    . $LSF_BINDIR/pjllib.sh
Packit 857059
    TOTAL_CPUS=`echo $LSB_MCPU_HOSTS | /bin/awk '
Packit 857059
BEGIN {counter=0}
Packit 857059
{
Packit 857059
    size = split($0, a, " ");
Packit 857059
    for (i = 1; i <= size; i += 2) {
Packit 857059
        counter = counter + a[i + 1];
Packit 857059
        for (j = 0; j < a[i + 1]; ++j) {
Packit 857059
            print a[i] > hfile;
Packit 857059
        }
Packit 857059
    }
Packit 857059
}
Packit 857059
END { print counter}' hfile=$HOST_FILE`
Packit 857059
Packit 857059
    if [ -f "${LSF_BINDIR}/pjllib.sh" ]; then
Packit 857059
        # get a host list each host per line that satisfies the task geometry
Packit 857059
        # then construct the appfile based on new host list
Packit 857059
        . ${LSF_BINDIR}/pjllib.sh
Packit 857059
        reorder_file_based_on_task_geom "$HOST_FILE" "sort"
Packit 857059
        EXIT_VALUE=$?
Packit 857059
        if [ "$EXIT_VALUE" != "0" ]; then
Packit 857059
            echo "Error in reorder_file_based_on_task_geom \"$HOST_FILE\" \"sort\", Exit ..." 1>&2
Packit 857059
            cleanup_openmpi
Packit 857059
            exit ${EXIT_VALUE}
Packit 857059
Packit 857059
        fi
Packit 857059
        _new_host_list=`cat $HOST_FILE`
Packit 857059
        echo $_new_host_list | /bin/awk ' {
Packit 857059
            size = split($0, a, " ");
Packit 857059
            for (i = 1; i <= size; i++) {
Packit 857059
                newln = sprintf("-host %s -n 1 %s %s",a[i], (openmpi_opt)?openmpi_opt:" ", (cmdln)?cmdln:" ");
Packit 857059
                print newln > appfile
Packit 857059
            }
Packit 857059
        }' appfile="$APP_FILE" cmdln="$CMD_LINE" openmpi_opt="$OPENMPI_OPTS"
Packit 857059
    else
Packit 857059
        echo "Cannot find pjllib.sh in $LSF_BINDIR. Exiting..." 1>&2
Packit 857059
        cleanup_openmpi
Packit 857059
        exit 1
Packit 857059
        
Packit 857059
    fi
Packit 857059
else
Packit 857059
    echo $LSB_MCPU_HOSTS | /bin/awk ' {
Packit 857059
        size = split($0, a, " ");
Packit 857059
        for (i = 1; i < size; i++) {
Packit 857059
            newln = "";
Packit 857059
            newln = sprintf("-host %s -n %s %s %s",a[i], a[++i], (openmpi_opt)?openmpi_opt:" ", (cmdln)?cmdln:" ");
Packit 857059
            print newln > appfile
Packit 857059
        }
Packit 857059
    }' appfile="$APP_FILE" cmdln="$CMD_LINE" openmpi_opt="$OPENMPI_OPTS"
Packit 857059
fi
Packit 857059
Packit 857059
echo "appfile $APP_FILE reads" >> $LOGFILE
Packit 857059
cat $APP_FILE >> $LOGFILE
Packit 857059
echo "command used to launch the job : " >> $LOGFILE
Packit 857059
echo "   $MPIRUN_CMD -app $APP_FILE " >> $LOGFILE
Packit 857059
$MPIRUN_CMD --app $APP_FILE 
Packit 857059
EXIT_VALUE=$?
Packit 857059
Packit 857059
#------------------------------------------------------------------
Packit 857059
# Clean up:
Packit 857059
# - remove temporary files 
Packit 857059
# - exit with the exit value of the mpirun command 
Packit 857059
#------------------------------------------------------------------
Packit 857059
cleanup_openmpi
Packit 857059
exit $EXIT_VALUE
Packit 857059
Packit 857059
#-------------------------------------------------------------------------
Packit 857059
# End the script.
Packit 857059
#-------------------------------------------------------------------------