#!@BOURNE_SHELL@ # $Source: runtime/gasnet/sci-conduit/contrib/gasnetrun_sci.in $ # $Date: Tue, 19 Apr 2005 02:00:38 -0700 $ # $Revision: 1.7 $ # Terms of use are as specified in license.txt # # Burt Gordon, High-performance Computing & Simulation (HCS) Lab # June 2003 # Description: # This script reads the contents of the "machines" file and # attempts to rsh into each machine and begin SCI initialization. # # #echo "Script started" #function to print a help screen PrintHelp() { echo "Usage:" echo " gasnetrun_sci [options] [program_args]" echo "" echo " : Program to be run on each node. Assumed to be " echo " a globally accessible location. If not fully " echo " qualified, assumed to be in the current directory." echo "" echo "Options:" echo " -n : number of nodes to use. (Also accept -np)" echo " If number exceeds the total number of nodes listed in the machines file," echo " then program_name is started once on each of the nodes in the file. If " echo " -n option is not used, only one copy of program_name will be started." echo "" echo " -machines : absolute location of machines file. If not specified, equal to " echo " machines.sci in current directory" echo "" echo " -jobs : list of nodes to use, can be used in place of machines.sci, supercedes" echo " -machines and machines.sci if they are defined." echo "" echo " -spawn <[rsh, ssh]> : choose either rsh or ssh as the spawner, default is rsh" echo "" echo " -E VAR1[,VAR2...] : indicate environment variables to propagate" echo "" echo " -v : verbose operation" echo "" echo " -help : this message" echo "" } # defaults RSH=rsh USERENV= NUMBER_NODES= #go through each of the arguments to the script while [ "$1" ] do # case is used to determine the proper actions to be taken case "$1" in -n) NUMBER_NODES=$2 shift 2 ;; -np) NUMBER_NODES=$2 shift 2 ;; -machines) MFILE=$2 shift 2 ;; -jobs) JOB_FILE=$2 shift 2 ;; -spawn) RSH=$2 shift 2 ;; -E) USERENV=$2 shift 2 ;; -v) VERBOSE=1 shift 1 ;; -help | -h) PrintHelp exit 1 ;; # everything else is a program name followed by its arguments *) PROG="$1" shift while [ "$1" ] do ARGS="$ARGS '$1'" shift done esac done if [ $VERBOSE ] ; then GASNET_VERBOSEENV=1 export GASNET_VERBOSEENV fi #Need to get the directory info from the program name if [ "$PROG" ] then DIR="${PROG%/*}" PROG="${PROG##*/}" else echo "ERROR: Program name is not specified. Exiting..." echo "" PrintHelp exit 1 fi #We need the absolute directory of the program #if there was no / in the program variable, DIR==PROG, so use the # current working directory as the absolute path to the program if [ "$PROG" == "$DIR" ] then DIR=$PWD fi #if the user used ./, we still need the absolute path # so use the current working directory if [ $DIR == '.' ] then DIR=$PWD fi # ensure we have a canonical absolute path PWD=`cd $PWD ; pwd` DIR=`cd $DIR ; pwd` #echo "spawner is: $RSH" #check and see if there is a machine file #if not, there had better be a jobs file #if machine file was specified but is blank, there's a problem if [ $MFILE ] then if ! test -e "$MFILE" ; then echo "ERROR: machine file '$MFILE' does not exist! Exiting..." echo "" PrintHelp exit 1 fi else if [ -a machines.sci ] then MFILE=machines.sci else if [ $JOB_FILE ] then X=1 else echo "Cannot find jobs or machines file" echo "Need to define either jobs file or machines file to continue" PrintHelp exit 1 fi fi fi #check if number of nodes has been specified if [ "$NUMBER_NODES" == '' ] then echo "ERROR: Number of nodes is not specified. Exiting..." echo "" PrintHelp exit 1 fi #echo "Program Name is : $PROG" #echo "Arguments are : $ARGS" #echo "N is equal to : $NUMBER_NODES" #echo "Machines file now equal to : $MFILE" #echo "Jobs files is now: $JOB_FILE" # Now read Machines file and get information #echo "Reading host info" #strip the first column, then the second column # the first column should be all the hostnames to use # the second column should be all the SCI Ids of the hostnames in column 1 if [ $MFILE ] then HOSTS=`cat $MFILE | @AWK@ 'NF==0 {exit} {print $1}'` SCI_IDS=`cat $MFILE | @AWK@ 'NF==0 {exit} {print $2}'` fi #note, if jobs file is defined, it overwrites the variables set by machines file if [ $JOB_FILE ] then HOSTS=`cat $JOB_FILE | @AWK@ 'NF==0 {exit} {print $1}'` SCI_IDS=`cat $JOB_FILE | @AWK@ 'NF==0 {exit} {print $2}'` fi #echo "Hosts are :" #echo "$HOSTS" #echo "SCI IDs are :" #echo "$SCI_IDS" #echo "" #Now set up file info based on job node names and sci ids #one from jobs, one from ids, ";" after each pair # this is used to make sure we know how many # nodes there are and what their GASNet ids are #GASNet ids are mapped from 0 to n starting with the # top of the file list going to the end #first get rid of all the \n and \r characters and replace with spaces SCI_IDS=`echo "$SCI_IDS" | @AWK@ '{ print gensub(/\n/, " ", "g") }'` SCI_IDS=`echo "$SCI_IDS" | @AWK@ '{ print gensub(/\r/, " ", "g") }'` #this code has terrible logrithmic time. It will take O(n^2), where n is the # number of nodes to use. Probably need to look into changing this. While # this is not in the performance critical path, startup could be rather # slow for a large number of machines i=0 for h in $HOSTS do j=0 #echo "h is :$h," for k in $SCI_IDS do #echo "k (SCI ID) is -$k-" if (( $i == $j )) #if this SCI ID should correspond with the node name then if (( $i < $NUMBER_NODES )) #make sure we have the correct number then #echo 'h is :'$h', k is : '"$k"',' #just place SCI Ids, don't need node names FILE=$FILE" $k;" fi fi j=`expr $j + 1` #echo "i is: $i, and j is:$j" done i=`expr $i + 1` done #echo Hosts and ids are: #echo $HOSTS :: #echo $FILE #write the file information into a global location near the program # we assume an NFS-like file structure so that this file will be available to all nodes cd $DIR rm -f gasnet_nodes.sci if (( $i < $NUMBER_NODES )) then cat > gasnet_nodes.sci << EOF $i $FILE EOF else cat > gasnet_nodes.sci << EOF $NUMBER_NODES $FILE EOF fi #we need to gather any GASNET environment variables set on this machine and propogate # them to all the executing machines # use the env command as the default shell may be different on every machine ENV=@ENVCMD@ if ! test -x $ENV ; then ENV=`which env` if ! test -x $ENV ; then echo "ERROR: failed to find /usr/bin/env" exit 1 fi fi # round-up user requested environment variables USERENV=`echo "$USERENV" | @AWK@ -F, 'BEGIN{ORS=" "}{for(i=1;i<=NF;i++) { print "-e ^"$i"="; } }'` SYSTEMENV="-e ^GASNET_" EXPORT=`$ENV | grep $USERENV $SYSTEMENV | @AWK@ -F= 'BEGIN{ORS=""; print "'"$ENV "'"}{print $1"='"'"'"$2"'"'"' "; }'` #if EXPORT is == ENV only, set EXPORT as empty since there are no variables to propogate if [ "$EXPORT" == "$ENV" ] then EXPORT="" fi # now with that information, we need to log into every machine and start # the initialization program. #We need to extract the first machine in the list, this will be the # gasnet node 0. We will rsh into this machine last. That way # the script will wait until the program has completed # the program before the script ends. This way ctrl+c will kill the program. TEMP_HOSTS=$HOSTS HOSTS="" #echo " Temp_Hosts are:" #echo "$TEMP_HOSTS" #echo " Hosts are now: $HOSTS." i=0 for h in $TEMP_HOSTS do if (( $i < 1 )) then FIRST=$h i=`expr $i + 1` #echo "i equal to $i" else HOSTS="$HOSTS$h " fi done # THISNAME=`hostname` # echo "Hostname is: $THISNAME" #check to see if the names match # if [ $THISNAME != $FIRST ] # then # echo "Hostname returned: $THISNAME, the first node in the list is : $FIRST" # echo " The two names must be equal and this script needs to be started from" # echo " the machine which will act as gasnet node #0. Exiting..." # exit # fi #echo "Hosts are:" #echo "$HOSTS" #echo "Node 0 is: $FIRST" # need the & to run rsh (or ssh) as backgroud or else the script must wait # for the program to terminate before moving on to next line # for loop to rsh (or ssh) to each machine in the file #need to fix this so node 0 goes last and the script waits for it to terminate # the program if [ $VERBOSE ] ; then echo "Beginning program on host(s)" fi # EXIT before any actual execution, used for testing how things work above #exit #NOTE: here we add "./" in front of $PROG i=1 for h in $HOSTS do if (( $i < $NUMBER_NODES )) then $RSH $h "cd $PWD; $EXPORT '$DIR/$PROG' $ARGS ; exit;"& if [ $VERBOSE ] ; then echo "$RSH $h -- cd $PWD; $EXPORT '$DIR/$PROG' $ARGS ; exit;" fi i=`expr $i + 1` #echo "i equal to $i" else break fi done if [ $VERBOSE ] ; then echo "Finished with rsh" echo "Now going to the first host" fi #start the program on this host if [ $VERBOSE ] ; then echo "$RSH $FIRST -- cd $PWD; $EXPORT '$DIR/$PROG' $ARGS ; exit;" fi $RSH $FIRST "cd $PWD; $EXPORT '$DIR/$PROG' $ARGS ; exit;"