#!/bin/bash
main="ProdPilot.py"
export queuename=""
for p in $@; do
if [[ "`echo $p | grep '\-\-queue'`" != "" ]] ; then
queuename="`echo $p | sed 's?--queue=??'`"
echo "Queue $queuename"
fi
done
# If OSG setup script exists, run it
if test ! $OSG_GRID = ""; then
if test -f $OSG_GRID/setup.sh ; then
echo "Running OSG setup from $OSG_GRID/setup.sh"
source $OSG_GRID/setup.sh
else
echo "!!WARNING!!2999!!OSG_GRID defined but setup file $OSG_GRID/setup.sh does not exist"
fi
else
echo "No OSG setup script found. OSG_GRID='$OSG_GRID'"
fi
transurl="http://www.usatlas.bnl.gov/svn/panda/autopilot/trunk/trans-atlasprod.sh"
export PANDA_URL=http://pandasrv.usatlas.bnl.gov:25080/server/panda
export PANDA_URL_SSL=https://pandasrv.usatlas.bnl.gov:25443/server/panda
#export PANDA_URL=http://pandasrv.usatlas.bnl.gov:26080/server/panda
#export PANDA_URL_SSL=https://pandasrv.usatlas.bnl.gov:26443/server/panda
export DQ2_URL_SERVER=http://atlddmcat.cern.ch/dq2/
export DQ2_URL_SERVER_SSL=https://atlddmcat.cern.ch:443/dq2/
# New param for pilot.py: '-f false' means pilot.py should not ask Panda for a job, we already have the job. pilot.py should pick up
# Panda response from pandaJobData.out
pilot2pars="-f false -m false"
#tppars="--script=pilot.py --libcode=pilotcode.tar.gz --pilotsrcurl=http://pandamon.usatlas.bnl.gov:25880/cache"
tppars="--script=pilot.py --libcode=pilotcode.tar.gz --pilotsrcurl=http://pandamon.usatlas.bnl.gov:25880/cache,http://gridui05.usatlas.bnl.gov:25880/cache,http://gridui06.usatlas.bnl.gov:25880/cache,http://gridui07.usatlas.bnl.gov:25880/cache"
export params=$@
echo "Pilot wrapper script start at `date` on `hostname -f`"
export HOSTNAME=`hostname -f`
echo "Command line parameters: $params"
echo "Environment:"
printenv
tpurl=http://www.usatlas.bnl.gov/svn/panda/autopilot/trunk/
ALLOK=yes
export PATH=/usr/local/bin:/usr/bin:/bin:$PATH:$PWD
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/usr/lib
## Try to avoid curl error 1792 as per this posting found on google:
# If you see errors such as:
# 'Curl error 256: at ./sophosmonthly.pl line 112.' or 'Curl error 1792: at ./sophosmonthly.pl line 138.'
# Run
# echo "--disable-epsv" > ~/.curlrc
# and re-run the update scripts. This disables our FTP server's defaulted enhanced passive mode for 'curl'.
echo "--disable-epsv" > ~/.curlrc
## curl present?
xcurl=`which curl`
if test -x "$xcurl" ; then
echo "PATH=$PATH"
echo "curl found: $xcurl"
curl -V
else
# try to install it if wget is there
which wget
wget www.bnl.gov
wget ${tpurl}curl
chmod +x curl
wget ${tpurl}libcurl.so.3
wget ${tpurl}libssl.so.4
wget ${tpurl}libcrypto.so.4
wget ${tpurl}libcom_err.so.3
echo "pwd: `pwd`"
echo "PWD: $PWD"
echo "Listing: `ls -al`"
PATH=$PWD:$PATH
echo "PATH: $PATH"
LD_LIBRARY_PATH=$PWD:$LD_LIBRARY_PATH
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
which curl
./curl -V
xcurl=`which curl`
if test -x "$xcurl" ; then
echo "curl missing. Installed it."
else
echo "pilotWrapper ERROR: curl not found, and attempt to install it failed. Cannot continue."
echo '!!FAILED!!2000!!curl not found'
echo "Path: $PATH"
which wget
which curl
ls -al /usr/bin/curl /usr/local/bin/curl
ALLOK=no
fi
fi
if test $ALLOK = "yes"; then
## outbound http working?
cmd1='curl --connect-timeout 20 --max-time 120 --insecure -s -S https://www.bnl.gov'
res1=`$cmd1 | grep -i '
Object moved' | wc -c`
cmd2="curl --connect-timeout 20 --max-time 120 -s -S $tpurl"
res2=`$cmd2 | grep -i 'SimplePilot.py' | wc -c`
if test $res1 = 0; then
echo "pilotWrapper ERROR: outbound https not working. Cannot continue."
echo '!!FAILED!!2001!!curl https not working'
echo "https to www.bnl.gov:"
curl --connect-timeout 20 --max-time 120 --verbose --insecure -s -S https://www.bnl.gov
echo "https_proxy=$https_proxy"
echo "curl test was: $cmd1"
echo " result: $res1"
curl -V
echo "Doing authenticated curl:"
curl --connect-timeout 20 --max-time 120 --verbose -Ss --cert /tmp/x509up_u`id -u` --key /tmp/x509up_u`id -u` https://www.bnl.gov
ALLOK=no
HTTPSOK=no
else
echo "Outbound https working"
HTTPSOK=yes
fi
if test $res2 = 0; then
echo "pilotWrapper ERROR: outbound http not working. Cannot continue."
echo "http to $tpurl:"
curl --connect-timeout 20 --max-time 120 --verbose -s -S $tpurl
echo "http to www.bnl.gov:"
curl --connect-timeout 20 --max-time 120 --verbose -s -S http://www.bnl.gov
echo "http_proxy=$http_proxy"
echo "curl test was: $cmd2"
echo " result: $res2"
curl -V
ALLOK=no
if test $HTTPSOK = "no"; then
echo '!!FAILED!!2006!!Outbound http(s) fails'
else
echo '!!FAILED!!2002!!curl http not working'
fi
else
echo "Outbound http working"
# resp=`curl -Ss --insecure --compressed 'http://gridui02.usatlas.bnl.gov:26080/server/panda/getJob?node=dummy&prodSourceLabel=test&siteName=FAKESITE&mem=-1&disk=-1&cpu=-1' | grep -i 'StatusCode=' | wc -c`
# if test $resp = 0; then
# sleep 1
# resp=`curl -Ss --insecure --compressed 'http://gridui02.usatlas.bnl.gov:26080/server/panda/getJob?node=dummy&prodSourceLabel=test&siteName=FAKESITE&mem=-1&disk=-1&cpu=-1' | grep -i 'StatusCode=' | wc -c`
# if test $resp = 0; then
# echo "pilotWrapper ERROR: Panda server exchange failed. Cannot continue."
# echo '!!FAILED!!2005!!Panda server exchange failed'
# echo
# echo `curl --verbose -Ss --insecure --compressed 'http://gridui02.usatlas.bnl.gov:26080/server/panda/getJob?node=dummy&prodSourceLabel=test&siteName=FAKESITE&mem=-1&disk=-1&cpu=-1'`
# ALLOK=no
# else
# echo "Panda server exchange OK"
# fi
# else
# echo "Panda server exchange OK"
# fi
fi
fi
## Python present?
xpython=`which python`
if test -x "$xpython" ; then
echo "python found: $xpython"
echo `python -V`
else
echo "pilotWrapper ERROR: python not found. Cannot continue."
echo '!!FAILED!!2003!!python not found'
ALLOK=no
fi
## g++ setup
which /usr/bin/g++32
if [ $? -eq 0 ];then
alias g++=g++32
fi
if test $ALLOK = "no"; then
echo "ERROR - will abort due to unusable environment. Here's some environment info."
#echo "=== ls -al /usr/bin"
#ls -al /usr/bin
#echo "=== ls -al /usr/local/bin"
#ls -al /usr/local/bin
echo "=== printenv"
printenv
date
exit 1
fi
## Check WN environment
# $OSG_WN_TMP set? Else $LSF_SCRATCH on LSF systems
## All OK so far. Do site/vo specific setup.
### move to VO/site specific setup/integrity check, which is pulled down depending
### on site/vo
## Configure dq2_* commands
#echo "Set up grid, dq2_* commands"
#source /afs/cern.ch/project/gd/LCG-share/sl3/etc/profile.d/grid_env.sh
#source /afs/usatlas.bnl.gov/Grid/Don-Quijote/dq2_user_client/setup.sh.CERN
#export LFC_HOST=lfc-atlas.cern.ch
#export LCG_CATALOG_TYPE=lfc
#which dq2_get
## Anything went wrong in site/vo specific setup?
if test $ALLOK = "no"; then
echo "pilotWrapper ERROR - aborting due to unusable environment. Here's some environment info."
echo "=== printenv"
printenv
echo "=== ls -al /usr/bin"
ls -al /usr/bin
echo "=== ls -al /usr/local/bin"
ls -al /usr/local/bin
date
exit 1
fi
## All OK. Fetch the AutoPilot scripts
#scripts="$main PilotUtils.py dq2_poolFCjobO"
scripts="$main PilotUtils.py"
echo "Fetching scripts: $scripts"
for file in $scripts ; do
curl --connect-timeout 20 --max-time 120 $tpurl$file -s -S -o $file
if test -f $file ; then
chmod +x $file
else
echo "ERROR: Failed to retrieve script $file from $tpurl$file"
echo '!!FAILED!!2004!!script retrieve failed'
ALLOK=no
fi
done
# Be sure job.out exists
touch job.out
## Is there a site setup to do?
envsetup=`curl --connect-timeout 20 --max-time 60 "http://pandamon.usatlas.bnl.gov:25880/server/pandamon/query?tpmes=pilotpars&getpar=envsetup&queue=$queuename" -s -S`
if [[ "$envsetup" != "" ]] ; then
catsetup="`echo $envsetup | sed 's?source ?cat ?'`"
lssetup="`echo $envsetup | sed 's?source ?ls -alL ?'`"
echo "Setup command: '$envsetup'"
echo "Listing: $lssetup"
$lssetup
echo "___________ setup content:"
$catsetup
echo "___________ running setup"
$envsetup
echo "Environment after setup:"
printenv
else
echo "No site setup script to execute. Proceeding without."
fi
echo "grid-proxy-info:"
which grid-proxy-info
grid-proxy-info
echo "voms-proxy-info:"
which voms-proxy-info
voms-proxy-info -all
# 64bit worker nodes with 32 bit python available
which python32
if [ $? -eq 0 ];then
pybin=python32
else
pybin=python
fi
echo "Will use pybin=$pybin"
## Run the pilot which will ask Panda for a job
if test $ALLOK = "yes"; then
echo "==== Executing $main"
$pybin $main $params --transurl=$transurl "--pilotpars=\"$pilot2pars $tppars\""
RETVAL=$?
if test $RETVAL -eq 2; then
echo "!!FINISHED!!0!!site offline"
elif test $RETVAL -ne 0; then
echo "!!FAILED!!2007!!$main script failed"
fi
echo "pilot wrapper exiting at `date`. Return value $RETVAL"
exit $RETVAL
else
echo "pilot wrapper aborting at `date`"
exit 1
fi