#!/bin/bash main="ProdPilot.py" export queuename="" for p in $@; do if [[ "`echo $p | grep '\-\-queue'`" != "" ]] ; then queuename="`echo $p | sed 's?--queue=??'`" echo "Queue $queuename" fi done # If OSG setup script exists, run it if test ! $OSG_GRID = ""; then if test -f $OSG_GRID/setup.sh ; then echo "Running OSG setup from $OSG_GRID/setup.sh" source $OSG_GRID/setup.sh else echo "!!WARNING!!2999!!OSG_GRID defined but setup file $OSG_GRID/setup.sh does not exist" fi else echo "No OSG setup script found. OSG_GRID='$OSG_GRID'" fi transurl="http://www.usatlas.bnl.gov/svn/panda/autopilot/trunk/trans-atlasprod.sh" export PANDA_URL=http://pandasrv.usatlas.bnl.gov:25080/server/panda export PANDA_URL_SSL=https://pandasrv.usatlas.bnl.gov:25443/server/panda #export PANDA_URL=http://pandasrv.usatlas.bnl.gov:26080/server/panda #export PANDA_URL_SSL=https://pandasrv.usatlas.bnl.gov:26443/server/panda export DQ2_URL_SERVER=http://atlddmcat.cern.ch/dq2/ export DQ2_URL_SERVER_SSL=https://atlddmcat.cern.ch:443/dq2/ # New param for pilot.py: '-f false' means pilot.py should not ask Panda for a job, we already have the job. pilot.py should pick up # Panda response from pandaJobData.out pilot2pars="-f false -m false" #tppars="--script=pilot.py --libcode=pilotcode.tar.gz --pilotsrcurl=http://pandamon.usatlas.bnl.gov:25880/cache" tppars="--script=pilot.py --libcode=pilotcode.tar.gz --pilotsrcurl=http://pandamon.usatlas.bnl.gov:25880/cache,http://gridui05.usatlas.bnl.gov:25880/cache,http://gridui06.usatlas.bnl.gov:25880/cache,http://gridui07.usatlas.bnl.gov:25880/cache" export params=$@ echo "Pilot wrapper script start at `date` on `hostname -f`" export HOSTNAME=`hostname -f` echo "Command line parameters: $params" echo "Environment:" printenv tpurl=http://www.usatlas.bnl.gov/svn/panda/autopilot/trunk/ ALLOK=yes export PATH=/usr/local/bin:/usr/bin:/bin:$PATH:$PWD #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/usr/lib ## Try to avoid curl error 1792 as per this posting found on google: # If you see errors such as: # 'Curl error 256: at ./sophosmonthly.pl line 112.' or 'Curl error 1792: at ./sophosmonthly.pl line 138.' # Run # echo "--disable-epsv" > ~/.curlrc # and re-run the update scripts. This disables our FTP server's defaulted enhanced passive mode for 'curl'. echo "--disable-epsv" > ~/.curlrc ## curl present? xcurl=`which curl` if test -x "$xcurl" ; then echo "PATH=$PATH" echo "curl found: $xcurl" curl -V else # try to install it if wget is there which wget wget www.bnl.gov wget ${tpurl}curl chmod +x curl wget ${tpurl}libcurl.so.3 wget ${tpurl}libssl.so.4 wget ${tpurl}libcrypto.so.4 wget ${tpurl}libcom_err.so.3 echo "pwd: `pwd`" echo "PWD: $PWD" echo "Listing: `ls -al`" PATH=$PWD:$PATH echo "PATH: $PATH" LD_LIBRARY_PATH=$PWD:$LD_LIBRARY_PATH echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" which curl ./curl -V xcurl=`which curl` if test -x "$xcurl" ; then echo "curl missing. Installed it." else echo "pilotWrapper ERROR: curl not found, and attempt to install it failed. Cannot continue." echo '!!FAILED!!2000!!curl not found' echo "Path: $PATH" which wget which curl ls -al /usr/bin/curl /usr/local/bin/curl ALLOK=no fi fi if test $ALLOK = "yes"; then ## outbound http working? cmd1='curl --connect-timeout 20 --max-time 120 --insecure -s -S https://www.bnl.gov' res1=`$cmd1 | grep -i 'Object moved' | wc -c` cmd2="curl --connect-timeout 20 --max-time 120 -s -S $tpurl" res2=`$cmd2 | grep -i 'SimplePilot.py' | wc -c` if test $res1 = 0; then echo "pilotWrapper ERROR: outbound https not working. Cannot continue." echo '!!FAILED!!2001!!curl https not working' echo "https to www.bnl.gov:" curl --connect-timeout 20 --max-time 120 --verbose --insecure -s -S https://www.bnl.gov echo "https_proxy=$https_proxy" echo "curl test was: $cmd1" echo " result: $res1" curl -V echo "Doing authenticated curl:" curl --connect-timeout 20 --max-time 120 --verbose -Ss --cert /tmp/x509up_u`id -u` --key /tmp/x509up_u`id -u` https://www.bnl.gov ALLOK=no HTTPSOK=no else echo "Outbound https working" HTTPSOK=yes fi if test $res2 = 0; then echo "pilotWrapper ERROR: outbound http not working. Cannot continue." echo "http to $tpurl:" curl --connect-timeout 20 --max-time 120 --verbose -s -S $tpurl echo "http to www.bnl.gov:" curl --connect-timeout 20 --max-time 120 --verbose -s -S http://www.bnl.gov echo "http_proxy=$http_proxy" echo "curl test was: $cmd2" echo " result: $res2" curl -V ALLOK=no if test $HTTPSOK = "no"; then echo '!!FAILED!!2006!!Outbound http(s) fails' else echo '!!FAILED!!2002!!curl http not working' fi else echo "Outbound http working" # resp=`curl -Ss --insecure --compressed 'http://gridui02.usatlas.bnl.gov:26080/server/panda/getJob?node=dummy&prodSourceLabel=test&siteName=FAKESITE&mem=-1&disk=-1&cpu=-1' | grep -i 'StatusCode=' | wc -c` # if test $resp = 0; then # sleep 1 # resp=`curl -Ss --insecure --compressed 'http://gridui02.usatlas.bnl.gov:26080/server/panda/getJob?node=dummy&prodSourceLabel=test&siteName=FAKESITE&mem=-1&disk=-1&cpu=-1' | grep -i 'StatusCode=' | wc -c` # if test $resp = 0; then # echo "pilotWrapper ERROR: Panda server exchange failed. Cannot continue." # echo '!!FAILED!!2005!!Panda server exchange failed' # echo # echo `curl --verbose -Ss --insecure --compressed 'http://gridui02.usatlas.bnl.gov:26080/server/panda/getJob?node=dummy&prodSourceLabel=test&siteName=FAKESITE&mem=-1&disk=-1&cpu=-1'` # ALLOK=no # else # echo "Panda server exchange OK" # fi # else # echo "Panda server exchange OK" # fi fi fi ## Python present? xpython=`which python` if test -x "$xpython" ; then echo "python found: $xpython" echo `python -V` else echo "pilotWrapper ERROR: python not found. Cannot continue." echo '!!FAILED!!2003!!python not found' ALLOK=no fi ## g++ setup which /usr/bin/g++32 if [ $? -eq 0 ];then alias g++=g++32 fi if test $ALLOK = "no"; then echo "ERROR - will abort due to unusable environment. Here's some environment info." #echo "=== ls -al /usr/bin" #ls -al /usr/bin #echo "=== ls -al /usr/local/bin" #ls -al /usr/local/bin echo "=== printenv" printenv date exit 1 fi ## Check WN environment # $OSG_WN_TMP set? Else $LSF_SCRATCH on LSF systems ## All OK so far. Do site/vo specific setup. ### move to VO/site specific setup/integrity check, which is pulled down depending ### on site/vo ## Configure dq2_* commands #echo "Set up grid, dq2_* commands" #source /afs/cern.ch/project/gd/LCG-share/sl3/etc/profile.d/grid_env.sh #source /afs/usatlas.bnl.gov/Grid/Don-Quijote/dq2_user_client/setup.sh.CERN #export LFC_HOST=lfc-atlas.cern.ch #export LCG_CATALOG_TYPE=lfc #which dq2_get ## Anything went wrong in site/vo specific setup? if test $ALLOK = "no"; then echo "pilotWrapper ERROR - aborting due to unusable environment. Here's some environment info." echo "=== printenv" printenv echo "=== ls -al /usr/bin" ls -al /usr/bin echo "=== ls -al /usr/local/bin" ls -al /usr/local/bin date exit 1 fi ## All OK. Fetch the AutoPilot scripts #scripts="$main PilotUtils.py dq2_poolFCjobO" scripts="$main PilotUtils.py" echo "Fetching scripts: $scripts" for file in $scripts ; do curl --connect-timeout 20 --max-time 120 $tpurl$file -s -S -o $file if test -f $file ; then chmod +x $file else echo "ERROR: Failed to retrieve script $file from $tpurl$file" echo '!!FAILED!!2004!!script retrieve failed' ALLOK=no fi done # Be sure job.out exists touch job.out ## Is there a site setup to do? envsetup=`curl --connect-timeout 20 --max-time 60 "http://pandamon.usatlas.bnl.gov:25880/server/pandamon/query?tpmes=pilotpars&getpar=envsetup&queue=$queuename" -s -S` if [[ "$envsetup" != "" ]] ; then catsetup="`echo $envsetup | sed 's?source ?cat ?'`" lssetup="`echo $envsetup | sed 's?source ?ls -alL ?'`" echo "Setup command: '$envsetup'" echo "Listing: $lssetup" $lssetup echo "___________ setup content:" $catsetup echo "___________ running setup" $envsetup echo "Environment after setup:" printenv else echo "No site setup script to execute. Proceeding without." fi echo "grid-proxy-info:" which grid-proxy-info grid-proxy-info echo "voms-proxy-info:" which voms-proxy-info voms-proxy-info -all # 64bit worker nodes with 32 bit python available which python32 if [ $? -eq 0 ];then pybin=python32 else pybin=python fi echo "Will use pybin=$pybin" ## Run the pilot which will ask Panda for a job if test $ALLOK = "yes"; then echo "==== Executing $main" $pybin $main $params --transurl=$transurl "--pilotpars=\"$pilot2pars $tppars\"" RETVAL=$? if test $RETVAL -eq 2; then echo "!!FINISHED!!0!!site offline" elif test $RETVAL -ne 0; then echo "!!FAILED!!2007!!$main script failed" fi echo "pilot wrapper exiting at `date`. Return value $RETVAL" exit $RETVAL else echo "pilot wrapper aborting at `date`" exit 1 fi