// CondorCodJob.cxx #include "dial_condor/CondorCodJob.h" #include #include #include #include #include #include #include "dataset_id/EventIdList.h" #include "dataset_util/FileStatus.h" #include "dataset_util/FileName.h" #include "dataset_util/Text.h" #include "dataset_util/ssystem.h" #include "dataset_util/get_hostname.h" #include "dataset_util/getcwd.h" #include "dataset_base/DatasetCreator.h" #include "dataset_xml/XmlParser.h" using std::string; using std::vector; using std::cerr; using std::endl; using std::ostream; using dset::Dataset; using dset::DatasetCreator; using dial::Job; using dial::CondorCodJob; using dial::CondorCodTable; //********************************************************************** // Local definitions. //********************************************************************** namespace { bool afsAclModified = false; string originalAfsAcl = "rl"; bool isAfsDirectory(string dir) { FileName fname(dir); if( fname.is_relative() ) dir = fname.fullpath().name(); if( dir.find("/afs") == 0 ) return true; return false; } string afsAcl(string dir) { string acl = ""; string tmpfname = tmpnam(NULL); string fslacmd = "fs la " + dir + " >& " + tmpfname; ssystem(fslacmd); Text tmpftxt(tmpfname); int numline = tmpftxt.size(); string linestr; do { linestr = tmpftxt.line(numline); if ( linestr.find("system:anyuser") != string::npos ) { vector tokens; Text::split(linestr, tokens, " "); acl = tokens[1]; break; } numline--; } while( numline > 0 ); remove(tmpfname.c_str()); return acl; } void setAfsAcl(string dir, string acl) { string fssacmd = "fs sa -dir " + dir + " -acl system:anyuser " + acl; ssystem(fssacmd); } CondorCodTable& ctable(string machinefile="") { static CondorCodTable table(machinefile); return table; } } // end unnamed namespace //********************************************************************** // Member functions. //********************************************************************** // Constructor. CondorCodJob:: CondorCodJob(JobId jid, const Application& app, const Task& tsk, const Dataset& dst, const JobPreferences& prf, string jobdir, string runfile, string machinefile) : Job("CondorCodJob", jid, app, tsk, dst, prf, jobdir, runfile), m_rundir(jobdir), m_machinefile(machinefile) { base_set_submit_host(get_hostname()); create_local_run_script_wrapper(); } //********************************************************************** // Conversion constructor. CondorCodJob::CondorCodJob(const Job& job, string descfile) : Job(job), m_descfile(descfile) { string hname = get_hostname(); if ( hname != submit_host() ) { cerr << "LsfJob: Change host name in copy" << endl; base_set_submit_host(hname); } } //********************************************************************** // Destructor. CondorCodJob::~CondorCodJob() { } //********************************************************************** int CondorCodJob::start() { // Check that the executable is present. { string runscript = job_directory() + "/dial_run_script"; FileStatus estat(runscript); if ( ! estat.is_executable() ) return 4; } int return_val; if ( (return_val = initialize()) != 0 ) { return return_val; } if ( (return_val = activate()) != 0 ) { return return_val; } return 0; } //********************************************************************** int CondorCodJob::initialize() { if ( ! is_initialized() ) return 1; // Check that the run directory is present. { FileStatus dstat(m_rundir); if ( ! dstat.is_directory() ) return 3; } // Define and check files. { FileStatus fstat(directory()); if ( ! fstat.is_directory() ) return base_set_failed(101); if ( ! fstat.is_readable() ) return base_set_failed(102); if ( ! fstat.is_writeable() ) return base_set_failed(103); } Text desctxt; desctxt.append("JobUniverse = 5"); desctxt.append("Cmd = \"/bin/sh\""); desctxt.append("IWD = \"" + job_directory() + "\"" ); desctxt.append("Args = \"" + job_directory() + "/dial_run_script" + "\"" ); desctxt.append("In = /dev/null"); desctxt.append("Out = \"" + job_directory() + "/stdout.log\""); desctxt.append("Err = \"" + job_directory() + "/stderr.log\""); desctxt.append("StarterUserLog = \"" + job_directory() + "/condorcod.log\""); string descfile = m_rundir + "/submit_cod_description"; desctxt.write(descfile); m_descfile = descfile; // creation of the description file is done // if it is an AFS directory, do ACL stuff if( isAfsDirectory(m_rundir) ) { originalAfsAcl = afsAcl(m_rundir); if ( ! ( originalAfsAcl.find("i") != string::npos && originalAfsAcl.find("w") != string::npos ) ) { setAfsAcl(m_rundir, "rliw"); afsAclModified = true; } } return 0; } //********************************************************************** int CondorCodJob::activate() { string runhost; m_claimId = ctable(m_machinefile).getfreeclaim(&runhost) ; // will contain the Claim id. // when no free claims are available if (m_claimId == "No Free Claim") return 56; ctable().updateClaimState(m_claimId, string("Activated")); base_set_run_host(runhost); // if no claim id is present if (m_claimId == "") return -1; string subcmd = "condor_cod activate -id " + m_claimId + " -jobad " + m_descfile + " 1>&/dev/null 2>&1" ; int sstat = ssystem("cd " + m_rundir + "; " + subcmd); if ( sstat == 0 ) { base_set_running(); } else { if ( afsAclModified ) setAfsAcl(job_directory(), originalAfsAcl); base_set_failed(109); } return sstat; } //********************************************************************** int CondorCodJob::deactivate() { if (m_claimId == "") return -1; if (!is_running()) return 1; string subcmd = "condor_cod deactivate -id " + m_claimId + " -fast 1>&/dev/null 2>&1"; int iRet = ssystem("cd " + m_rundir + "; " + subcmd); if (iRet == 0) base_set_killed(121); return iRet; } int CondorCodJob::update() { if ( is_initialized() ) { int stat = start(); if ( stat != 0 ) { return 1; } return 0; } if ( ! is_running() ) return 2; // get the current status of the job and set the state of the job accordingly string tmpfname = tmpnam(NULL); vector tokens; int status = 0; Text logtxt(job_directory() + "/condorcod.log"); Text::set_warn(); for (int i = 0; i < (signed int) logtxt.size(); i++) { string logline = logtxt.line(i); if ( logline.find("Failed") != string::npos ) { base_set_failed(402); return 402; } if ( logline.find("evicted") != string::npos ) { base_set_killed(122); return 403; } if (logline.find("Normal termination") != string::npos ) { // the job ended normally. have to capture the return status tokens.resize(0); Text::split(logline,tokens," "); // set the status of the job tokens[5].erase(tokens[5].size() - 1); // remove last ')' base_set_return_status(atoi(tokens[5].c_str())); status = 1; } } if (status == 0) { base_set_running(); } else if ( status == 1 ) { string resfile = job_directory() + "/result.xml"; if ( FileStatus(resfile).is_readable() ) { XmlParser parser; const XmlElement* pxres = parser.parse(resfile); if ( pxres == 0 ) { base_set_failed(143); return 0; } const Dataset* pres = DatasetCreator::create(*pxres); delete pxres; if ( pres == 0 ) { base_set_failed(144); return 0; } int stat = base_set_result(pres); if ( stat != 0 ) { delete pres; base_set_failed(145); return 0; } if ( pres == 0 ) { base_set_failed(146); } if ( dataset()!=0 && dataset()->is_event_dataset() ) { base_set_event_count(dataset()->event_count()); } else { base_set_event_count(0); } } base_set_done(); ctable().updateClaimState(m_claimId, string("Idle")); } base_set_update(); return 0; } //********************************************************************** int CondorCodJob::kill(int err) { int return_val; if (is_initialized()) { base_set_killed(err); return 0; } if ( (return_val = deactivate()) != 0 ) { return return_val; } return 0; } //********************************************************************** ostream& CondorCodJob::ostr(ostream& lhs) const { Job::ostr(lhs); if ( is_initialized() ) return lhs; lhs << endl; lhs << "Run script: " << run_script() << endl; lhs << "Run directory: " << directory() << endl; lhs << "Condor Claim ID: " << condor_claim_id(); if ( is_running() ) return lhs; lhs << endl; lhs << "Return status: " << return_status(); return lhs; } //**********************************************************************