# ProdSampleRequest.py # # David Adams # April 2009 # # Describe prodsys requests. # from TaskName import TaskName ################################################################################ # Exceptions. class ProdSampleRequestException: def __init__(self, msg): self.msg = msg def __str__(self): return repr(self.msg) ################################################################################ # Class to describe a physics group name. class PhysicsGroup: def __init__(self, val): self.rawname = val self.short_names = [ "none", "phys", "sm", "bphy", "top", "higg", "susy", "exot" ] self.long_names = ["Invalid", "Physics", "StandardModel", "BPhysics", "Top", "Higgs", "SUSY", "Exotics" ] self.prod_names = [ "", "physics", "sm", "bphysics", "top", "higgs", "susy", "exotics" ] self.short_names += [ "egam", "muon", "tau", "btag", "jet" ] self.long_names += [ "EGamma", "Muon", "Tau", "BTag", "JetEtmiss" ] self.prod_names += [ "egamma", "muon", "tau", "btagging", "jetEtmiss" ] self.short_names += [ "hion" ] self.long_names += [ "HeavyIons" ] self.prod_names += [ "heavyion" ] self.ngroup = len(self.short_names) self.alias = {} short_index = {} for idx in range(0, self.ngroup): name = self.short_names[idx] short_index[name] = idx self.alias["physics"] = short_index["phys"] self.alias["BPhys"] = short_index["bphy"] self.alias["bphys"] = short_index["bphy"] self.alias["Egamma"] = short_index["egam"] self.alias["FlavourTag"] = short_index["btag"] # Initialize the alias map. for idx in range(0, self.ngroup): name = self.short_names[idx] self.alias[name] = idx for idx in range(0, self.ngroup): name = self.long_names[idx] self.alias[name] = idx for idx in range(0, self.ngroup): name = self.prod_names[idx] self.alias[name] = idx self.__index = 0 if isinstance(val, int): if val in range(0, self.ngroup): self.__index = val elif isinstance(val, str): if val in self.alias: self.__index = self.alias[val] def add_alias(self, group, name): if isinstance(name, str): idx = None if isinstance(group, int): if group in range(0, ngroup): idx = group elif isinstance(group, PhysicsGroup): idx = group.index if idx: self.alias[name] = idx def index(self): return self.__index def is_valid(self): return bool(self.index()) def nickname(self): return self.short_names[self.__index] def fullname(self): return self.long_names[self.__index] def prodname(self): return self.prod_names[self.__index] def __str__(self): return self.fullname() ################################################################################ class ProdSampleRequest: def __init__(self, project, group, intask, tag, events): self.project = project self.groups = [group] self.intask = intask self.tag = tag self.events = events # Merge another request into this one. # New request must have the same input task and tag. def merge(self, req): if req.intask.name != self.intask.name: return 1 if req.tag != self.tag: return 2 self.groups += req.groups types = self.events.keys() types += req.events.keys() # Build unique list--need python 2.6 for set. utypes = {} for type in types: utypes[type] = None for type in utypes: if type in req.events: if type in self.events: self.events[type] += req.events[type] else: self.events[type] = req.events[type] return 0 # Return a string listing the groups in this request. def group_string(self): sout = "" for group in self.groups: if len(sout): sout += "," sout += group.nickname() return sout # Return the input task. def input_task(self): return self.intask # Return the output task for a given tag. def output_task(self): return self.intask.add_tag(self.tag, self.project) ################################################################################ # ProdSampleRequestList constructs a list of ProdSampleRequest objects from # a list in Borut's format. The specifier ten-percentESD is intepreted in such # to guarantee the number of requested AOD.ESD events is divisible by 250, # the number of events per job. class ProdSampleRequestListException: def __init__(self, msg): self.msg = msg def __str__(self): return repr(self.msg) class ProdSampleRequestList: # List of know output formats with AOD and ESD. aodesd_formats = ["AOD.ESD", "AOD.ESD.NTUP"] aodesd_formats += ["ESD"] aodesd_formats += ["TAG_COMM.AOD.ESD.HIST"] aodesd_formats += ["TAG_COMM.AOD.ESD.HIST.NTUP_PIXELCALIB.NTUP_MUONCALIB.NTUP_TRIG"] # List of known output formats with AOD but no ESD. aod_formats = [] for name in aodesd_formats: newname = name.replace("AOD.ESD", "AOD") aod_formats += [newname] def __init__(self, project): self.project = project self.reqs = [] self .dupmap = {} # Format and header for text dumps. self.index_fmt = "%6i" self.group_fmt = " %-19s" self.intask_fmt = " %-80s" self.tag_fmt = "%6s" self.count_fmt = "%10.1f" self.text_header = " Index" self.text_header += self.group_fmt%"Groups" self.text_header += self.intask_fmt%"Task" self.text_header += self.tag_fmt%"Tag" self.text_header += " AOD.ESD AOD" # Flag to display output task instead of input in text dumps self.show_output_task = False def readfile(self, fname): infile = open(fname, "r") for rawline in infile: line = rawline.rstrip("\r\n") words = line.split() # Skip the header word(s) iw = 0 word = words[iw] if word != "RECO": raise ProdSampleRequestListException("First word must be RECO") iw += 1 word = words[iw] if word == "PILEUP": iw += 1 word = words[iw] # Read the physics group. if word != "group=": raise ProdSampleRequestListException("Field name is not group: " + word) iw += 1 gname = words[iw] group = PhysicsGroup(gname) if not group.is_valid(): raise ProdSampleRequestListException("Invalid group name: " + gname) # Read the input task. iw += 1 word = words[iw] if word != "input-task=": raise ProdSampleRequestListException("Field name is not input-task: " + word) iw += 1 tname = words[iw] task = TaskName(tname, True) if not task.is_valid(): raise ProdSampleRequestListException("Invalid task name: " + tname) # Read the number of events. iw += 1 word = words[iw] if word != "stats=": raise ProdSampleRequestListException("Field name is not stats: " + word) iw += 1 stats = words[iw] nevt = int(stats) # Read the data types to record. # Create the number of events of each data type. iw += 1 word = words[iw] if word != "store=": raise ProdSampleRequestListException("Field name is not store: " + word) iw += 1 store = words[iw] savetype = {} events = {} if store == "ESD": events["AOD.ESD"] = nevt events["AOD"] = 0 elif store == "ten-percentESD": events["AOD.ESD"] = nevt/10 events["AOD.ESD"] = ((nevt/10-51)/250+1)*250 # Divide by 10 but keep a multiple of 250 events["AOD"] = nevt - events["AOD.ESD"] else: raise ProdSampleRequestListException("Invalid storage option: " + store) # Read the tag. iw += 1 word = words[iw] if word != "recotag=": raise ProdSampleRequestListException("Field name is not recotag: " + word) iw += 1 tag = words[iw] # Check if request is duplicate (same task name and tag) req = ProdSampleRequest(self.project, group, task, tag, events) key = req.intask.name + "/" + req.tag if key in self.dupmap: #raise ProdSampleRequestListException("Duplicate request: " + key) oldreq = self.dupmap[key] err = oldreq.merge(req) if err: oldkey = oldreq.intask.name + "/" + oldreq.tag raise ProdSampleRequestListException("Duplicate merge failed with error " + str(err) + "\n New key:" + key + "\n Old key: " + oldkey) # Otherwise add this request to the list. else: self.dupmap[key] = req self.reqs += [req] def readfile2(self, fname): infile = open(fname, "r") projects = [ "mc08", "mc09_valid", "mc09_900GeV", "mc09_2TeV", "mc09_7TeV", "mc09_10TeV" ] project = "" priority = "500" fraction_esd = None events_per_job = None events_per_job_allowed = [ 250, 1, 1000 ] aodesd_format = "AOD.ESD" aod_format = "AOD" group = PhysicsGroup("none") tag = "none" nevt = 0 ds = "" project_mode = "default" for rawline in infile: line = rawline.rstrip("\r\n") if len(line) == 0: continue if line[0] == "#": continue words = line.split() ds = "" for word in words: try: name,value = word.split(":") except: raise ProdSampleRequestListException("Invalid word: " + word) if name == "project": if not value in projects: raise ProdSampleRequestListException("Invalid " + name + ": " + value) project = value elif name == "priority": if value != priority: if 0: raise ProdSampleRequestListException("Invalid " + name + ": " + value) elif name == "events_per_job": events_per_job = int(value) if events_per_job not in events_per_job_allowed: raise ProdSampleRequestListException("Invalid " + name + ": " + value) elif name == "formats": aodesd_format = "" aod_format = "" for ifmt in range(0, len(ProdSampleRequestList.aodesd_formats)): aodfmt = ProdSampleRequestList.aod_formats[ifmt] aodesdfmt = ProdSampleRequestList.aodesd_formats[ifmt] if value == aodesdfmt: aodesd_format = aodesdfmt aod_format = aodfmt elif value == aodfmt: aodesd_format = aodesdfmt aod_format = aodfmt project_mode = "noesd" if len(aodesd_format) == 0: raise ProdSampleRequestListException("Invalid " + name + ": " + value) elif name == "comment": pass elif name == "group": group = PhysicsGroup(value) elif name == "num_events": nevt = int(value) elif name == "tag": tag = value elif name == "ds": ds = value if len(ds): if project == self.project: #print "Keeping", project, ds events = {} if project_mode == "default": events[aodesd_format] = nevt events[aod_format] = 0 fraction_esd = 1.0 elif project_mode[0:12] == "fraction_ESD=": percent_esd = int(project_mode[12:]) if percent_esd < 0 or percent_esd > 100: raise ProdSampleRequestListException("Invalid ESD percentage: " + str(percent_esd)) fraction_esd = percent_esd/100.0 raw_nesd = percent_esd*nevt/100 events[aodesd_format] = ((raw_nesd-1)/events_per_job+1)*events_per_job # Divide by 10 but keep a multiple events[aod_format] = nevt - events[aodesd_format] elif project_mode == "noesd": events[aodesd_format] = 0 events[aod_format] = nevt fraction_esd = 0.0 else: raise ProdSampleRequestListException("Invalid project_mode:" + project_mode) if not group.is_valid(): raise ProdSampleRequestListException("Invalid group name: " + group.rawname) task = TaskName(ds, True) req = ProdSampleRequest(project, group, task, tag, events) key = req.intask.name + "/" + req.tag if key in self.dupmap: oldreq = self.dupmap[key] err = oldreq.merge(req) if err: oldkey = oldreq.intask.name + "/" + oldreq.tag raise ProdSampleRequestListException("Duplicate merge failed with error " + str(err) + "\n New key:" + key + "\n Old key: " + oldkey) # Otherwise add this request to the list. else: self.dupmap[key] = req self.reqs += [req] else: print "Skipping", project, ds # Return a text line describing entry ireq. def text_line(self, ireq): if ireq not in range(0,len(self.reqs)): return "" req = self.reqs[ireq] sout = "" sout += self.index_fmt%ireq sout += self.group_fmt%req.group_string() if self.show_output_task: sout += self.intask_fmt%req.output_task() else: sout += self.intask_fmt%req.input_task() sout += self.tag_fmt%req.tag events = req.events naodesd = 0 naod = 0 for type in events: if type in ProdSampleRequestList.aodesd_formats: naodesd = events[type] elif type in ProdSampleRequestList.aod_formats: naod = events[type] else: raise ProdSampleRequestListException("Invalid output type: " + type) sout += self.count_fmt%(naodesd/1000.) sout += self.count_fmt%(naod/1000.) return sout # Print the list. def text(self): print self.text_header for ireq in range(0, len(self.reqs)): print self.text_line(ireq) #RECO PILEUP group= Tau input-task= mc08.105010.J1_pythia_jetjet.digit.e344_s479_d150 stats= 400000 store= ESD recotag= r621