Skip to content

Commit

Permalink
Merge pull request #3 from UMN-CMS/simplify_analyzer
Browse files Browse the repository at this point in the history
starting to incorporate multiple files
  • Loading branch information
jack1851 authored May 17, 2024
2 parents 9ec36aa + a058621 commit 451691b
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 10 deletions.
11 changes: 9 additions & 2 deletions analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
import dask
NanoAODSchema.warn_missing_crossrefs = False

# input files per process, set to e.g. 10 (smaller number = faster)
N_FILES_MAX_PER_SAMPLE = 1

class WrAnalysis(processor.ProcessorABC):
def __init__(self):
#Initialize histograms
Expand All @@ -34,7 +37,7 @@ def __init__(self):

def process(self, events):


# create copies of histogram objects
hist_dict = copy.deepcopy(self.hist_dict)

elecs = events.Electron
Expand Down Expand Up @@ -122,7 +125,11 @@ def postprocess(self, accumulator):

t0 = time.monotonic()

fileset = construct_fileset()
fileset = construct_fileset(N_FILES_MAX_PER_SAMPLE)

print(f"processes in fileset: {list(fileset.keys())}")
print(f"\nexample of information in fileset:\n{{\n 'files': [{fileset['ttbar__nominal']['files'][0]}, ...],")
print(f" 'metadata': {fileset['ttbar__nominal']['metadata']}\n}}")

fname = fileset['ttbar__nominal']['files'][0]
events = NanoEventsFactory.from_root(
Expand Down
10 changes: 7 additions & 3 deletions nanoaod_inputs.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
{
"ttbar": {
"nominal": {
"nevts_total": 120000,
"nevts_total": 270000,
"files": [
{
"path": "root://cmsxrootd.fnal.gov//store/mc/RunIIAutumn18NanoAOD/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/102X_upgrade2018_realistic_v15-v1/120000/08359797-037D-AD42-A433-978BEEE70173.root",
"nevts": 120000
"path": "root://cmsxrootd.fnal.gov//store/mc/RunIIAutumn18NanoAOD/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/102X_upgrade2018_realistic_v15-v1/110000/179C0011-B080-054B-A829-C1C276DD137E.root",
"nevts": 210000
},
{
"path": "root://cmsxrootd.fnal.gov//store/mc/RunIIAutumn18NanoAOD/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/102X_upgrade2018_realistic_v15-v1/110000/1E2B86BB-4765-C745-93C6-ED1C7EC6680C.root",
"nevts": 60000
}
]
}
Expand Down
25 changes: 20 additions & 5 deletions utils/file_input.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
import json

def construct_fileset():
def construct_fileset(n_files_max_per_sample):

# x-secs are in pb (taken from Table 8 (p.g. 9) of AN)
xsec_info = {
"ttbar": 88.29,
}

# list of files
with open("nanoaod_inputs.json") as f:
file_info = json.load(f)

# process into "fileset" summarizing all info
fileset = {}
for process in file_info.keys():
for variation in file_info[process].keys():
file_list = file_info[process][variation]["files"]
for process in file_info.keys(): #ttbar
for variation in file_info[process].keys(): #nominal
file_list = file_info[process][variation]["files"] #list containing dictionarys with xrootd path an nevts for each nanoAOD file
if n_files_max_per_sample != -1:
file_list = file_list[:n_files_max_per_sample] # use partial set of samples

file_paths = [f["path"] for f in file_list]
fileset.update({f"{process}__{variation}": {"files": file_paths}})

nevts_total = sum([f["nevts"] for f in file_list])
metadata = {"process": process, "variation": variation, "nevts": nevts_total, "xsec": xsec_info[process]}
fileset.update({f"{process}__{variation}": {"files": file_paths, "metadata": metadata}})
return fileset

0 comments on commit 451691b

Please sign in to comment.