Merge pull request #3 from UMN-CMS/simplify_analyzer

starting to incorporate multiple files
UMN-CMS · May 17, 2024 · 451691b · 451691b
2 parents 9ec36aa + a058621
commit 451691b
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 10 deletions.
diff --git a/analyzer.py b/analyzer.py
@@ -15,6 +15,9 @@
 import dask
 NanoAODSchema.warn_missing_crossrefs = False
 
+# input files per process, set to e.g. 10 (smaller number = faster)
+N_FILES_MAX_PER_SAMPLE = 1
+
 class WrAnalysis(processor.ProcessorABC):
     def __init__(self):
         #Initialize histograms
@@ -34,7 +37,7 @@ def __init__(self):
 
     def process(self, events):
 
-
+        # create copies of histogram objects
         hist_dict = copy.deepcopy(self.hist_dict)
 
         elecs = events.Electron
@@ -122,7 +125,11 @@ def postprocess(self, accumulator):
 
 t0 = time.monotonic()
 
-fileset = construct_fileset()
+fileset = construct_fileset(N_FILES_MAX_PER_SAMPLE)
+
+print(f"processes in fileset: {list(fileset.keys())}")
+print(f"\nexample of information in fileset:\n{{\n  'files': [{fileset['ttbar__nominal']['files'][0]}, ...],")
+print(f"  'metadata': {fileset['ttbar__nominal']['metadata']}\n}}")
 
 fname = fileset['ttbar__nominal']['files'][0]
 events = NanoEventsFactory.from_root(

diff --git a/nanoaod_inputs.json b/nanoaod_inputs.json
@@ -1,11 +1,15 @@
 {
     "ttbar": {
         "nominal": {
-            "nevts_total": 120000,
+            "nevts_total": 270000,
             "files": [
                 {
-                    "path": "root://cmsxrootd.fnal.gov//store/mc/RunIIAutumn18NanoAOD/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/102X_upgrade2018_realistic_v15-v1/120000/08359797-037D-AD42-A433-978BEEE70173.root",
-                    "nevts": 120000
+                    "path": "root://cmsxrootd.fnal.gov//store/mc/RunIIAutumn18NanoAOD/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/102X_upgrade2018_realistic_v15-v1/110000/179C0011-B080-054B-A829-C1C276DD137E.root",
+                    "nevts": 210000
+                },
+		{
+                    "path": "root://cmsxrootd.fnal.gov//store/mc/RunIIAutumn18NanoAOD/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/102X_upgrade2018_realistic_v15-v1/110000/1E2B86BB-4765-C745-93C6-ED1C7EC6680C.root",
+                    "nevts": 60000
                 }
             ]
         }

diff --git a/utils/file_input.py b/utils/file_input.py
@@ -1,12 +1,27 @@
 import json
 
-def construct_fileset():
+def construct_fileset(n_files_max_per_sample):
+
+    # x-secs are in pb (taken from Table 8 (p.g. 9) of AN)
+    xsec_info = {
+        "ttbar": 88.29,
+    }
+
+    # list of files
     with open("nanoaod_inputs.json") as f:
         file_info = json.load(f)
+
+    # process into "fileset" summarizing all info
     fileset = {}
-    for process in file_info.keys():
-        for variation in file_info[process].keys():
-            file_list = file_info[process][variation]["files"]
+    for process in file_info.keys(): #ttbar
+        for variation in file_info[process].keys(): #nominal
+            file_list = file_info[process][variation]["files"] #list containing dictionarys with xrootd path an nevts for each nanoAOD file
+            if n_files_max_per_sample != -1:
+                file_list = file_list[:n_files_max_per_sample]  # use partial set of samples
+
             file_paths = [f["path"] for f in file_list]
-            fileset.update({f"{process}__{variation}": {"files": file_paths}})
+
+            nevts_total = sum([f["nevts"] for f in file_list])
+            metadata = {"process": process, "variation": variation, "nevts": nevts_total, "xsec": xsec_info[process]}
+            fileset.update({f"{process}__{variation}": {"files": file_paths, "metadata": metadata}})
     return fileset