Skip to content

Commit

Permalink
Made an __init__.py file inside the utils directory
Browse files Browse the repository at this point in the history
  • Loading branch information
jack1851 committed May 22, 2024
1 parent 734035a commit 7c0b62c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 30 deletions.
56 changes: 26 additions & 30 deletions analyzer.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,18 @@
import logging
import time
import warnings

import awkward as ak
import warnings
from coffea import processor
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import PackedSelection
from coffea.dataset_tools import apply_to_fileset, max_chunks, preprocess
import copy
import hist
import hist.dask as hda
import dask
import numpy as np
import warnings
from distributed import Client

from utils.file_output import save_histograms
from utils.file_input import construct_fileset
from utils.histos import config
from utils.compute_variables import get_variables
from coffea.dataset_tools import (
apply_to_fileset,
max_chunks,
preprocess,
)
NanoAODSchema.warn_missing_crossrefs = False

import utils # contains code for bookkeeping and cosmetics, as well as some boilerplate

warnings.filterwarnings(
"ignore",
Expand All @@ -32,8 +22,6 @@
# 64M ttbar events takes about 45 minutes
N_FILES_MAX_PER_SAMPLE = 1

#client = Client()

class WrAnalysis(processor.ProcessorABC):
def __init__(self):

Expand All @@ -43,13 +31,13 @@ def __init__(self):
self.hist_dict[mll] = {}
for flavor in ["eejj", "mumujj", "emujj"]:
self.hist_dict[mll][flavor] = {}
for i in range(len(config["histos"]["HISTO_NAMES"])):
for i in range(len(utils.config["histos"]["HISTO_NAMES"])):
#Need to look at hist documentation to improve this
self.hist_dict[mll][flavor][config["histos"]["HISTO_NAMES"][i]] =(
hda.Hist.new.Reg(bins=config["histos"]["N_BINS"][i],
start=config["histos"]["BIN_LOW"][i],
stop=config["histos"]["BIN_HIGH"][i],
label=config["histos"]["HISTO_LABELS"][i])
self.hist_dict[mll][flavor][utils.config["histos"]["HISTO_NAMES"][i]] =(
hda.Hist.new.Reg(bins=utils.config["histos"]["N_BINS"][i],
start=utils.config["histos"]["BIN_LOW"][i],
stop=utils.config["histos"]["BIN_HIGH"][i],
label=utils.config["histos"]["HISTO_LABELS"][i])
.Weight()
)

Expand Down Expand Up @@ -113,7 +101,7 @@ def process(self, events): #Processes a single NanoEvents chunk

num_selected = ak.num(passing_elecs,axis=0).compute()

print(f"{num_selected} events passed the selection ({num_selected/nevts_total*100:.2f}% efficiency).\n")
print(f"{num_selected} events passed the selection ({num_selected/nevts_total*100:.2f}% efficiency).")

mll = (passing_leptons[:, 0] + passing_leptons[:, 1]).mass

Expand All @@ -137,14 +125,13 @@ def process(self, events): #Processes a single NanoEvents chunk
flavor_selection = selections.all(flavor)
selected_leptons = passing_leptons[mll_selection & flavor_selection]
selected_jets = passing_jets[mll_selection & flavor_selection]
print(f"Filling histograms for events with dilepton mass {mll} and flavor {flavor}.")
# Creates a list of dask arrays of all kinematic variables
variables = get_variables(selected_leptons, selected_jets)
variables = utils.compute_variables.get_variables(selected_leptons, selected_jets)
for i, variable in enumerate(variables):
# Fill histograms
hist_dict[mll][flavor][config["histos"]["HISTO_NAMES"][i]].fill(variable)
hist_dict[mll][flavor][utils.config["histos"]["HISTO_NAMES"][i]].fill(variable)

print("\nFinished processing events and filling histograms.\n")
print("Finished processing events and filling histograms.\n")

output = {"nevents": {events.metadata["dataset"]: len(events)}, "hist_dict": hist_dict}
return output
Expand All @@ -156,7 +143,16 @@ def postprocess(self, accumulator):

t0 = time.monotonic()

fileset = construct_fileset(N_FILES_MAX_PER_SAMPLE)
fileset = utils.file_input.construct_fileset(N_FILES_MAX_PER_SAMPLE)

#print(f"Fileset: {fileset}\n") for debugging

print(f"Processes in fileset: {list(fileset.keys())}")
file_name, file_branch = next(iter(fileset['ttbar__nominal']['files'].items()))
print(f"\nExample of information in fileset:\n{{\n 'files': {file_name}, ...,")
print(f" 'metadata': {fileset['ttbar__nominal']['metadata']}\n}}\n")

NanoAODSchema.warn_missing_crossrefs = False # silences warnings about branches we will not use here

filemeta, _=preprocess(fileset, step_size=100_000, skip_bad_files=True)

Expand All @@ -172,7 +168,7 @@ def postprocess(self, accumulator):
(out,) = dask.compute(all_histograms)

print("Histograms computed.\n")
save_histograms(out, "example_histos.root")
utils.file_output.save_histograms(out, "example_histos.root")

exec_time = time.monotonic() - t0
print(f"\nExecution took {exec_time:.2f} seconds")
4 changes: 4 additions & 0 deletions utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .config import config as config
from . import file_input as file_input
from . import file_output as file_output
from . import compute_variables as compute_variables
File renamed without changes.

0 comments on commit 7c0b62c

Please sign in to comment.