Skip to content

Commit

Permalink
Merge pull request #2 from UMN-CMS/dask_histograms
Browse files Browse the repository at this point in the history
Dask histograms
  • Loading branch information
jack1851 authored May 17, 2024
2 parents 1c9f958 + ca6ac15 commit 9ec36aa
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 26 deletions.
27 changes: 16 additions & 11 deletions analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import copy
import numpy as np
import uproot
from hist import Hist
import hist
import time

from utils.file_output import save_histograms
from utils.file_input import construct_fileset
from utils.histos import config
from utils.compute_variables import get_variables

import hist.dask as hda
import dask
NanoAODSchema.warn_missing_crossrefs = False

class WrAnalysis(processor.ProcessorABC):
Expand All @@ -25,7 +25,7 @@ def __init__(self):
self.hist_dict[mll][flavor] = {}
for i in range(len(config["histos"]["HISTO_NAMES"])):
self.hist_dict[mll][flavor][config["histos"]["HISTO_NAMES"][i]] =(
Hist.new.Reg(bins=config["histos"]["N_BINS"][i],
hda.Hist.new.Reg(bins=config["histos"]["N_BINS"][i],
start=config["histos"]["BIN_LOW"][i],
stop=config["histos"]["BIN_HIGH"][i],
label=config["histos"]["HISTO_LABELS"][i])
Expand All @@ -42,7 +42,7 @@ def process(self, events):
jets = events.Jet

num_events = ak.num(elecs,axis=0).compute()
print(f"\nProcessing {num_events} events")
print(f"\nProcessing {num_events} events.")

# Mask jets and leptons with their individual requirements
good_elecs = elecs[(elecs.pt > 53) & (np.abs(elecs.eta) < 2.4) & (elecs.cutBased_HEEP)]
Expand Down Expand Up @@ -84,7 +84,7 @@ def process(self, events):

num_selected = ak.num(passing_elecs,axis=0).compute()

print(f"\n{num_selected} events passed the selection ({num_selected/num_events*100:.2f}% efficiency)")
print(f"{num_selected} events passed the selection ({num_selected/num_events*100:.2f}% efficiency).\n")

mll = (passing_leptons[:, 0] + passing_leptons[:, 1]).mass

Expand All @@ -108,15 +108,17 @@ def process(self, events):
flavor_selection = selections.all(flavor)
selected_leptons = passing_leptons[mll_selection & flavor_selection]
selected_jets = passing_jets[mll_selection & flavor_selection]
print(f"\nCalculating kinematic variables and filling histograms for events with dilepton mass {mll} and flavor {flavor}")
print(f"Filling histograms for events with dilepton mass {mll} and flavor {flavor}.")
variables = get_variables(selected_leptons, selected_jets) #This step takes forever (converting the dask arrays using .compute()).
for i, variable in enumerate(variables):
hist_dict[mll][flavor][config["histos"]["HISTO_NAMES"][i]].fill(variable)
hist_dict[mll][flavor][config["histos"]["HISTO_NAMES"][i]].fill(variable)

print("\nFinished processing events and filling histograms.\n")

return hist_dict

def postprocess(self, accumulator):
return accumulator
pass

t0 = time.monotonic()

Expand All @@ -132,8 +134,11 @@ def postprocess(self, accumulator):
p = WrAnalysis()
out = p.process(events)

print("\nSaving histograms")
save_histograms(out, "example_histos.root")
print("Computing histograms...")
(computed,)=dask.compute(out)
print("Histograms computed.\n")

save_histograms(computed, "example_histos.root")

exec_time = time.monotonic() - t0
print(f"\nExecution took {exec_time:.2f} seconds")
30 changes: 15 additions & 15 deletions utils/compute_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

def get_variables(leptons, jets):

leadlepton_pt = leptons[:,0].pt.compute()
subleadlepton_pt = leptons[:,1].pt.compute()
leadjet_pt = jets[:,0].pt.compute()
subleadjet_pt = jets[:,1].pt.compute()
leadlepton_eta = leptons[:,0].eta.compute()
subleadlepton_eta = leptons[:,1].eta.compute()
leadjet_eta = jets[:,0].eta.compute()
subleadjet_eta = jets[:,1].eta.compute()
leadlepton_phi = leptons[:,0].phi.compute()
subleadlepton_phi = leptons[:,1].phi.compute()
leadjet_phi = jets[:,0].phi.compute()
subleadjet_phi = jets[:,1].phi.compute()
dilepton_mass = (leptons[:, 0] + leptons[:, 1]).mass.compute()
dijet_mass = (jets[:, 0] + jets[:, 1]).mass.compute()
fourobject_mass = (leptons[:, 0] + leptons[:, 1] + jets[:, 0] + jets[:, 1]).mass.compute()
leadlepton_pt = leptons[:,0].pt
subleadlepton_pt = leptons[:,1].pt
leadjet_pt = jets[:,0].pt
subleadjet_pt = jets[:,1].pt
leadlepton_eta = leptons[:,0].eta
subleadlepton_eta = leptons[:,1].eta
leadjet_eta = jets[:,0].eta
subleadjet_eta = jets[:,1].eta
leadlepton_phi = leptons[:,0].phi
subleadlepton_phi = leptons[:,1].phi
leadjet_phi = jets[:,0].phi
subleadjet_phi = jets[:,1].phi
dilepton_mass = (leptons[:, 0] + leptons[:, 1]).mass
dijet_mass = (jets[:, 0] + jets[:, 1]).mass
fourobject_mass = (leptons[:, 0] + leptons[:, 1] + jets[:, 0] + jets[:, 1]).mass

variables = [leadlepton_pt,
subleadlepton_pt,
Expand Down
1 change: 1 addition & 0 deletions utils/file_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ def save_histograms(hist_dict, filename):
for flavor in hist_dict[mll].keys():
for histogram_name, histogram_data in hist_dict[mll][flavor].items():
f[f"{mll}/{flavor}/{histogram_name}"] = histogram_data
print(f"Histograms saved to {histograms_directory}/{filename}")

0 comments on commit 9ec36aa

Please sign in to comment.