Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CI and add basic code quality functionality #53

Merged
merged 8 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[flake8]
ignore = W391, W503
max-line-length = 79
max-line-length = 88
extend-ignore = E203, E704, E266
exclude = menu_tools/object_performance/quality_obj.py
25 changes: 0 additions & 25 deletions .github/workflows/action.yml

This file was deleted.

41 changes: 41 additions & 0 deletions .github/workflows/code_quality.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Code quality
on: [push, pull_request, workflow_dispatch]

jobs:
black-lint:
runs-on: ubuntu-latest
name: black
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
with:
options: "--check --verbose"
src: "./menu_tools"
version: "~= 23.12"
flake8-lint:
runs-on: ubuntu-latest
name: flake8
steps:
- name: Check out source repository
uses: actions/checkout@v3
- name: Set up Python environment
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: flake8 Lint
uses: py-actions/flake8@v2
with:
path: "./menu_tools"
mypy-type-check:
runs-on: ubuntu-latest
name: mypy
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.11'
architecture: 'x64'
- run: |
python -m pip install --upgrade pip poetry
poetry install
poetry run mypy
109 changes: 50 additions & 59 deletions menu_tools/caching/cache_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,18 @@
vector.register_awkward()


class ObjectCacher():

def __init__(self, version, sample, obj, tree, branches, cfg_file,
dryrun=False):
class ObjectCacher:
def __init__(self, version, sample, obj, tree, branches, cfg_file, dryrun=False):
self._version = version
self._sample = sample
self._cfg_file = cfg_file
self._object = obj.split('_')[0]
self._object = obj.split("_")[0]
self._tree = tree
self._final_ak_array = None
self._ref_part_iso_dR_vals = [0.1, 0.15, 0.2, 0.3, 1.5]
self._ref_part_iso = {
f"dr_{dR}": [] for dR in self._ref_part_iso_dR_vals
}
self._ref_part_iso = {f"dr_{dR}": [] for dR in self._ref_part_iso_dR_vals}
try:
self._part_type = obj.split('_')[1]
self._part_type = obj.split("_")[1]
except IndexError:
self._part_type = ""
self._dryrun = dryrun
Expand All @@ -47,11 +43,7 @@ def parquet_fname(self):
Returns the name of the output file
that the object will produce.
"""
fname = (
self._version
+ '_' + self._sample
+ "_" + self._object
)
fname = self._version + "_" + self._sample + "_" + self._object
if self._part_type:
fname += "_" + self._part_type
return fname
Expand All @@ -65,7 +57,7 @@ def _ntuple_path(self):
if glob.glob(local_ntuple_path):
return local_ntuple_path

with open(self._cfg_file, 'r') as f:
with open(self._cfg_file, "r") as f:
cfg = yaml.safe_load(f)[self._version][self._sample]
return cfg["ntuple_path"]

Expand All @@ -86,10 +78,10 @@ def _p4_sum(self, array, axis=-1):
"E": ak.sum(array.E, axis=axis, keepdims=True),
},
with_name="Momentum4D",
behavior=array.behavior
behavior=array.behavior,
)

def _get_visible_taus(self, all_parts, status_check = True):
def _get_visible_taus(self, all_parts, status_check=True):
"""
Create a collection of gen-level taus.
Leptonic taus are discarded.
Expand All @@ -99,15 +91,17 @@ def _get_visible_taus(self, all_parts, status_check = True):
all_parts = ak.zip({k.lower(): all_parts[k] for k in all_parts.keys()})

if status_check:

sel = ( (all_parts.id == 15) &
(all_parts.stat == 2)
)
sel = (all_parts.id == 15) & (all_parts.stat == 2)

is_tau = ak.any(sel, axis=-1)
all_parts = ak.where(is_tau, all_parts, ak.full_like(all_parts, -1000))

all_parts = {f: field for f, field in zip(['Id','Stat','Pt','Eta','Phi','Parent','E'],ak.unzip(all_parts))}
all_parts = {
f: field
for f, field in zip(
["Id", "Stat", "Pt", "Eta", "Phi", "Parent", "E"], ak.unzip(all_parts)
)
}

sel_no_nu_e = abs(all_parts["Id"]) != 12
sel_no_nu_mu = abs(all_parts["Id"]) != 14
Expand All @@ -120,11 +114,11 @@ def _get_visible_taus(self, all_parts, status_check = True):
all_tau_p = all_parts.copy()
all_tau_m = all_parts.copy()

sel = all_tau_p['Parent'] == 15
sel = all_tau_p["Parent"] == 15
for branch in all_tau_p:
all_tau_p[branch] = all_tau_p[branch][sel]

sel = all_tau_m['Parent'] == -15
sel = all_tau_m["Parent"] == -15
for branch in all_tau_m:
all_tau_m[branch] = all_tau_m[branch][sel]

Expand All @@ -134,13 +128,13 @@ def _get_visible_taus(self, all_parts, status_check = True):
all_tau_p = ak.zip({k.lower(): all_tau_p[k] for k in all_tau_p.keys()})
all_tau_p = ak.with_name(all_tau_p, "Momentum4D")

sel_ele = ak.any(abs(all_tau_p['id']) == 11, axis=-1)
sel_mu = ak.any(abs(all_tau_p['id']) == 13, axis=-1)
sel_ele = ak.any(abs(all_tau_p["id"]) == 11, axis=-1)
sel_mu = ak.any(abs(all_tau_p["id"]) == 13, axis=-1)
sel_lep = sel_ele | sel_mu
all_tau_p = ak.mask(all_tau_p, sel_lep, valid_when=False)

sel_ele = ak.any(abs(all_tau_m['id']) == 11, axis=-1)
sel_mu = ak.any(abs(all_tau_m['id']) == 13, axis=-1)
sel_ele = ak.any(abs(all_tau_m["id"]) == 11, axis=-1)
sel_mu = ak.any(abs(all_tau_m["id"]) == 13, axis=-1)
sel_lep = sel_ele | sel_mu
all_tau_m = ak.mask(all_tau_m, sel_lep, valid_when=False)

Expand All @@ -150,13 +144,13 @@ def _get_visible_taus(self, all_parts, status_check = True):
# Parent, Id and Stat are dummy branches, only needed
# for technical consistency.
final_taus = {
'Pt': ak.concatenate([fs_tau_p.pt, fs_tau_m.pt], axis=-1),
'Eta': ak.concatenate([fs_tau_p.eta, fs_tau_m.eta], axis=-1),
'Phi': ak.concatenate([fs_tau_p.phi, fs_tau_m.phi], axis=-1),
'E': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1),
'Parent': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1),
'Id': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1),
'Stat': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1)
"Pt": ak.concatenate([fs_tau_p.pt, fs_tau_m.pt], axis=-1),
"Eta": ak.concatenate([fs_tau_p.eta, fs_tau_m.eta], axis=-1),
"Phi": ak.concatenate([fs_tau_p.phi, fs_tau_m.phi], axis=-1),
"E": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1),
"Parent": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1),
"Id": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1),
"Stat": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1),
}

return final_taus
Expand All @@ -166,14 +160,14 @@ def _filter_genpart_branches(self, all_arrays):
Filter genparticle branches by Id.
"""
partId = abs(all_arrays["Id"])
sel_id = (partId == utils.get_pdg_id(self._part_type))
sel_id = partId == utils.get_pdg_id(self._part_type)
for branch in all_arrays:
all_arrays[branch] = all_arrays[branch][sel_id]
all_arrays[branch] = ak.fill_none(all_arrays[branch], -999)

return all_arrays

def _filter_fspart_branches(self, all_parts, status_check = True):
def _filter_fspart_branches(self, all_parts, status_check=True):
"""
Select all the final state particles.
This collection is used only for dR matching
Expand All @@ -183,15 +177,17 @@ def _filter_fspart_branches(self, all_parts, status_check = True):
all_parts = ak.zip({k.lower(): all_parts[k] for k in all_parts.keys()})

if status_check:

sel = ( (all_parts.id == 15) &
(all_parts.stat == 2)
)
sel = (all_parts.id == 15) & (all_parts.stat == 2)

is_tau = ak.any(sel, axis=-1)
all_parts = ak.where(is_tau, all_parts, ak.full_like(all_parts, -1000))

all_parts = {f: field for f, field in zip(['Id','Stat','Pt','Eta','Phi','Parent','E'],ak.unzip(all_parts))}
all_parts = {
f: field
for f, field in zip(
["Id", "Stat", "Pt", "Eta", "Phi", "Parent", "E"], ak.unzip(all_parts)
)
}

sel_no_nu_e = abs(all_parts["Id"]) != 12
sel_no_nu_mu = abs(all_parts["Id"]) != 14
Expand Down Expand Up @@ -255,9 +251,7 @@ def _load_branches_from_ntuple(self, chunk_array, arr, branches):
def _ak_array_in_chunk(self, arr, chunk_array, branches):
for branch in branches:
branch_key = branch.removeprefix("part")
arr[branch_key] = ak.concatenate(
[arr[branch_key], chunk_array[branch_key]]
)
arr[branch_key] = ak.concatenate([arr[branch_key], chunk_array[branch_key]])
return arr

@utils.timer("Loading objects files")
Expand All @@ -274,21 +268,19 @@ def _concat_array_from_ntuples(self):

# Read files in chunks to avoid issues with large size files
chunk_name = f"{fname}:{self._tree}"
for array in uproot.iterate(chunk_name, filter_name = branches, step_size="100 MB"):
for array in uproot.iterate(
chunk_name, filter_name=branches, step_size="100 MB"
):
chunk_array = {x.removeprefix("part"): [] for x in branches}
chunk_array = self._load_branches_from_ntuple(
chunk_array, array, branches
)
chunk_array = self._postprocess_branches(chunk_array)

new_array = self._ak_array_in_chunk(
new_array, chunk_array, branches
)
new_array = self._ak_array_in_chunk(new_array, chunk_array, branches)

# Concatenate array from "fname file" to all_arrays
all_arrays = self._ak_array_in_chunk(
all_arrays, new_array, branches
)
all_arrays = self._ak_array_in_chunk(all_arrays, new_array, branches)

bar.finish()

Expand All @@ -314,11 +306,11 @@ def _save_array_to_parquet(self):
"""
ak.to_parquet(
self._final_ak_array,
destination=self.cache_out_path + f"{self.parquet_fname}.parquet"
destination=self.cache_out_path + f"{self.parquet_fname}.parquet",
)

def load(self):
#print(f"Process {self._object + self._part_type} object...")
# print(f"Process {self._object + self._part_type} object...")

if self._cache_file_exists():
return
Expand All @@ -336,21 +328,20 @@ def load(self):


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument(
"cfg",
help="Path to the config file in yaml format. Defaults in `configs/caching`."
help="Path to the config file in yaml format. Defaults in `configs/caching`.",
)
parser.add_argument(
"--dry-run",
"-d",
action="store_true",
help="Only do print-out of objects and branches to be loaded."
help="Only do print-out of objects and branches to be loaded.",
)
args = parser.parse_args()

with open(args.cfg, 'r') as f:
with open(args.cfg, "r") as f:
cfg = yaml.safe_load(f)
for version, samples in cfg.items():
print("Processing: version", version)
Expand All @@ -368,6 +359,6 @@ def load(self):
obj=obj,
branches=branches,
cfg_file=args.cfg,
dryrun=args.dry_run
dryrun=args.dry_run,
)
loader.load()
Loading