From 4db64d02e71c4d59063d9ddfe938c95c247801fe Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Tue, 16 Jan 2024 15:49:47 +0100 Subject: [PATCH 1/8] fix python version compatibilty; add mypy config in pyproject.toml --- pyproject.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7d898768..acb80d28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ packages = [ ] [tool.poetry.dependencies] -python = "^3.11.0" +python = "~3.11.0" awkward = "2.5.2" fsspec = "2023.12.2" pyyaml = "6.0.1" @@ -54,3 +54,9 @@ pythonpath = [ testpaths = [ "tests", ] + +[tool.mypy] +disable_error_code = [ + "import-untyped" +] +mypy_path = "$MYPY_CONFIG_FILE_DIR/menu_tools" From 5c84cc38c9261879d86afbd4f6bf1d5ec7f0fa86 Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Tue, 16 Jan 2024 16:17:16 +0100 Subject: [PATCH 2/8] make mypy ignore untyped imports --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index acb80d28..cecb59bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,4 +59,4 @@ testpaths = [ disable_error_code = [ "import-untyped" ] -mypy_path = "$MYPY_CONFIG_FILE_DIR/menu_tools" +explicit_package_bases = true From 3fc4cc72b6caa3d3d325fd26c2944b1bef4ecee4 Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Tue, 16 Jan 2024 16:23:49 +0100 Subject: [PATCH 3/8] Fix Python version in workflow and add mypy and black checks; remove flake8 --- .github/workflows/action.yml | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/.github/workflows/action.yml b/.github/workflows/action.yml index cd6a33e8..af76bf0b 100644 --- a/.github/workflows/action.yml +++ b/.github/workflows/action.yml @@ -1,25 +1,13 @@ -name: Python Tests -on: [push] +name: Code Quality and Tests +on: [push, pull_request] jobs: - build: + lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v4 + - uses: psf/black@stable with: - python-version: '3.10' - architecture: 'x64' - - name: Install dependencies - run: | - python -m pip install --upgrade pip wheel setuptools==65.5.1 - pip install flake8 pytest - if [ -f objectPerformance/requirements.txt ]; then pip install -r objectPerformance/requirements.txt; fi - - name: Run Flake8 - run: flake8 objectPerformance - - name: Run Tests - run: | - cd objectPerformance - python -m pytest - + options: "--check --verbose" + src: "./menu_tools" + version: "~= 23.12" From 5305bfe715c91f223ee011f614ac6f5c34abe325 Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Wed, 17 Jan 2024 09:29:35 +0100 Subject: [PATCH 4/8] update actions --- .github/workflows/action.yml | 19 ++++++++++++------- .github/workflows/lint.yml | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/action.yml b/.github/workflows/action.yml index af76bf0b..ca4d7910 100644 --- a/.github/workflows/action.yml +++ b/.github/workflows/action.yml @@ -1,13 +1,18 @@ -name: Code Quality and Tests -on: [push, pull_request] +name: Code quality +on: [push, pull_request, workflow_dispatch] jobs: - lint: + type-check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: psf/black@stable + - uses: actions/setup-python@v4 with: - options: "--check --verbose" - src: "./menu_tools" - version: "~= 23.12" + python-version: '3.11' + architecture: 'x64' + - run: | + python -m pip install --upgrade pip poetry + poetry install + - run: | + pip list + mypy menu_tools diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..66056563 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,25 @@ +name: flake8, black Lint +on: [push, pull_request, workflow_dispatch] + +jobs: + black-lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable + with: + options: "--check --verbose" + src: "./menu_tools" + version: "~= 23.12" + flake8-lint: + runs-on: ubuntu-latest + name: Lint + steps: + - name: Check out source repository + uses: actions/checkout@v3 + - name: Set up Python environment + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: flake8 Lint + uses: py-actions/flake8@v2 From f0db7fe8d72aa47137049d5498243c702765e3ed Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Wed, 17 Jan 2024 09:32:01 +0100 Subject: [PATCH 5/8] run balck (code formatter) on menu_tools --- menu_tools/caching/cache_objects.py | 109 +++++---- .../object_performance/compare_plots.py | 44 ++-- .../object_performance/plotBTagEfficiency.py | 52 +++-- menu_tools/object_performance/plot_config.py | 4 +- menu_tools/object_performance/plotter.py | 212 ++++++++++-------- menu_tools/object_performance/quality_obj.py | 143 ++++++------ .../object_performance/scaling_collection.py | 118 ++++------ .../object_performance/tests/conftest.py | 103 +++++++-- .../tests/test_integration.py | 39 ++-- .../tests/test_turnon_collection.py | 13 +- .../object_performance/tests/test_utils.py | 20 +- .../object_performance/turnon_collection.py | 82 +++---- menu_tools/utils/utils.py | 40 ++-- 13 files changed, 531 insertions(+), 448 deletions(-) diff --git a/menu_tools/caching/cache_objects.py b/menu_tools/caching/cache_objects.py index be2d0bbf..fdddf135 100755 --- a/menu_tools/caching/cache_objects.py +++ b/menu_tools/caching/cache_objects.py @@ -14,22 +14,18 @@ vector.register_awkward() -class ObjectCacher(): - - def __init__(self, version, sample, obj, tree, branches, cfg_file, - dryrun=False): +class ObjectCacher: + def __init__(self, version, sample, obj, tree, branches, cfg_file, dryrun=False): self._version = version self._sample = sample self._cfg_file = cfg_file - self._object = obj.split('_')[0] + self._object = obj.split("_")[0] self._tree = tree self._final_ak_array = None self._ref_part_iso_dR_vals = [0.1, 0.15, 0.2, 0.3, 1.5] - self._ref_part_iso = { - f"dr_{dR}": [] for dR in self._ref_part_iso_dR_vals - } + self._ref_part_iso = {f"dr_{dR}": [] for dR in self._ref_part_iso_dR_vals} try: - self._part_type = obj.split('_')[1] + self._part_type = obj.split("_")[1] except IndexError: self._part_type = "" self._dryrun = dryrun @@ -47,11 +43,7 @@ def parquet_fname(self): Returns the name of the output file that the object will produce. """ - fname = ( - self._version - + '_' + self._sample - + "_" + self._object - ) + fname = self._version + "_" + self._sample + "_" + self._object if self._part_type: fname += "_" + self._part_type return fname @@ -65,7 +57,7 @@ def _ntuple_path(self): if glob.glob(local_ntuple_path): return local_ntuple_path - with open(self._cfg_file, 'r') as f: + with open(self._cfg_file, "r") as f: cfg = yaml.safe_load(f)[self._version][self._sample] return cfg["ntuple_path"] @@ -86,10 +78,10 @@ def _p4_sum(self, array, axis=-1): "E": ak.sum(array.E, axis=axis, keepdims=True), }, with_name="Momentum4D", - behavior=array.behavior + behavior=array.behavior, ) - def _get_visible_taus(self, all_parts, status_check = True): + def _get_visible_taus(self, all_parts, status_check=True): """ Create a collection of gen-level taus. Leptonic taus are discarded. @@ -99,15 +91,17 @@ def _get_visible_taus(self, all_parts, status_check = True): all_parts = ak.zip({k.lower(): all_parts[k] for k in all_parts.keys()}) if status_check: - - sel = ( (all_parts.id == 15) & - (all_parts.stat == 2) - ) + sel = (all_parts.id == 15) & (all_parts.stat == 2) is_tau = ak.any(sel, axis=-1) all_parts = ak.where(is_tau, all_parts, ak.full_like(all_parts, -1000)) - all_parts = {f: field for f, field in zip(['Id','Stat','Pt','Eta','Phi','Parent','E'],ak.unzip(all_parts))} + all_parts = { + f: field + for f, field in zip( + ["Id", "Stat", "Pt", "Eta", "Phi", "Parent", "E"], ak.unzip(all_parts) + ) + } sel_no_nu_e = abs(all_parts["Id"]) != 12 sel_no_nu_mu = abs(all_parts["Id"]) != 14 @@ -120,11 +114,11 @@ def _get_visible_taus(self, all_parts, status_check = True): all_tau_p = all_parts.copy() all_tau_m = all_parts.copy() - sel = all_tau_p['Parent'] == 15 + sel = all_tau_p["Parent"] == 15 for branch in all_tau_p: all_tau_p[branch] = all_tau_p[branch][sel] - sel = all_tau_m['Parent'] == -15 + sel = all_tau_m["Parent"] == -15 for branch in all_tau_m: all_tau_m[branch] = all_tau_m[branch][sel] @@ -134,13 +128,13 @@ def _get_visible_taus(self, all_parts, status_check = True): all_tau_p = ak.zip({k.lower(): all_tau_p[k] for k in all_tau_p.keys()}) all_tau_p = ak.with_name(all_tau_p, "Momentum4D") - sel_ele = ak.any(abs(all_tau_p['id']) == 11, axis=-1) - sel_mu = ak.any(abs(all_tau_p['id']) == 13, axis=-1) + sel_ele = ak.any(abs(all_tau_p["id"]) == 11, axis=-1) + sel_mu = ak.any(abs(all_tau_p["id"]) == 13, axis=-1) sel_lep = sel_ele | sel_mu all_tau_p = ak.mask(all_tau_p, sel_lep, valid_when=False) - sel_ele = ak.any(abs(all_tau_m['id']) == 11, axis=-1) - sel_mu = ak.any(abs(all_tau_m['id']) == 13, axis=-1) + sel_ele = ak.any(abs(all_tau_m["id"]) == 11, axis=-1) + sel_mu = ak.any(abs(all_tau_m["id"]) == 13, axis=-1) sel_lep = sel_ele | sel_mu all_tau_m = ak.mask(all_tau_m, sel_lep, valid_when=False) @@ -150,13 +144,13 @@ def _get_visible_taus(self, all_parts, status_check = True): # Parent, Id and Stat are dummy branches, only needed # for technical consistency. final_taus = { - 'Pt': ak.concatenate([fs_tau_p.pt, fs_tau_m.pt], axis=-1), - 'Eta': ak.concatenate([fs_tau_p.eta, fs_tau_m.eta], axis=-1), - 'Phi': ak.concatenate([fs_tau_p.phi, fs_tau_m.phi], axis=-1), - 'E': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1), - 'Parent': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1), - 'Id': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1), - 'Stat': ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1) + "Pt": ak.concatenate([fs_tau_p.pt, fs_tau_m.pt], axis=-1), + "Eta": ak.concatenate([fs_tau_p.eta, fs_tau_m.eta], axis=-1), + "Phi": ak.concatenate([fs_tau_p.phi, fs_tau_m.phi], axis=-1), + "E": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1), + "Parent": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1), + "Id": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1), + "Stat": ak.concatenate([fs_tau_p.E, fs_tau_m.E], axis=-1), } return final_taus @@ -166,14 +160,14 @@ def _filter_genpart_branches(self, all_arrays): Filter genparticle branches by Id. """ partId = abs(all_arrays["Id"]) - sel_id = (partId == utils.get_pdg_id(self._part_type)) + sel_id = partId == utils.get_pdg_id(self._part_type) for branch in all_arrays: all_arrays[branch] = all_arrays[branch][sel_id] all_arrays[branch] = ak.fill_none(all_arrays[branch], -999) return all_arrays - def _filter_fspart_branches(self, all_parts, status_check = True): + def _filter_fspart_branches(self, all_parts, status_check=True): """ Select all the final state particles. This collection is used only for dR matching @@ -183,15 +177,17 @@ def _filter_fspart_branches(self, all_parts, status_check = True): all_parts = ak.zip({k.lower(): all_parts[k] for k in all_parts.keys()}) if status_check: - - sel = ( (all_parts.id == 15) & - (all_parts.stat == 2) - ) + sel = (all_parts.id == 15) & (all_parts.stat == 2) is_tau = ak.any(sel, axis=-1) all_parts = ak.where(is_tau, all_parts, ak.full_like(all_parts, -1000)) - all_parts = {f: field for f, field in zip(['Id','Stat','Pt','Eta','Phi','Parent','E'],ak.unzip(all_parts))} + all_parts = { + f: field + for f, field in zip( + ["Id", "Stat", "Pt", "Eta", "Phi", "Parent", "E"], ak.unzip(all_parts) + ) + } sel_no_nu_e = abs(all_parts["Id"]) != 12 sel_no_nu_mu = abs(all_parts["Id"]) != 14 @@ -255,9 +251,7 @@ def _load_branches_from_ntuple(self, chunk_array, arr, branches): def _ak_array_in_chunk(self, arr, chunk_array, branches): for branch in branches: branch_key = branch.removeprefix("part") - arr[branch_key] = ak.concatenate( - [arr[branch_key], chunk_array[branch_key]] - ) + arr[branch_key] = ak.concatenate([arr[branch_key], chunk_array[branch_key]]) return arr @utils.timer("Loading objects files") @@ -274,21 +268,19 @@ def _concat_array_from_ntuples(self): # Read files in chunks to avoid issues with large size files chunk_name = f"{fname}:{self._tree}" - for array in uproot.iterate(chunk_name, filter_name = branches, step_size="100 MB"): + for array in uproot.iterate( + chunk_name, filter_name=branches, step_size="100 MB" + ): chunk_array = {x.removeprefix("part"): [] for x in branches} chunk_array = self._load_branches_from_ntuple( chunk_array, array, branches ) chunk_array = self._postprocess_branches(chunk_array) - new_array = self._ak_array_in_chunk( - new_array, chunk_array, branches - ) + new_array = self._ak_array_in_chunk(new_array, chunk_array, branches) # Concatenate array from "fname file" to all_arrays - all_arrays = self._ak_array_in_chunk( - all_arrays, new_array, branches - ) + all_arrays = self._ak_array_in_chunk(all_arrays, new_array, branches) bar.finish() @@ -314,11 +306,11 @@ def _save_array_to_parquet(self): """ ak.to_parquet( self._final_ak_array, - destination=self.cache_out_path + f"{self.parquet_fname}.parquet" + destination=self.cache_out_path + f"{self.parquet_fname}.parquet", ) def load(self): - #print(f"Process {self._object + self._part_type} object...") + # print(f"Process {self._object + self._part_type} object...") if self._cache_file_exists(): return @@ -336,21 +328,20 @@ def load(self): if __name__ == "__main__": - parser = argparse.ArgumentParser() parser.add_argument( "cfg", - help="Path to the config file in yaml format. Defaults in `configs/caching`." + help="Path to the config file in yaml format. Defaults in `configs/caching`.", ) parser.add_argument( "--dry-run", "-d", action="store_true", - help="Only do print-out of objects and branches to be loaded." + help="Only do print-out of objects and branches to be loaded.", ) args = parser.parse_args() - with open(args.cfg, 'r') as f: + with open(args.cfg, "r") as f: cfg = yaml.safe_load(f) for version, samples in cfg.items(): print("Processing: version", version) @@ -368,6 +359,6 @@ def load(self): obj=obj, branches=branches, cfg_file=args.cfg, - dryrun=args.dry_run + dryrun=args.dry_run, ) loader.load() diff --git a/menu_tools/object_performance/compare_plots.py b/menu_tools/object_performance/compare_plots.py index cadd39de..ca52f1fb 100755 --- a/menu_tools/object_performance/compare_plots.py +++ b/menu_tools/object_performance/compare_plots.py @@ -18,22 +18,22 @@ class ComparisonCentral(Plotter): - def __init__(self, cfg_plots_path): - with open(cfg_plots_path, 'r') as f: + with open(cfg_plots_path, "r") as f: self.cfg_plots = yaml.safe_load(f) for plot_name, cfg_plot in self.cfg_plots.items(): self.plot_name = plot_name self.cfg = self.cfg_plots[self.plot_name] self.save_dir = self.cfg["save_dir"] - if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) + if not os.path.exists(self.save_dir): + os.makedirs(self.save_dir) @property def _get_watermark(self): - try: - return self.cfg["watermark"] - except KeyError: - return " " + try: + return self.cfg["watermark"] + except KeyError: + return " " @property def _get_files(self): @@ -55,15 +55,20 @@ def _style_plot(self, fig, ax, legend_loc="lower right"): ax.set_ylabel(rf"{ylabel}") ax.set_ylim(0.0, 1) ax.tick_params(direction="in") - ax.text(0, -0.1, self._get_watermark, - color="grey", alpha=0.2, - fontsize=20, - transform=ax.transAxes) + ax.text( + 0, + -0.1, + self._get_watermark, + color="grey", + alpha=0.2, + fontsize=20, + transform=ax.transAxes, + ) fig.tight_layout() def run(self): files = self._get_files - fig, ax = self._create_new_plot() + fig, ax = self._create_new_plot() for file in files: fname = os.path.join(files[file]["dir"], file) test_object = files[file]["object"] @@ -75,24 +80,27 @@ def run(self): err_kwargs = dict_to_plot[test_object]["err_kwargs"] - ax.errorbar(dict_to_plot[test_object]["xbins"], - dict_to_plot[test_object]["efficiency"], - yerr = dict_to_plot[test_object]["efficiency_err"], - label = label, **err_kwargs) + ax.errorbar( + dict_to_plot[test_object]["xbins"], + dict_to_plot[test_object]["efficiency"], + yerr=dict_to_plot[test_object]["efficiency_err"], + label=label, + **err_kwargs, + ) self._style_plot(fig, ax) plt.savefig(f"{self.save_dir}/{self.plot_name}.png") plt.savefig(f"{self.save_dir}/{self.plot_name}.pdf") + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "cfg_plots", default="cfg_plots/version_comparison.yaml", - help="Path of YAML file specifying the desired plots." + help="Path of YAML file specifying the desired plots.", ) args = parser.parse_args() plotter = ComparisonCentral(args.cfg_plots) plotter.run() - diff --git a/menu_tools/object_performance/plotBTagEfficiency.py b/menu_tools/object_performance/plotBTagEfficiency.py index 402d3ee6..a9bc0ae8 100755 --- a/menu_tools/object_performance/plotBTagEfficiency.py +++ b/menu_tools/object_performance/plotBTagEfficiency.py @@ -13,23 +13,23 @@ class ComparisonCentral(Plotter): - def __init__(self, cfg_plots_path): - with open(cfg_plots_path, 'r') as f: + with open(cfg_plots_path, "r") as f: self.cfg_plots = yaml.safe_load(f) self.cfgs = {} for plot_name, cfg_plot in self.cfg_plots.items(): - print (plot_name) + print(plot_name) self.cfgs[plot_name] = cfg_plot - if not os.path.exists(cfg_plot['save_dir']): os.makedirs(cfg_plot['save_dir']) + if not os.path.exists(cfg_plot["save_dir"]): + os.makedirs(cfg_plot["save_dir"]) - def _get_watermark(self,cfg): - try: - return cfg["watermark"] - except KeyError: - return " " + def _get_watermark(self, cfg): + try: + return cfg["watermark"] + except KeyError: + return " " - def _get_files(self,cfg): + def _get_files(self, cfg): try: return cfg["files"] except KeyError: @@ -48,16 +48,21 @@ def _style_plot(self, cfg, fig, ax, legend_loc="upper right"): ax.set_ylabel(rf"{ylabel}") ax.set_ylim(0.0, 1) ax.tick_params(direction="in") - ax.text(0, -0.1, self._get_watermark(cfg), - color="grey", alpha=0.2, - fontsize=20, - transform=ax.transAxes) + ax.text( + 0, + -0.1, + self._get_watermark(cfg), + color="grey", + alpha=0.2, + fontsize=20, + transform=ax.transAxes, + ) fig.tight_layout() def run(self): for plot_name, cfg in self.cfgs.items(): files = self._get_files(cfg) - fig, ax = self._create_new_plot() + fig, ax = self._create_new_plot() for file in files: fname = os.path.join(files[file]["dir"], file) test_object = files[file]["object"] @@ -69,25 +74,28 @@ def run(self): err_kwargs = dict_to_plot[test_object]["err_kwargs"] - ax.errorbar(dict_to_plot[test_object]["xbins"], - dict_to_plot[test_object]["efficiency"], - yerr = dict_to_plot[test_object]["efficiency_err"], - label = label, **err_kwargs) + ax.errorbar( + dict_to_plot[test_object]["xbins"], + dict_to_plot[test_object]["efficiency"], + yerr=dict_to_plot[test_object]["efficiency_err"], + label=label, + **err_kwargs, + ) self._style_plot(cfg, fig, ax) - print ("Saving plot to : ",plot_name) + print("Saving plot to : ", plot_name) plt.savefig(f"{cfg['save_dir']}/{plot_name}.png") plt.savefig(f"{cfg['save_dir']}/{plot_name}.pdf") + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "cfg_plots", default="cfg_plots/bJetEff.yaml", - help="Path of YAML file specifying the desired plots." + help="Path of YAML file specifying the desired plots.", ) args = parser.parse_args() plotter = ComparisonCentral(args.cfg_plots) plotter.run() - diff --git a/menu_tools/object_performance/plot_config.py b/menu_tools/object_performance/plot_config.py index 4f30ce3c..f1f7f417 100644 --- a/menu_tools/object_performance/plot_config.py +++ b/menu_tools/object_performance/plot_config.py @@ -1,5 +1,4 @@ -class PlotConfig(): - +class PlotConfig: def __init__(self, cfg: dict): self._cfg = cfg @@ -150,4 +149,3 @@ def get_l1_iso(self, obj): return self._cfg["test_objects"][obj]["iso_branch"] except KeyError: return None - diff --git a/menu_tools/object_performance/plotter.py b/menu_tools/object_performance/plotter.py index 2ba3bbf2..2ff8fbc5 100755 --- a/menu_tools/object_performance/plotter.py +++ b/menu_tools/object_performance/plotter.py @@ -16,8 +16,7 @@ plt.style.use(hep.style.CMS) -class Plotter(): - +class Plotter: outdir = "outputs/object_performance/" def _make_output_dirs(self, version: str): @@ -32,7 +31,6 @@ def _create_new_plot(self): class EfficiencyPlotter(Plotter): - def __init__(self, name, cfg, turnon_collection): self.plot_name = name self.cfg = cfg @@ -46,18 +44,23 @@ def _style_plot(self, fig, ax, legend_loc="lower right"): ax.legend(loc=legend_loc, frameon=False) ax.set_xlabel(rf"{self.cfg['xlabel']}") ylabel = self.cfg["ylabel"].replace( - "", - str(self.turnon_collection.threshold) + "", str(self.turnon_collection.threshold) ) ax.set_ylabel(rf"{ylabel}") ax.set_xlim(self.cfg["binning"]["min"], self.cfg["binning"]["max"]) ax.tick_params(direction="in") - watermark = f"{self.version}_{self.plot_name}_"\ - f"{self.turnon_collection.threshold}" - ax.text(0, -0.1, watermark, - color="grey", alpha=0.2, - fontsize=20, - transform=ax.transAxes) + watermark = ( + f"{self.version}_{self.plot_name}_" f"{self.turnon_collection.threshold}" + ) + ax.text( + 0, + -0.1, + watermark, + color="grey", + alpha=0.2, + fontsize=20, + transform=ax.transAxes, + ) fig.tight_layout() def _save_json(self, file_name): @@ -65,11 +68,11 @@ def _save_json(self, file_name): xlabel = self.cfg["xlabel"] ylabel = self.cfg["ylabel"].replace( - "", - str(self.turnon_collection.threshold) + "", str(self.turnon_collection.threshold) + ) + watermark = ( + f"{self.version}_{self.plot_name}_" f"{self.turnon_collection.threshold}" ) - watermark = f"{self.version}_{self.plot_name}_"\ - f"{self.turnon_collection.threshold}" plot["xlabel"] = xlabel plot["ylabel"] = ylabel @@ -92,8 +95,9 @@ def _save_json(self, file_name): efficiency, yerr = self.turnon_collection.get_efficiency(obj_key) xerr = self.turnon_collection.xerr(obj_key) - yerr = np.array([yerr[0][~np.isnan(efficiency)], - yerr[1][~np.isnan(efficiency)]]) + yerr = np.array( + [yerr[0][~np.isnan(efficiency)], yerr[1][~np.isnan(efficiency)]] + ) xerr = xerr[~np.isnan(efficiency)] xbins = xbins[~np.isnan(efficiency)] efficiency = efficiency[~np.isnan(efficiency)] @@ -105,8 +109,7 @@ def _save_json(self, file_name): label = self.cfg["test_objects"][obj_key]["label"] - err_kwargs = {"xerr": xerr, - "capsize": 3, "marker": 'o', "markersize": 8} + err_kwargs = {"xerr": xerr, "capsize": 3, "marker": "o", "markersize": 8} _object["label"] = label _object["efficiency"] = efficiency @@ -145,21 +148,24 @@ def _plot_efficiency_curve(self): label = self.cfg["test_objects"][obj_key]["label"] - err_kwargs = {"xerr": self.turnon_collection.xerr(obj_key), - "capsize": 3, "marker": 'o', "markersize": 8} - ax.errorbar(xbins, efficiency, yerr=yerr, label=label, - **err_kwargs) + err_kwargs = { + "xerr": self.turnon_collection.xerr(obj_key), + "capsize": 3, + "marker": "o", + "markersize": 8, + } + ax.errorbar(xbins, efficiency, yerr=yerr, label=label, **err_kwargs) self._style_plot(fig, ax) ax.set_ylim(0, 1.1) plot_fname = f"{self.outdir}/{self.version}/turnons/{self.plot_name}_{self.turnon_collection.threshold}_{self.version}" - for ext in [".png",".pdf"]: + for ext in [".png", ".pdf"]: plt.savefig(f"{plot_fname}{ext}") self._save_json(f"{plot_fname}.json") ## save config with open(f"{plot_fname}.yaml", "w") as outfile: - yaml.dump({self.plot_name:self.cfg}, outfile, default_flow_style=False) + yaml.dump({self.plot_name: self.cfg}, outfile, default_flow_style=False) plt.close() @@ -179,20 +185,19 @@ def _plot_iso_vs_efficiency_curve(self): # yerr = np.sqrt(iso_vs_eff_hist) # TODO: Possibly introduce errors label = self.cfg["test_objects"][obj_key]["label"] - err_kwargs = {"capsize": 3, "marker": 'o', "markersize": 8} + err_kwargs = {"capsize": 3, "marker": "o", "markersize": 8} ax.errorbar(xbins, iso_vs_eff_hist, label=label, **err_kwargs) self._style_plot(fig, ax) - plot_fname = f"{self.outdir}/{self.version}/turnons/{self.plot_name}_{self.turnon_collection.threshold}_{self.version}" - for ext in [".png",".pdf"]: + for ext in [".png", ".pdf"]: plt.savefig(f"{plot_fname}{ext}") self._save_json(f"{plot_fname}.json") ## save config with open(f"{plot_fname}.yaml", "w") as outfile: - yaml.dump({self.plot_name:self.cfg}, outfile, default_flow_style=False) + yaml.dump({self.plot_name: self.cfg}, outfile, default_flow_style=False) plt.close() @@ -206,11 +211,22 @@ def _plot_raw_counts(self): xbins = 0.5 * (xbins[1:] + xbins[:-1]) for obj_key, ref_hist in self.turnon_collection.hists["ref"].items(): - err_kwargs = {"xerr": self.turnon_collection.xerr(obj_key), - "capsize": 1, "marker": 'o', "markersize": 2, - "linestyle": "None"} - - ref_hist = ax.step(xbins, ref_hist[0], where="mid", label = "ref: " + obj_key , ls = "--", color = "k") + err_kwargs = { + "xerr": self.turnon_collection.xerr(obj_key), + "capsize": 1, + "marker": "o", + "markersize": 2, + "linestyle": "None", + } + + ref_hist = ax.step( + xbins, + ref_hist[0], + where="mid", + label="ref: " + obj_key, + ls="--", + color="k", + ) label = self.cfg["reference_object"]["label"] for obj_key, gen_hist_trig in self.turnon_collection.hists.items(): @@ -219,14 +235,20 @@ def _plot_raw_counts(self): yerr = np.sqrt(gen_hist_trig[0]) label = self.cfg["test_objects"][obj_key]["label"] test_hist = ax.step(xbins, gen_hist_trig[0], where="mid") - ax.errorbar(xbins, gen_hist_trig[0], yerr=yerr, label=label, - color=test_hist[0].get_color(), **err_kwargs) + ax.errorbar( + xbins, + gen_hist_trig[0], + yerr=yerr, + label=label, + color=test_hist[0].get_color(), + **err_kwargs, + ) self._style_plot(fig, ax) plot_fname = f"{self.outdir}/{self.version}/distributions/{self.plot_name}_{self.turnon_collection.threshold}_dist_{self.version}" - for ext in [".png",".pdf"]: + for ext in [".png", ".pdf"]: plt.savefig(f"{plot_fname}{ext}") - #self._save_json(f"{plot_fname}.json") + # self._save_json(f"{plot_fname}.json") plt.close() @@ -239,13 +261,13 @@ def plot(self): self._plot_raw_counts() -class EfficiencyCentral(): +class EfficiencyCentral: """ Class that orchestrates the plotting of """ def __init__(self, cfg_plots_path): - with open(cfg_plots_path, 'r') as f: + with open(cfg_plots_path, "r") as f: self.cfg_plots = yaml.safe_load(f) def get_thresholds(self, cfg_plot: dict): @@ -275,15 +297,20 @@ def run(self): turnon_collection = TurnOnCollection(cfg_plot, threshold) turnon_collection.create_hists() - plotter = EfficiencyPlotter(plot_name, cfg_plot, - turnon_collection) + plotter = EfficiencyPlotter(plot_name, cfg_plot, turnon_collection) plotter.plot() class ScalingPlotter(Plotter): - - def __init__(self, plot_name: str, cfg_plot: dict, scalings: dict, - scaling_pct: float, version: str, params: dict): + def __init__( + self, + plot_name: str, + cfg_plot: dict, + scalings: dict, + scaling_pct: float, + version: str, + params: dict, + ): self.plot_name = plot_name self.cfg_plot = cfg_plot self.scalings = scalings @@ -294,7 +321,7 @@ def __init__(self, plot_name: str, cfg_plot: dict, scalings: dict, def _params_to_func_str(self, obj): a = round(self.params[obj][0], 3) b = round(self.params[obj][1], 3) - pm = '+' if b > 0 else '-' + pm = "+" if b > 0 else "-" return f"y = {a} x {pm} {abs(b)}" def _set_plot_ranges(self, ax): @@ -320,9 +347,11 @@ def _save_json(self, file_name): x_points = list(points.keys()) y_points = list(points.values()) - label = (self.cfg_plot["test_objects"][obj]["label"] - + ", " - + self._params_to_func_str(obj)) + label = ( + self.cfg_plot["test_objects"][obj]["label"] + + ", " + + self._params_to_func_str(obj) + ) _object["xvals"] = x_points _object["yvals"] = y_points @@ -340,11 +369,13 @@ def plot(self): for obj, points in self.scalings.items(): x_points = np.array(list(points.keys())) y_points = np.array(list(points.values())) - pts = ax.plot(x_points, y_points, 'o') + pts = ax.plot(x_points, y_points, "o") - label = (self.cfg_plot["test_objects"][obj]["label"] - + ", " - + self._params_to_func_str(obj)) + label = ( + self.cfg_plot["test_objects"][obj]["label"] + + ", " + + self._params_to_func_str(obj) + ) a, b = self.params[obj] x = np.linspace(0, 2500, 20) y = utils.scaling_func(x, a, b) @@ -354,33 +385,41 @@ def plot(self): ax.set_xlabel("L1 threshold [GeV]") ax.set_ylabel(f"{int(self.scaling_pct*100)}% Location (gen, GeV)") watermark = f"{self.version}_{self.plot_name}" - ax.text(0, -0.1, watermark, - color="grey", alpha=0.2, - fontsize=20, - transform=ax.transAxes) + ax.text( + 0, + -0.1, + watermark, + color="grey", + alpha=0.2, + fontsize=20, + transform=ax.transAxes, + ) self._set_plot_ranges(ax) fig.tight_layout() - plot_fname = f"{self.outdir}/{self.version}/scalings/{self.plot_name}_{self.version}" - for ext in [".png",".pdf"]: + plot_fname = ( + f"{self.outdir}/{self.version}/scalings/{self.plot_name}_{self.version}" + ) + for ext in [".png", ".pdf"]: plt.savefig(f"{plot_fname}{ext}") self._save_json(f"{plot_fname}.json") ## save config with open(f"{plot_fname}.yaml", "w") as outfile: - yaml.dump({self.plot_name:self.cfg_plot}, outfile, default_flow_style=False) + yaml.dump( + {self.plot_name: self.cfg_plot}, outfile, default_flow_style=False + ) plt.close() class ScalingCentral: - outdir = "outputs/object_performance/" def __init__(self, cfg_plots_path): - with open(cfg_plots_path, 'r') as f: + with open(cfg_plots_path, "r") as f: self.cfg_plots = yaml.safe_load(f) - with open("./configs/scaling_thresholds.yaml", 'r') as f: + with open("./configs/scaling_thresholds.yaml", "r") as f: self.scaling_thresholds = yaml.safe_load(f) def _get_scaling_thresholds(self, cfg_plot, test_obj): @@ -400,24 +439,25 @@ def _get_scaling_thresholds(self, cfg_plot, test_obj): return self.scaling_thresholds["Tau"] if any("Jet" in x for x in cfg_plot["test_objects"]): return self.scaling_thresholds["Jet"] - raise RuntimeError( - "Failed to find thresholds in cfg_scaling_thresholds!" - ) + raise RuntimeError("Failed to find thresholds in cfg_scaling_thresholds!") def _rate_config_function(self, name: str, a: float, b: float): - pm = '+' if b < 0 else '' - f_string = (f"function :: {name}OfflineEtCut :: " - f"args:=(offline); lambda:=(offline{pm}{-b:.3f})/{a:.3f}") + pm = "+" if b < 0 else "" + f_string = ( + f"function :: {name}OfflineEtCut :: " + f"args:=(offline); lambda:=(offline{pm}{-b:.3f})/{a:.3f}" + ) return f_string - def _write_scalings_to_file(self, - plot_name: str, - version: str, - params: dict): - with open(f"{self.outdir}/{version}/scalings/{plot_name}_scalings_{version}.txt", 'w+') as f: - f.write('') + def _write_scalings_to_file(self, plot_name: str, version: str, params: dict): + with open( + f"{self.outdir}/{version}/scalings/{plot_name}_scalings_{version}.txt", "w+" + ) as f: + f.write("") - with open(f"{self.outdir}/{version}/scalings/{plot_name}_scalings_{version}.txt", 'a') as f: + with open( + f"{self.outdir}/{version}/scalings/{plot_name}_scalings_{version}.txt", "a" + ) as f: for obj, obj_params in params.items(): a, b = obj_params f.write(self._rate_config_function(obj, a, b) + "\n") @@ -440,22 +480,19 @@ def run(self): turnon_collection.create_hists() scaling_pct = turnon_collection.cfg_plot.scaling_pct method = turnon_collection.cfg_plot.scaling_method - scaling_collect = ScalingCollection(cfg_plot, - method, - scaling_pct) + scaling_collect = ScalingCollection(cfg_plot, method, scaling_pct) version = turnon_collection.version - scal = scaling_collect._compute_scalings(turnon_collection, - test_obj, - scal, - scaling_pct, - method) + scal = scaling_collect._compute_scalings( + turnon_collection, test_obj, scal, scaling_pct, method + ) bar.finish() scalings[test_obj] = scal[test_obj] params = scaling_collect._fit_linear_functions(scalings) if params: - plotter = ScalingPlotter(plot_name, cfg_plot, scalings, - scaling_pct, version, params) + plotter = ScalingPlotter( + plot_name, cfg_plot, scalings, scaling_pct, version, params + ) plotter.plot() self._write_scalings_to_file(plot_name, version, params) @@ -465,7 +502,7 @@ def run(self): parser.add_argument( "cfg_plots", default="cfg_plots/muons.yaml", - help="Path of YAML file specifying the desired plots." + help="Path of YAML file specifying the desired plots.", ) args = parser.parse_args() @@ -474,4 +511,3 @@ def run(self): scalings = ScalingCentral(args.cfg_plots) scalings.run() - diff --git a/menu_tools/object_performance/quality_obj.py b/menu_tools/object_performance/quality_obj.py index 8d256b68..66cbab36 100644 --- a/menu_tools/object_performance/quality_obj.py +++ b/menu_tools/object_performance/quality_obj.py @@ -1,27 +1,28 @@ -class L1IsoCut(): - +class L1IsoCut: def __init__(self, ak_arrays, obj: str, IsoBB=-1, IsoEE=-1, l1_iso="iso"): ak_arrays = ak_arrays[obj] # TODO: remove obj arg self.IsoBB = IsoBB self.IsoEE = IsoEE self.l1_iso = l1_iso - self.sel_iso_BB = ak_arrays['eta'] > -100 - self.sel_iso_EE = ak_arrays['eta'] > -100 + self.sel_iso_BB = ak_arrays["eta"] > -100 + self.sel_iso_EE = ak_arrays["eta"] > -100 if self.IsoBB >= 0: - self.sel_iso_BB = ((abs(ak_arrays['eta']) < 1.479) - & (ak_arrays[self.l1_iso] > self.IsoBB)) + self.sel_iso_BB = (abs(ak_arrays["eta"]) < 1.479) & ( + ak_arrays[self.l1_iso] > self.IsoBB + ) if self.IsoEE >= 0: - self.sel_iso_EE = ((abs(ak_arrays['eta']) > 1.479) - & (ak_arrays[self.l1_iso] > self.IsoEE)) + self.sel_iso_EE = (abs(ak_arrays["eta"]) > 1.479) & ( + ak_arrays[self.l1_iso] > self.IsoEE + ) @property def ISO_EEBB(self): return self.sel_iso_EE | self.sel_iso_BB -class Quality(): +class Quality: """ Class implementing the L1 quality criteria. Hardware criteria to be decide with Menu team. @@ -29,73 +30,83 @@ class Quality(): def __init__(self, ak_arrays, obj: str): ak_arrays = ak_arrays[obj] # TODO: remove obj arg - #print("Computing quality for ", obj) - - self.sel_lowEta = ((abs(ak_arrays['eta']) < 0.9) - & (ak_arrays['region'] != 1)) - self.sel_midEta = ((abs(ak_arrays['eta']) > 0.9) - & (abs(ak_arrays['eta']) < 1.2) - & (ak_arrays['region'] != 2)) - self.sel_highEta = ((abs(ak_arrays['eta']) > 1.2) - & (ak_arrays['region'] != 3)) - - self.sel_qualities = ((ak_arrays['quality'] != 11) - & (ak_arrays['quality'] != 13) - & (ak_arrays['quality'] != 14) - & (ak_arrays['quality'] != 15) - & (ak_arrays['region'] == 3)) - self.sel_qual_12 = ((ak_arrays['quality'] < 12) - & (ak_arrays['region'] == 2)) - self.sel_qual_0 = ((ak_arrays['quality'] == 0) - & (ak_arrays['region'] == 3)) - self.sel_qual_1 = ((ak_arrays['quality'] < 2) - & (ak_arrays['region'] == 1)) - self.sel_qual_3 = ((ak_arrays['quality'] != 3) - & (ak_arrays['region'] == 1)) - self.sel_qual_5 = ((ak_arrays['quality'] != 5) - & (ak_arrays['region'] == 1)) - self.sel_qualOnly_12 = (ak_arrays['quality'] < 12) - - self.sel_midEta_qual = ((abs(ak_arrays['eta']) > 0.9) - & (abs(ak_arrays['eta']) < 1.2) - & (ak_arrays['region'] == 3)) - - self.sel_odd = (ak_arrays['quality'] % 2 == 0) - self.sel_odd_type = ((ak_arrays['quality'] % 2 == 0) - & (ak_arrays['region'] == 1)) - self.sel_not_4 = (ak_arrays['region'] == 4) - + # print("Computing quality for ", obj) + + self.sel_lowEta = (abs(ak_arrays["eta"]) < 0.9) & (ak_arrays["region"] != 1) + self.sel_midEta = ( + (abs(ak_arrays["eta"]) > 0.9) + & (abs(ak_arrays["eta"]) < 1.2) + & (ak_arrays["region"] != 2) + ) + self.sel_highEta = (abs(ak_arrays["eta"]) > 1.2) & (ak_arrays["region"] != 3) + + self.sel_qualities = ( + (ak_arrays["quality"] != 11) + & (ak_arrays["quality"] != 13) + & (ak_arrays["quality"] != 14) + & (ak_arrays["quality"] != 15) + & (ak_arrays["region"] == 3) + ) + self.sel_qual_12 = (ak_arrays["quality"] < 12) & (ak_arrays["region"] == 2) + self.sel_qual_0 = (ak_arrays["quality"] == 0) & (ak_arrays["region"] == 3) + self.sel_qual_1 = (ak_arrays["quality"] < 2) & (ak_arrays["region"] == 1) + self.sel_qual_3 = (ak_arrays["quality"] != 3) & (ak_arrays["region"] == 1) + self.sel_qual_5 = (ak_arrays["quality"] != 5) & (ak_arrays["region"] == 1) + self.sel_qualOnly_12 = ak_arrays["quality"] < 12 + + self.sel_midEta_qual = ( + (abs(ak_arrays["eta"]) > 0.9) + & (abs(ak_arrays["eta"]) < 1.2) + & (ak_arrays["region"] == 3) + ) + + self.sel_odd = ak_arrays["quality"] % 2 == 0 + self.sel_odd_type = (ak_arrays["quality"] % 2 == 0) & (ak_arrays["region"] == 1) + self.sel_not_4 = ak_arrays["region"] == 4 ### EG IDs from 123x - self.sel_tkIsoPho_123 = ((ak_arrays['quality'] > 0 ) & (abs(ak_arrays['eta']) < 1.479)) | ((ak_arrays['quality'] == 3 ) & (abs(ak_arrays['eta']) >= 1.479)) - + self.sel_tkIsoPho_123 = ( + (ak_arrays["quality"] > 0) & (abs(ak_arrays["eta"]) < 1.479) + ) | ((ak_arrays["quality"] == 3) & (abs(ak_arrays["eta"]) >= 1.479)) ## EG IDs from 125x # for EG: region == HGC if "passeseleid" in ak_arrays.fields: - self.sel_EG_barrelID = (ak_arrays['region'] == 0) & (ak_arrays['passeseleid'] == 1) + self.sel_EG_barrelID = (ak_arrays["region"] == 0) & ( + ak_arrays["passeseleid"] == 1 + ) else: - self.sel_EG_barrelID = (ak_arrays['region'] == 0) & (((ak_arrays['quality'] >> 1)&1) > 0) + self.sel_EG_barrelID = (ak_arrays["region"] == 0) & ( + ((ak_arrays["quality"] >> 1) & 1) > 0 + ) if "passessaid" in ak_arrays.fields: - self.sel_EG_endcapID = (ak_arrays['region'] == 1) & (ak_arrays['passessaid'] == 1) + self.sel_EG_endcapID = (ak_arrays["region"] == 1) & ( + ak_arrays["passessaid"] == 1 + ) else: - self.sel_EG_endcapID = (ak_arrays['region'] == 1) & (((ak_arrays['quality'] >> 0)&1) > 0) + self.sel_EG_endcapID = (ak_arrays["region"] == 1) & ( + ((ak_arrays["quality"] >> 0) & 1) > 0 + ) # for EG: quality = HwQual, alt approach: use HW qual bits directly instead of the menu ntuple variables: bit0: SA, 1: Ele, 2: Pho - #self.sel_EG_barrelID = (ak_arrays['region'] == 0) & (((ak_arrays['quality'] >> 1)&1) > 0) - #self.sel_EG_endcapID = (ak_arrays['region'] == 1) & (((ak_arrays['quality'] >> 0)&1) > 0) + # self.sel_EG_barrelID = (ak_arrays['region'] == 0) & (((ak_arrays['quality'] >> 1)&1) > 0) + # self.sel_EG_endcapID = (ak_arrays['region'] == 1) & (((ak_arrays['quality'] >> 0)&1) > 0) ## tkPhoton from 125x - #self.sel_tkPho_barrelID = (ak_arrays['region'] == 0) & (ak_arrays['passeseleid'] == 1) - #self.sel_tkPho_endcapID = (ak_arrays['region'] == 1) & (ak_arrays['passesphoid'] == 1) + # self.sel_tkPho_barrelID = (ak_arrays['region'] == 0) & (ak_arrays['passeseleid'] == 1) + # self.sel_tkPho_endcapID = (ak_arrays['region'] == 1) & (ak_arrays['passesphoid'] == 1) if "passesphoid" in ak_arrays.fields: - self.sel_tkPho_endcapID = (ak_arrays['region'] == 1) & (ak_arrays['passesphoid'] == 1) + self.sel_tkPho_endcapID = (ak_arrays["region"] == 1) & ( + ak_arrays["passesphoid"] == 1 + ) else: - self.sel_tkPho_endcapID = (ak_arrays['region'] == 1) & (((ak_arrays['quality'] >> 2)&1) > 0) + self.sel_tkPho_endcapID = (ak_arrays["region"] == 1) & ( + ((ak_arrays["quality"] >> 2) & 1) > 0 + ) - #self.sel_tkPho_barrelID = (ak_arrays['region'] == 0) & (((ak_arrays['quality'] >> 1)&1) > 0) - #self.sel_tkPho_endcapID = (ak_arrays['region'] == 1) & (((ak_arrays['quality'] >> 2)&1) > 0) + # self.sel_tkPho_barrelID = (ak_arrays['region'] == 0) & (((ak_arrays['quality'] >> 1)&1) > 0) + # self.sel_tkPho_endcapID = (ak_arrays['region'] == 1) & (((ak_arrays['quality'] >> 2)&1) > 0) @property def QUAL_125x_EGID(self): @@ -103,7 +114,7 @@ def QUAL_125x_EGID(self): @property def QUAL_125x_tkPhoID(self): - #return ~(self.sel_tkPho_barrelID | self.sel_tkPho_endcapID) + # return ~(self.sel_tkPho_barrelID | self.sel_tkPho_endcapID) return ~(self.sel_EG_barrelID | self.sel_tkPho_endcapID) @property @@ -136,8 +147,7 @@ def QUAL_CorrectRegion(self): @property def QUAL_Endcap1CorrectRegion(self): - return (self.sel_lowEta | self.sel_midEta | self.sel_highEta - | self.sel_qual_0) + return self.sel_lowEta | self.sel_midEta | self.sel_highEta | self.sel_qual_0 @property def QUAL_BarrelOddEndcap2(self): @@ -161,8 +171,13 @@ def QUAL_Odd(self): @property def QUAL_Overlap12Endcap1CorrectRegion(self): - return (self.sel_lowEta | self.sel_midEta | self.sel_highEta - | self.sel_qual_12 | self.sel_qual_0) + return ( + self.sel_lowEta + | self.sel_midEta + | self.sel_highEta + | self.sel_qual_12 + | self.sel_qual_0 + ) @property def QUAL_12(self): diff --git a/menu_tools/object_performance/scaling_collection.py b/menu_tools/object_performance/scaling_collection.py index 3fab8a95..1f8f94ed 100644 --- a/menu_tools/object_performance/scaling_collection.py +++ b/menu_tools/object_performance/scaling_collection.py @@ -6,7 +6,7 @@ from menu_tools.utils import utils -class ScalingCollection(): +class ScalingCollection: """ Collection of scaling values corresponding to one scaling plot. This requires the input of multiple @@ -14,10 +14,7 @@ class ScalingCollection(): objects. """ - def __init__(self, - cfg: PlotConfig, - method: str, - plateau_pct: float = 0.95): + def __init__(self, cfg: PlotConfig, method: str, plateau_pct: float = 0.95): self.cfg = cfg self.method = method self.plateau_pct = plateau_pct @@ -38,30 +35,29 @@ def _find_turnon_cut(self, graph_x, graph_y, Target): L = 0 R = np.max(graph_x) - while (R - L > 0.0001): + while R - L > 0.0001: C = (L + R) / 2 V = self._get_point_on_curve(C, graph_x, graph_y) - if (V < Target): + if V < Target: L = C else: R = C - return (R + L) / 2. + return (R + L) / 2.0 def _find_turnon_fit(self, function, popt, _min, _max, target): - if ((function(_min, *popt) > target) - | (function(_max, *popt) < target)): + if (function(_min, *popt) > target) | (function(_max, *popt) < target): return -1000 L = _min R = _max - while (R - L > 0.0001): + while R - L > 0.0001: C = (L + R) / 2 V = function(C, *popt) - if (V < target): + if V < target: L = C else: R = C @@ -69,10 +65,7 @@ def _find_turnon_fit(self, function, popt, _min, _max, target): return (R + L) / 2 @utils.ignore_warnings - def _compute_value_of_tanh_at_threshold(self, - efficiency, - bins, - threshold): + def _compute_value_of_tanh_at_threshold(self, efficiency, bins, threshold): xvals = np.array(bins) efficiency = np.array(efficiency) @@ -88,29 +81,28 @@ def _compute_value_of_tanh_at_threshold(self, return s_val @utils.ignore_warnings - def _compute_value_of_errf_at_threshold(self, - efficiency, - bins, - scaling_pct): + def _compute_value_of_errf_at_threshold(self, efficiency, bins, scaling_pct): xvals = np.array(bins) efficiency = np.array(efficiency) xvals = xvals[~np.isnan(efficiency)] efficiency = efficiency[~np.isnan(efficiency)] - popt, pcov = curve_fit(utils.errf, xvals, efficiency, - p0=[0.02, 80, 20, 1, np.min(efficiency)] - ) + popt, pcov = curve_fit( + utils.errf, xvals, efficiency, p0=[0.02, 80, 20, 1, np.min(efficiency)] + ) - s_val = self._find_turnon_fit(utils.errf, popt, - np.min(xvals), np.max(xvals) * 10, - scaling_pct * utils.errf(10000, *popt) - ) + s_val = self._find_turnon_fit( + utils.errf, + popt, + np.min(xvals), + np.max(xvals) * 10, + scaling_pct * utils.errf(10000, *popt), + ) return s_val def _interpolate(self, H, K1, K2): - A = np.ones(len(K1)) * (-K2) B = [k1i + 2 * K2 for k1i in K1] C = np.ones(len(K1)) * (-K2) @@ -141,11 +133,10 @@ def _interpolate(self, H, K1, K2): return Y def _get_point_on_curve(self, x, graph_x, graph_y): - - if (x < graph_x[0]): + if x < graph_x[0]: return 0 - if (x >= graph_x[len(graph_x) - 1]): + if x >= graph_x[len(graph_x) - 1]: return 1 xr = graph_x[0] @@ -155,22 +146,20 @@ def _get_point_on_curve(self, x, graph_x, graph_y): yl = yr xr = graph_x[i + 1] yr = graph_y[i + 1] - if ((x < xr) & (x >= xl)): + if (x < xr) & (x >= xl): return yl + (yr - yl) / (xr - xl) * (x - xl) return -1 - def _compute_scalings_naive(self, - turnon_collection, - test_obj, - scalings, - scaling_pct): + def _compute_scalings_naive( + self, turnon_collection, test_obj, scalings, scaling_pct + ): bins = turnon_collection.bins bins = 0.5 * (bins[1:] + bins[:-1]) threshold = turnon_collection.threshold for obj_key, gen_hist_trig in turnon_collection.hists.items(): - if ((obj_key == "ref") | (obj_key != test_obj)): + if (obj_key == "ref") | (obj_key != test_obj): continue efficiency, yerr = turnon_collection.get_efficiency(obj_key) @@ -187,83 +176,65 @@ def _compute_scalings_naive(self, K1.append(1 / (er_dn[i] + er_up[i]) / (er_up[i] + er_dn[i])) percentage_point = self._find_turnon_cut( - xbins, - self._interpolate(efficiency, K1, 100), - scaling_pct + xbins, self._interpolate(efficiency, K1, 100), scaling_pct ) if percentage_point: scalings[obj_key][threshold] = percentage_point return scalings - def _compute_scalings_tanh(self, - turnon_collection, - test_obj, - scalings, - scaling_pct): + def _compute_scalings_tanh( + self, turnon_collection, test_obj, scalings, scaling_pct + ): bins = turnon_collection.bins bins = 0.5 * (bins[1:] + bins[:-1]) threshold = turnon_collection.threshold for obj_key, gen_hist_trig in turnon_collection.hists.items(): - if ((obj_key == "ref") | (obj_key != test_obj)): + if (obj_key == "ref") | (obj_key != test_obj): continue efficiency, _ = turnon_collection.get_efficiency(obj_key) percentage_point = self._compute_value_of_tanh_at_threshold( - efficiency, - bins, - scaling_pct + efficiency, bins, scaling_pct ) if percentage_point: scalings[obj_key][threshold] = percentage_point return scalings - def _compute_scalings_errf(self, - turnon_collection, - test_obj, - scalings, - scaling_pct): + def _compute_scalings_errf( + self, turnon_collection, test_obj, scalings, scaling_pct + ): bins = turnon_collection.bins bins = 0.5 * (bins[1:] + bins[:-1]) threshold = turnon_collection.threshold for obj_key, gen_hist_trig in turnon_collection.hists.items(): - if ((obj_key == "ref") | (obj_key != test_obj)): + if (obj_key == "ref") | (obj_key != test_obj): continue efficiency, _ = turnon_collection.get_efficiency(obj_key) percentage_point = self._compute_value_of_errf_at_threshold( - efficiency, - bins, - scaling_pct + efficiency, bins, scaling_pct ) if percentage_point: scalings[obj_key][threshold] = percentage_point return scalings - def _compute_scalings(self, turnon_collection, test_obj, scalings, - scaling_pct, method="tanh") -> dict: + def _compute_scalings( + self, turnon_collection, test_obj, scalings, scaling_pct, method="tanh" + ) -> dict: if method == "tanh": return self._compute_scalings_tanh( - turnon_collection, - test_obj, - scalings, - scaling_pct + turnon_collection, test_obj, scalings, scaling_pct ) if method == "errf": return self._compute_scalings_errf( - turnon_collection, - test_obj, - scalings, - scaling_pct + turnon_collection, test_obj, scalings, scaling_pct ) if method == "naive": return self._compute_scalings_naive( - turnon_collection, - test_obj, - scalings, - scaling_pct + turnon_collection, test_obj, scalings, scaling_pct ) def _fit_linear_functions(self, scalings): @@ -280,4 +251,3 @@ def _fit_linear_functions(self, scalings): if __name__ == "__main__": pass - diff --git a/menu_tools/object_performance/tests/conftest.py b/menu_tools/object_performance/tests/conftest.py index 620881ce..ed782f48 100644 --- a/menu_tools/object_performance/tests/conftest.py +++ b/menu_tools/object_performance/tests/conftest.py @@ -6,30 +6,89 @@ def met_config(): cfg_plot = { "sample": "TT", "default_version": "V22", - "reference_object": { - "object": "genMetTrue", - "suffix": "", - "label": "Gen MET" - }, + "reference_object": {"object": "genMetTrue", "suffix": "", "label": "Gen MET"}, "test_objects": { - "trackerMET": { - "suffix": "", - "label": "Tracker MET" - }, - "puppiMET": { - "suffix": "Et", - "label": "Puppi MET" - }, + "trackerMET": {"suffix": "", "label": "Tracker MET"}, + "puppiMET": {"suffix": "Et", "label": "Puppi MET"}, }, "binning": {"min": 0, "max": 500, "step": 20}, - "trackerMETTruth": [17671, 8214, 6463, 5321, 4212, 3308, 2453, 1811, - 1146, 759, 482, 307, 261, 154, 93, 73, 61, 32, 22, - 18, 20, 14, 8, 7], - "puppiMETTruth": [31222, 14025, 13874, 13621, 11387, 8429, 5670, 3644, - 2133, 1306, 766, 460, 352, 222, 145, 98, 81, 45, 29, - 21, 24, 15, 9, 7], - "genMETTruth": [130238, 51518, 40197, 29181, 18620, 11269, 6729, 3975, - 2255, 1353, 791, 470, 355, 225, 148, 98, 81, 45, 30, - 21, 25, 15, 9, 7], + "trackerMETTruth": [ + 17671, + 8214, + 6463, + 5321, + 4212, + 3308, + 2453, + 1811, + 1146, + 759, + 482, + 307, + 261, + 154, + 93, + 73, + 61, + 32, + 22, + 18, + 20, + 14, + 8, + 7, + ], + "puppiMETTruth": [ + 31222, + 14025, + 13874, + 13621, + 11387, + 8429, + 5670, + 3644, + 2133, + 1306, + 766, + 460, + 352, + 222, + 145, + 98, + 81, + 45, + 29, + 21, + 24, + 15, + 9, + 7, + ], + "genMETTruth": [ + 130238, + 51518, + 40197, + 29181, + 18620, + 11269, + 6729, + 3975, + 2255, + 1353, + 791, + 470, + 355, + 225, + 148, + 98, + 81, + 45, + 30, + 21, + 25, + 15, + 9, + 7, + ], } return cfg_plot diff --git a/menu_tools/object_performance/tests/test_integration.py b/menu_tools/object_performance/tests/test_integration.py index e2c04709..cf0c395f 100644 --- a/menu_tools/object_performance/tests/test_integration.py +++ b/menu_tools/object_performance/tests/test_integration.py @@ -11,18 +11,31 @@ def off_test_turnon_collection_met(met_config): turnon_collection = TurnOnCollection(met_config, 70) turnon_collection.create_hists() - assert all([x == y for x, y in zip( - list(turnon_collection.hists["trackerMET"][0]), - met_config["trackerMETTruth"] - )]) + assert all( + [ + x == y + for x, y in zip( + list(turnon_collection.hists["trackerMET"][0]), + met_config["trackerMETTruth"], + ) + ] + ) - assert all([x == y for x, y in zip( - list(turnon_collection.hists["puppiMET"][0]), - met_config["puppiMETTruth"] - )]) - - assert all([x == y for x, y in zip( - list(turnon_collection.hists["ref"][0]), - met_config["genMETTruth"] - )]) + assert all( + [ + x == y + for x, y in zip( + list(turnon_collection.hists["puppiMET"][0]), + met_config["puppiMETTruth"], + ) + ] + ) + assert all( + [ + x == y + for x, y in zip( + list(turnon_collection.hists["ref"][0]), met_config["genMETTruth"] + ) + ] + ) diff --git a/menu_tools/object_performance/tests/test_turnon_collection.py b/menu_tools/object_performance/tests/test_turnon_collection.py index b887d795..4812dfa3 100644 --- a/menu_tools/object_performance/tests/test_turnon_collection.py +++ b/menu_tools/object_performance/tests/test_turnon_collection.py @@ -17,16 +17,13 @@ def test_select_highest_pt_ref_object(): TurnOnCollection._set_bins = MagicMock() turnon_collection = TurnOnCollection(None, None) arr_content = [[], [None]] + [ - [float(f"{i}.{k}") for k in range(3)] - for i in range(5) + [float(f"{i}.{k}") for k in range(3)] for i in range(5) ] - idx_empty = [i for i, x in enumerate(arr_content) - if len(x) == 0 or x[0] is None] + idx_empty = [i for i, x in enumerate(arr_content) if len(x) == 0 or x[0] is None] turnon_collection.ak_arrays = {} - turnon_collection.ak_arrays["ref"] = ak.Array({ - "pt": arr_content, - "other": arr_content - }) + turnon_collection.ak_arrays["ref"] = ak.Array( + {"pt": arr_content, "other": arr_content} + ) # Execute selection of highest pt reference object turnon_collection._select_highest_pt_ref_object() diff --git a/menu_tools/object_performance/tests/test_utils.py b/menu_tools/object_performance/tests/test_utils.py index 80c39253..75503876 100644 --- a/menu_tools/object_performance/tests/test_utils.py +++ b/menu_tools/object_performance/tests/test_utils.py @@ -2,10 +2,10 @@ def test_get_pdg_id(): - electrons = ['e', 'ele', 'electron'] - muons = ['mu', 'muon'] - taus = ['tau'] - photons = ['photon', 'gamma'] + electrons = ["e", "ele", "electron"] + muons = ["mu", "muon"] + taus = ["tau"] + photons = ["photon", "gamma"] for particle in electrons: assert utils.get_pdg_id(particle) == 11 @@ -21,12 +21,12 @@ def test_get_pdg_id(): def test_str_to_op(): - op_less_than = utils.str_to_op('<') - op_less_equal = utils.str_to_op('<=') - op_equal = utils.str_to_op('==') - op_unequal = utils.str_to_op('!=') - op_greater_than = utils.str_to_op('>') - op_greater_equal = utils.str_to_op('>=') + op_less_than = utils.str_to_op("<") + op_less_equal = utils.str_to_op("<=") + op_equal = utils.str_to_op("==") + op_unequal = utils.str_to_op("!=") + op_greater_than = utils.str_to_op(">") + op_greater_equal = utils.str_to_op(">=") assert op_less_than(2, 5) assert not op_less_than(5, 2) diff --git a/menu_tools/object_performance/turnon_collection.py b/menu_tools/object_performance/turnon_collection.py index 38806cfe..5d785c40 100644 --- a/menu_tools/object_performance/turnon_collection.py +++ b/menu_tools/object_performance/turnon_collection.py @@ -12,8 +12,7 @@ vector.register_awkward() -class ArrayLoader(): - +class ArrayLoader: def __init__(self, turnon_collection): self.turnon_collection = turnon_collection @@ -36,8 +35,8 @@ def _map_region(self, test_array, obj: str): Needed from V25 after the barrel and endcap collections have been merged. """ - if 'hgc' in test_array.fields: - test_array["region"] = (ak.where(abs(test_array["eta"]) > 1.479, 1, 0)) + if "hgc" in test_array.fields: + test_array["region"] = ak.where(abs(test_array["eta"]) > 1.479, 1, 0) return test_array @@ -55,10 +54,7 @@ def _load_array_from_parquet(self, obj: str): f"{obj}.parquet" ) array = ak.from_parquet(fname) - array_dict = { - self._transform_key(key, obj): array[key] - for key in array.fields - } + array_dict = {self._transform_key(key, obj): array[key] for key in array.fields} if self.turnon_collection.cfg_plot.reference_trafo: array = ak.Array(array_dict) else: @@ -95,8 +91,7 @@ def load_arrays(self): self._load_test_branches() -class TurnOnCollection(): - +class TurnOnCollection: def __init__(self, cfg_plot, threshold): self.cfg_plot = PlotConfig(cfg_plot) self.version = self.cfg_plot.version_ref_object @@ -131,19 +126,25 @@ def _match_test_to_ref(self): for test_obj, obj_cfg in self.cfg_plot.test_objects.items(): suffix = obj_cfg["suffix"].lower() ref_test = ak.cartesian( - {"ref": self.ak_arrays["ref"], - "test": self.ak_arrays[test_obj]}, - nested=True + {"ref": self.ak_arrays["ref"], "test": self.ak_arrays[test_obj]}, + nested=True, ) js, gs = ak.unzip(ref_test) dR = gs.deltaR(js) pass_dR = dR < self.cfg_plot.get_match_dR(test_obj) - pt_max = ak.argmax(ref_test["test"]["pt"][pass_dR], axis=-1, - keepdims=True) - if ("iso" not in suffix): - self.numerators["ref"][test_obj] = ref_test["ref"][suffix][pass_dR][pt_max][:, :, 0] # noqa - self.numerators["test"][test_obj] = ref_test["test"][suffix][pass_dR][pt_max][:, :, 0] # noqa + pt_max = ak.argmax(ref_test["test"]["pt"][pass_dR], axis=-1, keepdims=True) + if "iso" not in suffix: + self.numerators["ref"][test_obj] = ref_test["ref"][suffix][pass_dR][ + pt_max + ][ + :, :, 0 + ] # noqa + self.numerators["test"][test_obj] = ref_test["test"][suffix][pass_dR][ + pt_max + ][ + :, :, 0 + ] # noqa def _flatten_array(self, ak_array, ak_to_np=False): """ @@ -170,8 +171,8 @@ def _compute_MHT(self): _px = self.ak_arrays["ref"].px _py = self.ak_arrays["ref"].py _mht = np.sqrt( - ak.sum(_px[:, :], axis=-1, keepdims=True)**2 - + ak.sum(_py[:, :], axis=-1, keepdims=True)**2 + ak.sum(_px[:, :], axis=-1, keepdims=True) ** 2 + + ak.sum(_py[:, :], axis=-1, keepdims=True) ** 2 ) return _mht @@ -186,8 +187,7 @@ def _reduce_to_per_event(self): field = cfg["suffix"].lower() try: self.ak_arrays[test_obj][field] = ak.max( - self.ak_arrays[test_obj][field], - axis=1 + self.ak_arrays[test_obj][field], axis=1 ) except ValueError: pass @@ -202,10 +202,7 @@ def _apply_reference_trafo(self): return if trafo == "HT": - self.ak_arrays["ref"]["HT"] = ak.sum( - self.ak_arrays["ref"]["pt"], - axis=-1 - ) + self.ak_arrays["ref"]["HT"] = ak.sum(self.ak_arrays["ref"]["pt"], axis=-1) if trafo == "MHT": gen_mht = self._compute_MHT() @@ -225,7 +222,9 @@ def _apply_quality_cuts(self): return ## force quality bit to be int! - self.ak_arrays[test_obj]["quality"] = ak.values_astype(self.ak_arrays[test_obj]["quality"], np.int32) + self.ak_arrays[test_obj]["quality"] = ak.values_astype( + self.ak_arrays[test_obj]["quality"], np.int32 + ) quality = Quality(self.ak_arrays, test_obj) sel = ~getattr(quality, quality_id) @@ -242,11 +241,10 @@ def _apply_L1_isolation_cuts(self): iso_EE = self.cfg_plot.get_iso_EE(test_obj) l1_iso = self.cfg_plot.get_l1_iso(test_obj) - if ((iso_BB == -1) & (iso_EE == -1)): + if (iso_BB == -1) & (iso_EE == -1): continue - isolation = L1IsoCut(self.ak_arrays, test_obj, - iso_BB, iso_EE, l1_iso) + isolation = L1IsoCut(self.ak_arrays, test_obj, iso_BB, iso_EE, l1_iso) sel = ~getattr(isolation, "ISO_EEBB") self.ak_arrays[test_obj] = self.ak_arrays[test_obj][sel] @@ -262,9 +260,7 @@ def _select_highest_pt_ref_object(self): def _apply_list_of_reference_cuts(self, cut_list): for cut in cut_list: - cut = re.sub(r"{([^&|]*)}", - r"self.ak_arrays['ref']['\1']", - cut) + cut = re.sub(r"{([^&|]*)}", r"self.ak_arrays['ref']['\1']", cut) sel = eval(cut) self.ak_arrays["ref"] = self.ak_arrays["ref"][sel] @@ -301,28 +297,23 @@ def _apply_test_obj_cuts(self): if not (cuts := self.cfg_plot.get_object_cuts(test_obj)): continue for cut in cuts: - cut = re.sub(r"{([^&|]*)}", - r"self.ak_arrays[test_obj]['\1']", - cut) + cut = re.sub(r"{([^&|]*)}", r"self.ak_arrays[test_obj]['\1']", cut) sel = eval(cut) self.ak_arrays[test_obj] = self.ak_arrays[test_obj][sel] def _skim_to_hists(self): ref_field = self.cfg_plot.reference_field - if (trafo := self.cfg_plot.reference_trafo): + if trafo := self.cfg_plot.reference_trafo: ref_field = trafo for test_obj, cfg in self.cfg_plot.test_objects.items(): field = cfg["suffix"].lower() sel = self.ak_arrays[test_obj][field] > self.threshold - ak_array = self._flatten_array( - self.ak_arrays["ref"][sel][ref_field] - ) + ak_array = self._flatten_array(self.ak_arrays["ref"][sel][ref_field]) self.hists[test_obj] = np.histogram(ak_array, bins=self.bins) self.hists["ref"][test_obj] = np.histogram( - self._flatten_array(self.ak_arrays["ref"][ref_field]), - bins=self.bins + self._flatten_array(self.ak_arrays["ref"][ref_field]), bins=self.bins ) def _remove_inner_nones_zeros(self, arr): @@ -334,9 +325,7 @@ def _remove_inner_nones_zeros(self, arr): def _skim_to_hists_dR_matched(self): ref_field = self.cfg_plot.reference_field - ref_obj = self._remove_inner_nones_zeros( - self.ak_arrays["ref"][ref_field] - ) + ref_obj = self._remove_inner_nones_zeros(self.ak_arrays["ref"][ref_field]) for test_obj, cfg in self.cfg_plot.test_objects.items(): sel_threshold = self.numerators["test"][test_obj] >= self.threshold @@ -352,8 +341,7 @@ def _skim_to_hists_dR_matched(self): ref_obj = self.numerators["ref"][test_obj] ref_obj = self._remove_inner_nones_zeros(ref_obj) ref_flat_np = self._flatten_array(ref_obj, ak_to_np=True) - self.hists["ref"][test_obj] = np.histogram(ref_flat_np, - bins=self.bins) + self.hists["ref"][test_obj] = np.histogram(ref_flat_np, bins=self.bins) def _skim_to_hists_dR_matched_Iso(self): for test_obj, cfg in self.cfg_plot.test_objects.items(): diff --git a/menu_tools/utils/utils.py b/menu_tools/utils/utils.py index 8459458f..576cf067 100644 --- a/menu_tools/utils/utils.py +++ b/menu_tools/utils/utils.py @@ -12,12 +12,12 @@ def str_to_op(x: str): op_map = { - '<': operator.lt, - '<=': operator.le, - '==': operator.eq, - '!=': operator.ne, - '>=': operator.ge, - '>': operator.gt, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, } return op_map[x] @@ -38,14 +38,14 @@ def clopper_pearson_err(x_hist, n_hist, alpha=1 - 0.68, warn="ignore"): def get_pdg_id(particle: str): id_map = { - 'e': 11, - 'ele': 11, - 'electron': 11, - 'mu': 13, - 'muon': 13, - 'tau': 15, - 'photon': 22, - 'gamma': 22, + "e": 11, + "ele": 11, + "electron": 11, + "mu": 13, + "muon": 13, + "tau": 15, + "photon": 22, + "gamma": 22, } return id_map[particle.lower()] @@ -63,9 +63,7 @@ def get_branches(ntuple_path: str, tree: str, obj: str): else: prefix = "L1PhaseII/" - obj_branches = [ - x.removeprefix(prefix + obj) for x in all_branches if obj in x - ] + obj_branches = [x.removeprefix(prefix + obj) for x in all_branches if obj in x] return obj_branches @@ -90,6 +88,7 @@ def errf(x: float, a: float, b: float, c: float, d: float, e: float): _cdf_back = norm.cdf(a * (x - b), a * a * c * c, a * c) * (d - e) + e return _cdf_front - _exp_turnon * _cdf_back + ############## # Decorators # ############## @@ -100,6 +99,7 @@ def wrapper(*args, **kwargs): with warnings.catch_warnings(): warnings.simplefilter("ignore") return func(*args, **kwargs) + return wrapper @@ -110,9 +110,9 @@ def wrapper(*args, **kwargs): t0 = time.time() result = func(*args, **kwargs) t1 = time.time() - print(f"{task} completed in " - f"{timedelta(seconds=round(t1 - t0, 0))}s") + print(f"{task} completed in " f"{timedelta(seconds=round(t1 - t0, 0))}s") return result + return wrapper - return decorator + return decorator From 687fcfb00b081b873ae021b5e383bc0e1f027807 Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Wed, 17 Jan 2024 13:19:58 +0100 Subject: [PATCH 6/8] reintroduce flake8; consolidate github actions --- .flake8 | 4 +- .github/workflows/action.yml | 18 ----- .../workflows/{lint.yml => code_quality.yml} | 22 +++++- .../object_performance/compare_plots.py | 5 -- .../object_performance/plotBTagEfficiency.py | 1 - menu_tools/object_performance/plotter.py | 79 ++++++++++--------- .../object_performance/scaling_collection.py | 2 + .../tests/test_integration.py | 2 +- .../tests/test_turnon_collection.py | 2 +- .../object_performance/tests/test_utils.py | 2 +- pyproject.toml | 1 + 11 files changed, 70 insertions(+), 68 deletions(-) delete mode 100644 .github/workflows/action.yml rename .github/workflows/{lint.yml => code_quality.yml} (55%) diff --git a/.flake8 b/.flake8 index d9b24d51..c90c600d 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,5 @@ [flake8] ignore = W391, W503 -max-line-length = 79 +max-line-length = 88 +extend-ignore = E203, E704, E266 +exclude = menu_tools/object_performance/quality_obj.py diff --git a/.github/workflows/action.yml b/.github/workflows/action.yml deleted file mode 100644 index ca4d7910..00000000 --- a/.github/workflows/action.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Code quality -on: [push, pull_request, workflow_dispatch] - -jobs: - type-check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.11' - architecture: 'x64' - - run: | - python -m pip install --upgrade pip poetry - poetry install - - run: | - pip list - mypy menu_tools diff --git a/.github/workflows/lint.yml b/.github/workflows/code_quality.yml similarity index 55% rename from .github/workflows/lint.yml rename to .github/workflows/code_quality.yml index 66056563..43cbeadd 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/code_quality.yml @@ -1,9 +1,10 @@ -name: flake8, black Lint +name: Code quality on: [push, pull_request, workflow_dispatch] jobs: black-lint: runs-on: ubuntu-latest + name: black steps: - uses: actions/checkout@v3 - uses: psf/black@stable @@ -13,7 +14,7 @@ jobs: version: "~= 23.12" flake8-lint: runs-on: ubuntu-latest - name: Lint + name: flake8 steps: - name: Check out source repository uses: actions/checkout@v3 @@ -23,3 +24,20 @@ jobs: python-version: "3.11" - name: flake8 Lint uses: py-actions/flake8@v2 + with: + path: "./menu_tools" + mypy-type-check: + runs-on: ubuntu-latest + name: mypy + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + architecture: 'x64' + - run: | + python -m pip install --upgrade pip poetry + poetry install + - run: | + pip list + mypy menu_tools diff --git a/menu_tools/object_performance/compare_plots.py b/menu_tools/object_performance/compare_plots.py index ca52f1fb..06f8d3a0 100755 --- a/menu_tools/object_performance/compare_plots.py +++ b/menu_tools/object_performance/compare_plots.py @@ -3,15 +3,10 @@ import matplotlib.pyplot as plt import mplhep as hep -import numpy as np -from progress.bar import IncrementalBar import yaml import json -from menu_tools.object_performance.turnon_collection import TurnOnCollection -from menu_tools.object_performance.scaling_collection import ScalingCollection from menu_tools.object_performance.plotter import Plotter -from menu_tools.utils import utils plt.style.use(hep.style.CMS) diff --git a/menu_tools/object_performance/plotBTagEfficiency.py b/menu_tools/object_performance/plotBTagEfficiency.py index a9bc0ae8..2b4e6160 100755 --- a/menu_tools/object_performance/plotBTagEfficiency.py +++ b/menu_tools/object_performance/plotBTagEfficiency.py @@ -3,7 +3,6 @@ import matplotlib.pyplot as plt import mplhep as hep -import numpy as np import yaml import json diff --git a/menu_tools/object_performance/plotter.py b/menu_tools/object_performance/plotter.py index 2ff8fbc5..73ae6909 100755 --- a/menu_tools/object_performance/plotter.py +++ b/menu_tools/object_performance/plotter.py @@ -17,12 +17,12 @@ class Plotter: - outdir = "outputs/object_performance/" + outdir_base = "outputs/object_performance/" def _make_output_dirs(self, version: str): - os.makedirs(f"{self.outdir}/{version}/turnons", exist_ok=True) - os.makedirs(f"{self.outdir}/{version}/distributions", exist_ok=True) - os.makedirs(f"{self.outdir}/{version}/scalings", exist_ok=True) + os.makedirs(f"{self.outdir_base}/{version}/turnons", exist_ok=True) + os.makedirs(f"{self.outdir_base}/{version}/distributions", exist_ok=True) + os.makedirs(f"{self.outdir_base}/{version}/scalings", exist_ok=True) def _create_new_plot(self): fig, ax = plt.subplots(figsize=(10, 10)) @@ -36,22 +36,27 @@ def __init__(self, name, cfg, turnon_collection): self.cfg = cfg self.turnon_collection = turnon_collection self.version = self.turnon_collection.version + self.threshold = self.turnon_collection.threshold self.bin_width = turnon_collection.cfg_plot.bin_width + @property + def _outdir_turnons(self) -> str: + return os.path.join(self.outdir_base, self.version, "turnons") + + @property + def _outdir_distributions(self) -> str: + return os.path.join(self.outdir_base, self.version, "distributions") + def _style_plot(self, fig, ax, legend_loc="lower right"): - ax.axvline(self.turnon_collection.threshold, ls=":", c="k") + ax.axvline(self.threshold, ls=":", c="k") ax.axhline(1, ls=":", c="k") ax.legend(loc=legend_loc, frameon=False) ax.set_xlabel(rf"{self.cfg['xlabel']}") - ylabel = self.cfg["ylabel"].replace( - "", str(self.turnon_collection.threshold) - ) + ylabel = self.cfg["ylabel"].replace("", str(self.threshold)) ax.set_ylabel(rf"{ylabel}") ax.set_xlim(self.cfg["binning"]["min"], self.cfg["binning"]["max"]) ax.tick_params(direction="in") - watermark = ( - f"{self.version}_{self.plot_name}_" f"{self.turnon_collection.threshold}" - ) + watermark = f"{self.version}_{self.plot_name}_" f"{self.threshold}" ax.text( 0, -0.1, @@ -67,12 +72,8 @@ def _save_json(self, file_name): plot = {} xlabel = self.cfg["xlabel"] - ylabel = self.cfg["ylabel"].replace( - "", str(self.turnon_collection.threshold) - ) - watermark = ( - f"{self.version}_{self.plot_name}_" f"{self.turnon_collection.threshold}" - ) + ylabel = self.cfg["ylabel"].replace("", str(self.threshold)) + watermark = f"{self.version}_{self.plot_name}_" f"{self.threshold}" plot["xlabel"] = xlabel plot["ylabel"] = ylabel @@ -158,14 +159,16 @@ def _plot_efficiency_curve(self): self._style_plot(fig, ax) ax.set_ylim(0, 1.1) - plot_fname = f"{self.outdir}/{self.version}/turnons/{self.plot_name}_{self.turnon_collection.threshold}_{self.version}" - for ext in [".png", ".pdf"]: - plt.savefig(f"{plot_fname}{ext}") - self._save_json(f"{plot_fname}.json") - ## save config - with open(f"{plot_fname}.yaml", "w") as outfile: - yaml.dump({self.plot_name: self.cfg}, outfile, default_flow_style=False) + # Save figure + plot_fname = f"{self.plot_name}_{self.threshold}_{self.version}" + plt.savefig(os.path.join(self._outdir_turnons, f"{plot_fname}.png")) + plt.savefig(os.path.join(self._outdir_turnons, f"{plot_fname}.pdf")) + self._save_json(os.path.join(self._outdir_turnons, f"{plot_fname}.json")) + + # Save config + with open(os.path.join(self._outdir_turnons, f"{plot_fname}.json"), "w") as f: + yaml.dump({self.plot_name: self.cfg}, f, default_flow_style=False) plt.close() @@ -190,14 +193,15 @@ def _plot_iso_vs_efficiency_curve(self): self._style_plot(fig, ax) - plot_fname = f"{self.outdir}/{self.version}/turnons/{self.plot_name}_{self.turnon_collection.threshold}_{self.version}" - for ext in [".png", ".pdf"]: - plt.savefig(f"{plot_fname}{ext}") - self._save_json(f"{plot_fname}.json") + # Save figure + plot_fname = f"{self.plot_name}_{self.threshold}_{self.version}" + plt.savefig(os.path.join(self._outdir_turnons, f"{plot_fname}.png")) + plt.savefig(os.path.join(self._outdir_turnons, f"{plot_fname}.pdf")) + self._save_json(os.path.join(self._outdir_turnons, f"{plot_fname}.json")) - ## save config - with open(f"{plot_fname}.yaml", "w") as outfile: - yaml.dump({self.plot_name: self.cfg}, outfile, default_flow_style=False) + # Save config + with open(os.path.join(self._outdir_turnons, f"{plot_fname}.json"), "w") as f: + yaml.dump({self.plot_name: self.cfg}, f, default_flow_style=False) plt.close() @@ -245,10 +249,10 @@ def _plot_raw_counts(self): ) self._style_plot(fig, ax) - plot_fname = f"{self.outdir}/{self.version}/distributions/{self.plot_name}_{self.turnon_collection.threshold}_dist_{self.version}" - for ext in [".png", ".pdf"]: - plt.savefig(f"{plot_fname}{ext}") - # self._save_json(f"{plot_fname}.json") + # Save figure + plot_fname = f"{self.plot_name}_{self.threshold}_dist_{self.version}" + plt.savefig(os.path.join(self._outdir_distributions, f"{plot_fname}.png")) + plt.savefig(os.path.join(self._outdir_distributions, f"{plot_fname}.pdf")) plt.close() @@ -335,8 +339,7 @@ def _set_plot_ranges(self, ax): ax.set_xlim(0, xmax) ax.set_ylim(0, ymax) - def _save_json(self, file_name): - # file_name = = f"{self.outdir}/{self.version}/scalings/{self.plot_name}.json" + def _save_json(self, fpath: str) -> None: plot = {} watermark = f"{self.version}_{self.plot_name}" @@ -359,7 +362,7 @@ def _save_json(self, file_name): plot[obj] = _object - with open(f"{file_name}", "w") as outfile: + with open(fpath, "w") as outfile: outfile.write(json.dumps(plot, indent=4)) def plot(self): diff --git a/menu_tools/object_performance/scaling_collection.py b/menu_tools/object_performance/scaling_collection.py index 1f8f94ed..8ef29af4 100644 --- a/menu_tools/object_performance/scaling_collection.py +++ b/menu_tools/object_performance/scaling_collection.py @@ -236,6 +236,8 @@ def _compute_scalings( return self._compute_scalings_naive( turnon_collection, test_obj, scalings, scaling_pct ) + else: + raise ValueError(f"`{method}` is not a valid scaling method!") def _fit_linear_functions(self, scalings): params = {} diff --git a/menu_tools/object_performance/tests/test_integration.py b/menu_tools/object_performance/tests/test_integration.py index cf0c395f..e0d2e7e7 100644 --- a/menu_tools/object_performance/tests/test_integration.py +++ b/menu_tools/object_performance/tests/test_integration.py @@ -1,4 +1,4 @@ -from turnon_collection import TurnOnCollection +from menu_tools.object_performance.turnon_collection import TurnOnCollection def off_test_turnon_collection_met(met_config): diff --git a/menu_tools/object_performance/tests/test_turnon_collection.py b/menu_tools/object_performance/tests/test_turnon_collection.py index 4812dfa3..5ee90a70 100644 --- a/menu_tools/object_performance/tests/test_turnon_collection.py +++ b/menu_tools/object_performance/tests/test_turnon_collection.py @@ -2,7 +2,7 @@ import awkward as ak -from turnon_collection import TurnOnCollection +from menu_tools.object_performance.turnon_collection import TurnOnCollection def test_select_highest_pt_ref_object(): diff --git a/menu_tools/object_performance/tests/test_utils.py b/menu_tools/object_performance/tests/test_utils.py index 75503876..53a39008 100644 --- a/menu_tools/object_performance/tests/test_utils.py +++ b/menu_tools/object_performance/tests/test_utils.py @@ -1,4 +1,4 @@ -import utils +from menu_tools.object_performance import utils def test_get_pdg_id(): diff --git a/pyproject.toml b/pyproject.toml index cecb59bb..90255a10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ vector = "1.1.1.post1" [tool.poetry.group.dev.dependencies] black = "23.12.1" mypy = "1.8.0" +flake8 = "^7.0.0" [tool.poetry.group.test.dependencies] pytest = "7.4.3" From 1fe16ed078c0c387ad8e90880788ffa6d266f1e3 Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Wed, 17 Jan 2024 13:42:50 +0100 Subject: [PATCH 7/8] fix mypy issues --- menu_tools/object_performance/plot_config.py | 9 ++++++--- menu_tools/object_performance/plotter.py | 10 ++++------ menu_tools/object_performance/scaling_collection.py | 2 +- menu_tools/object_performance/tests/test_utils.py | 2 +- pyproject.toml | 3 ++- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/menu_tools/object_performance/plot_config.py b/menu_tools/object_performance/plot_config.py index f1f7f417..d4189494 100644 --- a/menu_tools/object_performance/plot_config.py +++ b/menu_tools/object_performance/plot_config.py @@ -1,9 +1,12 @@ +from typing import Any + + class PlotConfig: - def __init__(self, cfg: dict): + def __init__(self, cfg: dict[str, Any]): self._cfg = cfg @property - def sample(self): + def sample(self) -> str: return self._cfg["sample"] @property @@ -48,7 +51,7 @@ def reference_trafo(self): return None @property - def test_objects(self): + def test_objects(self) -> dict[str, dict]: return self._cfg["test_objects"] def get_match_dR(self, test_obj): diff --git a/menu_tools/object_performance/plotter.py b/menu_tools/object_performance/plotter.py index 73ae6909..f20663e5 100755 --- a/menu_tools/object_performance/plotter.py +++ b/menu_tools/object_performance/plotter.py @@ -1,4 +1,5 @@ import argparse +from typing import Any import os import matplotlib.pyplot as plt @@ -19,12 +20,12 @@ class Plotter: outdir_base = "outputs/object_performance/" - def _make_output_dirs(self, version: str): + def _make_output_dirs(self, version: str) -> None: os.makedirs(f"{self.outdir_base}/{version}/turnons", exist_ok=True) os.makedirs(f"{self.outdir_base}/{version}/distributions", exist_ok=True) os.makedirs(f"{self.outdir_base}/{version}/scalings", exist_ok=True) - def _create_new_plot(self): + def _create_new_plot(self) -> tuple[plt.Figure, plt.Axes]: fig, ax = plt.subplots(figsize=(10, 10)) hep.cms.label(ax=ax, llabel="Phase-2 Simulation", com=14) return fig, ax @@ -340,10 +341,7 @@ def _set_plot_ranges(self, ax): ax.set_ylim(0, ymax) def _save_json(self, fpath: str) -> None: - plot = {} - - watermark = f"{self.version}_{self.plot_name}" - plot["watermark"] = watermark + plot: dict[str, Any] = {"watermark": f"{self.version}_{self.plot_name}"} for obj, points in self.scalings.items(): _object = {} diff --git a/menu_tools/object_performance/scaling_collection.py b/menu_tools/object_performance/scaling_collection.py index 8ef29af4..f219d200 100644 --- a/menu_tools/object_performance/scaling_collection.py +++ b/menu_tools/object_performance/scaling_collection.py @@ -18,7 +18,7 @@ def __init__(self, cfg: PlotConfig, method: str, plateau_pct: float = 0.95): self.cfg = cfg self.method = method self.plateau_pct = plateau_pct - self.scalings = {x: {} for x in self.cfg["test_objects"]} + self.scalings: dict[str, dict] = {x: {} for x in self.cfg["test_objects"]} self.fit_function_params = None def _find_percentage_point(self, hist, bins, scaling_pct): diff --git a/menu_tools/object_performance/tests/test_utils.py b/menu_tools/object_performance/tests/test_utils.py index 53a39008..7df338ed 100644 --- a/menu_tools/object_performance/tests/test_utils.py +++ b/menu_tools/object_performance/tests/test_utils.py @@ -1,4 +1,4 @@ -from menu_tools.object_performance import utils +from menu_tools.object_performance.utils import utils def test_get_pdg_id(): diff --git a/pyproject.toml b/pyproject.toml index 90255a10..f70ff4f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ testpaths = [ [tool.mypy] disable_error_code = [ - "import-untyped" + "import-untyped", + "index" ] explicit_package_bases = true From a0add9361ad8dd137bc04b9b99580e6a6f0c53d7 Mon Sep 17 00:00:00 2001 From: Daniel Hundhausen Date: Wed, 17 Jan 2024 14:02:35 +0100 Subject: [PATCH 8/8] fix mypy not found in CI --- .github/workflows/code_quality.yml | 4 +--- pyproject.toml | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml index 43cbeadd..87d97b29 100644 --- a/.github/workflows/code_quality.yml +++ b/.github/workflows/code_quality.yml @@ -38,6 +38,4 @@ jobs: - run: | python -m pip install --upgrade pip poetry poetry install - - run: | - pip list - mypy menu_tools + poetry run mypy diff --git a/pyproject.toml b/pyproject.toml index f70ff4f5..974033f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,9 @@ testpaths = [ ] [tool.mypy] +files = [ + "menu_tools" +] disable_error_code = [ "import-untyped", "index"