diff --git a/ctapipe/core/tool.py b/ctapipe/core/tool.py index 8c42930df43..71c8bbd0066 100644 --- a/ctapipe/core/tool.py +++ b/ctapipe/core/tool.py @@ -1,21 +1,29 @@ """Classes to handle configurable command-line user interfaces.""" import logging import logging.config -import textwrap -from abc import abstractmethod -import pathlib import os +import pathlib import re +import textwrap +from abc import abstractmethod +from typing import Union +import yaml -from traitlets import default -from traitlets.config import Application, Configurable +try: + import tomli as toml + + HAS_TOML = True +except ImportError: + HAS_TOML = False + +from traitlets import default, List +from traitlets.config import Application, Config, Configurable from .. import __version__ as version -from .traits import Path, Enum, Bool, Dict from . import Provenance from .component import Component -from .logging import create_logging_config, ColoredFormatter, DEFAULT_LOGGING - +from .logging import DEFAULT_LOGGING, ColoredFormatter, create_logging_config +from .traits import Bool, Dict, Enum, Path __all__ = ["Tool", "ToolConfigurationError"] @@ -119,16 +127,21 @@ def main(): """ - config_file = Path( - exists=True, - directory_ok=False, - allow_none=True, - default_value=None, - help=( - "name of a configuration file with " - "parameters to load in addition to " - "command-line parameters" - ), + config_files = List( + trait=Path( + exists=True, + directory_ok=False, + allow_none=True, + default_value=None, + help=( + "List of configuration files with parameters to load " + "in addition to command-line parameters. " + "The order listed is the order of precendence (later config parameters " + "overwrite earlier ones), however parameters specified on the " + "command line always have the highest precendence. " + "Config files may be in JSON, YAML, TOML, or Python format" + ), + ) ).tag(config=True) log_config = Dict(default_value=DEFAULT_LOGGING).tag(config=True) @@ -158,7 +171,7 @@ def __init__(self, **kwargs): # make sure there are some default aliases in all Tools: super().__init__(**kwargs) aliases = { - ("c", "config"): "Tool.config_file", + ("c", "config"): "Tool.config_files", "log-level": "Tool.log_level", ("l", "log-file"): "Tool.log_file", "log-file-level": "Tool.log_file_level", @@ -184,16 +197,19 @@ def __init__(self, **kwargs): self.update_logging_config() def initialize(self, argv=None): - """ handle config and any other low-level setup """ + """handle config and any other low-level setup""" self.parse_command_line(argv) self.update_logging_config() - if self.config_file is not None: - self.log.debug(f"Loading config from '{self.config_file}'") + if self.config_files is not None: + self.log.info("Loading config from '%s'", self.config_files) try: - self.load_config_file(self.config_file) + for config_file in self.config_files: + self.load_config_file(config_file) except Exception as err: - raise ToolConfigurationError(f"Couldn't read config file: {err}") + raise ToolConfigurationError( + f"Couldn't read config file: {err} ({type(err)})" + ) from err # ensure command-line takes precedence over config file options: self.update_config(self.cli_config) @@ -201,6 +217,34 @@ def initialize(self, argv=None): self.log.info(f"ctapipe version {self.version_string}") + def load_config_file(self, path: Union[str, pathlib.Path]) -> None: + """ + Load a configuration file in one of the supported formats, and merge it with + the current config if it exists. + + Parameters + ---------- + path: Union[str, pathlib.Path] + config file to load. [yaml, toml, json, py] formats are supported + """ + + path = pathlib.Path(path) + + if path.suffix in [".yaml", ".yml"]: + # do our own YAML loading + with open(path, "r") as infile: + config = Config(yaml.safe_load(infile)) + self.update_config(config) + elif path.suffix == ".toml" and HAS_TOML: + with open(path, "rb") as infile: + config = Config(toml.load(infile)) + self.update_config(config) + else: + # fall back to traitlets.config.Application's implementation + super().load_config_file(str(path)) + + Provenance().add_input_file(path, role="Tool Configuration") + def update_logging_config(self): """Update the configuration of loggers.""" cfg = create_logging_config( @@ -330,11 +374,11 @@ def write_provenance(self): @property def version_string(self): - """ a formatted version string with version, release, and git hash""" + """a formatted version string with version, release, and git hash""" return f"{version}" def get_current_config(self): - """ return the current configuration as a dict (e.g. the values + """return the current configuration as a dict (e.g. the values of all traits, even if they were not set during configuration) """ conf = { @@ -350,7 +394,7 @@ def get_current_config(self): return conf def _repr_html_(self): - """ nice HTML rep, with blue for non-default values""" + """nice HTML rep, with blue for non-default values""" traits = self.traits() name = self.__class__.__name__ lines = [ @@ -475,7 +519,6 @@ def run_tool(tool: Tool, argv=None, cwd=None): # switch to cwd for running and back after os.chdir(cwd) tool.run(argv or []) - return 0 except SystemExit as e: return e.code finally: diff --git a/ctapipe/resources/base_config.yaml b/ctapipe/resources/base_config.yaml new file mode 100644 index 00000000000..713bf491c74 --- /dev/null +++ b/ctapipe/resources/base_config.yaml @@ -0,0 +1,111 @@ +# ========================================================================== +# ctapipe-process configuration file. +# version: VERSION +# +# This configuration contains a subset of options needed for a basic analysis. +# Not all possible options are shown. To get a complete list, run: +# +# `ctapipe-process --help-all` +# +# ========================================================================== + +DataWriter: + Contact: + # please fill in your contact information here. It will be stored in the + # output files as provenance information + name: YOUR-NAME-HERE + email: YOUREMAIL@EXAMPLE.ORG + organization: YOUR-ORGANIZATION + + # options that control what is stored in the output file by default here we + # write nothing (can be overridden on the command-line or in subsequent config + # files) + overwrite: false # do not overwrite existing files + write_images: false # store DL1 images + write_parameters: false # store DL1 parameters + write_stereo_shower: false # store DL2 stereo geometry + write_raw_waveforms: false # write R0 waveforms + write_waveforms: false # write R1 waveforms + +# The CameraCalibrator takes data from R1 or DL0 to DL1a level, applying finer +# calibration and turning waveforms into images. It is run only if DL1a images +# do not already exist in the input file. +CameraCalibrator: + # Choose an extractor type from the following possibilities: + #'FullWaveformSum', 'FixedWindowSum', 'GlobalPeakWindowSum', + #'LocalPeakWindowSum', 'SlidingWindowMaxSum', 'NeighborPeakWindowSum', + #'TwoPassWindowSum', 'BaselineSubtractedNeighborPeakWindowSum' + # + # Note this is a telescope-wise parameter, so can be specified per telescope + # if necessary (see below for an example) + image_extractor_type: NeighborPeakWindowSum + +# The ImageProcessor performs the DL1a-> DL1b (image parameters) transition. It +# is run only if the parameters `DataWriter.write_image_parameters=True` and the +# parameters don't already exist in the input file (or if the user forces them +# to be re-computed using DataWriter.recompute_dl1=True) +ImageProcessor: + # The image cleaner selects pixels which have signal in them and rejects those + # without. Options are: 'TailcutsImageCleaner', 'MARSImageCleaner', + # 'FACTImageCleaner' + image_cleaner_type: TailcutsImageCleaner + + # make sure you include a configuration for the image cleaner you selected + # above here. The section named by the image_cleaner_type will be used to + # configure it. + TailcutsImageCleaner: + # the thresholds for this image cleaner must be optimized for the data set + # you are analyzing. The defaults may not be correct and should be optimized + # for the given use case. + # + # These are telescope-wise parameters, where the options are patterns + # specified in a list in order of precedence, with later options overwriting + # earlier ones. Each pattern is a triplet of [scope, key, value], where the + # scope can be "type" (matching to the telescope type name) or "id" + # (matching a specific telescope ID number). In the case of "type", the key + # should be either a telescope type string, or part of one with "*" used as + # a wildcard match (e.g. "LST*" would match all LST telescopes). You can + # specify a universal default value using "*" as a key. Note that specifying + # a single value is equivalent to specifying a one-item list containing + # [type, '*', value] . + picture_threshold_pe: # top-level threshold in photoelectrons + - [type, "LST*", 6.0] + - [type, "MST*NectarCam", 8.0] + - [type, "MST*FlashCam", 10000] # disabled for now + - [type, "SST_ASTRI_CHEC", 4.0] + boundary_threshold_pe: # second-level threshold in photoelectrons + - [type, "LST*", 3.0] + - [type, "MST*NectarCam", 4.0] + - [type, "MST*FlashCam", 10000] # disabled for now + - [type, "SST_ASTRI_CHEC", 2.0] + keep_isolated_pixels: False # If False, pixels with < min_picture_neighbors are removed. + min_picture_neighbors: 2 # Minimum number of neighbors above threshold to consider + + # Choose which images should be parameterized: + ImageQualityQuery: + # quality critieria should be a list of pairs of [name, + # filter_function_string] The filter function should take a single value + # which is the image itself, a 1D np.array. + quality_criteria: + - ["enough_pixels", "lambda im: np.count_nonzero(im) > 2"] + - ["enough_charge", "lambda im: im.sum() > 50"] + +# The ShowerProcessor performs the DL1 to DL2a (reconstructed shower geometry) +# transition. It is run only if the parameter DataWriter.write_stereo_shower=True. +ShowerProcessor: + # choose between HillasReconstructor and HillasIntersection (two + # implementations of the standard stereo line-intersection method) + reconstructor_type: HillasReconstructor + + HillasReconstructor: + # Choose which telescope events should be included in the reconstruction. + StereoQualityQuery: + # the quality criteria here should again be a list of [name, + # filter_function_string] pairs, with filter functions that take the set of + # image parameters, `p` (a `ctapipe.containers.ImageParametersContainer`), as + # an argument. + quality_criteria: + - [enough intensity, "lambda p: p.hillas.intensity > 50"] + - [Positive width, "lambda p: p.hillas.width.value > 0"] + - [enough pixels, "lambda p: p.morphology.num_pixels > 3"] + - [not clipped, "lambda p: p.leakage.intensity_width_2 < 0.8"] diff --git a/ctapipe/tools/tests/resources/image_modification_config.json b/ctapipe/resources/image_modification_config.json similarity index 100% rename from ctapipe/tools/tests/resources/image_modification_config.json rename to ctapipe/resources/image_modification_config.json diff --git a/ctapipe/tools/tests/resources/stage1_config.json b/ctapipe/resources/stage1_config.json similarity index 100% rename from ctapipe/tools/tests/resources/stage1_config.json rename to ctapipe/resources/stage1_config.json diff --git a/ctapipe/resources/stage1_config.toml b/ctapipe/resources/stage1_config.toml new file mode 100644 index 00000000000..4a2b1740233 --- /dev/null +++ b/ctapipe/resources/stage1_config.toml @@ -0,0 +1,55 @@ +[DataWriter.Contact] +# please fill in your contact information here. It will be stored in the +# output files as provenance information +name = "YOUR-NAME-HERE" +email = "YOUREMAIL@EXAMPLE.ORG" +organization = "YOUR-ORGANIZATION" + +[DataWriter] +# options that control what is stored in the output file +overwrite = false +write_images = true +write_parameters = true +write_stereo_shower = false +write_mono_shower = false +transform_image = true +transform_peak_time = true + +[CameraCalibrator] +image_extractor_type = "NeighborPeakWindowSum" + +[ImageProcessor] +image_cleaner_type = "TailcutsImageCleaner" + + + +[ImageProcessor.TailcutsImageCleaner] +picture_threshold_pe = [ + [ "type", "*", 10.0,], + [ "type", "LST_LST_LSTCam", 5.0,], + [ "type", "MST_MST_NectarCam", 5.0,], + [ "type", "SST_ASTRI_CHEC", 3.0,], +] +boundary_threshold_pe = [ + [ "type", "*", 5.0,], + [ "type", "LST_LST_LSTCam", 2.5,], + [ "type", "MST_MST_NectarCam", 2.5,], + [ "type", "SST_ASTRI_CHEC", 1.5,], +] +min_picture_neighbors = [ [ "type", "*", 2,],] + +[ImageProcessor.ImageQualityQuery] +# These specify which images should be parameterized: +quality_criteria = [ + [ "enough_pixels", "lambda im: np.count_nonzero(im) > 2",], + [ "enough_charge", "lambda im: im.sum() > 50",] +] + +[ShowerProcessor.ShowerQualityQuery] +# These specify criteria for telescopes that should be included in stereo +# reconstruction: +quality_criteria = [ + [ "enough intensity", "lambda p: p.hillas.intensity > 50",], + [ "Positive width", "lambda p: p.hillas.width.value > 0",], + [ "enough pixels", "lambda p: p.morphology.num_pixels > 3",], +] diff --git a/ctapipe/resources/stage1_config.yaml b/ctapipe/resources/stage1_config.yaml new file mode 100644 index 00000000000..77bac36c3c7 --- /dev/null +++ b/ctapipe/resources/stage1_config.yaml @@ -0,0 +1,15 @@ +# ====================================================================== +# ctapipe-process configuration file. +# version: VERSION +# +# Perform Stage 1 Processing +# +# This configuration enables options needed for Rx to DL1b (image parameters) +# ====================================================================== +# +# Make sure you first include `--config base_config.yaml` before including this file + +DataWriter: + write_images: true + write_parameters: true + write_stereo_shower: false diff --git a/ctapipe/tools/tests/resources/stage2_config.json b/ctapipe/resources/stage2_config.json similarity index 100% rename from ctapipe/tools/tests/resources/stage2_config.json rename to ctapipe/resources/stage2_config.json diff --git a/ctapipe/resources/stage2_config.yaml b/ctapipe/resources/stage2_config.yaml new file mode 100644 index 00000000000..00ae9725b94 --- /dev/null +++ b/ctapipe/resources/stage2_config.yaml @@ -0,0 +1,16 @@ +# ====================================================================== +# ctapipe-process configuration file. +# version: VERSION +# +# Perform Stage 2 Processing +# +# This configuration enables options needed for Rx to DL2a (shower +# reconstruction) But not including any image parameters +# ====================================================================== +# +# Make sure you first include `--config base_config.yaml` before including this file + +DataWriter: + write_images: false + write_parameters: false + write_stereo_shower: true diff --git a/ctapipe/tools/tests/resources/training_config.json b/ctapipe/resources/training_config.json similarity index 100% rename from ctapipe/tools/tests/resources/training_config.json rename to ctapipe/resources/training_config.json diff --git a/ctapipe/resources/training_config.yaml b/ctapipe/resources/training_config.yaml new file mode 100644 index 00000000000..3258342407f --- /dev/null +++ b/ctapipe/resources/training_config.yaml @@ -0,0 +1,17 @@ +# ====================================================================== +# ctapipe-process configuration file. +# version: VERSION +# +# Perform Stage 1 and Stage 2a processing to generate features needed for +# training machine learning algorithms. +# +# Note that here write_images is disabled by default (to avoid very large +# files), however for training deep learning algorithms, it should be turned on. +# ====================================================================== +# +# Make sure you first include `--config base_config.yaml` before including this file + +DataWriter: + write_images: false + write_parameters: true + write_stereo_shower: true diff --git a/ctapipe/tools/quickstart.py b/ctapipe/tools/quickstart.py index 678c5978b36..a4c2446ec67 100644 --- a/ctapipe/tools/quickstart.py +++ b/ctapipe/tools/quickstart.py @@ -14,7 +14,12 @@ __all__ = ["QuickStartTool"] -CONFIGS_TO_WRITE = ["stage1_config.json", "stage2_config.json", "training_config.json"] +CONFIGS_TO_WRITE = [ + "base_config.yaml", + "stage1_config.yaml", + "stage2_config.yaml", + "training_config.yaml", +] README_TEXT = f""" ctapipe working directory @@ -23,19 +28,25 @@ This working directory contains some example configuration files that are useful for processing data with `ctapipe-process`. These include: -- stage1_config.json: generate DL1 data from lower data levels -- stage2_config.json: generate DL2 shower geometry from DL1 or lower levels -- training_config.json: generate both DL1 parameter and DL2 shower geometry data +- base_config.yaml: standard configuration options, to be included always -You can modify these to change the output, and run ctapipe using: +In addition several sub-configurations to be included after base_config.yaml + +- stage1_config.yaml: generate DL1 data from lower data levels +- stage2_config.yaml: generate DL2 shower geometry from DL1 or lower levels +- training_config.yaml: generate both DL1 parameter and DL2 shower geometry + data, useful for training ML algorithms + +You can modify these to change the output, and run ctapipe by including both the +base config plus one additional configuration using: ``` -ctapipe-process --config --input --output +ctapipe-process --config base_config.yaml --config --input --output ``` -Where is any ctapipe-readable event file at a lower or equal data -level to the one requested to be produced, and is one of the -configuration files generated by `ctapipe-quickstart`. +Where is one of the non-base configs above, is any +ctapipe-readable event file at a lower or equal data level to the one requested +to be produced. Details about all configuration options can be found by running: @@ -127,12 +138,13 @@ def setup(self): "YOUR-NAME-HERE": self.contact_name, "YOUREMAIL@EXAMPLE.ORG": self.contact_email, "YOUR-ORGANIZATION": self.contact_organization, + "VERSION": VERSION, } def start(self): for filename in CONFIGS_TO_WRITE: - config = files("ctapipe.tools.tests").joinpath("resources", filename) + config = files("ctapipe").joinpath("resources", filename) destination = self.workdir / filename if destination.exists(): @@ -156,7 +168,7 @@ def finish(self): def main(): - """ run the tool""" + """run the tool""" tool = QuickStartTool() tool.run() diff --git a/ctapipe/tools/tests/test_merge.py b/ctapipe/tools/tests/test_merge.py index c1d2408006c..446bff21afe 100644 --- a/ctapipe/tools/tests/test_merge.py +++ b/ctapipe/tools/tests/test_merge.py @@ -18,7 +18,7 @@ def run_stage1(input_path, cwd, output_path=None): - config = files("ctapipe.tools.tests").joinpath("resources", "stage1_config.json") + config = files("ctapipe").joinpath("resources", "stage1_config.json") if output_path is None: output_path = Path( diff --git a/ctapipe/tools/tests/test_process.py b/ctapipe/tools/tests/test_process.py index f3eef639ab3..d8f1a4b88ec 100644 --- a/ctapipe/tools/tests/test_process.py +++ b/ctapipe/tools/tests/test_process.py @@ -22,13 +22,56 @@ def resource_file(filename): - return files("ctapipe.tools.tests").joinpath("resources", filename) + return files("ctapipe").joinpath("resources", filename) + + +@pytest.mark.parametrize( + "config_files", + [ + ("base_config.yaml", "stage1_config.yaml"), + ("stage1_config.toml",), + ("stage1_config.json",), + ], +) +def test_read_yaml_toml_json_config(dl1_image_file, config_files): + """check that we can read multiple formats of config file""" + tool = ProcessorTool() + for config_base in config_files: + config = resource_file(config_base) + tool.load_config_file(config) -def test_stage_1_dl1(tmp_path, dl1_image_file, dl1_parameters_file): - """ check simtel to DL1 conversion """ + tool.config.EventSource.input_url = dl1_image_file + tool.config.DataWriter.overwrite = True + tool.setup() + assert ( + tool.get_current_config()["ProcessorTool"]["DataWriter"]["contact_info"].name + == "YOUR-NAME-HERE" + ) + + +def test_multiple_configs(dl1_image_file): + """ensure a config file loaded later overwrites keys from an earlier one""" + tool = ProcessorTool() + + tool.load_config_file(resource_file("base_config.yaml")) + tool.load_config_file(resource_file("stage2_config.yaml")) + tool.config.EventSource.input_url = dl1_image_file + tool.config.DataWriter.overwrite = True + tool.setup() + + # ensure the overwriting works (base config has this option disabled) + assert ( + tool.get_current_config()["ProcessorTool"]["DataWriter"]["write_stereo_shower"] + == True + ) + + +def test_stage_1_dl1(tmp_path, dl1_image_file, dl1_parameters_file): + """check simtel to DL1 conversion""" config = resource_file("stage1_config.json") + # DL1A file as input dl1b_from_dl1a_file = tmp_path / "dl1b_fromdl1a.dl1.h5" assert ( @@ -102,7 +145,7 @@ def test_stage1_datalevels(tmp_path): """test the dl1 tool on a file not providing r1, dl0 or dl1a""" class DummyEventSource(EventSource): - """ for testing """ + """for testing""" @staticmethod def is_compatible(file_path): @@ -157,7 +200,7 @@ def _generator(self): def test_stage_2_from_simtel(tmp_path): - """ check we can go to DL2 geometry from simtel file """ + """check we can go to DL2 geometry from simtel file""" config = resource_file("stage2_config.json") output = tmp_path / "test_stage2_from_simtel.DL2.h5" @@ -182,7 +225,7 @@ def test_stage_2_from_simtel(tmp_path): def test_stage_2_from_dl1_images(tmp_path, dl1_image_file): - """ check we can go to DL2 geometry from DL1 images """ + """check we can go to DL2 geometry from DL1 images""" config = resource_file("stage2_config.json") output = tmp_path / "test_stage2_from_dl1image.DL2.h5" @@ -206,7 +249,7 @@ def test_stage_2_from_dl1_images(tmp_path, dl1_image_file): def test_stage_2_from_dl1_params(tmp_path, dl1_parameters_file): - """ check we can go to DL2 geometry from DL1 parameters """ + """check we can go to DL2 geometry from DL1 parameters""" config = resource_file("stage2_config.json") output = tmp_path / "test_stage2_from_dl1param.DL2.h5" @@ -231,7 +274,7 @@ def test_stage_2_from_dl1_params(tmp_path, dl1_parameters_file): def test_training_from_simtel(tmp_path): - """ check we can write both dl1 and dl2 info (e.g. for training input) """ + """check we can write both dl1 and dl2 info (e.g. for training input)""" config = resource_file("training_config.json") output = tmp_path / "test_training.DL1DL2.h5" @@ -288,9 +331,11 @@ def test_image_modifications(tmp_path, dl1_image_file): assert modified_images["image"].sum() / unmodified_images["image"].sum() > 1.5 -@pytest.mark.parametrize("filename", CONFIGS_TO_WRITE) +@pytest.mark.parametrize( + "filename", ["base_config.yaml", "stage1_config.json", "stage1_config.toml"] +) def test_quickstart_templates(filename): - """ ensure template configs have an appropriate placeholder for the contact info """ + """ensure template configs have an appropriate placeholder for the contact info""" config = resource_file(filename) text = config.read_text() @@ -300,7 +345,7 @@ def test_quickstart_templates(filename): def test_quickstart(tmp_path): - """ ensure quickstart tool generates expected output """ + """ensure quickstart tool generates expected output""" tool = QuickStartTool() run_tool( diff --git a/environment.yml b/environment.yml index 243ad6dd119..610818ace63 100644 --- a/environment.yml +++ b/environment.yml @@ -36,6 +36,7 @@ dependencies: - sphinx=3.5 - sphinx-automodapi - sphinx_rtd_theme + - tomli - tqdm - traitlets - vitables diff --git a/setup.py b/setup.py index 298ed9d56d6..202d55827b2 100755 --- a/setup.py +++ b/setup.py @@ -20,7 +20,12 @@ "ctapipe-fileinfo = ctapipe.tools.fileinfo:main", "ctapipe-quickstart = ctapipe.tools.quickstart:main", ] -tests_require = ["pytest", "pandas>=0.24.0", "importlib_resources;python_version<'3.9'"] +tests_require = [ + "pytest", + "pandas>=0.24.0", + "importlib_resources;python_version<'3.9'", + "tomli", +] docs_require = [ "sphinx_rtd_theme", "sphinx_automodapi", @@ -57,6 +62,7 @@ "setuptools_scm>=3.4", "importlib_resources;python_version<'3.9'", "jinja2~=3.0.2", # for sphinx 3.5, update when moving to 4.x + "pyyaml>=5.1", ], # here are optional dependencies (as "tag" : "dependency spec") extras_require={