Skip to content

Commit

Permalink
allow YAML and TOML config files and multiple configs (#1856)
Browse files Browse the repository at this point in the history
* support loading config files from a YAML file
* a sample YAML version of the config
* add yaml example to quickstart
* fixed typo
* add TOML support
* allow multiple config files, fixes #1732
* add config files to provenance tracker
* updated help string for config_file
* a better config file with lots of explanation
* change Tool.config_file -> config_files since list
* fix incorrect f-string (now a log statement)
* update to reflect the new ShowerProcessor
* move ctapipe.tools.test.resources to ctapipe.resources
* require pyyaml and remove HAS_YAML
  • Loading branch information
kosack authored Apr 14, 2022
1 parent d4b9155 commit 0612a33
Show file tree
Hide file tree
Showing 15 changed files with 373 additions and 52 deletions.
99 changes: 71 additions & 28 deletions ctapipe/core/tool.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
"""Classes to handle configurable command-line user interfaces."""
import logging
import logging.config
import textwrap
from abc import abstractmethod
import pathlib
import os
import pathlib
import re
import textwrap
from abc import abstractmethod
from typing import Union
import yaml

from traitlets import default
from traitlets.config import Application, Configurable
try:
import tomli as toml

HAS_TOML = True
except ImportError:
HAS_TOML = False

from traitlets import default, List
from traitlets.config import Application, Config, Configurable

from .. import __version__ as version
from .traits import Path, Enum, Bool, Dict
from . import Provenance
from .component import Component
from .logging import create_logging_config, ColoredFormatter, DEFAULT_LOGGING

from .logging import DEFAULT_LOGGING, ColoredFormatter, create_logging_config
from .traits import Bool, Dict, Enum, Path

__all__ = ["Tool", "ToolConfigurationError"]

Expand Down Expand Up @@ -119,16 +127,21 @@ def main():
"""

config_file = Path(
exists=True,
directory_ok=False,
allow_none=True,
default_value=None,
help=(
"name of a configuration file with "
"parameters to load in addition to "
"command-line parameters"
),
config_files = List(
trait=Path(
exists=True,
directory_ok=False,
allow_none=True,
default_value=None,
help=(
"List of configuration files with parameters to load "
"in addition to command-line parameters. "
"The order listed is the order of precendence (later config parameters "
"overwrite earlier ones), however parameters specified on the "
"command line always have the highest precendence. "
"Config files may be in JSON, YAML, TOML, or Python format"
),
)
).tag(config=True)

log_config = Dict(default_value=DEFAULT_LOGGING).tag(config=True)
Expand Down Expand Up @@ -158,7 +171,7 @@ def __init__(self, **kwargs):
# make sure there are some default aliases in all Tools:
super().__init__(**kwargs)
aliases = {
("c", "config"): "Tool.config_file",
("c", "config"): "Tool.config_files",
"log-level": "Tool.log_level",
("l", "log-file"): "Tool.log_file",
"log-file-level": "Tool.log_file_level",
Expand All @@ -184,23 +197,54 @@ def __init__(self, **kwargs):
self.update_logging_config()

def initialize(self, argv=None):
""" handle config and any other low-level setup """
"""handle config and any other low-level setup"""
self.parse_command_line(argv)
self.update_logging_config()

if self.config_file is not None:
self.log.debug(f"Loading config from '{self.config_file}'")
if self.config_files is not None:
self.log.info("Loading config from '%s'", self.config_files)
try:
self.load_config_file(self.config_file)
for config_file in self.config_files:
self.load_config_file(config_file)
except Exception as err:
raise ToolConfigurationError(f"Couldn't read config file: {err}")
raise ToolConfigurationError(
f"Couldn't read config file: {err} ({type(err)})"
) from err

# ensure command-line takes precedence over config file options:
self.update_config(self.cli_config)
self.update_logging_config()

self.log.info(f"ctapipe version {self.version_string}")

def load_config_file(self, path: Union[str, pathlib.Path]) -> None:
"""
Load a configuration file in one of the supported formats, and merge it with
the current config if it exists.
Parameters
----------
path: Union[str, pathlib.Path]
config file to load. [yaml, toml, json, py] formats are supported
"""

path = pathlib.Path(path)

if path.suffix in [".yaml", ".yml"]:
# do our own YAML loading
with open(path, "r") as infile:
config = Config(yaml.safe_load(infile))
self.update_config(config)
elif path.suffix == ".toml" and HAS_TOML:
with open(path, "rb") as infile:
config = Config(toml.load(infile))
self.update_config(config)
else:
# fall back to traitlets.config.Application's implementation
super().load_config_file(str(path))

Provenance().add_input_file(path, role="Tool Configuration")

def update_logging_config(self):
"""Update the configuration of loggers."""
cfg = create_logging_config(
Expand Down Expand Up @@ -330,11 +374,11 @@ def write_provenance(self):

@property
def version_string(self):
""" a formatted version string with version, release, and git hash"""
"""a formatted version string with version, release, and git hash"""
return f"{version}"

def get_current_config(self):
""" return the current configuration as a dict (e.g. the values
"""return the current configuration as a dict (e.g. the values
of all traits, even if they were not set during configuration)
"""
conf = {
Expand All @@ -350,7 +394,7 @@ def get_current_config(self):
return conf

def _repr_html_(self):
""" nice HTML rep, with blue for non-default values"""
"""nice HTML rep, with blue for non-default values"""
traits = self.traits()
name = self.__class__.__name__
lines = [
Expand Down Expand Up @@ -475,7 +519,6 @@ def run_tool(tool: Tool, argv=None, cwd=None):
# switch to cwd for running and back after
os.chdir(cwd)
tool.run(argv or [])
return 0
except SystemExit as e:
return e.code
finally:
Expand Down
111 changes: 111 additions & 0 deletions ctapipe/resources/base_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# ==========================================================================
# ctapipe-process configuration file.
# version: VERSION
#
# This configuration contains a subset of options needed for a basic analysis.
# Not all possible options are shown. To get a complete list, run:
#
# `ctapipe-process --help-all`
#
# ==========================================================================

DataWriter:
Contact:
# please fill in your contact information here. It will be stored in the
# output files as provenance information
name: YOUR-NAME-HERE
email: [email protected]
organization: YOUR-ORGANIZATION

# options that control what is stored in the output file by default here we
# write nothing (can be overridden on the command-line or in subsequent config
# files)
overwrite: false # do not overwrite existing files
write_images: false # store DL1 images
write_parameters: false # store DL1 parameters
write_stereo_shower: false # store DL2 stereo geometry
write_raw_waveforms: false # write R0 waveforms
write_waveforms: false # write R1 waveforms

# The CameraCalibrator takes data from R1 or DL0 to DL1a level, applying finer
# calibration and turning waveforms into images. It is run only if DL1a images
# do not already exist in the input file.
CameraCalibrator:
# Choose an extractor type from the following possibilities:
#'FullWaveformSum', 'FixedWindowSum', 'GlobalPeakWindowSum',
#'LocalPeakWindowSum', 'SlidingWindowMaxSum', 'NeighborPeakWindowSum',
#'TwoPassWindowSum', 'BaselineSubtractedNeighborPeakWindowSum'
#
# Note this is a telescope-wise parameter, so can be specified per telescope
# if necessary (see below for an example)
image_extractor_type: NeighborPeakWindowSum

# The ImageProcessor performs the DL1a-> DL1b (image parameters) transition. It
# is run only if the parameters `DataWriter.write_image_parameters=True` and the
# parameters don't already exist in the input file (or if the user forces them
# to be re-computed using DataWriter.recompute_dl1=True)
ImageProcessor:
# The image cleaner selects pixels which have signal in them and rejects those
# without. Options are: 'TailcutsImageCleaner', 'MARSImageCleaner',
# 'FACTImageCleaner'
image_cleaner_type: TailcutsImageCleaner

# make sure you include a configuration for the image cleaner you selected
# above here. The section named by the image_cleaner_type will be used to
# configure it.
TailcutsImageCleaner:
# the thresholds for this image cleaner must be optimized for the data set
# you are analyzing. The defaults may not be correct and should be optimized
# for the given use case.
#
# These are telescope-wise parameters, where the options are patterns
# specified in a list in order of precedence, with later options overwriting
# earlier ones. Each pattern is a triplet of [scope, key, value], where the
# scope can be "type" (matching to the telescope type name) or "id"
# (matching a specific telescope ID number). In the case of "type", the key
# should be either a telescope type string, or part of one with "*" used as
# a wildcard match (e.g. "LST*" would match all LST telescopes). You can
# specify a universal default value using "*" as a key. Note that specifying
# a single value is equivalent to specifying a one-item list containing
# [type, '*', value] .
picture_threshold_pe: # top-level threshold in photoelectrons
- [type, "LST*", 6.0]
- [type, "MST*NectarCam", 8.0]
- [type, "MST*FlashCam", 10000] # disabled for now
- [type, "SST_ASTRI_CHEC", 4.0]
boundary_threshold_pe: # second-level threshold in photoelectrons
- [type, "LST*", 3.0]
- [type, "MST*NectarCam", 4.0]
- [type, "MST*FlashCam", 10000] # disabled for now
- [type, "SST_ASTRI_CHEC", 2.0]
keep_isolated_pixels: False # If False, pixels with < min_picture_neighbors are removed.
min_picture_neighbors: 2 # Minimum number of neighbors above threshold to consider

# Choose which images should be parameterized:
ImageQualityQuery:
# quality critieria should be a list of pairs of [name,
# filter_function_string] The filter function should take a single value
# which is the image itself, a 1D np.array.
quality_criteria:
- ["enough_pixels", "lambda im: np.count_nonzero(im) > 2"]
- ["enough_charge", "lambda im: im.sum() > 50"]

# The ShowerProcessor performs the DL1 to DL2a (reconstructed shower geometry)
# transition. It is run only if the parameter DataWriter.write_stereo_shower=True.
ShowerProcessor:
# choose between HillasReconstructor and HillasIntersection (two
# implementations of the standard stereo line-intersection method)
reconstructor_type: HillasReconstructor

HillasReconstructor:
# Choose which telescope events should be included in the reconstruction.
StereoQualityQuery:
# the quality criteria here should again be a list of [name,
# filter_function_string] pairs, with filter functions that take the set of
# image parameters, `p` (a `ctapipe.containers.ImageParametersContainer`), as
# an argument.
quality_criteria:
- [enough intensity, "lambda p: p.hillas.intensity > 50"]
- [Positive width, "lambda p: p.hillas.width.value > 0"]
- [enough pixels, "lambda p: p.morphology.num_pixels > 3"]
- [not clipped, "lambda p: p.leakage.intensity_width_2 < 0.8"]
File renamed without changes.
55 changes: 55 additions & 0 deletions ctapipe/resources/stage1_config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
[DataWriter.Contact]
# please fill in your contact information here. It will be stored in the
# output files as provenance information
name = "YOUR-NAME-HERE"
email = "[email protected]"
organization = "YOUR-ORGANIZATION"

[DataWriter]
# options that control what is stored in the output file
overwrite = false
write_images = true
write_parameters = true
write_stereo_shower = false
write_mono_shower = false
transform_image = true
transform_peak_time = true

[CameraCalibrator]
image_extractor_type = "NeighborPeakWindowSum"

[ImageProcessor]
image_cleaner_type = "TailcutsImageCleaner"



[ImageProcessor.TailcutsImageCleaner]
picture_threshold_pe = [
[ "type", "*", 10.0,],
[ "type", "LST_LST_LSTCam", 5.0,],
[ "type", "MST_MST_NectarCam", 5.0,],
[ "type", "SST_ASTRI_CHEC", 3.0,],
]
boundary_threshold_pe = [
[ "type", "*", 5.0,],
[ "type", "LST_LST_LSTCam", 2.5,],
[ "type", "MST_MST_NectarCam", 2.5,],
[ "type", "SST_ASTRI_CHEC", 1.5,],
]
min_picture_neighbors = [ [ "type", "*", 2,],]

[ImageProcessor.ImageQualityQuery]
# These specify which images should be parameterized:
quality_criteria = [
[ "enough_pixels", "lambda im: np.count_nonzero(im) > 2",],
[ "enough_charge", "lambda im: im.sum() > 50",]
]

[ShowerProcessor.ShowerQualityQuery]
# These specify criteria for telescopes that should be included in stereo
# reconstruction:
quality_criteria = [
[ "enough intensity", "lambda p: p.hillas.intensity > 50",],
[ "Positive width", "lambda p: p.hillas.width.value > 0",],
[ "enough pixels", "lambda p: p.morphology.num_pixels > 3",],
]
15 changes: 15 additions & 0 deletions ctapipe/resources/stage1_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# ======================================================================
# ctapipe-process configuration file.
# version: VERSION
#
# Perform Stage 1 Processing
#
# This configuration enables options needed for Rx to DL1b (image parameters)
# ======================================================================
#
# Make sure you first include `--config base_config.yaml` before including this file

DataWriter:
write_images: true
write_parameters: true
write_stereo_shower: false
File renamed without changes.
16 changes: 16 additions & 0 deletions ctapipe/resources/stage2_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# ======================================================================
# ctapipe-process configuration file.
# version: VERSION
#
# Perform Stage 2 Processing
#
# This configuration enables options needed for Rx to DL2a (shower
# reconstruction) But not including any image parameters
# ======================================================================
#
# Make sure you first include `--config base_config.yaml` before including this file

DataWriter:
write_images: false
write_parameters: false
write_stereo_shower: true
File renamed without changes.
17 changes: 17 additions & 0 deletions ctapipe/resources/training_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# ======================================================================
# ctapipe-process configuration file.
# version: VERSION
#
# Perform Stage 1 and Stage 2a processing to generate features needed for
# training machine learning algorithms.
#
# Note that here write_images is disabled by default (to avoid very large
# files), however for training deep learning algorithms, it should be turned on.
# ======================================================================
#
# Make sure you first include `--config base_config.yaml` before including this file

DataWriter:
write_images: false
write_parameters: true
write_stereo_shower: true
Loading

0 comments on commit 0612a33

Please sign in to comment.