Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add esmvaltool develop command group #3635

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions esmvaltool/utils/develop/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Interface to utility commands for develop command group."""

import sys
from pathlib import Path

from . import compare
from . import recipe_filler


_DEFAULT_USER_CONFIG_FILE = Path.home() / ".esmvaltool" / "config-user.yml"

class DevelopCommand():
"""Development utilities."""

def compare(self,
reference_dir,
current_dir,
verbose=False):
"""Compare a recipe run to a reference run.

Returns True if the runs were identical, False otherwise.

Parameters
----------
reference_dir : str
Results directory from reference run
current_dir : str
Results directory from run to be tested
verbose : bool
Produce verbose output
"""
same = compare.compare(Path(reference_dir), Path(current_dir), verbose)

sys.exit(int(not same))


def fill_recipe(self, recipe, output_recipe="recipe_autofilled.yml",
config_file=_DEFAULT_USER_CONFIG_FILE):
"""
Fill in a partial recipe with additional datasets.

Tool to obtain a set of additional datasets when given a partial recipe.
The blank recipe should contain, to the very least, a list of diagnostics
each with their variable(s). Example of minimum settings:

diagnostics:
diagnostic:
variables:
ta:
mip: Amon
start_year: 1850
end_year: 1900

Note that the tool will exit if any of these minimum settings are missing!

Key features:

- you can add as many variable parameters as are needed; if not added, the
tool will use the "*" wildcard and find all available combinations;
- you can restrict the number of datasets to be looked for with the `dataset:`
key for each variable, pass a list of datasets as value, e.g.
`dataset: [MPI-ESM1-2-LR, MPI-ESM-LR]`;
- you can specify a pair of experiments eg `exp: [rcp26, rcp85]`
for each variable; this will look for each available dataset per experiment
and assemble an aggregated data stretch from each experiment; equivalent to
esmvaltool's syntax of multiple experiments; this option needs an ensemble
to be declared explicitly; it will return no entry if there are gaps in data
- `start_year` and `end_year` are mandatory and are used to filter out the
datasets that don't have data in the interval; if you want all possible years
hence no filtering on years just use "*" for start and end years;
- `config-user: rootpath: CMIPX` may be a list, rootpath lists are supported;

Caveats:

- the tool doesn't yet work for derived variables;
- operation restricted to CMIP data.

Parameters
----------

recipe : str
Path to partial recipe file
output_recipe : str
Path to output recipe
config_file : str
User configuration file
"""

recipe_filler.run(recipe, output_recipe, config_file)
71 changes: 37 additions & 34 deletions esmvaltool/utils/recipe_filler.py β†’ esmvaltool/utils/develop/recipe_filler.py
100755 β†’ 100644
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,8 @@ def _get_download_dir(yamlconf, cmip_era):
return False


def _get_site_rootpath(cmip_era):
def _get_site_rootpath(cmip_era, config_yml):
"""Get site (drs) from config-user.yml."""
config_yml = get_args().config_file
with open(config_yml, 'r') as yamf:
yamlconf = yaml.safe_load(yamf)
drs = yamlconf['drs'][cmip_era]
Expand All @@ -322,9 +321,9 @@ def _get_site_rootpath(cmip_era):
return drs, rootdir


def _get_input_dir(cmip_era):
def _get_input_dir(cmip_era, config_file):
"""Get input_dir from config-developer.yml."""
site = _get_site_rootpath(cmip_era)[0]
site = _get_site_rootpath(cmip_era, config_file)[0]
yamlconf = read_config_developer_file()

return yamlconf[cmip_era]['input_dir'][site]
Expand All @@ -336,17 +335,18 @@ def _get_input_file(cmip_era):
return yamlconf[cmip_era]['input_file']


def _determine_basepath(cmip_era):
def _determine_basepath(cmip_era, config_file):
"""Determine a basepath."""
if isinstance(_get_site_rootpath(cmip_era)[1], list):
rootpaths = _get_site_rootpath(cmip_era)[1]
if isinstance(_get_site_rootpath(cmip_era, config_file)[1], list):
rootpaths = _get_site_rootpath(cmip_era, config_file)[1]
else:
rootpaths = [_get_site_rootpath(cmip_era)[1]]
rootpaths = [_get_site_rootpath(cmip_era, config_file)[1]]

basepaths = []
for rootpath in rootpaths:
if _get_input_dir(cmip_era) != os.path.sep:
basepath = os.path.join(rootpath, _get_input_dir(cmip_era),
if _get_input_dir(cmip_era, config_file) != os.path.sep:
basepath = os.path.join(rootpath,
_get_input_dir(cmip_era, config_file),
_get_input_file(cmip_era))
else:
basepath = os.path.join(rootpath, _get_input_file(cmip_era))
Expand Down Expand Up @@ -484,7 +484,7 @@ def _resolve_latestversion(dirname_template):
return dirname_template


def list_all_files(file_dict, cmip_era):
def list_all_files(file_dict, cmip_era, config_file):
"""
List all files that match the dataset dictionary.

Expand Down Expand Up @@ -522,7 +522,7 @@ def list_all_files(file_dict, cmip_era):
return []
file_dict['frequency'] = frequency

basepaths = _determine_basepath(cmip_era)
basepaths = _determine_basepath(cmip_era, config_file)
all_files = []

for basepath in basepaths:
Expand Down Expand Up @@ -553,7 +553,7 @@ def list_all_files(file_dict, cmip_era):
return all_files


def _file_to_recipe_dataset(fn_path, cmip_era, file_dict):
def _file_to_recipe_dataset(fn_path, cmip_era, file_dict, config_file):
"""Convert a filename to an recipe ready dataset."""
# Add the obvious ones - ie the one you requested!
output_dataset = {}
Expand All @@ -565,7 +565,7 @@ def _file_to_recipe_dataset(fn_path, cmip_era, file_dict):
output_dataset[key] = value

# Split file name and base path into directory structure and filenames.
basefiles = _determine_basepath(cmip_era)
basefiles = _determine_basepath(cmip_era, config_file)
_, fnfile = os.path.split(fn_path)

for basefile in basefiles:
Expand Down Expand Up @@ -710,15 +710,15 @@ def _add_datasets_into_recipe(additional_datasets, output_recipe):
yaml.dump(cur_yaml, yamlfile)


def _find_all_datasets(recipe_dict, cmip_eras):
def _find_all_datasets(recipe_dict, cmip_eras, config_file):
"""Find all datasets explicitly."""
datasets = []
for cmip_era in cmip_eras:
if cmip_era == "CMIP6":
activity = "CMIP"
else:
activity = ""
drs, site_path = _get_site_rootpath(cmip_era)
drs, site_path = _get_site_rootpath(cmip_era, config_file)
if drs in ["default", "SMHI"]:
logger.info("DRS is %s; filter on dataset disabled.", drs)
datasets = ["*"]
Expand Down Expand Up @@ -767,10 +767,10 @@ def _get_exp(recipe_dict):
return exps_list


def _get_datasets(recipe_dict, cmip_eras):
def _get_datasets(recipe_dict, cmip_eras, config_file):
"""Get the correct datasets as list if needed."""
if recipe_dict["dataset"] == "*":
datasets = _find_all_datasets(recipe_dict, cmip_eras)
datasets = _find_all_datasets(recipe_dict, cmip_eras, config_file)
return datasets
if isinstance(recipe_dict['dataset'], list):
datasets = recipe_dict['dataset']
Expand Down Expand Up @@ -804,38 +804,34 @@ def get_args():
return args


def _get_timefiltered_files(recipe_dict, exps_list, cmip_era):
def _get_timefiltered_files(recipe_dict, exps_list, cmip_era, config_file):
"""Obtain all files that correspond to requested time range."""
# multiple experiments allowed, complement data from each exp
if len(exps_list) > 1:
files = []
for exp in exps_list:
recipe_dict["exp"] = exp
files.extend(list_all_files(recipe_dict, cmip_era))
files.extend(list_all_files(recipe_dict, cmip_era, config_file))
files = filter_years(files,
recipe_dict["start_year"],
recipe_dict["end_year"],
overlap=True)
recipe_dict["exp"] = exps_list

else:
files = list_all_files(recipe_dict, cmip_era)
files = list_all_files(recipe_dict, cmip_era, config_file)
files = filter_years(files, recipe_dict["start_year"],
recipe_dict["end_year"])

return files


def run():
"""Run the `recipe_filler` tool. Help in __doc__ and via --help."""
# Get arguments
args = get_args()
input_recipe = args.recipe
output_recipe = args.output
def run(input_recipe, output_recipe, config_file):
"""Run the `recipe_filler` tool."""
cmip_eras = ["CMIP5", "CMIP6"]

# read the config file
config_user = read_config_user_file(args.config_file,
config_user = read_config_user_file(config_file,
'recipe_filler',
options={})

Expand All @@ -845,8 +841,7 @@ def run():
os.makedirs(run_dir)
log_files = configure_logging(output_dir=run_dir,
console_log_level=config_user['log_level'])
logger.info(HEADER)
logger.info("Using user configuration file: %s", args.config_file)
logger.info("Using user configuration file: %s", config_file)
logger.info("Using pilot recipe file: %s", input_recipe)
logger.info("Writing filled out recipe to: %s", output_recipe)
log_files = "\n".join(log_files)
Expand Down Expand Up @@ -877,7 +872,7 @@ def run():
cmip_eras = [recipe_dict['project']]

# get datasets depending on user request; always a list
datasets = _get_datasets(recipe_dict, cmip_eras)
datasets = _get_datasets(recipe_dict, cmip_eras, config_file)

# get experiments depending on user request; always a list
exps_list = _get_exp(recipe_dict)
Expand All @@ -888,14 +883,15 @@ def run():
logger.info("Seeking data for dataset: %s", dataset)
for cmip_era in cmip_eras:
files = _get_timefiltered_files(recipe_dict, exps_list,
cmip_era)
cmip_era, config_file)

# assemble in new recipe
add_datasets = []
for fn in sorted(files):
fn_dir = os.path.dirname(fn)
logger.info("Data directory: %s", fn_dir)
out = _file_to_recipe_dataset(fn, cmip_era, recipe_dict)
out = _file_to_recipe_dataset(fn, cmip_era, recipe_dict,
config_file)
logger.info("New recipe entry: %s", out)
if out is None:
continue
Expand All @@ -911,4 +907,11 @@ def run():


if __name__ == "__main__":
run()
"""Run the `recipe_filler` tool. Help in __doc__ and via --help."""
logger.info(HEADER)
# Get arguments
args = get_args()
input_recipe = args.recipe
output_recipe = args.output
config_file = args.config_file
run(input_recipe, output_recipe, config_file)
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,15 +250,14 @@ def read_description(filename):
'esmvaltool.cmorizers.mip_convert.esmvt_mipconv_setup:main',
'nclcodestyle = esmvaltool.utils.nclcodestyle.nclcodestyle:_main',
'test_recipe = '
'esmvaltool.utils.testing.recipe_settings.install_expand_run:main',
'recipe_filler = '
'esmvaltool.utils.recipe_filler:run'
'esmvaltool.utils.testing.recipe_settings.install_expand_run:main'
],
'esmvaltool_commands': [
'colortables = '
'esmvaltool.utils.color_tables.show_color_tables:ColorTables',
'install = esmvaltool.install:Install',
'data = esmvaltool.cmorizers.data.cmorizer:DataCommand'
'data = esmvaltool.cmorizers.data.cmorizer:DataCommand',
'develop = esmvaltool.utils.develop:DevelopCommand'
]
},
cmdclass={
Expand Down