diff --git a/doc/operation.md b/doc/operation.md new file mode 100644 index 00000000..da347163 --- /dev/null +++ b/doc/operation.md @@ -0,0 +1,108 @@ +# Sea Ice E-CDR Operations + +This document outlines how the `seaice_ecdr` code is leveraged in operations. + + +## Command-line Interface (CLI) + +The `./scripts/cli.sh` script is the primary entrypoint for interacting with the +CLI. Not all CLI subcommands are used in operations (e.g., they are used +exclusively in dev). The CLI subcommands outlined below should be used in +production. + +**NOTE**: on NSIDC production VMs, the CLI is setup to be available on the +system PATH as `ecdr`. E.g.,: + +``` +$ ecdr --help +``` + +## G10016 NRT Processing + +NRT data will be written to +`/share/apps/G10016_V3/v03r00/production/complete/`. The contents of this +directory should be rsync-ed to `/disks/sidads_ftp/pub/DATASETS/NOAA/G10016_V3` +after successful completion of each G10016 procesing job. + +### Daily processing + +Daily NRT processing should occur by running this command: + +``` +daily-nrt --last-n-days 5 --hemisphere both +``` + +Note that the `--overwrite` flag can be used to re-create NRT data if e.g., a +data gap is filled a few days late. + +### Monthly processing + +**TODO**: the code does not yet support this. + + +## G02202 "final" Processing + +Final data will be written to +`/share/apps/G02202_V5/v05r00/production/complete/`. The contents of this +directory should be rsync-ed to `/disks/sidads_ftp/pub/DATASETS/NOAA/G02202_V5` +after successful completion of each G02202 procesing job. + +Typically, "final" procesing occurs all at once, as data becomes +finalized/available for NSIDC-0001. In other words, the following do not need to +be run on a daily/monthly basis, but instead can be bundled into one job. See +[the ops job for +v4](https://ci.jenkins-ops-2022.apps.int.nsidc.org/job/G02202_Generate_Dataset_Production) +as an example. + +### Daily processing + +To create daily data: + +``` +daily --start-date YYYY-MM-DD --end-date YYYY-MM-DD --hemisphere {north|south} +``` + +Once daily data for a year is available, this data should be aggregated with the +`daily-aggregate` command: + +``` +daily-aggregate --year YYYY --hemisphere {north|south} +``` + +There will be one daily aggregate file per year per hemisphere. + +### Monthly processing + +When a month's worth of daily data is available, monthly data files can be produced: + +``` +monthly --year YYYY --month mm --hemisphere {north|south} +``` + +A range of years/months can also be specified: + + +``` +monthly --year YYYY --month mm --end-year YYYY --end-month MM --hemisphere {north|south} +``` + +Each time a new monthly file is produced, the monthly aggregate file should be +updated. There will always only be one monthly aggregate file per hemisphere: + +``` +monthly-aggregate --hemisphere {north | south} +``` + +### Validation + +Each time finalized data is produced, the validation CLI should be run: + + +``` +validate-outputs --hemisphere {north|south} --start-date YYYY-MM-DD --end-date YYYY-MM-DD +``` + +This produces log files in +`/share/apps/G02202_V5/v05r00_outputs/production/validation/` that should be +published to the production location. TODO: confirm this is accurate. Does not +look like v4 does this. diff --git a/seaice_ecdr/cli/entrypoint.py b/seaice_ecdr/cli/entrypoint.py index 007b2947..7b40cbb9 100644 --- a/seaice_ecdr/cli/entrypoint.py +++ b/seaice_ecdr/cli/entrypoint.py @@ -4,6 +4,7 @@ from seaice_ecdr.cli.daily import cli as daily_cli from seaice_ecdr.cli.monthly import cli as monthly_cli +from seaice_ecdr.cli.monthly_nrt import cli as monthly_nrt_cli from seaice_ecdr.cli.nrt import cli as nrt_cli from seaice_ecdr.daily_aggregate import cli as daily_aggregate_cli from seaice_ecdr.initial_daily_ecdr import cli as ecdr_cli @@ -43,9 +44,10 @@ def cli(): cli.add_command(monthly_cli) # Generate monthly aggregate file (one per hemisphere) cli.add_command(monthly_aggregate_cli) -# Wraps the `nrt_ecdr_for_dates` CLI with the correct platform start date +# Wraps the NRT CLIs with the correct platform start date # configuration chosen. cli.add_command(nrt_cli) +cli.add_command(monthly_nrt_cli) if __name__ == "__main__": cli() diff --git a/seaice_ecdr/cli/monthly.py b/seaice_ecdr/cli/monthly.py index 5ba8d0ac..81800b7d 100644 --- a/seaice_ecdr/cli/monthly.py +++ b/seaice_ecdr/cli/monthly.py @@ -72,6 +72,7 @@ def make_monthly_25km_ecdr( base_output_dir=base_output_dir, hemisphere=hemisphere, resolution=RESOLUTION, + is_nrt=False, ) diff --git a/seaice_ecdr/cli/monthly_nrt.py b/seaice_ecdr/cli/monthly_nrt.py new file mode 100644 index 00000000..b87c0190 --- /dev/null +++ b/seaice_ecdr/cli/monthly_nrt.py @@ -0,0 +1,135 @@ +from pathlib import Path +from typing import Final, get_args + +import click +import pandas as pd +from pm_tb_data._types import Hemisphere + +from seaice_ecdr.cli.util import CLI_EXE_PATH, run_cmd +from seaice_ecdr.constants import DEFAULT_BASE_NRT_OUTPUT_DIR +from seaice_ecdr.platforms.config import ( + NRT_PLATFORM_START_DATES_CONFIG_FILEPATH, +) +from seaice_ecdr.publish_monthly import prepare_monthly_nc_for_publication + + +def make_monthly_25km_ecdr( + year: int, + month: int, + end_year: int | None, + end_month: int | None, + hemisphere: Hemisphere, + base_output_dir: Path, +): + if end_year is None: + end_year = year + if end_month is None: + end_month = month + + # TODO: consider extracting these to CLI options that default to these values. + RESOLUTION: Final = "25" + ANCILLARY_SOURCE: Final = "CDRv5" + # TODO: the amsr2 start date should ideally be read from the platform start + # date config. + # Use the default platform dates, which excldues AMSR2 + run_cmd( + f"export PLATFORM_START_DATES_CONFIG_FILEPATH={NRT_PLATFORM_START_DATES_CONFIG_FILEPATH} &&" + f" {CLI_EXE_PATH} intermediate-monthly" + f" --year {year} --month {month}" + f" --end-year {end_year} --end-month {end_month}" + f" --hemisphere {hemisphere}" + f" --base-output-dir {base_output_dir}" + f" --resolution {RESOLUTION}" + f" --ancillary-source {ANCILLARY_SOURCE}" + " --is-nrt" + ) + + # Prepare the monthly data for publication + for period in pd.period_range( + start=pd.Period(year=year, month=month, freq="M"), + end=pd.Period(year=end_year, month=end_month, freq="M"), + freq="M", + ): + prepare_monthly_nc_for_publication( + year=period.year, + month=period.month, + base_output_dir=base_output_dir, + hemisphere=hemisphere, + resolution=RESOLUTION, + is_nrt=True, + ) + + +@click.command(name="monthly-nrt") +@click.option( + "--year", + required=True, + type=int, + help="Year for which to create the monthly file.", +) +@click.option( + "--month", + required=True, + type=int, + help="Month for which to create the monthly file.", +) +@click.option( + "--end-year", + required=False, + default=None, + type=int, + help="If given, the end year for which to create monthly files.", +) +@click.option( + "--end-month", + required=False, + default=None, + type=int, + help="If given, the end year for which to create monthly files.", +) +@click.option( + "-h", + "--hemisphere", + required=True, + type=click.Choice(get_args(Hemisphere)), +) +@click.option( + "--base-output-dir", + required=True, + type=click.Path( + exists=True, + file_okay=False, + dir_okay=True, + writable=True, + resolve_path=True, + path_type=Path, + ), + default=DEFAULT_BASE_NRT_OUTPUT_DIR, + help=( + "Base output directory for NRT ECDR outputs." + " Subdirectories are created for outputs of" + " different stages of processing." + ), + show_default=True, +) +def cli( + *, + year: int, + month: int, + end_year: int | None, + end_month: int | None, + hemisphere: Hemisphere, + base_output_dir: Path, +) -> None: + make_monthly_25km_ecdr( + year=year, + month=month, + end_year=end_year, + end_month=end_month, + hemisphere=hemisphere, + base_output_dir=base_output_dir, + ) + + +if __name__ == "__main__": + cli() diff --git a/seaice_ecdr/intermediate_monthly.py b/seaice_ecdr/intermediate_monthly.py index 1b75983d..9297bb4b 100644 --- a/seaice_ecdr/intermediate_monthly.py +++ b/seaice_ecdr/intermediate_monthly.py @@ -76,6 +76,7 @@ def _get_daily_complete_filepaths_for_month( intermediate_output_dir: Path, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + is_nrt: bool, ) -> list[Path]: """Return a list of paths to ECDR daily complete filepaths for the given year and month.""" data_list = [] @@ -93,7 +94,7 @@ def _get_daily_complete_filepaths_for_month( resolution=resolution, intermediate_output_dir=intermediate_output_dir, platform_id=platform.id, - is_nrt=False, + is_nrt=is_nrt, ) if expected_fp.is_file(): data_list.append(expected_fp) @@ -135,6 +136,7 @@ def get_daily_ds_for_month( intermediate_output_dir: Path, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + is_nrt: bool, ) -> xr.Dataset: """Create an xr.Dataset wtih ECDR complete daily data for a given year and month. @@ -148,6 +150,7 @@ def get_daily_ds_for_month( intermediate_output_dir=intermediate_output_dir, hemisphere=hemisphere, resolution=resolution, + is_nrt=is_nrt, ) # Read all of the complete daily data for the given year and month. ds = xr.open_mfdataset(data_list) @@ -636,6 +639,7 @@ def make_intermediate_monthly_nc( intermediate_output_dir: Path, resolution: ECDR_SUPPORTED_RESOLUTIONS, ancillary_source: ANCILLARY_SOURCES, + is_nrt: bool, ) -> Path: daily_ds_for_month = get_daily_ds_for_month( year=year, @@ -643,6 +647,7 @@ def make_intermediate_monthly_nc( intermediate_output_dir=intermediate_output_dir, hemisphere=hemisphere, resolution=resolution, + is_nrt=is_nrt, ) platform_id = daily_ds_for_month.platform_id @@ -745,6 +750,12 @@ def make_intermediate_monthly_nc( required=True, type=click.Choice(get_args(ANCILLARY_SOURCES)), ) +@click.option( + "--is-nrt", + required=False, + is_flag=True, + help=("Create intermediate monthly file in NRT mode (uses NRT-stype filename)."), +) def cli( *, year: int, @@ -755,6 +766,7 @@ def cli( base_output_dir: Path, resolution: ECDR_SUPPORTED_RESOLUTIONS, ancillary_source: ANCILLARY_SOURCES, + is_nrt: bool, ): if end_year is None: end_year = year @@ -779,6 +791,7 @@ def cli( hemisphere=hemisphere, resolution=resolution, ancillary_source=ancillary_source, + is_nrt=is_nrt, ) except Exception: error_periods.append(period) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index e768c029..1a7fc9fb 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -6,11 +6,13 @@ from typing import Final, get_args import click +import datatree import xarray as xr from loguru import logger from pm_tb_data._types import Hemisphere from pm_tb_data.fetch.nsidc_0080 import get_nsidc_0080_tbs_from_disk +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ANCILLARY_SOURCES from seaice_ecdr.checksum import write_checksum_file from seaice_ecdr.cli.util import datetime_to_date @@ -22,6 +24,7 @@ ) from seaice_ecdr.intermediate_daily import ( complete_daily_ecdr_ds, + get_ecdr_filepath, ) from seaice_ecdr.publish_daily import ( get_complete_daily_filepath, @@ -251,6 +254,32 @@ def get_nrt_complete_daily_filepath( return nrt_output_filepath +def override_attrs_for_nrt( + *, + publication_ready_ds: datatree.DataTree, + resolution: ECDR_SUPPORTED_RESOLUTIONS, +) -> datatree.DataTree: + override_for_nrt = publication_ready_ds.copy() + override_for_nrt.attrs["summary"] = ( + f"This data set provides a near-real-time (NRT) passive microwave sea ice concentration climate data record (CDR) based on gridded brightness temperatures (TBs) from the Defense Meteorological Satellite Program (DMSP) passive microwave radiometer: the Special Sensor Microwave Imager/Sounder (SSMIS) F17. The sea ice concentration CDR is an estimate of sea ice concentration that is produced by combining concentration estimates from two algorithms developed at the NASA Goddard Space Flight Center (GSFC): the NASA Team algorithm and the Bootstrap algorithm. The individual algorithms are used to process and combine brightness temperature data at NSIDC. This product is designed to provide an NRT time series of sea ice concentrations (the fraction, or percentage, of ocean area covered by sea ice). The data are gridded on the NSIDC polar stereographic grid with {resolution} x {resolution} km grid cells and are available in NetCDF file format. Each file contains a variable with the CDR concentration values as well as variables that hold the NASA Team and Bootstrap processed concentrations for reference. Variables containing standard deviation, quality flags, and projection information are also included." + ) + + # NOTE: this NRT summary is specific to SSMIS F17. + assert NRT_PLATFORM_ID in override_for_nrt.attrs["summary"] + + override_for_nrt.attrs["id"] = "https://doi.org/10.7265/j0z0-4h87" + override_for_nrt.attrs["metadata_link"] = ( + f"https://nsidc.org/data/g10016/versions/{ECDR_NRT_PRODUCT_VERSION.major_version_number}" + ) + override_for_nrt.attrs["title"] = ( + "Near-Real-Time NOAA-NSIDC Climate Data Record of Passive Microwave" + f" Sea Ice Concentration Version {ECDR_NRT_PRODUCT_VERSION.major_version_number}" + ) + override_for_nrt.attrs["product_version"] = ECDR_NRT_PRODUCT_VERSION.version_str + + return override_for_nrt + + def nrt_ecdr_for_day( *, date: dt.date, @@ -291,23 +320,30 @@ def nrt_ecdr_for_day( is_nrt=True, ancillary_source=ancillary_source, ) + # Write the daily intermediate file. This is used by the monthly NRT + # processing to produce the monthly fields. + cde_ds_filepath = get_ecdr_filepath( + date=date, + hemisphere=hemisphere, + resolution=NRT_RESOLUTION, + intermediate_output_dir=intermediate_output_dir, + platform_id=NRT_PLATFORM_ID, + is_nrt=True, + ) + cde_ds.to_netcdf( + cde_ds_filepath, + ) + + # Prepare the ds for publication daily_ds = make_publication_ready_ds( intermediate_daily_ds=cde_ds, hemisphere=hemisphere, ) # Update global attrs to reflect G10016 instead of G02202: - daily_ds.attrs["id"] = "https://doi.org/10.7265/j0z0-4h87" - daily_ds.attrs["metadata_link"] = ( - f"https://nsidc.org/data/g10016/versions/{ECDR_NRT_PRODUCT_VERSION.major_version_number}" - ) - daily_ds.attrs["title"] = ( - "Near-Real-Time NOAA-NSIDC Climate Data Record of Passive Microwave" - f" Sea Ice Concentration Version {ECDR_NRT_PRODUCT_VERSION.major_version_number}" - ) - daily_ds.attrs["product_version"] = ECDR_NRT_PRODUCT_VERSION.version_str - daily_ds.attrs["summary"] = ( - f"This data set provides a near-real-time (NRT) passive microwave sea ice concentration climate data record (CDR) based on gridded brightness temperatures (TBs) from the Defense Meteorological Satellite Program (DMSP) passive microwave radiometer: the Special Sensor Microwave Imager/Sounder (SSMIS) F17. The sea ice concentration CDR is an estimate of sea ice concentration that is produced by combining concentration estimates from two algorithms developed at the NASA Goddard Space Flight Center (GSFC): the NASA Team algorithm and the Bootstrap algorithm. The individual algorithms are used to process and combine brightness temperature data at NSIDC. This product is designed to provide an NRT time series of sea ice concentrations (the fraction, or percentage, of ocean area covered by sea ice). The data are gridded on the NSIDC polar stereographic grid with {NRT_RESOLUTION} x {NRT_RESOLUTION} km grid cells and are available in NetCDF file format. Each file contains a variable with the CDR concentration values as well as variables that hold the NASA Team and Bootstrap processed concentrations for reference. Variables containing standard deviation, quality flags, and projection information are also included." + daily_ds = override_attrs_for_nrt( + publication_ready_ds=daily_ds, + resolution=NRT_RESOLUTION, ) daily_ds.to_netcdf(nrt_output_filepath) diff --git a/seaice_ecdr/publish_monthly.py b/seaice_ecdr/publish_monthly.py index 348536c3..c517e7b9 100644 --- a/seaice_ecdr/publish_monthly.py +++ b/seaice_ecdr/publish_monthly.py @@ -1,4 +1,5 @@ import datetime as dt +from functools import cache from pathlib import Path import datatree @@ -16,15 +17,25 @@ add_coordinates_attr, remove_valid_range_from_coordinate_vars, ) +from seaice_ecdr.nrt import override_attrs_for_nrt from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID from seaice_ecdr.util import ( find_standard_monthly_netcdf_files, get_complete_output_dir, get_intermediate_output_dir, + nrt_monthly_filename, platform_id_from_filename, standard_monthly_filename, ) +# TODO: consider extracting to config or a kwarg of this function for more +# flexible use with other platforms in the future. +PROTOTYPE_PLATFORM_ID: SUPPORTED_PLATFORM_ID = "am2" +PROTOTYPE_PLATFORM_DATA_GROUP_NAME = f"prototype_{PROTOTYPE_PLATFORM_ID}" +# TODO: this should be extracted from e.g., the platform start date +# configuration instead of hard-coding it here. +PROTOTYPE_PLATFORM_START_DATE = dt.date(2013, 1, 1) + def get_complete_monthly_dir(complete_output_dir: Path) -> Path: monthly_dir = complete_output_dir / "monthly" @@ -41,48 +52,44 @@ def get_complete_monthly_filepath( year: int, month: int, complete_output_dir: Path, + is_nrt: bool, ) -> Path: output_dir = get_complete_monthly_dir( complete_output_dir=complete_output_dir, ) - output_fn = standard_monthly_filename( - hemisphere=hemisphere, - resolution=resolution, - platform_id=platform_id, - year=year, - month=month, - ) + if is_nrt: + output_fn = nrt_monthly_filename( + hemisphere=hemisphere, + resolution=resolution, + platform_id=platform_id, + year=year, + month=month, + ) + else: + output_fn = standard_monthly_filename( + hemisphere=hemisphere, + resolution=resolution, + platform_id=platform_id, + year=year, + month=month, + ) output_path = output_dir / output_fn return output_path -def prepare_monthly_nc_for_publication( +@cache +def _get_all_intermediate_monthly_fps( *, base_output_dir: Path, year: int, month: int, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, -): - """Prepare a monthly NetCDF file for publication. - - If monthly data for a prototype platform is available for the given month, - this function adds that data to a prototype group in the output NetCDF - file. The output NC file's root-group variables are all taken from the - default platforms given by the platofrm start date configuration. - """ - # TODO: consider extracting to config or a kwarg of this function for more - # flexible use with other platforms in the future. - PROTOTYPE_PLATFORM_ID: SUPPORTED_PLATFORM_ID = "am2" - PROTOTYPE_PLATFORM_DATA_GROUP_NAME = f"prototype_{PROTOTYPE_PLATFORM_ID}" - # TODO: this should be extracted from e.g., the platform start date - # configuration instead of hard-coding it here. - PROTOTYPE_PLATFORM_START_DATE = dt.date(2013, 1, 1) - - # Get the intermediate monthly data +) -> list[Path]: + """Get a list of all intermediate monthly filepaths for the given params.""" intermediate_output_dir = get_intermediate_output_dir( base_output_dir=base_output_dir, hemisphere=hemisphere, @@ -99,19 +106,81 @@ def prepare_monthly_nc_for_publication( month=month, platform_id="*", ) - default_monthly_fps = [ + return all_intermediate_monthly_fps + + +def _get_intermediate_monthly_fp( + *, + base_output_dir: Path, + year: int, + month: int, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, +) -> Path: + all_intermediate_monthly_fps = _get_all_intermediate_monthly_fps( + base_output_dir=base_output_dir, + year=year, + month=month, + hemisphere=hemisphere, + resolution=resolution, + ) + intermediate_monthly_fps = [ fp for fp in all_intermediate_monthly_fps if f"_{PROTOTYPE_PLATFORM_ID}_" not in fp.name ] - if len(default_monthly_fps) != 1: - raise RuntimeError( - f"Failed to find an intermediate monthly file for {year=}, {month=}, {hemisphere} in {intermediate_monthly_dir}" + if len(intermediate_monthly_fps) != 1: + raise FileNotFoundError( + f"Failed to find an intermediate monthly file for {year=}, {month=}, {hemisphere}" ) - default_intermediate_monthly_ds = xr.open_dataset(default_monthly_fps[0]) - # get the platform Id from the filename default filename. - platform_id = platform_id_from_filename(default_monthly_fps[0].name) + monthly_filepath = intermediate_monthly_fps[0] + + return monthly_filepath + + +def _get_prototype_monthly_fp( + *, + year: int, + month: int, + hemisphere: Hemisphere, + base_output_dir: Path, + resolution: ECDR_SUPPORTED_RESOLUTIONS, +) -> Path | None: + all_intermediate_monthly_fps = _get_all_intermediate_monthly_fps( + base_output_dir=base_output_dir, + year=year, + month=month, + hemisphere=hemisphere, + resolution=resolution, + ) + prototype_monthly_fps = [ + fp + for fp in all_intermediate_monthly_fps + if f"_{PROTOTYPE_PLATFORM_ID}_" in fp.name + ] + + if (prototype_len := len(prototype_monthly_fps)) > 0: + if prototype_len > 1: + raise RuntimeError( + f"Something went wrong: found multiple intermediate prototype monthly files, but found {prototype_len}: {prototype_monthly_fps}." + ) + + return prototype_monthly_fps[0] + else: + return None + + +def prepare_monthly_ds_for_publication( + *, + year: int, + month: int, + hemisphere: Hemisphere, + intermediate_monthly_fp: Path, + prototype_monthly_fp: Path | None, +) -> datatree.DataTree: + # Get the intermediate monthly data + default_intermediate_monthly_ds = xr.open_dataset(intermediate_monthly_fp) # TODO: a lot of the below (e.g., where the supplementary group is # constructed and a DataTree) is made is very similar to how it's done in @@ -147,19 +216,9 @@ def prepare_monthly_nc_for_publication( } ) - # Now get the prototype filepath, if it exists, and add it to the new monthly ds. - prototype_monthly_fps = [ - fp - for fp in all_intermediate_monthly_fps - if f"_{PROTOTYPE_PLATFORM_ID}_" in fp.name - ] - if (prototype_len := len(prototype_monthly_fps)) > 0: - if prototype_len > 1: - raise RuntimeError( - f"Something went wrong: found multiple intermediate prototype monthly files, but found {prototype_len}: {prototype_monthly_fps}." - ) - - prototype_monthly_ds = xr.open_dataset(prototype_monthly_fps[0]) + # Add the prototype group if a prototype file is passed in. + if prototype_monthly_fp: + prototype_monthly_ds = xr.open_dataset(prototype_monthly_fp) cdr_var_fieldnames = [ "cdr_seaice_conc_monthly", "cdr_seaice_conc_monthly_qa_flag", @@ -205,6 +264,19 @@ def prepare_monthly_nc_for_publication( add_coordinate_coverage_content_type(complete_monthly_ds) add_coordinates_attr(complete_monthly_ds) + return complete_monthly_ds + + +def _write_publication_ready_nc_and_checksum( + publication_ready_monthly_ds: datatree.DataTree, + base_output_dir: Path, + year: int, + month: int, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + is_nrt: bool, + platform_id: SUPPORTED_PLATFORM_ID, +) -> Path: # Write out finalized nc file. complete_output_dir = get_complete_output_dir( base_output_dir=base_output_dir, @@ -218,8 +290,9 @@ def prepare_monthly_nc_for_publication( year=year, month=month, complete_output_dir=complete_output_dir, + is_nrt=is_nrt, ) - complete_monthly_ds.to_netcdf(complete_monthly_filepath) + publication_ready_monthly_ds.to_netcdf(complete_monthly_filepath) logger.success(f"Staged NC file for publication: {complete_monthly_filepath}") # Write checksum file for the complete daily output. @@ -229,3 +302,67 @@ def prepare_monthly_nc_for_publication( ) return complete_monthly_filepath + + +def prepare_monthly_nc_for_publication( + *, + base_output_dir: Path, + year: int, + month: int, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + is_nrt: bool, +): + """Prepare a monthly NetCDF file for publication. + + If monthly data for a prototype platform is available for the given month, + this function adds that data to a prototype group in the output NetCDF + file. The output NC file's root-group variables are all taken from the + default platforms given by the platofrm start date configuration. + """ + intermediate_monthly_fp = _get_intermediate_monthly_fp( + base_output_dir=base_output_dir, + year=year, + month=month, + hemisphere=hemisphere, + resolution=resolution, + ) + + # Now get the prototype filepath, if it exists, and add it to the new + # monthly ds. + prototype_monthly_fp = _get_prototype_monthly_fp( + year=year, + month=month, + hemisphere=hemisphere, + base_output_dir=base_output_dir, + resolution=resolution, + ) + complete_monthly_ds = prepare_monthly_ds_for_publication( + year=year, + month=month, + hemisphere=hemisphere, + intermediate_monthly_fp=intermediate_monthly_fp, + prototype_monthly_fp=prototype_monthly_fp, + ) + + # Override attrs for nrt + if is_nrt: + complete_monthly_ds = override_attrs_for_nrt( + publication_ready_ds=complete_monthly_ds, resolution=resolution + ) + + # get the platform Id from the filename default filename. + platform_id = platform_id_from_filename(intermediate_monthly_fp.name) + # Write the publication-ready monthly ds + complete_monthly_filepath = _write_publication_ready_nc_and_checksum( + publication_ready_monthly_ds=complete_monthly_ds, + year=year, + month=month, + hemisphere=hemisphere, + resolution=resolution, + base_output_dir=base_output_dir, + platform_id=platform_id, + is_nrt=is_nrt, + ) + + return complete_monthly_filepath diff --git a/seaice_ecdr/tests/integration/test_intermediate_monthly.py b/seaice_ecdr/tests/integration/test_intermediate_monthly.py index b0799c26..1492a0cd 100644 --- a/seaice_ecdr/tests/integration/test_intermediate_monthly.py +++ b/seaice_ecdr/tests/integration/test_intermediate_monthly.py @@ -34,6 +34,7 @@ def test_make_intermediate_monthly_nc(base_output_dir_test_path, monkeypatch): resolution="25", intermediate_output_dir=intermediate_output_dir, ancillary_source=ancillary_source, + is_nrt=False, ) assert output_path.is_file() diff --git a/seaice_ecdr/tests/integration/test_publish_monthly.py b/seaice_ecdr/tests/integration/test_publish_monthly.py index 5639ffd0..e5ecbe64 100644 --- a/seaice_ecdr/tests/integration/test_publish_monthly.py +++ b/seaice_ecdr/tests/integration/test_publish_monthly.py @@ -16,6 +16,7 @@ def test_publish_monthly_nc(base_output_dir_test_path): # noqa resolution="25", year=2022, month=3, + is_nrt=False, ) assert output_path.is_file() diff --git a/seaice_ecdr/tests/unit/test_monthly.py b/seaice_ecdr/tests/unit/test_monthly.py index 95cbed22..6841bc2d 100644 --- a/seaice_ecdr/tests/unit/test_monthly.py +++ b/seaice_ecdr/tests/unit/test_monthly.py @@ -75,6 +75,7 @@ def test__get_daily_complete_filepaths_for_month(fs): intermediate_output_dir=intermediate_output_dir / NORTH, resolution="12.5", hemisphere=NORTH, + is_nrt=False, ) assert sorted(_fake_files_for_test_year_month_and_hemisphere) == sorted(actual) diff --git a/seaice_ecdr/tests/unit/test_util.py b/seaice_ecdr/tests/unit/test_util.py index 53081d2d..c01de133 100644 --- a/seaice_ecdr/tests/unit/test_util.py +++ b/seaice_ecdr/tests/unit/test_util.py @@ -16,6 +16,7 @@ find_standard_monthly_netcdf_files, get_num_missing_pixels, nrt_daily_filename, + nrt_monthly_filename, platform_id_from_filename, raise_error_for_dates, standard_daily_aggregate_filename, @@ -55,6 +56,20 @@ def test_nrt_daily_filename(): assert actual == expected +def test_nrt_monthly_filename(): + expected = f"sic_psn25_202409_F17_{ECDR_NRT_PRODUCT_VERSION}_P.nc" + + actual = nrt_monthly_filename( + hemisphere=NORTH, + resolution="25", + platform_id="F17", + year=2024, + month=9, + ) + + assert actual == expected + + def test_daily_aggregate_filename(): expected = f"sic_psn12.5_20210101-20211231_{ECDR_PRODUCT_VERSION}.nc" @@ -140,6 +155,35 @@ def test_monthly_platform_id_from_filename(): assert expected_platform_id == actual_platform_id +def test_daily_platform_id_from_daily_nrt_filename(): + expected_platform_id: Final = "F17" + fn = nrt_daily_filename( + hemisphere=SOUTH, + resolution="25", + platform_id=expected_platform_id, + date=dt.date(2021, 1, 1), + ) + + actual_platform_id = platform_id_from_filename(fn) + + assert expected_platform_id == actual_platform_id + + +def test_daily_platform_id_from_monthly_nrt_filename(): + expected_platform_id: Final = "F17" + fn = nrt_monthly_filename( + hemisphere=SOUTH, + resolution="25", + platform_id=expected_platform_id, + year=2024, + month=9, + ) + + actual_platform_id = platform_id_from_filename(fn) + + assert expected_platform_id == actual_platform_id + + def test_find_standard_monthly_netcdf_files_platform_wildcard(fs): monthly_output_dir = Path("/path/to/data/dir/monthly") fs.create_dir(monthly_output_dir) diff --git a/seaice_ecdr/util.py b/seaice_ecdr/util.py index 2766f86b..aba51258 100644 --- a/seaice_ecdr/util.py +++ b/seaice_ecdr/util.py @@ -36,6 +36,21 @@ def standard_daily_filename( return fn +def _standard_fn_to_nrt(*, standard_fn: str) -> str: + standard_fn_path = Path(standard_fn) + + fn_base = standard_fn_path.stem + ext = standard_fn_path.suffix + nrt_fn = fn_base + "_P" + ext + + # Replace the standard G02202 version number with the NRT version. + nrt_fn = nrt_fn.replace( + ECDR_PRODUCT_VERSION.version_str, ECDR_NRT_PRODUCT_VERSION.version_str + ) + + return nrt_fn + + def nrt_daily_filename( *, hemisphere: Hemisphere, @@ -49,15 +64,32 @@ def nrt_daily_filename( platform_id=platform_id, date=date, ) - standard_fn_path = Path(standard_fn) - fn_base = standard_fn_path.stem - ext = standard_fn_path.suffix - nrt_fn = fn_base + "_P" + ext + nrt_fn = _standard_fn_to_nrt( + standard_fn=standard_fn, + ) - # Replace the standard G02202 version number with the NRT version. - nrt_fn = nrt_fn.replace( - ECDR_PRODUCT_VERSION.version_str, ECDR_NRT_PRODUCT_VERSION.version_str + return nrt_fn + + +def nrt_monthly_filename( + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + platform_id: SUPPORTED_PLATFORM_ID, + year: int, + month: int, +) -> str: + standard_fn = standard_monthly_filename( + hemisphere=hemisphere, + resolution=resolution, + platform_id=platform_id, + year=year, + month=month, + ) + + nrt_fn = _standard_fn_to_nrt( + standard_fn=standard_fn, ) return nrt_fn @@ -194,7 +226,18 @@ def standard_monthly_aggregate_filename( # This regex works for both daily and monthly filenames. -STANDARD_FN_REGEX = re.compile(r"sic_ps.*_.*_(?P.*)_.*.nc") +STANDARD_FN_REGEX = re.compile( + # Grid ID is e.g., "pss25" for polar stereo southern hemisphere 25km. + r"sic_(?Pps[sn]\d+(\.\d+)?)" + # Date is 6 digits for monthly (YYYYMM) and 8 digits for daily (YYYYMMDD) + r"_(?P\d{6}|\d{8})" + # Platform ID is e.g., "F17" + r"_(?P.*)" + # Version string is e.g., "v05r00" + r"_(?Pv\d{2}r\d{2})" + # optional `_P` for nrt files. + r"(_P)?.nc" +) def platform_id_from_filename(filename: str) -> SUPPORTED_PLATFORM_ID: