From 3ef84581df8c6c0549741855d58ba0a62d3e7092 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 23 Sep 2024 16:08:13 -0600 Subject: [PATCH 01/21] Init work to support NRT using NSIDC-0080 F18 as input --- seaice_ecdr/nrt.py | 90 ++++++++++----------------------- seaice_ecdr/platforms/config.py | 13 +++++ seaice_ecdr/platforms/models.py | 1 + seaice_ecdr/tb_data.py | 2 +- 4 files changed, 41 insertions(+), 65 deletions(-) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 4e31ed39..41e8537d 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -9,14 +9,11 @@ import xarray as xr from loguru import logger from pm_tb_data._types import Hemisphere -from pm_tb_data.fetch.amsr.lance_amsr2 import ( - access_local_lance_data, - download_latest_lance_files, -) +from pm_tb_data.fetch.nsidc_0080 import get_nsidc_0080_tbs_from_disk from seaice_ecdr.ancillary import ANCILLARY_SOURCES from seaice_ecdr.cli.util import datetime_to_date -from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR, LANCE_NRT_DATA_DIR +from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.initial_daily_ecdr import ( compute_initial_daily_ecdr_dataset, get_idecdr_filepath, @@ -42,7 +39,8 @@ get_intermediate_output_dir, ) -LANCE_RESOLUTION: Final = "12.5" +NRT_RESOLUTION: Final = "25" +NRT_PLATFORM_ID: Final = "F18" def compute_nrt_initial_daily_ecdr_dataset( @@ -51,43 +49,43 @@ def compute_nrt_initial_daily_ecdr_dataset( hemisphere: Hemisphere, ancillary_source: ANCILLARY_SOURCES = "CDRv5", ): - """Create an initial daily ECDR NetCDF using NRT LANCE AMSR2 data.""" - # TODO: handle missing data case. - xr_tbs = access_local_lance_data( + """Create an initial daily ECDR NetCDF using NRT data""" + # TODO: consider extracting the fetch-related code here to `tb_data` module. + xr_tbs = get_nsidc_0080_tbs_from_disk( date=date, hemisphere=hemisphere, - data_dir=LANCE_NRT_DATA_DIR, + resolution=NRT_RESOLUTION, + platform_id=NRT_PLATFORM_ID, ) - data_source: Final = "LANCE AU_SI12" - platform_id: Final = "am2" + data_source: Final = "NSIDC-0080" ecdr_tbs = map_tbs_to_ecdr_channels( - # TODO/Note: this mapping is the same as used for `am2`. mapping=dict( - v19="v18", - h19="h18", - v22="v23", - v37="v36", - h37="h36", + v19="v19", + h19="h19", + v22="v22", + v37="v37", + h37="h37", ), xr_tbs=xr_tbs, hemisphere=hemisphere, - resolution=LANCE_RESOLUTION, + resolution=NRT_RESOLUTION, date=date, data_source=data_source, ) tb_data = EcdrTbData( tbs=ecdr_tbs, - resolution=LANCE_RESOLUTION, + resolution="25", data_source=data_source, - platform_id=platform_id, + platform_id=NRT_PLATFORM_ID, ) nrt_initial_ecdr_ds = compute_initial_daily_ecdr_dataset( date=date, hemisphere=hemisphere, tb_data=tb_data, + # TODO: this needs to be updated. land_spillover_alg="NT2", ancillary_source=ancillary_source, ) @@ -108,8 +106,7 @@ def read_or_create_and_read_nrt_idecdr_ds( date=date, platform_id=platform.id, intermediate_output_dir=intermediate_output_dir, - # TODO: we want to support 25km NRT. - resolution="12.5", + resolution=NRT_RESOLUTION, ) if overwrite or not idecdr_filepath.is_file(): @@ -172,7 +169,7 @@ def temporally_interpolated_nrt_ecdr_dataset( temporally_interpolated_ds = temporal_interpolation( date=date, hemisphere=hemisphere, - resolution=LANCE_RESOLUTION, + resolution=NRT_RESOLUTION, data_stack=data_stack, interp_range=days_to_look_previously, one_sided_limit=days_to_look_previously, @@ -188,12 +185,14 @@ def read_or_create_and_read_nrt_tiecdr_ds( date: dt.date, intermediate_output_dir: Path, overwrite: bool, + # TODO: is it 4 or 5 days? Here we use 4, but the default for the temporal + # interpolation itself is 5. days_to_look_previously: int = 4, ) -> xr.Dataset: tie_filepath = get_tie_filepath( date=date, hemisphere=hemisphere, - resolution=LANCE_RESOLUTION, + resolution=NRT_RESOLUTION, intermediate_output_dir=intermediate_output_dir, ) @@ -233,7 +232,7 @@ def nrt_ecdr_for_day( cde_filepath = get_ecdr_filepath( date=date, hemisphere=hemisphere, - resolution=LANCE_RESOLUTION, + resolution=NRT_RESOLUTION, intermediate_output_dir=complete_output_dir, platform_id=platform.id, is_nrt=True, @@ -261,7 +260,7 @@ def nrt_ecdr_for_day( tie_ds=tiecdr_ds, date=date, hemisphere=hemisphere, - resolution=LANCE_RESOLUTION, + resolution=NRT_RESOLUTION, intermediate_output_dir=intermediate_output_dir, is_nrt=True, ancillary_source=ancillary_source, @@ -273,48 +272,12 @@ def nrt_ecdr_for_day( intermediate_output_dir=complete_output_dir, hemisphere=hemisphere, ) - # TODO: still need to "publish" to the complete location logger.success(f"Wrote complete daily ncfile: {written_cde_ncfile}") except Exception as e: logger.exception(f"Failed to create NRT ECDR for {date=} {hemisphere=}") raise e -@click.command(name="download-latest-nrt-data") -@click.option( - "-o", - "--output-dir", - required=True, - type=click.Path( - exists=True, - file_okay=False, - dir_okay=True, - writable=True, - resolve_path=True, - path_type=Path, - ), - show_default=True, - default=LANCE_NRT_DATA_DIR, - help="Directory in which LANCE AMSR2 NRT files will be downloaded to.", -) -@click.option( - "--overwrite", - is_flag=True, - show_default=True, - default=False, - help="Overwrite existing LANCE files.", -) -def download_latest_nrt_data(*, output_dir: Path, overwrite: bool) -> None: - """Download the latest NRT LANCE AMSR2 data to the specified output directory. - - Files are only downloaded if they are considered 'complete' and ready for - NRT processing for the ECDR product. This means that the latest available - date of data is never downloaded, as it is considered provisional/subject to - change until a new day's worth of data is available. - """ - download_latest_lance_files(output_dir=output_dir, overwrite=overwrite) - - @click.command(name="daily-nrt") @click.option( "-d", @@ -400,5 +363,4 @@ def nrt_cli(): ... -nrt_cli.add_command(download_latest_nrt_data) nrt_cli.add_command(nrt_ecdr_for_dates) diff --git a/seaice_ecdr/platforms/config.py b/seaice_ecdr/platforms/config.py index b8575318..3eecb420 100644 --- a/seaice_ecdr/platforms/config.py +++ b/seaice_ecdr/platforms/config.py @@ -53,6 +53,18 @@ ), ) + +F18_PLATFORM = Platform( + name="DMSP 5D-3/F18 > Defense Meteorological Satellite Program-F18", + sensor="SSMIS > Special Sensor Microwave Imager/Sounder", + id="F18", + date_range=DateRange( + # TODO: is this accurate? This value from NSIDC-0001 docs. + first_date=dt.date(2017, 1, 1), + last_date=None, + ), +) + F17_PLATFORM = Platform( name="DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", sensor="SSMIS > Special Sensor Microwave Imager/Sounder", @@ -105,6 +117,7 @@ AM2_PLATFORM, AME_PLATFORM, F17_PLATFORM, + F18_PLATFORM, F13_PLATFORM, F11_PLATFORM, F08_PLATFORM, diff --git a/seaice_ecdr/platforms/models.py b/seaice_ecdr/platforms/models.py index 15e95380..cfa89eda 100644 --- a/seaice_ecdr/platforms/models.py +++ b/seaice_ecdr/platforms/models.py @@ -15,6 +15,7 @@ SUPPORTED_PLATFORM_ID = Literal[ "am2", # AMSR2 "ame", # AMSRE + "F18", # SSMIS F18 (NRT) "F17", # SSMIS F17 "F13", # SSMI F13 "F11", # SSMI F11 diff --git a/seaice_ecdr/tb_data.py b/seaice_ecdr/tb_data.py index b45aecc2..34593ac6 100644 --- a/seaice_ecdr/tb_data.py +++ b/seaice_ecdr/tb_data.py @@ -254,7 +254,7 @@ def _get_nsidc_0001_tbs( tbs=ecdr_tbs, resolution=tb_resolution, data_source=data_source, - platform_id=platform_id, # type: ignore[arg-type] + platform_id=platform_id, ) return ecdr_tb_data From e3f4120277d33488a8e441435cf682232425a7b9 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 23 Sep 2024 17:10:37 -0600 Subject: [PATCH 02/21] Update NRT code to write publication-ready output files --- seaice_ecdr/intermediate_daily.py | 77 +++++++------------------ seaice_ecdr/nrt.py | 31 +++++----- seaice_ecdr/publish_daily.py | 95 +++++++++++++++++++------------ 3 files changed, 97 insertions(+), 106 deletions(-) diff --git a/seaice_ecdr/intermediate_daily.py b/seaice_ecdr/intermediate_daily.py index b2b16acb..487f70ac 100644 --- a/seaice_ecdr/intermediate_daily.py +++ b/seaice_ecdr/intermediate_daily.py @@ -11,7 +11,7 @@ import datetime as dt from functools import cache from pathlib import Path -from typing import Iterable, cast, get_args +from typing import Iterable, get_args import click import numpy as np @@ -474,6 +474,7 @@ def complete_daily_ecdr_ds( - a Dataset containing - The melt onset field - All appropriate QA and QC fields + - sets variable encoding for writting data to NetCDF. """ # Initialize the complete daily ECDR dataset (cde) using the temporally # interpolated ECDR (tie) dataset provided to this function. @@ -505,60 +506,29 @@ def complete_daily_ecdr_ds( cde_ds, hemisphere, resolution, ancillary_source=ancillary_source ) - # TODO: Need to ensure that the cdr_seaice_conc field does not have values - # where seaice cannot occur, eg over land or lakes - - return cde_ds - - -def write_cde_netcdf( - *, - cde_ds: xr.Dataset, - output_filepath: Path, - intermediate_output_dir: Path, - hemisphere: Hemisphere, - uncompressed_fields: Iterable[str] = ("crs", "time", "y", "x"), - excluded_fields: Iterable[str] = [], + # Set nc encoding for variables conc_fields: Iterable[str] = [ "raw_bt_seaice_conc", "raw_nt_seaice_conc", "cdr_seaice_conc", - ], -) -> Path: - """Write the complete, temporally interpolated ECDR to a netCDF file. + ] + for conc_varname in conc_fields: + cde_ds[conc_varname].encoding = { + "zlib": True, + "dtype": "uint8", + "scale_factor": 0.01, + "add_offset": 0.0, + "_FillValue": 255, + } + for coord_var_name in ("crs", "time", "y", "x"): + cde_ds[coord_var_name].encoding = {"zlib": True} - This function also creates a checksum file for the complete daily netcdf. - """ - logger.info( - f"Writing netCDF of complete, temporally interpolated eCDR file to: {output_filepath}" - ) - for excluded_field in excluded_fields: - if excluded_field in cde_ds.variables.keys(): - cde_ds = cde_ds.drop_vars(excluded_field) - - nc_encoding = {} - for varname in cde_ds.variables.keys(): - varname = cast(str, varname) - if varname in conc_fields: - nc_encoding[varname] = { - "zlib": True, - "dtype": "uint8", - "scale_factor": 0.01, - "add_offset": 0.0, - "_FillValue": 255, - } - elif varname not in uncompressed_fields: - nc_encoding[varname] = {"zlib": True} - - cde_ds.to_netcdf( - output_filepath, - encoding=nc_encoding, - unlimited_dims=[ - "time", - ], - ) + cde_ds.encoding = {"unlimited_dims": "time"} + + # TODO: Need to ensure that the cdr_seaice_conc field does not have values + # where seaice cannot occur, eg over land or lakes - return output_filepath + return cde_ds def make_standard_cdecdr_netcdf( @@ -648,13 +618,10 @@ def make_standard_cdecdr_netcdf( ancillary_source=ancillary_source, ) - written_cde_ncfile = write_cde_netcdf( - cde_ds=cde_ds, - output_filepath=cde_filepath, - intermediate_output_dir=intermediate_output_dir, - hemisphere=hemisphere, + cde_ds.to_netcdf( + cde_filepath, ) - logger.success(f"Wrote complete daily ncfile: {written_cde_ncfile}") + logger.success(f"Wrote complete daily ncfile: {cde_filepath}") except Exception as e: logger.exception( "Failed to create complete daily NetCDF for" diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 41e8537d..68ddc92e 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -21,12 +21,14 @@ ) from seaice_ecdr.intermediate_daily import ( complete_daily_ecdr_ds, - get_ecdr_filepath, - write_cde_netcdf, ) from seaice_ecdr.platforms import ( PLATFORM_CONFIG, ) +from seaice_ecdr.publish_daily import ( + get_complete_daily_filepath, + make_publication_ready_ds, +) from seaice_ecdr.tb_data import EcdrTbData, map_tbs_to_ecdr_channels from seaice_ecdr.temporal_composite_daily import ( get_tie_filepath, @@ -228,21 +230,19 @@ def nrt_ecdr_for_day( hemisphere=hemisphere, is_nrt=True, ) - platform = PLATFORM_CONFIG.get_platform_by_date(date) - cde_filepath = get_ecdr_filepath( + nrt_output_filepath = get_complete_daily_filepath( date=date, hemisphere=hemisphere, resolution=NRT_RESOLUTION, - intermediate_output_dir=complete_output_dir, - platform_id=platform.id, + complete_output_dir=complete_output_dir, is_nrt=True, ) - if cde_filepath.is_file() and not overwrite: - logger.info(f"File for {date=} already exists ({cde_filepath}).") + if nrt_output_filepath.is_file() and not overwrite: + logger.info(f"File for {date=} already exists ({nrt_output_filepath}).") return - if not cde_filepath.is_file() or overwrite: + if not nrt_output_filepath.is_file() or overwrite: intermediate_output_dir = get_intermediate_output_dir( base_output_dir=base_output_dir, hemisphere=hemisphere, @@ -265,14 +265,15 @@ def nrt_ecdr_for_day( is_nrt=True, ancillary_source=ancillary_source, ) - - written_cde_ncfile = write_cde_netcdf( - cde_ds=cde_ds, - output_filepath=cde_filepath, - intermediate_output_dir=complete_output_dir, + daily_ds = make_publication_ready_ds( + intermediate_daily_ds=cde_ds, hemisphere=hemisphere, ) - logger.success(f"Wrote complete daily ncfile: {written_cde_ncfile}") + + daily_ds.to_netcdf(nrt_output_filepath) + logger.success(f"Wrote complete daily NRT NC file: {nrt_output_filepath}") + + # TODO: write checksum file for NRTs? except Exception as e: logger.exception(f"Failed to create NRT ECDR for {date=} {hemisphere=}") raise e diff --git a/seaice_ecdr/publish_daily.py b/seaice_ecdr/publish_daily.py index 1c8cf490..942a470f 100644 --- a/seaice_ecdr/publish_daily.py +++ b/seaice_ecdr/publish_daily.py @@ -4,6 +4,7 @@ from typing import get_args import click +import xarray as xr from datatree import DataTree from loguru import logger from pm_tb_data._types import NORTH, Hemisphere @@ -27,6 +28,11 @@ standard_daily_filename, ) +# TODO: consider extracting to config or a kwarg of this function for more +# flexible use with other platforms in the future. +PROTOTYPE_PLATFORM_ID: SUPPORTED_PLATFORM_ID = "am2" +PROTOTYPE_PLATFORM_DATA_GROUP_NAME = f"prototype_{PROTOTYPE_PLATFORM_ID}" + # TODO: this and `get_complete_daily_filepath` are identical (aside from var # names) to `get_ecdr_filepath` and `get_ecdr_dir`. @@ -82,6 +88,54 @@ def get_complete_daily_filepath( return ecdr_filepath +def make_publication_ready_ds( + intermediate_daily_ds: xr.Dataset, + hemisphere: Hemisphere, +) -> DataTree: + """Take an intermediate daily dataset and prepare for publication. + + * Moves supplementary fields into "cdr_supplementary" group + * Removes `valid_range` attr from coordinate vars + * Adds `coverage_content_type: coordinate` attr to coordinate vars + * Adds `coordinates` attr to data variables + """ + # publication-ready daily data are grouped using `DataTree`. + # Create a `cdr_supplementary` group for "supplemntary" fields + cdr_supplementary_fields = [ + "raw_bt_seaice_conc", + "raw_nt_seaice_conc", + "surface_type_mask", + ] + # Melt onset only occurs in the NH. + if hemisphere == NORTH: + cdr_supplementary_fields.append("cdr_melt_onset_day") + + # Drop x, y, time coordinate variables. These will be inherited from the + # root group. + cdr_supplementary_group = intermediate_daily_ds[cdr_supplementary_fields].drop_vars( + ["x", "y", "time"] + ) + # remove attrs from supplementary group. These will be inherted from the + # root group. + cdr_supplementary_group.attrs = {} + + complete_daily_ds: DataTree = DataTree.from_dict( + { + "/": intermediate_daily_ds[ + [k for k in intermediate_daily_ds if k not in cdr_supplementary_fields] + ], + "cdr_supplementary": cdr_supplementary_group, + } + ) + + # Remove `valid_range` from coordinate attrs + remove_valid_range_from_coordinate_vars(complete_daily_ds) + add_coordinate_coverage_content_type(complete_daily_ds) + add_coordinates_attr(complete_daily_ds) + + return complete_daily_ds + + # TODO: consider a better name. `publish` implies this function might actually # publish it to a publicly accessible archive. That's something ops will do # separately. This just generates the publication-ready nc file to it's expected @@ -100,10 +154,6 @@ def publish_daily_nc( output NC file's root-group variables are all taken from the default platforms given by the platform start date configuration. """ - # TODO: consider extracting to config or a kwarg of this function for more - # flexible use with other platforms in the future. - PROTOTYPE_PLATFORM_ID: SUPPORTED_PLATFORM_ID = "am2" - PROTOTYPE_PLATFORM_DATA_GROUP_NAME = f"prototype_{PROTOTYPE_PLATFORM_ID}" intermediate_output_dir = get_intermediate_output_dir( base_output_dir=base_output_dir, @@ -120,35 +170,13 @@ def publish_daily_nc( is_nrt=False, ) - # publication-ready daily data are grouped using `DataTree`. - # Create a `cdr_supplementary` group for "supplemntary" fields - cdr_supplementary_fields = [ - "raw_bt_seaice_conc", - "raw_nt_seaice_conc", - "surface_type_mask", - ] - # Melt onset only occurs in the NH. - if hemisphere == NORTH: - cdr_supplementary_fields.append("cdr_melt_onset_day") - - # Drop x, y, time coordinate variables. These will be inherited from the - # root group. - cdr_supplementary_group = default_daily_ds[cdr_supplementary_fields].drop_vars( - ["x", "y", "time"] - ) - # remove attrs from supplementary group. These will be inherted from the - # root group. - cdr_supplementary_group.attrs = {} - - complete_daily_ds: DataTree = DataTree.from_dict( - { - "/": default_daily_ds[ - [k for k in default_daily_ds if k not in cdr_supplementary_fields] - ], - "cdr_supplementary": cdr_supplementary_group, - } + # Prepare a dataset that's ready for publication. + complete_daily_ds = make_publication_ready_ds( + intermediate_daily_ds=default_daily_ds, + hemisphere=hemisphere, ) + # Add the prototype group if there's data. if PLATFORM_CONFIG.platform_available_for_date( platform_id=PROTOTYPE_PLATFORM_ID, date=date, @@ -200,11 +228,6 @@ def publish_daily_nc( f"Failed to find prototype daily file for {date=} {PROTOTYPE_PLATFORM_ID=}" ) - # Remove `valid_range` from coordinate attrs - remove_valid_range_from_coordinate_vars(complete_daily_ds) - add_coordinate_coverage_content_type(complete_daily_ds) - add_coordinates_attr(complete_daily_ds) - # write out finalized nc file. complete_output_dir = get_complete_output_dir( base_output_dir=base_output_dir, From ee18544d4610c4052336c49891e9fc070217961a Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 23 Sep 2024 17:12:50 -0600 Subject: [PATCH 03/21] Cleanup now-unused constant --- seaice_ecdr/constants.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/seaice_ecdr/constants.py b/seaice_ecdr/constants.py index d8e2b1da..a4f27baa 100644 --- a/seaice_ecdr/constants.py +++ b/seaice_ecdr/constants.py @@ -65,10 +65,6 @@ def _get_env_subdir_str() -> str: LOGS_DIR = NSIDC_NFS_SHARE_DIR / f"{ECDR_PRODUCT_VERSION}_logs" / _env_subdir LOGS_DIR.mkdir(parents=True, exist_ok=True) -# Location of LANCE AMSR2 NRT data files: -# TODO: nest the subdir under an `ecdr_inputs` or similar? -LANCE_NRT_DATA_DIR = NSIDC_NFS_SHARE_DIR / "lance_amsr2_nrt_data" - # Location of surface mask & geo-information files. CDR_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / f"{ECDR_PRODUCT_VERSION}_ancillary" CDRv4_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / "cdrv4_equiv_ancillary" From eaddc21c3720fc1c4a40fd3c9bc6b131462cfef6 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 23 Sep 2024 17:13:00 -0600 Subject: [PATCH 04/21] Update doc & help strings --- seaice_ecdr/nrt.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 68ddc92e..663bb605 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -224,7 +224,7 @@ def nrt_ecdr_for_day( overwrite: bool, ancillary_source: ANCILLARY_SOURCES = "CDRv5", ): - """Create an initial daily ECDR NetCDF using NRT LANCE AMSR2 data.""" + """Create an initial daily ECDR NetCDF using NRT NSIDC-0080 F18 data.""" complete_output_dir = get_complete_output_dir( base_output_dir=base_output_dir, hemisphere=hemisphere, @@ -330,13 +330,7 @@ def nrt_ecdr_for_day( @click.option( "--overwrite", is_flag=True, - help=( - "Overwrite intermediate and final outputs. CAUTION: because lance data is temporary," - " this action could be destructive in a permenant way. E.g,. if input data for a" - " day that this CLI is being run for was previously generated with available" - " lance data, but that data no longer exists, the resulting file may be empty or" - " have significant data gaps. Use this primarily in a development environment." - ), + help=("Overwrite intermediate and final outputs."), ) def nrt_ecdr_for_dates( *, From e23be68aaca2fb8d2e9ed97f3c7c2f1586e60da9 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 23 Sep 2024 17:19:08 -0600 Subject: [PATCH 05/21] Fixup output filename for publication-ready nrt files --- seaice_ecdr/daily_aggregate.py | 2 ++ seaice_ecdr/nrt.py | 1 + seaice_ecdr/publish_daily.py | 8 +++++--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/seaice_ecdr/daily_aggregate.py b/seaice_ecdr/daily_aggregate.py index 814ef6a4..8a140405 100644 --- a/seaice_ecdr/daily_aggregate.py +++ b/seaice_ecdr/daily_aggregate.py @@ -48,12 +48,14 @@ def _get_daily_complete_filepaths_for_year( dt.date(year, 1, 1), PLATFORM_CONFIG.get_first_platform_start_date() ) for period in pd.period_range(start=start_date, end=dt.date(year, 12, 31)): + platform = PLATFORM_CONFIG.get_platform_by_date(period.to_timestamp().date()) expected_fp = get_complete_daily_filepath( date=period.to_timestamp().date(), hemisphere=hemisphere, resolution=resolution, complete_output_dir=complete_output_dir, is_nrt=False, + platform_id=platform.id, ) if expected_fp.is_file(): data_list.append(expected_fp) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 663bb605..9bf792d1 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -235,6 +235,7 @@ def nrt_ecdr_for_day( hemisphere=hemisphere, resolution=NRT_RESOLUTION, complete_output_dir=complete_output_dir, + platform_id=NRT_PLATFORM_ID, is_nrt=True, ) diff --git a/seaice_ecdr/publish_daily.py b/seaice_ecdr/publish_daily.py index 942a470f..b0e88a5b 100644 --- a/seaice_ecdr/publish_daily.py +++ b/seaice_ecdr/publish_daily.py @@ -60,20 +60,20 @@ def get_complete_daily_filepath( resolution: ECDR_SUPPORTED_RESOLUTIONS, complete_output_dir: Path, is_nrt: bool, + platform_id: SUPPORTED_PLATFORM_ID, ): - platform = PLATFORM_CONFIG.get_platform_by_date(date) if is_nrt: ecdr_filename = nrt_daily_filename( hemisphere=hemisphere, date=date, - platform_id=platform.id, + platform_id=platform_id, resolution=resolution, ) else: ecdr_filename = standard_daily_filename( hemisphere=hemisphere, date=date, - platform_id=platform.id, + platform_id=platform_id, resolution=resolution, ) @@ -234,12 +234,14 @@ def publish_daily_nc( hemisphere=hemisphere, is_nrt=False, ) + platform = PLATFORM_CONFIG.get_platform_by_date(date) complete_daily_filepath = get_complete_daily_filepath( date=date, resolution=resolution, complete_output_dir=complete_output_dir, hemisphere=hemisphere, is_nrt=False, + platform_id=platform.id, ) complete_daily_ds.to_netcdf(complete_daily_filepath) logger.success(f"Staged NC file for publication: {complete_daily_filepath}") From c8234374b64e7a13a6d9dd629b181edab8ce4023 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 23 Sep 2024 17:37:23 -0600 Subject: [PATCH 06/21] Write checksums for nrt files --- seaice_ecdr/nrt.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 9bf792d1..d6a7e2d8 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -12,6 +12,7 @@ from pm_tb_data.fetch.nsidc_0080 import get_nsidc_0080_tbs_from_disk from seaice_ecdr.ancillary import ANCILLARY_SOURCES +from seaice_ecdr.checksum import write_checksum_file from seaice_ecdr.cli.util import datetime_to_date from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.initial_daily_ecdr import ( @@ -274,7 +275,12 @@ def nrt_ecdr_for_day( daily_ds.to_netcdf(nrt_output_filepath) logger.success(f"Wrote complete daily NRT NC file: {nrt_output_filepath}") - # TODO: write checksum file for NRTs? + # write checksum file for NRTs + checksums_dir = nrt_output_filepath.parent / "checksums" + write_checksum_file( + input_filepath=nrt_output_filepath, + output_dir=checksums_dir, + ) except Exception as e: logger.exception(f"Failed to create NRT ECDR for {date=} {hemisphere=}") raise e From 6185a9df556ad14c12b17c2b678d63cae69b5e83 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 23 Sep 2024 17:45:13 -0600 Subject: [PATCH 07/21] Simplify handling of nrt temporal interp range We look 5 days into the past to forward-fill missing data for the target date. --- seaice_ecdr/nrt.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index d6a7e2d8..c22b5e25 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -44,6 +44,9 @@ NRT_RESOLUTION: Final = "25" NRT_PLATFORM_ID: Final = "F18" +# Number of days to look previously for temporal interpolation (forward +# gap-filling) +NRT_DAYS_TO_LOOK_PREVIOUSLY: Final = 5 def compute_nrt_initial_daily_ecdr_dataset( @@ -152,12 +155,11 @@ def temporally_interpolated_nrt_ecdr_dataset( date: dt.date, intermediate_output_dir: Path, overwrite: bool, - days_to_look_previously: int = 5, ancillary_source: ANCILLARY_SOURCES = "CDRv5", ) -> xr.Dataset: init_datasets = [] for date in date_range( - start_date=date - dt.timedelta(days=days_to_look_previously), end_date=date + start_date=date - dt.timedelta(days=NRT_DAYS_TO_LOOK_PREVIOUSLY), end_date=date ): init_dataset = read_or_create_and_read_nrt_idecdr_ds( date=date, @@ -174,8 +176,8 @@ def temporally_interpolated_nrt_ecdr_dataset( hemisphere=hemisphere, resolution=NRT_RESOLUTION, data_stack=data_stack, - interp_range=days_to_look_previously, - one_sided_limit=days_to_look_previously, + interp_range=NRT_DAYS_TO_LOOK_PREVIOUSLY, + one_sided_limit=NRT_DAYS_TO_LOOK_PREVIOUSLY, ancillary_source=ancillary_source, ) @@ -188,9 +190,6 @@ def read_or_create_and_read_nrt_tiecdr_ds( date: dt.date, intermediate_output_dir: Path, overwrite: bool, - # TODO: is it 4 or 5 days? Here we use 4, but the default for the temporal - # interpolation itself is 5. - days_to_look_previously: int = 4, ) -> xr.Dataset: tie_filepath = get_tie_filepath( date=date, @@ -205,7 +204,6 @@ def read_or_create_and_read_nrt_tiecdr_ds( date=date, intermediate_output_dir=intermediate_output_dir, overwrite=overwrite, - days_to_look_previously=days_to_look_previously, ) write_tie_netcdf( From c34a269a5c52afc28448d3f9628b63cde8e85ff1 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 08:35:36 -0600 Subject: [PATCH 08/21] Write NRT outputs to G10016 v3 dir and update fn w/ new version --- seaice_ecdr/constants.py | 10 ++++++++++ seaice_ecdr/nrt.py | 34 +++++++++++++++++++++++----------- seaice_ecdr/util.py | 5 ++++- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/seaice_ecdr/constants.py b/seaice_ecdr/constants.py index a4f27baa..90a57ced 100644 --- a/seaice_ecdr/constants.py +++ b/seaice_ecdr/constants.py @@ -68,3 +68,13 @@ def _get_env_subdir_str() -> str: # Location of surface mask & geo-information files. CDR_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / f"{ECDR_PRODUCT_VERSION}_ancillary" CDRv4_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / "cdrv4_equiv_ancillary" + +# NRT outputs +ECDR_NRT_PRODUCT_VERSION = "v03r00" +NSIDC_NFS_NRT_SHARE_DIR = Path("/share/apps/G10016_V3") +if not NSIDC_NFS_SHARE_DIR.is_dir(): + raise RuntimeError(f"Expected {NSIDC_NFS_NRT_SHARE_DIR} to exist, but it does not.") +DEFAULT_BASE_NRT_OUTPUT_DIR = ( + NSIDC_NFS_NRT_SHARE_DIR / ECDR_NRT_PRODUCT_VERSION / _env_subdir +) +DEFAULT_BASE_NRT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index c22b5e25..d8c1224d 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -14,7 +14,7 @@ from seaice_ecdr.ancillary import ANCILLARY_SOURCES from seaice_ecdr.checksum import write_checksum_file from seaice_ecdr.cli.util import datetime_to_date -from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR +from seaice_ecdr.constants import DEFAULT_BASE_NRT_OUTPUT_DIR from seaice_ecdr.initial_daily_ecdr import ( compute_initial_daily_ecdr_dataset, get_idecdr_filepath, @@ -161,6 +161,7 @@ def temporally_interpolated_nrt_ecdr_dataset( for date in date_range( start_date=date - dt.timedelta(days=NRT_DAYS_TO_LOOK_PREVIOUSLY), end_date=date ): + # TODO: support missing data periods. init_dataset = read_or_create_and_read_nrt_idecdr_ds( date=date, hemisphere=hemisphere, @@ -215,15 +216,9 @@ def read_or_create_and_read_nrt_tiecdr_ds( return tie_ds -def nrt_ecdr_for_day( - *, - date: dt.date, - hemisphere: Hemisphere, - base_output_dir: Path, - overwrite: bool, - ancillary_source: ANCILLARY_SOURCES = "CDRv5", -): - """Create an initial daily ECDR NetCDF using NRT NSIDC-0080 F18 data.""" +def get_nrt_complete_daily_filepath( + *, base_output_dir: Path, hemisphere: Hemisphere, date: dt.date +) -> Path: complete_output_dir = get_complete_output_dir( base_output_dir=base_output_dir, hemisphere=hemisphere, @@ -238,6 +233,23 @@ def nrt_ecdr_for_day( is_nrt=True, ) + return nrt_output_filepath + + +def nrt_ecdr_for_day( + *, + date: dt.date, + hemisphere: Hemisphere, + base_output_dir: Path, + overwrite: bool, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", +): + """Create an initial daily ECDR NetCDF using NRT NSIDC-0080 F18 data.""" + nrt_output_filepath = get_nrt_complete_daily_filepath( + base_output_dir=base_output_dir, + hemisphere=hemisphere, + date=date, + ) if nrt_output_filepath.is_file() and not overwrite: logger.info(f"File for {date=} already exists ({nrt_output_filepath}).") return @@ -324,7 +336,7 @@ def nrt_ecdr_for_day( resolve_path=True, path_type=Path, ), - default=DEFAULT_BASE_OUTPUT_DIR, + default=DEFAULT_BASE_NRT_OUTPUT_DIR, help=( "Base output directory for NRT ECDR outputs." " Subdirectories are created for outputs of" diff --git a/seaice_ecdr/util.py b/seaice_ecdr/util.py index b7659b37..82c893a0 100644 --- a/seaice_ecdr/util.py +++ b/seaice_ecdr/util.py @@ -10,7 +10,7 @@ from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ANCILLARY_SOURCES, get_ocean_mask -from seaice_ecdr.constants import ECDR_PRODUCT_VERSION +from seaice_ecdr.constants import ECDR_NRT_PRODUCT_VERSION, ECDR_PRODUCT_VERSION from seaice_ecdr.grid_id import get_grid_id from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID @@ -55,6 +55,9 @@ def nrt_daily_filename( ext = standard_fn_path.suffix nrt_fn = fn_base + "_P" + ext + # Replace the standard G02202 version number with the NRT version. + nrt_fn = nrt_fn.replace(ECDR_PRODUCT_VERSION, ECDR_NRT_PRODUCT_VERSION) + return nrt_fn From d04df9aec82a0874a10d1068bf754af31c9b021b Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 10:15:52 -0600 Subject: [PATCH 09/21] Update land spillover alg for NRT data to use NT2_BT --- seaice_ecdr/nrt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index d8c1224d..b9a5d381 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -47,6 +47,7 @@ # Number of days to look previously for temporal interpolation (forward # gap-filling) NRT_DAYS_TO_LOOK_PREVIOUSLY: Final = 5 +NRT_LAND_SPILLOVER_ALG: Final = "NT2_BT" def compute_nrt_initial_daily_ecdr_dataset( @@ -91,8 +92,7 @@ def compute_nrt_initial_daily_ecdr_dataset( date=date, hemisphere=hemisphere, tb_data=tb_data, - # TODO: this needs to be updated. - land_spillover_alg="NT2", + land_spillover_alg=NRT_LAND_SPILLOVER_ALG, ancillary_source=ancillary_source, ) From ec2fa4127ca697688da140eca212e3e74ddd990f Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 10:45:44 -0600 Subject: [PATCH 10/21] Update NRT NetCDF attrs for publication to G10016 --- seaice_ecdr/constants.py | 27 ++++++++++++++++++++++++--- seaice_ecdr/initial_daily_ecdr.py | 2 +- seaice_ecdr/nc_attrs.py | 6 +++--- seaice_ecdr/nrt.py | 16 ++++++++++++++-- seaice_ecdr/tests/unit/test_util.py | 4 ++-- seaice_ecdr/util.py | 4 +++- 6 files changed, 47 insertions(+), 12 deletions(-) diff --git a/seaice_ecdr/constants.py b/seaice_ecdr/constants.py index 90a57ced..7b44705d 100644 --- a/seaice_ecdr/constants.py +++ b/seaice_ecdr/constants.py @@ -14,8 +14,26 @@ import subprocess from pathlib import Path +from pydantic import BaseModel + + +class ProductVersion(BaseModel): + major_version_number: int + revision_number: int + + @property + def version_str(self) -> str: + return f"v{self.major_version_number:02}r{self.revision_number:02}" + + def __str__(self) -> str: + return self.version_str + + # This is the version string for the ECDR product. -ECDR_PRODUCT_VERSION = "v05r00" +ECDR_PRODUCT_VERSION = ProductVersion( + major_version_number=5, + revision_number=0, +) # NSIDC infrastructure-specific paths: NSIDC_NFS_SHARE_DIR = Path("/share/apps/G02202_V5") @@ -70,11 +88,14 @@ def _get_env_subdir_str() -> str: CDRv4_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / "cdrv4_equiv_ancillary" # NRT outputs -ECDR_NRT_PRODUCT_VERSION = "v03r00" +ECDR_NRT_PRODUCT_VERSION = ProductVersion( + major_version_number=3, + revision_number=0, +) NSIDC_NFS_NRT_SHARE_DIR = Path("/share/apps/G10016_V3") if not NSIDC_NFS_SHARE_DIR.is_dir(): raise RuntimeError(f"Expected {NSIDC_NFS_NRT_SHARE_DIR} to exist, but it does not.") DEFAULT_BASE_NRT_OUTPUT_DIR = ( - NSIDC_NFS_NRT_SHARE_DIR / ECDR_NRT_PRODUCT_VERSION / _env_subdir + NSIDC_NFS_NRT_SHARE_DIR / ECDR_NRT_PRODUCT_VERSION.version_str / _env_subdir ) DEFAULT_BASE_NRT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index f46f2a0c..5d0018cf 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -335,7 +335,7 @@ def get_flagmask( if ancillary_source == "CDRv4": version_string = "v04r00" elif ancillary_source == "CDRv5": - version_string = ECDR_PRODUCT_VERSION + version_string = ECDR_PRODUCT_VERSION.version_str flagmask_fn = CDR_ANCILLARY_DIR / f"flagmask_{gridid}_{version_string}.dat" try: diff --git a/seaice_ecdr/nc_attrs.py b/seaice_ecdr/nc_attrs.py index be5cecd0..cd84e9f2 100644 --- a/seaice_ecdr/nc_attrs.py +++ b/seaice_ecdr/nc_attrs.py @@ -167,12 +167,12 @@ def get_global_attrs( **time_coverage_attrs, title=( "NOAA-NSIDC Climate Data Record of Passive Microwave" - " Sea Ice Concentration Version 5" + f" Sea Ice Concentration Version {ECDR_PRODUCT_VERSION.major_version_number}" ), program="NOAA Climate Data Record Program", software_version_id=_get_software_version_id(), - metadata_link="https://nsidc.org/data/g02202/versions/5", - product_version=ECDR_PRODUCT_VERSION, + metadata_link=f"https://nsidc.org/data/g02202/versions/{ECDR_PRODUCT_VERSION.major_version_number}", + product_version=ECDR_PRODUCT_VERSION.version_str, spatial_resolution=f"{resolution}km", standard_name_vocabulary="CF Standard Name Table (v83, 17 October 2023)", id="https://doi.org/10.7265/rjzb-pf78", diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index b9a5d381..373f780d 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -14,7 +14,7 @@ from seaice_ecdr.ancillary import ANCILLARY_SOURCES from seaice_ecdr.checksum import write_checksum_file from seaice_ecdr.cli.util import datetime_to_date -from seaice_ecdr.constants import DEFAULT_BASE_NRT_OUTPUT_DIR +from seaice_ecdr.constants import DEFAULT_BASE_NRT_OUTPUT_DIR, ECDR_NRT_PRODUCT_VERSION from seaice_ecdr.initial_daily_ecdr import ( compute_initial_daily_ecdr_dataset, get_idecdr_filepath, @@ -161,7 +161,8 @@ def temporally_interpolated_nrt_ecdr_dataset( for date in date_range( start_date=date - dt.timedelta(days=NRT_DAYS_TO_LOOK_PREVIOUSLY), end_date=date ): - # TODO: support missing data periods. + # TODO: support missing data periods (e.g., running for 2024-09-22 does + # not work atm). init_dataset = read_or_create_and_read_nrt_idecdr_ds( date=date, hemisphere=hemisphere, @@ -282,6 +283,17 @@ def nrt_ecdr_for_day( hemisphere=hemisphere, ) + # Update global attrs to reflect G10016 instead of G02202: + daily_ds.attrs["id"] = "https://doi.org/10.7265/j0z0-4h87" + daily_ds.attrs["metadata_link"] = ( + f"https://nsidc.org/data/g10016/versions/{ECDR_NRT_PRODUCT_VERSION.major_version_number}" + ) + daily_ds.attrs["title"] = ( + "Near-Real-Time NOAA-NSIDC Climate Data Record of Passive Microwave" + f" Sea Ice Concentration Version {ECDR_NRT_PRODUCT_VERSION.major_version_number}" + ) + daily_ds.attrs["product_version"] = ECDR_NRT_PRODUCT_VERSION.version_str + daily_ds.to_netcdf(nrt_output_filepath) logger.success(f"Wrote complete daily NRT NC file: {nrt_output_filepath}") diff --git a/seaice_ecdr/tests/unit/test_util.py b/seaice_ecdr/tests/unit/test_util.py index 4cbade15..53081d2d 100644 --- a/seaice_ecdr/tests/unit/test_util.py +++ b/seaice_ecdr/tests/unit/test_util.py @@ -8,7 +8,7 @@ from pm_tb_data._types import NORTH, SOUTH from seaice_ecdr import util -from seaice_ecdr.constants import ECDR_PRODUCT_VERSION +from seaice_ecdr.constants import ECDR_NRT_PRODUCT_VERSION, ECDR_PRODUCT_VERSION from seaice_ecdr.multiprocess_intermediate_daily import get_dates_by_year from seaice_ecdr.platforms.models import SUPPORTED_PLATFORM_ID from seaice_ecdr.util import ( @@ -46,7 +46,7 @@ def test_daily_filename_south(): def test_nrt_daily_filename(): - expected = f"sic_psn12.5_20210101_am2_{ECDR_PRODUCT_VERSION}_P.nc" + expected = f"sic_psn12.5_20210101_am2_{ECDR_NRT_PRODUCT_VERSION}_P.nc" actual = nrt_daily_filename( hemisphere=NORTH, resolution="12.5", platform_id="am2", date=dt.date(2021, 1, 1) diff --git a/seaice_ecdr/util.py b/seaice_ecdr/util.py index 82c893a0..e412d21d 100644 --- a/seaice_ecdr/util.py +++ b/seaice_ecdr/util.py @@ -56,7 +56,9 @@ def nrt_daily_filename( nrt_fn = fn_base + "_P" + ext # Replace the standard G02202 version number with the NRT version. - nrt_fn = nrt_fn.replace(ECDR_PRODUCT_VERSION, ECDR_NRT_PRODUCT_VERSION) + nrt_fn = nrt_fn.replace( + ECDR_PRODUCT_VERSION.version_str, ECDR_NRT_PRODUCT_VERSION.version_str + ) return nrt_fn From a32cf41b04dcaf09cfbc09dccbc139670b695412 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 11:52:41 -0600 Subject: [PATCH 11/21] Update summary text in nrt files --- seaice_ecdr/nrt.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 373f780d..263a7a49 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -293,6 +293,9 @@ def nrt_ecdr_for_day( f" Sea Ice Concentration Version {ECDR_NRT_PRODUCT_VERSION.major_version_number}" ) daily_ds.attrs["product_version"] = ECDR_NRT_PRODUCT_VERSION.version_str + daily_ds.attrs["summary"] = ( + f"This data set provides a near-real-time (NRT) passive microwave sea ice concentration climate data record (CDR) based on gridded brightness temperatures (TBs) from the Defense Meteorological Satellite Program (DMSP) passive microwave radiometer: the Special Sensor Microwave Imager/Sounder (SSMIS) F18. The sea ice concentration CDR is an estimate of sea ice concentration that is produced by combining concentration estimates from two algorithms developed at the NASA Goddard Space Flight Center (GSFC): the NASA Team algorithm and the Bootstrap algorithm. The individual algorithms are used to process and combine brightness temperature data at NSIDC. This product is designed to provide an NRT time series of sea ice concentrations (the fraction, or percentage, of ocean area covered by sea ice). The data are gridded on the NSIDC polar stereographic grid with {NRT_RESOLUTION} x {NRT_RESOLUTION} km grid cells and are available in NetCDF file format. Each file contains a variable with the CDR concentration values as well as variables that hold the NASA Team and Bootstrap processed concentrations for reference. Variables containing standard deviation, quality flags, and projection information are also included." + ) daily_ds.to_netcdf(nrt_output_filepath) logger.success(f"Wrote complete daily NRT NC file: {nrt_output_filepath}") From 0770d10ac960670527656a03e7cfb3aba0b8dd05 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 13:16:16 -0600 Subject: [PATCH 12/21] NRT: Use null grid for missing dates of data --- seaice_ecdr/nrt.py | 52 ++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 263a7a49..8fff66f1 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -30,7 +30,7 @@ get_complete_daily_filepath, make_publication_ready_ds, ) -from seaice_ecdr.tb_data import EcdrTbData, map_tbs_to_ecdr_channels +from seaice_ecdr.tb_data import EcdrTbData, get_null_ecdr_tbs, map_tbs_to_ecdr_channels from seaice_ecdr.temporal_composite_daily import ( get_tie_filepath, temporal_interpolation, @@ -58,28 +58,36 @@ def compute_nrt_initial_daily_ecdr_dataset( ): """Create an initial daily ECDR NetCDF using NRT data""" # TODO: consider extracting the fetch-related code here to `tb_data` module. - xr_tbs = get_nsidc_0080_tbs_from_disk( - date=date, - hemisphere=hemisphere, - resolution=NRT_RESOLUTION, - platform_id=NRT_PLATFORM_ID, - ) data_source: Final = "NSIDC-0080" + try: + xr_tbs = get_nsidc_0080_tbs_from_disk( + date=date, + hemisphere=hemisphere, + resolution=NRT_RESOLUTION, + platform_id=NRT_PLATFORM_ID, + ) - ecdr_tbs = map_tbs_to_ecdr_channels( - mapping=dict( - v19="v19", - h19="h19", - v22="v22", - v37="v37", - h37="h37", - ), - xr_tbs=xr_tbs, - hemisphere=hemisphere, - resolution=NRT_RESOLUTION, - date=date, - data_source=data_source, - ) + ecdr_tbs = map_tbs_to_ecdr_channels( + mapping=dict( + v19="v19", + h19="h19", + v22="v22", + v37="v37", + h37="h37", + ), + xr_tbs=xr_tbs, + hemisphere=hemisphere, + resolution=NRT_RESOLUTION, + date=date, + data_source=data_source, + ) + except FileNotFoundError: + ecdr_tbs = get_null_ecdr_tbs(hemisphere=hemisphere, resolution=NRT_RESOLUTION) + logger.warning( + f"Using all-null TBS for date={date}," + f" hemisphere={hemisphere}," + f" resolution={NRT_RESOLUTION}" + ) tb_data = EcdrTbData( tbs=ecdr_tbs, @@ -161,8 +169,6 @@ def temporally_interpolated_nrt_ecdr_dataset( for date in date_range( start_date=date - dt.timedelta(days=NRT_DAYS_TO_LOOK_PREVIOUSLY), end_date=date ): - # TODO: support missing data periods (e.g., running for 2024-09-22 does - # not work atm). init_dataset = read_or_create_and_read_nrt_idecdr_ds( date=date, hemisphere=hemisphere, From 1c0635ab51ed5522d4c7217731bedab1d576c9dc Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 14:10:13 -0600 Subject: [PATCH 13/21] Add TODO about ancillary data location --- seaice_ecdr/constants.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/seaice_ecdr/constants.py b/seaice_ecdr/constants.py index 7b44705d..cfd58548 100644 --- a/seaice_ecdr/constants.py +++ b/seaice_ecdr/constants.py @@ -84,6 +84,9 @@ def _get_env_subdir_str() -> str: LOGS_DIR.mkdir(parents=True, exist_ok=True) # Location of surface mask & geo-information files. +# TODO: we should consider moving the ancillary files to a different +# location. Currently, ancillary files are stored in the G02202 V5 specific dir, +# but the NRT product is G10016 and it uses the same ancillary data. CDR_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / f"{ECDR_PRODUCT_VERSION}_ancillary" CDRv4_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / "cdrv4_equiv_ancillary" From 6a818f434430762204d008939f1aa3f7841ad01f Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 15:28:08 -0600 Subject: [PATCH 14/21] Wrap NRT CLI so that nrt platform start date config is used --- seaice_ecdr/cli/entrypoint.py | 8 +- seaice_ecdr/cli/nrt.py | 93 +++++++++++++++++++ .../config/nrt_platform_start_dates.yml | 5 + seaice_ecdr/initial_daily_ecdr.py | 21 ++++- seaice_ecdr/nrt.py | 12 +-- seaice_ecdr/platforms/config.py | 3 + 6 files changed, 125 insertions(+), 17 deletions(-) create mode 100644 seaice_ecdr/cli/nrt.py create mode 100644 seaice_ecdr/config/nrt_platform_start_dates.yml diff --git a/seaice_ecdr/cli/entrypoint.py b/seaice_ecdr/cli/entrypoint.py index 8839cb6a..007b2947 100644 --- a/seaice_ecdr/cli/entrypoint.py +++ b/seaice_ecdr/cli/entrypoint.py @@ -4,6 +4,7 @@ from seaice_ecdr.cli.daily import cli as daily_cli from seaice_ecdr.cli.monthly import cli as monthly_cli +from seaice_ecdr.cli.nrt import cli as nrt_cli from seaice_ecdr.daily_aggregate import cli as daily_aggregate_cli from seaice_ecdr.initial_daily_ecdr import cli as ecdr_cli from seaice_ecdr.intermediate_daily import cli as intermediate_daily_cli @@ -12,7 +13,7 @@ from seaice_ecdr.multiprocess_intermediate_daily import ( cli as multiprocess_intermediate_daily_cli, ) -from seaice_ecdr.nrt import nrt_cli +from seaice_ecdr.nrt import nrt_ecdr_for_dates from seaice_ecdr.publish_daily import cli as publish_daily_cli from seaice_ecdr.temporal_composite_daily import cli as tiecdr_cli from seaice_ecdr.validation import cli as validation_cli @@ -26,12 +27,12 @@ def cli(): cli.add_command(ecdr_cli) cli.add_command(tiecdr_cli) -cli.add_command(nrt_cli) cli.add_command(intermediate_daily_cli) cli.add_command(intermediate_monthly_cli) cli.add_command(validation_cli) cli.add_command(multiprocess_intermediate_daily_cli) cli.add_command(publish_daily_cli) +cli.add_command(nrt_ecdr_for_dates) # CLIs that ops will use below: # Generate standard daily files ready for publication: @@ -42,6 +43,9 @@ def cli(): cli.add_command(monthly_cli) # Generate monthly aggregate file (one per hemisphere) cli.add_command(monthly_aggregate_cli) +# Wraps the `nrt_ecdr_for_dates` CLI with the correct platform start date +# configuration chosen. +cli.add_command(nrt_cli) if __name__ == "__main__": cli() diff --git a/seaice_ecdr/cli/nrt.py b/seaice_ecdr/cli/nrt.py new file mode 100644 index 00000000..2060cfe5 --- /dev/null +++ b/seaice_ecdr/cli/nrt.py @@ -0,0 +1,93 @@ +"""Wrapper around the nrt CLI to override platform start dates config + +This is a hack, and should be unnecessary once the code is refactored to make it +easier to configure the platform start dates. +""" + +import copy +import datetime as dt +from pathlib import Path +from typing import get_args + +import click +from pm_tb_data._types import Hemisphere + +from seaice_ecdr.cli.util import CLI_EXE_PATH, datetime_to_date, run_cmd +from seaice_ecdr.constants import DEFAULT_BASE_NRT_OUTPUT_DIR +from seaice_ecdr.platforms.config import NRT_PLATFORM_START_DATES_CONFIG_FILEPATH + + +@click.command(name="daily-nrt") +@click.option( + "-d", + "--date", + required=True, + type=click.DateTime(formats=("%Y-%m-%d", "%Y%m%d", "%Y.%m.%d")), + callback=datetime_to_date, +) +@click.option( + "--end-date", + required=False, + type=click.DateTime( + formats=( + "%Y-%m-%d", + "%Y%m%d", + "%Y.%m.%d", + ) + ), + # Like `datetime_to_date` but allows `None`. + callback=lambda _ctx, _param, value: value if value is None else value.date(), + default=None, + help="If given, run temporal composite for `--date` through this end date.", +) +@click.option( + "-h", + "--hemisphere", + required=True, + type=click.Choice(get_args(Hemisphere)), +) +@click.option( + "--base-output-dir", + required=True, + type=click.Path( + exists=True, + file_okay=False, + dir_okay=True, + writable=True, + resolve_path=True, + path_type=Path, + ), + default=DEFAULT_BASE_NRT_OUTPUT_DIR, + help=( + "Base output directory for NRT ECDR outputs." + " Subdirectories are created for outputs of" + " different stages of processing." + ), + show_default=True, +) +@click.option( + "--overwrite", + is_flag=True, + help=("Overwrite intermediate and final outputs."), +) +def cli( + *, + date: dt.date, + end_date: dt.date | None, + hemisphere: Hemisphere, + base_output_dir: Path, + overwrite: bool, +): + if end_date is None: + end_date = copy.copy(date) + + overwrite_str = " --overwrite" if overwrite else "" + + run_cmd( + f"export PLATFORM_START_DATES_CONFIG_FILEPATH={NRT_PLATFORM_START_DATES_CONFIG_FILEPATH} &&" + f"{CLI_EXE_PATH} nrt" + f" --hemisphere {hemisphere}" + f" --base-output-dir {base_output_dir}" + f" --date {date:%Y-%m-%d}" + f" --end-date {end_date:%Y-%m-%d}" + overwrite_str + ) diff --git a/seaice_ecdr/config/nrt_platform_start_dates.yml b/seaice_ecdr/config/nrt_platform_start_dates.yml new file mode 100644 index 00000000..fbadeee2 --- /dev/null +++ b/seaice_ecdr/config/nrt_platform_start_dates.yml @@ -0,0 +1,5 @@ +cdr_platform_start_dates: + # We only use F18 for NRT. + - platform_id: "F18" + # This appears to be the first date of data in NSIDC0080. + start_date: "2021-11-01" diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index 5d0018cf..de800d38 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -417,12 +417,11 @@ def compute_initial_daily_ecdr_dataset( # The CDRv4 calculation causes TB to be zero/missing where # no sea ice can occur because of invalid region or land logger.debug(f"Applying invalid ice mask to TB field: {tb_si_varname}") - platform = PLATFORM_CONFIG.get_platform_by_date(date) invalid_ice_mask = get_invalid_ice_mask( hemisphere=hemisphere, date=date, resolution=tb_data.resolution, - platform=platform, + platform=PLATFORM_CONFIG.platform_for_id(tb_data.platform_id), ancillary_source=ancillary_source, ) @@ -573,7 +572,7 @@ def compute_initial_daily_ecdr_dataset( "Initialized cdr_seaice_conc_interp_spatial_flag with TB fill locations" ) - platform = PLATFORM_CONFIG.get_platform_by_date(date) + platform = PLATFORM_CONFIG.platform_for_id(tb_data.platform_id) if platform.id == "am2": bt_coefs_init = pmi_bt_params_amsr2.get_ausi_amsr2_bootstrap_params( date=date, @@ -586,6 +585,11 @@ def compute_initial_daily_ecdr_dataset( satellite="amsre", gridid=ecdr_ide_ds.grid_id, ) + # F18 is used in NRT processing. + elif platform.id == "F18": + raise NotImplementedError("TODO") + # Note: F18 is included in NSIDC0001, but we're getting data from NSIDC0080 + # for NRT. elif platform.id in get_args(NSIDC_0001_SATS): bt_coefs_init = pmi_bt_params_0001.get_nsidc0001_bootstrap_params( date=date, @@ -1298,11 +1302,11 @@ def make_idecdr_netcdf( land_spillover_alg: LAND_SPILL_ALGS, ancillary_source: ANCILLARY_SOURCES, overwrite_ide: bool = False, + platform_id: SUPPORTED_PLATFORM_ID, ) -> None: - platform = PLATFORM_CONFIG.get_platform_by_date(date) output_path = get_idecdr_filepath( date=date, - platform_id=platform.id, + platform_id=platform_id, hemisphere=hemisphere, intermediate_output_dir=intermediate_output_dir, resolution=resolution, @@ -1339,6 +1343,11 @@ def create_idecdr_for_date( land_spillover_alg: LAND_SPILL_ALGS, ancillary_source: ANCILLARY_SOURCES, ) -> None: + """Create a standard IDECDR file for the given date. + + Uses the default platform start date config to determine the correct + platform to use. + """ excluded_fields = [] if not verbose_intermed_ncfile: excluded_fields = [ @@ -1361,6 +1370,7 @@ def create_idecdr_for_date( "missing_tb_mask", ] try: + platform = PLATFORM_CONFIG.get_platform_by_date(date) make_idecdr_netcdf( date=date, hemisphere=hemisphere, @@ -1370,6 +1380,7 @@ def create_idecdr_for_date( overwrite_ide=overwrite_ide, land_spillover_alg=land_spillover_alg, ancillary_source=ancillary_source, + platform_id=platform.id, ) except Exception as e: diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 8fff66f1..d26308f3 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -114,6 +114,7 @@ def read_or_create_and_read_nrt_idecdr_ds( intermediate_output_dir: Path, overwrite: bool, ): + # TODO: this isn't correct. We always use F18 platform = PLATFORM_CONFIG.get_platform_by_date(date) idecdr_filepath = get_idecdr_filepath( hemisphere=hemisphere, @@ -317,7 +318,7 @@ def nrt_ecdr_for_day( raise e -@click.command(name="daily-nrt") +@click.command(name="nrt") @click.option( "-d", "--date", @@ -388,12 +389,3 @@ def nrt_ecdr_for_dates( base_output_dir=base_output_dir, overwrite=overwrite, ) - - -@click.group(name="nrt") -def nrt_cli(): - """Run NRT Sea Ice ECDR.""" - ... - - -nrt_cli.add_command(nrt_ecdr_for_dates) diff --git a/seaice_ecdr/platforms/config.py b/seaice_ecdr/platforms/config.py index 3eecb420..33e8b5e0 100644 --- a/seaice_ecdr/platforms/config.py +++ b/seaice_ecdr/platforms/config.py @@ -31,6 +31,9 @@ PROTOTYPE_PLATFORM_START_DATES_CONFIG_FILEPATH = Path( _this_dir / "../config/prototype_platform_start_dates.yml" ).resolve() +NRT_PLATFORM_START_DATES_CONFIG_FILEPATH = Path( + _this_dir / "../config/nrt_platform_start_dates.yml" +).resolve() AM2_PLATFORM = Platform( From 83dc0ea3f91240e08c2f3c83dc33f8a9a9d5c948 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 15:31:11 -0600 Subject: [PATCH 15/21] Add note/TODO --- seaice_ecdr/nrt.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index d26308f3..a6976fb1 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -43,6 +43,13 @@ ) NRT_RESOLUTION: Final = "25" +# NOTE/TODO: note that the NRT_PLATFORM_ID is used but not the exclusive source +# of platform information. The program must be run with the environment variable +# `PLATFORM_START_DATES_CONFIG_FILEPATH` set to the NRT config file +# (`nrt_platform_start_dates.yml`) for the code to run properly. Ideally in the +# future we can refactor the code to support configuring the platform start +# dates at any point rather than needing an at-import-time setup as we currently +# do. NRT_PLATFORM_ID: Final = "F18" # Number of days to look previously for temporal interpolation (forward # gap-filling) From 4b8f92db4ecdf56d69c8d50ae5277eea99260dc4 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Tue, 24 Sep 2024 15:39:55 -0600 Subject: [PATCH 16/21] Use the F17 polemask for F18 --- seaice_ecdr/ancillary.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/seaice_ecdr/ancillary.py b/seaice_ecdr/ancillary.py index 30feb52c..f2b0ceb7 100644 --- a/seaice_ecdr/ancillary.py +++ b/seaice_ecdr/ancillary.py @@ -112,7 +112,12 @@ def get_surfacetype_da( if "polehole_bitmask" in ancillary_ds.data_vars.keys(): polehole_bitmask = ancillary_ds.polehole_bitmask platform = PLATFORM_CONFIG.get_platform_by_date(date) - polehole_bitlabel = f"{platform.id}_polemask" + platform_id = platform.id + # TODO: Use F17 polemask if F18 is being used. There is currently no F18 + # polemask defined in the ancilary file + if platform.id == "F18": + platform_id = "F17" + polehole_bitlabel = f"{platform_id}_polemask" polehole_bitvalue = bitmask_value_for_meaning( var=polehole_bitmask, meaning=polehole_bitlabel, @@ -192,7 +197,12 @@ def nh_polehole_mask( date=date, ) - polehole_bitlabel = f"{platform.id}_polemask" + platform_id = platform.id + # TODO: Use F17 polemask if F18 is being used. There is currently no F18 + # polemask defined in the ancilary file + if platform.id == "F18": + platform_id = "F17" + polehole_bitlabel = f"{platform_id}_polemask" polehole_bitvalue = bitmask_value_for_meaning( var=polehole_bitmask, meaning=polehole_bitlabel, From fbd379c249c9fe8dd83a3f1710e4e235cb434ac5 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 25 Sep 2024 09:31:32 -0600 Subject: [PATCH 17/21] Use nsidc-0001 F18 BT params for nsidc-0080 F18 NRT --- seaice_ecdr/initial_daily_ecdr.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index de800d38..1efd0ec0 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -585,11 +585,8 @@ def compute_initial_daily_ecdr_dataset( satellite="amsre", gridid=ecdr_ide_ds.grid_id, ) - # F18 is used in NRT processing. - elif platform.id == "F18": - raise NotImplementedError("TODO") - # Note: F18 is included in NSIDC0001, but we're getting data from NSIDC0080 - # for NRT. + # NOTE/TODO: we get F18 data from NSIDC-0080 for NRT processing, but we are + # using the NSIDC-0001 specific BT params for F18. elif platform.id in get_args(NSIDC_0001_SATS): bt_coefs_init = pmi_bt_params_0001.get_nsidc0001_bootstrap_params( date=date, From f7241bb0179dc57d90985c43fae3d1d3622945e6 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 25 Sep 2024 15:53:18 -0600 Subject: [PATCH 18/21] Pin GHA micromamba version to the release before v2 v2 breaks things. --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fe6a59b9..428c331d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,6 +23,7 @@ jobs: - name: "Install Conda environment" uses: "mamba-org/setup-micromamba@v1" with: + micromamba-version: '1.5.10-0' # any version from https://github.com/mamba-org/micromamba-releases environment-file: "conda-lock.yml" # When using a lock-file, we have to set an environment name. environment-name: "seaice_ecdr-ci" From 87e0228161628e56bc91e12219d58685f1c6e87c Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Wed, 25 Sep 2024 15:58:38 -0600 Subject: [PATCH 19/21] Pin `pm_tb_data` and `pm_icecon` to new versions for G10016 --- environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 936c6e77..96ffcf05 100644 --- a/environment.yml +++ b/environment.yml @@ -20,8 +20,8 @@ dependencies: - nco ~=5.1.9 - pandas ~=1.4.4 - opencv ~=4.8.0 - - pm_tb_data ~=0.4.0 - - pm_icecon ~=0.4.0 + - pm_tb_data ~=0.5.0 + - pm_icecon ~=0.5.0 - leafmap - rioxarray - hvplot From 46c7f04de172f170ac5f203a706690521b8cd315 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 26 Sep 2024 08:32:29 -0600 Subject: [PATCH 20/21] Add `platform_id` kwarg to function call in test module --- .../tests/integration/test_initial_daily_ecdr_generation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py b/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py index 0f36885f..b1851010 100644 --- a/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py +++ b/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py @@ -146,6 +146,7 @@ def test_cli_idecdr_ncfile_creation(tmpdir): excluded_fields=[], land_spillover_alg="NT2", ancillary_source=ancillary_source, + platform_id=test_platform_id, ) output_path = get_idecdr_filepath( hemisphere=test_hemisphere, @@ -181,6 +182,7 @@ def test_can_drop_fields_from_idecdr_netcdf( excluded_fields=(cdr_conc_fieldname,), land_spillover_alg="NT2", ancillary_source=ancillary_source, + platform_id=test_platform_id, ) output_path = get_idecdr_filepath( hemisphere=test_hemisphere, From 01ab4e483e456d145ab68b8eb90a111f42c7b056 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 26 Sep 2024 09:07:36 -0600 Subject: [PATCH 21/21] Cleanup TODO. Scott confirmed this is correct. --- seaice_ecdr/platforms/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/seaice_ecdr/platforms/config.py b/seaice_ecdr/platforms/config.py index 33e8b5e0..3fd4b2fb 100644 --- a/seaice_ecdr/platforms/config.py +++ b/seaice_ecdr/platforms/config.py @@ -62,7 +62,6 @@ sensor="SSMIS > Special Sensor Microwave Imager/Sounder", id="F18", date_range=DateRange( - # TODO: is this accurate? This value from NSIDC-0001 docs. first_date=dt.date(2017, 1, 1), last_date=None, ),