diff --git a/CHANGELOG.md b/CHANGELOG.md index baf99311..f689dccd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# v0.2.0 + +* Produce 25km CDR instead of 12.5km. +* Refactor how platforms are handled to support overriding platform start dates + via yaml configuration files. + + # v0.1.0 * Initial version of the ECDR. diff --git a/pyproject.toml b/pyproject.toml index ec61d38f..5f937c35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "seaice_ecdr" -version = "0.1.0" +version = "0.2.0" [tool.bumpversion] -current_version = "0.1.0" +current_version = "0.2.0" commit = false tag = false diff --git a/seaice_ecdr/__init__.py b/seaice_ecdr/__init__.py index a7a20788..b0146856 100644 --- a/seaice_ecdr/__init__.py +++ b/seaice_ecdr/__init__.py @@ -6,7 +6,7 @@ from seaice_ecdr.constants import LOGS_DIR -__version__ = "v0.1.0" +__version__ = "v0.2.0" DEFAULT_LOG_LEVEL = "INFO" diff --git a/seaice_ecdr/_types.py b/seaice_ecdr/_types.py index 3236d8be..b21fe922 100644 --- a/seaice_ecdr/_types.py +++ b/seaice_ecdr/_types.py @@ -2,14 +2,3 @@ # In kilometers. ECDR_SUPPORTED_RESOLUTIONS = Literal["12.5", "25"] - -# Supported sats -SUPPORTED_SAT = Literal[ - "am2", # AMSR2 - "ame", # AMSRE - "F17", # SSMIS F17 - "F13", # SSMI F13 - "F11", # SSMI F11 - "F08", # SSMI F08 - "n07", # Nimbus-7 SMMR -] diff --git a/seaice_ecdr/ancillary.py b/seaice_ecdr/ancillary.py index f2090b94..30feb52c 100644 --- a/seaice_ecdr/ancillary.py +++ b/seaice_ecdr/ancillary.py @@ -19,7 +19,8 @@ from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.constants import CDR_ANCILLARY_DIR from seaice_ecdr.grid_id import get_grid_id -from seaice_ecdr.platforms import SUPPORTED_SAT, get_platform_by_date +from seaice_ecdr.platforms import PLATFORM_CONFIG, Platform +from seaice_ecdr.platforms.config import N07_PLATFORM ANCILLARY_SOURCES = Literal["CDRv4", "CDRv5"] @@ -96,7 +97,6 @@ def get_surfacetype_da( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, ancillary_source: ANCILLARY_SOURCES, - platform: SUPPORTED_SAT, ) -> xr.DataArray: """Return a dataarray with surface type information for this date.""" ancillary_ds = get_ancillary_ds( @@ -111,7 +111,8 @@ def get_surfacetype_da( polehole_surface_type = 100 if "polehole_bitmask" in ancillary_ds.data_vars.keys(): polehole_bitmask = ancillary_ds.polehole_bitmask - polehole_bitlabel = f"{platform}_polemask" + platform = PLATFORM_CONFIG.get_platform_by_date(date) + polehole_bitlabel = f"{platform.id}_polemask" polehole_bitvalue = bitmask_value_for_meaning( var=polehole_bitmask, meaning=polehole_bitlabel, @@ -174,8 +175,8 @@ def nh_polehole_mask( *, date: dt.date, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat=None, ancillary_source: ANCILLARY_SOURCES, + platform: Platform | None = None, ) -> xr.DataArray: """Return the northern hemisphere pole hole mask for the given date and resolution.""" ancillary_ds = get_ancillary_ds( @@ -186,12 +187,12 @@ def nh_polehole_mask( polehole_bitmask = ancillary_ds.polehole_bitmask - if sat is None: - sat = get_platform_by_date( + if platform is None: + platform = PLATFORM_CONFIG.get_platform_by_date( date=date, ) - polehole_bitlabel = f"{sat}_polemask" + polehole_bitlabel = f"{platform.id}_polemask" polehole_bitvalue = bitmask_value_for_meaning( var=polehole_bitmask, meaning=polehole_bitlabel, @@ -280,8 +281,8 @@ def get_invalid_ice_mask( hemisphere: Hemisphere, date: dt.date, resolution: ECDR_SUPPORTED_RESOLUTIONS, - platform: SUPPORTED_SAT, ancillary_source: ANCILLARY_SOURCES, + platform: Platform, ) -> xr.DataArray: """Return an invalid ice mask for the given date. @@ -289,13 +290,12 @@ def get_invalid_ice_mask( month-based mask. """ # SMMR / n07 case: - if platform == "n07": + if platform == N07_PLATFORM: # TODO: Daily (SMMR) mask is used at end for cleanup, # but not for initial TB field generation # Skip the smmr invalid ice mask for now... print("WARNING: Using non-SMMR invalid ice masks") - # return get_smmr_invalid_ice_mask(hemisphere=hemisphere, date=date, resolution=resolution, ancillary_source=ancillary_source) - + # return get_smmr_invalid_ice_mask(hemisphere=hemisphere, date=date) # All other platforms: ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, diff --git a/seaice_ecdr/complete_daily_ecdr.py b/seaice_ecdr/complete_daily_ecdr.py index a7aa2dd8..d4c9d7be 100644 --- a/seaice_ecdr/complete_daily_ecdr.py +++ b/seaice_ecdr/complete_daily_ecdr.py @@ -31,9 +31,7 @@ date_in_nh_melt_season, melting, ) -from seaice_ecdr.platforms import ( - get_platform_by_date, -) +from seaice_ecdr.platforms import PLATFORM_CONFIG from seaice_ecdr.set_daily_ncattrs import finalize_cdecdr_ds from seaice_ecdr.spillover import LAND_SPILL_ALGS from seaice_ecdr.temporal_composite_daily import get_tie_filepath, make_tiecdr_netcdf @@ -76,19 +74,19 @@ def get_ecdr_filepath( is_nrt: bool, ) -> Path: """Return the complete daily eCDR file path.""" - platform = get_platform_by_date(date) + platform = PLATFORM_CONFIG.get_platform_by_date(date) if is_nrt: ecdr_filename = nrt_daily_filename( hemisphere=hemisphere, date=date, - sat=platform, + platform_id=platform.id, resolution=resolution, ) else: ecdr_filename = standard_daily_filename( hemisphere=hemisphere, date=date, - sat=platform, + platform_id=platform.id, resolution=resolution, ) @@ -443,12 +441,10 @@ def _add_surfacetype_da( # The methodology here should be reviewed to see if there is # a "better" way to add a geo-referenced dataarray to an existing # xr Dataset. - platform = get_platform_by_date(date) surfacetype_da = get_surfacetype_da( date=date, hemisphere=hemisphere, resolution=resolution, - platform=platform, ancillary_source=ancillary_source, ) # Force use of the cde_ds coords instead of the x, y, time vars diff --git a/seaice_ecdr/daily_aggregate.py b/seaice_ecdr/daily_aggregate.py index 5cecd4e9..75335132 100644 --- a/seaice_ecdr/daily_aggregate.py +++ b/seaice_ecdr/daily_aggregate.py @@ -19,10 +19,10 @@ from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.nc_attrs import get_global_attrs from seaice_ecdr.nc_util import concatenate_nc_files -from seaice_ecdr.platforms import get_first_platform_start_date +from seaice_ecdr.platforms import PLATFORM_CONFIG from seaice_ecdr.util import ( get_complete_output_dir, - sat_from_filename, + platform_id_from_filename, standard_daily_aggregate_filename, ) @@ -37,7 +37,9 @@ def _get_daily_complete_filepaths_for_year( resolution: ECDR_SUPPORTED_RESOLUTIONS, ) -> list[Path]: data_list = [] - start_date = max(dt.date(year, 1, 1), get_first_platform_start_date()) + start_date = max( + dt.date(year, 1, 1), PLATFORM_CONFIG.get_first_platform_start_date() + ) for period in pd.period_range(start=start_date, end=dt.date(year, 12, 31)): expected_fp = get_ecdr_filepath( date=period.to_timestamp().date(), @@ -115,7 +117,7 @@ def _update_ncrcat_daily_ds( temporality="daily", aggregate=True, source=", ".join([fp.name for fp in daily_filepaths]), - sats=[sat_from_filename(fp.name) for fp in daily_filepaths], + platform_ids=[platform_id_from_filename(fp.name) for fp in daily_filepaths], ) ds.attrs = daily_aggregate_ds_global_attrs diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index c78effcf..5d449f47 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -31,7 +31,7 @@ from pm_tb_data._types import NORTH, Hemisphere from pm_tb_data.fetch.nsidc_0001 import NSIDC_0001_SATS -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ( ANCILLARY_SOURCES, get_empty_ds_with_time, @@ -42,7 +42,7 @@ from seaice_ecdr.cli.util import datetime_to_date from seaice_ecdr.constants import CDR_ANCILLARY_DIR, DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.grid_id import get_grid_id -from seaice_ecdr.platforms import get_platform_by_date +from seaice_ecdr.platforms import PLATFORM_CONFIG, SUPPORTED_PLATFORM_ID from seaice_ecdr.regrid_25to12 import reproject_ideds_25to12 from seaice_ecdr.spillover import LAND_SPILL_ALGS, land_spillover from seaice_ecdr.tb_data import ( @@ -54,8 +54,8 @@ from seaice_ecdr.util import get_intermediate_output_dir, standard_daily_filename -def platform_is_smmr(platform): - return platform in ("n07", "s36") +def platform_is_smmr(platform_id: SUPPORTED_PLATFORM_ID): + return platform_id in ("n07", "s36") def cdr_bootstrap_raw( @@ -64,7 +64,7 @@ def cdr_bootstrap_raw( tb_h37: npt.NDArray, tb_v19: npt.NDArray, bt_coefs, - platform: SUPPORTED_SAT, + platform: SUPPORTED_PLATFORM_ID, ): """Generate the raw bootstrap concentration field. Note: tb fields should already be transformed before @@ -205,7 +205,7 @@ def _setup_ecdr_ds( ecdr_ide_ds.attrs["data_source"] = tb_data.data_source # Set the platform - ecdr_ide_ds.attrs["platform"] = tb_data.platform + ecdr_ide_ds.attrs["platform"] = tb_data.platform_id file_date = dt.date(1970, 1, 1) + dt.timedelta( days=int(ecdr_ide_ds.variables["time"].data) @@ -407,7 +407,7 @@ def compute_initial_daily_ecdr_dataset( # The CDRv4 calculation causes TB to be zero/missing where # no sea ice can occur because of invalid region or land logger.debug(f"Applying invalid ice mask to TB field: {tb_si_varname}") - platform = get_platform_by_date(date) + platform = PLATFORM_CONFIG.get_platform_by_date(date) invalid_ice_mask = get_invalid_ice_mask( hemisphere=hemisphere, date=date, @@ -561,26 +561,26 @@ def compute_initial_daily_ecdr_dataset( ) logger.debug("Initialized spatial_interpolation_flag with TB fill locations") - platform = get_platform_by_date(date) - if platform == "am2": + platform = PLATFORM_CONFIG.get_platform_by_date(date) + if platform.id == "am2": bt_coefs_init = pmi_bt_params_amsr2.get_ausi_amsr2_bootstrap_params( date=date, satellite="amsr2", gridid=ecdr_ide_ds.grid_id, ) - elif platform == "ame": + elif platform.id == "ame": bt_coefs_init = pmi_bt_params_amsre.get_ausi_amsre_bootstrap_params( date=date, satellite="amsre", gridid=ecdr_ide_ds.grid_id, ) - elif platform in get_args(NSIDC_0001_SATS): + elif platform.id in get_args(NSIDC_0001_SATS): bt_coefs_init = pmi_bt_params_0001.get_nsidc0001_bootstrap_params( date=date, - satellite=platform, + satellite=platform.id, gridid=ecdr_ide_ds.grid_id, ) - elif platform_is_smmr(platform): + elif platform_is_smmr(platform.id): bt_coefs_init = get_smmr_params(hemisphere=hemisphere, date=date) else: raise RuntimeError(f"platform bootstrap params not implemented: {platform}") @@ -618,14 +618,14 @@ def compute_initial_daily_ecdr_dataset( pole_mask = nh_polehole_mask( date=date, resolution=tb_data.resolution, - sat=platform, ancillary_source=ancillary_source, + platform=platform, ) ecdr_ide_ds["pole_mask"] = pole_mask nt_params = get_cdr_nt_params( hemisphere=hemisphere, - platform=platform, + platform=platform.id, ) nt_coefs = NtCoefs( @@ -683,7 +683,7 @@ def compute_initial_daily_ecdr_dataset( v19=bt_v19, v22=bt_v22, ), - platform=platform.lower(), + platform=platform.id.lower(), ) bt_v37 = transformed["v37"] bt_h37 = transformed["h37"] @@ -713,7 +713,7 @@ def compute_initial_daily_ecdr_dataset( wintrc=bt_coefs_init["wintrc"], wslope=bt_coefs_init["wslope"], wxlimt=bt_coefs_init["wxlimt"], - is_smmr=platform_is_smmr(platform), + is_smmr=platform_is_smmr(platform.id), ) # Note: @@ -853,7 +853,7 @@ def compute_initial_daily_ecdr_dataset( tb_h37=bt_h37, tb_v19=bt_v19, bt_coefs=bt_coefs, - platform=platform, + platform=platform.id, ) # Set any bootstrap concentrations below 10% to 0. @@ -950,7 +950,6 @@ def compute_initial_daily_ecdr_dataset( tb_data=tb_data, algorithm=land_spillover_alg, land_mask=non_ocean_mask.data, - platform=platform, ancillary_source=ancillary_source, bt_conc=bt_asCDRv4_conc, nt_conc=nt_asCDRv4_conc, @@ -1252,7 +1251,7 @@ def get_idecdr_dir(*, intermediate_output_dir: Path) -> Path: def get_idecdr_filepath( *, date: dt.date, - platform, + platform_id: SUPPORTED_PLATFORM_ID, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, intermediate_output_dir: Path, @@ -1262,7 +1261,7 @@ def get_idecdr_filepath( standard_fn = standard_daily_filename( hemisphere=hemisphere, date=date, - sat=platform, + platform_id=platform_id, resolution=resolution, ) idecdr_fn = "idecdr_" + standard_fn @@ -1285,10 +1284,10 @@ def make_idecdr_netcdf( ancillary_source: ANCILLARY_SOURCES, overwrite_ide: bool = False, ) -> None: - platform = get_platform_by_date(date) + platform = PLATFORM_CONFIG.get_platform_by_date(date) output_path = get_idecdr_filepath( date=date, - platform=platform, + platform_id=platform.id, hemisphere=hemisphere, intermediate_output_dir=intermediate_output_dir, resolution=resolution, diff --git a/seaice_ecdr/monthly.py b/seaice_ecdr/monthly.py index 2bc29848..7e82509f 100644 --- a/seaice_ecdr/monthly.py +++ b/seaice_ecdr/monthly.py @@ -35,16 +35,17 @@ from loguru import logger from pm_tb_data._types import NORTH, Hemisphere -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ANCILLARY_SOURCES, flag_value_for_meaning from seaice_ecdr.checksum import write_checksum_file from seaice_ecdr.complete_daily_ecdr import get_ecdr_filepath from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.nc_attrs import get_global_attrs +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID from seaice_ecdr.util import ( get_complete_output_dir, get_num_missing_pixels, - sat_from_filename, + platform_id_from_filename, standard_monthly_filename, ) @@ -52,10 +53,10 @@ def check_min_days_for_valid_month( *, daily_ds_for_month: xr.Dataset, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, ) -> None: days_in_ds = len(daily_ds_for_month.time) - if sat == "n07": + if platform_id == "n07": min_days = 10 else: min_days = 20 @@ -103,22 +104,24 @@ def _get_daily_complete_filepaths_for_month( return data_list -def _sat_for_month(*, sats: list[SUPPORTED_SAT]) -> SUPPORTED_SAT: - """Returns the satellite from this month given a list of input satellites. +def _platform_id_for_month( + *, platform_ids: list[SUPPORTED_PLATFORM_ID] +) -> SUPPORTED_PLATFORM_ID: + """Returns the platform ID from this month given a list of input platforms. - The sat for monthly files is based on which sat contributes most to the - month. If two sats contribute equally, use the latest sat in the series. + The platform for monthly files is based on which platform contributes most to the + month. If two platforms contribute equally, use the latest platform in the series. - Function assumes the list of satellites is already sorted (i.e., the latest - satellite is `sats[-1]`). + Function assumes the list of platform ids is already sorted (i.e., the latest + platform is `platform_ids[-1]`). """ - # More than one sat, we need to choose the most common/latest in the series. - # `Counter` returns a dict keyed by `sat` with counts as values: - count = Counter(sats) - most_common_sats = count.most_common() - most_common_and_latest_sat = most_common_sats[-1][0] + # More than one platform, we need to choose the most common/latest in the series. + # `Counter` returns a dict keyed by `platform` with counts as values: + count = Counter(platform_ids) + most_common_platform_ids = count.most_common() + most_common_and_latest_platform_id = most_common_platform_ids[-1][0] - return most_common_and_latest_sat + return most_common_and_latest_platform_id def get_daily_ds_for_month( @@ -156,19 +159,19 @@ def get_daily_ds_for_month( data=data_list, dims=("time",), coords=dict(time=ds.time) ) - # Extract `sat` from the filenames contributing to this + # Extract `platform_id` from the filenames contributing to this # dataset. Ideally, we would use a custom `combine_attrs` when reading the - # data with `xr.open_mfdataset` in order to get the sat/sensor from global + # data with `xr.open_mfdataset` in order to get the platform/sensor from global # attrs in each of the contributing files. Unfortunately this interface is # poorly documented and seems to have limited support. E.g., see # https://github.com/pydata/xarray/issues/6679 - sats = [] + platform_ids = [] for filepath in data_list: - sats.append(sat_from_filename(filepath.name)) + platform_ids.append(platform_id_from_filename(filepath.name)) - sat = _sat_for_month(sats=sats) + platform_id = _platform_id_for_month(platform_ids=platform_ids) - ds.attrs["sat"] = sat + ds.attrs["platform_id"] = platform_id return ds @@ -504,7 +507,7 @@ def calc_surface_type_mask_monthly( def make_monthly_ds( *, daily_ds_for_month: xr.Dataset, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, ancillary_source: ANCILLARY_SOURCES, @@ -517,7 +520,7 @@ def make_monthly_ds( # Min-day check check_min_days_for_valid_month( daily_ds_for_month=daily_ds_for_month, - sat=sat, + platform_id=platform_id, ) # create `cdr_seaice_conc_monthly`. This is the combined monthly SIC. @@ -580,11 +583,11 @@ def make_monthly_ds( temporality="monthly", aggregate=False, source=", ".join([fp.item().name for fp in daily_ds_for_month.filepaths]), - # TODO: consider providing all sats that went into month? This would be + # TODO: consider providing all platforms that went into month? This would be # consistent with how we handle the aggregate filenames. Is it - # misleading to indicate that a month is a single sat when it may not + # misleading to indicate that a month is a single platform when it may not # really be? - sats=[sat], + platform_ids=[platform_id], ) monthly_ds.attrs.update(monthly_ds_global_attrs) @@ -602,7 +605,7 @@ def get_monthly_filepath( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, year: int, month: int, complete_output_dir: Path, @@ -614,7 +617,7 @@ def get_monthly_filepath( output_fn = standard_monthly_filename( hemisphere=hemisphere, resolution=resolution, - sat=sat, + platform_id=platform_id, year=year, month=month, ) @@ -641,12 +644,12 @@ def make_monthly_nc( resolution=resolution, ) - sat = daily_ds_for_month.sat + platform_id = daily_ds_for_month.platform_id output_path = get_monthly_filepath( hemisphere=hemisphere, resolution=resolution, - sat=sat, + platform_id=platform_id, year=year, month=month, complete_output_dir=complete_output_dir, @@ -654,7 +657,7 @@ def make_monthly_nc( monthly_ds = make_monthly_ds( daily_ds_for_month=daily_ds_for_month, - sat=sat, + platform_id=platform_id, hemisphere=hemisphere, resolution=resolution, ancillary_source=ancillary_source, diff --git a/seaice_ecdr/monthly_aggregate.py b/seaice_ecdr/monthly_aggregate.py index 2387b5b7..ed64de86 100644 --- a/seaice_ecdr/monthly_aggregate.py +++ b/seaice_ecdr/monthly_aggregate.py @@ -20,7 +20,7 @@ from seaice_ecdr.nc_util import concatenate_nc_files from seaice_ecdr.util import ( get_complete_output_dir, - sat_from_filename, + platform_id_from_filename, standard_monthly_aggregate_filename, ) @@ -38,8 +38,8 @@ def _get_monthly_complete_filepaths( # TODO: the monthly filenames are encoded in the # `util.standard_monthly_filename` func. Can we adapt that to use wildcards # and return a glob-able string? - # North Monthly files: sic_psn12.5_YYYYMM_sat_v05r00.nc - # South Monthly files: sic_pss12.5_YYYYMM_sat_v05r00.nc + # North Monthly files: sic_psn12.5_{YYYYMM}_{platform_id}_v05r00.nc + # South Monthly files: sic_pss12.5_{YYYYMM}_{platform_id}_v05r00.nc filename_pattern = f"sic_ps{hemisphere[0]}{resolution}_*.nc" monthly_files = list(sorted(monthly_dir.glob(filename_pattern))) @@ -102,7 +102,7 @@ def _update_ncrcat_monthly_ds( temporality="monthly", aggregate=True, source=", ".join([fp.name for fp in monthly_filepaths]), - sats=[sat_from_filename(fp.name) for fp in monthly_filepaths], + platform_ids=[platform_id_from_filename(fp.name) for fp in monthly_filepaths], ) agg_ds.attrs = monthly_aggregate_ds_global_attrs diff --git a/seaice_ecdr/multiprocess_daily.py b/seaice_ecdr/multiprocess_daily.py index 8f2fbf87..fe578496 100644 --- a/seaice_ecdr/multiprocess_daily.py +++ b/seaice_ecdr/multiprocess_daily.py @@ -14,7 +14,7 @@ from seaice_ecdr.complete_daily_ecdr import create_standard_ecdr_for_dates from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.initial_daily_ecdr import create_idecdr_for_date -from seaice_ecdr.platforms import get_first_platform_start_date +from seaice_ecdr.platforms import PLATFORM_CONFIG from seaice_ecdr.spillover import LAND_SPILL_ALGS from seaice_ecdr.temporal_composite_daily import make_tiecdr_netcdf from seaice_ecdr.util import ( @@ -122,7 +122,7 @@ def cli( # craete a range of dates that includes the interpolation range. This # ensures that data expected for temporal interpolation of the requested # dates has the data it needs. - earliest_date = get_first_platform_start_date() + earliest_date = PLATFORM_CONFIG.get_first_platform_start_date() initial_start_date = max(earliest_date, start_date - dt.timedelta(days=5)) initial_end_date = min(end_date + dt.timedelta(days=5), dt.date.today()) initial_file_dates = list( diff --git a/seaice_ecdr/nc_attrs.py b/seaice_ecdr/nc_attrs.py index e67ff9ae..d078c8c6 100644 --- a/seaice_ecdr/nc_attrs.py +++ b/seaice_ecdr/nc_attrs.py @@ -8,8 +8,8 @@ import pandas as pd import xarray as xr -from seaice_ecdr._types import SUPPORTED_SAT from seaice_ecdr.constants import ECDR_PRODUCT_VERSION +from seaice_ecdr.platforms import PLATFORM_CONFIG, SUPPORTED_PLATFORM_ID # Datetime string format for date-related attributes. DATE_STR_FMT = "%Y-%m-%dT%H:%M:%SZ" @@ -123,65 +123,6 @@ def _get_time_coverage_attrs( return time_coverage_attrs -# Here’s what the GCMD platform long name should be based on sensor/platform short name: -PLATFORMS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="GCOM-W1 > Global Change Observation Mission 1st-Water", - ame="Aqua > Earth Observing System, Aqua", - F17="DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", - F13="DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", - F11="DMSP 5D-2/F11 > Defense Meteorological Satellite Program-F11", - F08="DMSP 5D-2/F8 > Defense Meteorological Satellite Program-F8", - n07="Nimbus-7", -) - - -def _unique_sats(sats: list[SUPPORTED_SAT]) -> list[SUPPORTED_SAT]: - """Return the unique set of satellites. - - Order is preserved. - """ - # `set` is unordered. This gets the unique list of `sats`. - unique_sats = list(dict.fromkeys(sats)) - - return unique_sats - - -def get_platforms_for_sats(sats: list[SUPPORTED_SAT]) -> list[str]: - """Get the unique set of platforms for the given list of sats. - - Assumes `sats` is ordered from oldest->newest. - """ - # `set` is unordered. This gets the unique list of `sats`. - unique_sats = _unique_sats(sats) - platforms_for_sat = [PLATFORMS_FOR_SATS[sat] for sat in unique_sats] - - return platforms_for_sat - - -# Here’s what the GCMD sensor name should be based on sensor short name: -SENSORS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="AMSR2 > Advanced Microwave Scanning Radiometer 2", - ame="AMSR-E > Advanced Microwave Scanning Radiometer-EOS", - F17="SSMIS > Special Sensor Microwave Imager/Sounder", - # TODO: de-dup SSM/I text? - F13="SSM/I > Special Sensor Microwave/Imager", - F11="SSM/I > Special Sensor Microwave/Imager", - F08="SSM/I > Special Sensor Microwave/Imager", - n07="SMMR > Scanning Multichannel Microwave Radiometer", -) - - -def get_sensors_for_sats(sats: list[SUPPORTED_SAT]) -> list[str]: - """Get the unique set of sensors for the given list of sats. - - Assumes `sats` is ordered from oldest->newest. - """ - unique_sats = _unique_sats(sats) - sensors_for_sat = [SENSORS_FOR_SATS[sat] for sat in unique_sats] - - return sensors_for_sat - - def get_global_attrs( *, time: xr.DataArray, @@ -196,7 +137,7 @@ def get_global_attrs( # of source filenames. source: str, # List of satellites that provided data for the given netcdf file. - sats: list[SUPPORTED_SAT], + platform_ids: list[SUPPORTED_PLATFORM_ID], ) -> dict[str, Any]: """Return a dictionary containing the global attributes for a standard ECDR NetCDF file. @@ -207,8 +148,11 @@ def get_global_attrs( # TODO: support different resolutions, platforms, and sensors! resolution: Final = "12.5" - platform = ", ".join(get_platforms_for_sats(sats)) - sensor = ", ".join(get_sensors_for_sats(sats)) + platforms = [ + PLATFORM_CONFIG.platform_for_id(platform_id) for platform_id in platform_ids + ] + platform = ", ".join([platform.name for platform in platforms]) + sensor = ", ".join([platform.sensor for platform in platforms]) time_coverage_attrs = _get_time_coverage_attrs( temporality=temporality, diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 2f95b4b6..f2eaf4c7 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -28,7 +28,7 @@ write_ide_netcdf, ) from seaice_ecdr.platforms import ( - get_platform_by_date, + PLATFORM_CONFIG, ) from seaice_ecdr.tb_data import EcdrTbData, map_tbs_to_ecdr_channels from seaice_ecdr.temporal_composite_daily import ( @@ -59,7 +59,7 @@ def compute_nrt_initial_daily_ecdr_dataset( data_dir=LANCE_NRT_DATA_DIR, ) data_source: Final = "LANCE AU_SI12" - platform: Final = "am2" + platform_id: Final = "am2" ecdr_tbs = map_tbs_to_ecdr_channels( # TODO/Note: this mapping is the same as used for `am2`. @@ -81,7 +81,7 @@ def compute_nrt_initial_daily_ecdr_dataset( tbs=ecdr_tbs, resolution=LANCE_RESOLUTION, data_source=data_source, - platform=platform, + platform_id=platform_id, ) nrt_initial_ecdr_ds = compute_initial_daily_ecdr_dataset( @@ -102,11 +102,11 @@ def read_or_create_and_read_nrt_idecdr_ds( intermediate_output_dir: Path, overwrite: bool, ): - platform = get_platform_by_date(date) + platform = PLATFORM_CONFIG.get_platform_by_date(date) idecdr_filepath = get_idecdr_filepath( hemisphere=hemisphere, date=date, - platform=platform, + platform_id=platform.id, intermediate_output_dir=intermediate_output_dir, resolution="12.5", ) diff --git a/seaice_ecdr/platforms.py b/seaice_ecdr/platforms.py deleted file mode 100644 index 2b5c9fc8..00000000 --- a/seaice_ecdr/platforms.py +++ /dev/null @@ -1,287 +0,0 @@ -"""platforms.py. - -Routines for dealing with the platform (satellite) and sensors -for CDRv5 - - -TODO: There are a couple of date ranges for which we do not want - to produce data files: - Aug 12-24, 1984 because there is no SMMR data - Dec 3, 1987 - Jan 12, 1988 because no F08 data - Also, anything prior to the start of the data record, - eg prior to Oct 25, 1978 -""" - -import datetime as dt -import os -from collections import OrderedDict -from functools import cache -from typing import cast, get_args - -import yaml -from loguru import logger - -from seaice_ecdr._types import SUPPORTED_SAT - -# TODO: De-dup with nc_attrs.py -# Here’s what the GCMD platform long name should be based on sensor/platform short name: -PLATFORMS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="GCOM-W1 > Global Change Observation Mission 1st-Water", - ame="Aqua > Earth Observing System, Aqua", - F17="DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", - F13="DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", - F11="DMSP 5D-2/F11 > Defense Meteorological Satellite Program-F11", - F08="DMSP 5D-2/F8 > Defense Meteorological Satellite Program-F8", - n07="Nimbus-7", -) - - -# TODO: De-dup with nc_attrs.py -# Here’s what the GCMD sensor name should be based on sensor short name: -SENSORS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="AMSR2 > Advanced Microwave Scanning Radiometer 2", - ame="AMSR-E > Advanced Microwave Scanning Radiometer-EOS", - F17="SSMIS > Special Sensor Microwave Imager/Sounder", - # TODO: de-dup SSM/I text? - F13="SSM/I > Special Sensor Microwave/Imager", - F11="SSM/I > Special Sensor Microwave/Imager", - F08="SSM/I > Special Sensor Microwave/Imager", - n07="SMMR > Scanning Multichannel Microwave Radiometer", -) - - -# These first and last dates were adapted from the cdrv4 file -# https://bitbucket.org/nsidc/seaice_cdr/src/master/source/config/cdr.yml -# of commit: -# https://bitbucket.org/nsidc/seaice_cdr/commits/c9c632e73530554d8acfac9090baeb1e35755897 -PLATFORM_AVAILABILITY: OrderedDict[SUPPORTED_SAT, dict] = OrderedDict( - n07={"first_date": dt.date(1978, 10, 25), "last_date": dt.date(1987, 7, 9)}, - F08={"first_date": dt.date(1987, 7, 10), "last_date": dt.date(1991, 12, 2)}, - F11={"first_date": dt.date(1991, 12, 3), "last_date": dt.date(1995, 9, 30)}, - F13={"first_date": dt.date(1995, 10, 1), "last_date": dt.date(2007, 12, 31)}, - F17={"first_date": dt.date(2008, 1, 1), "last_date": None}, - # ame={"first_date": dt.date(2002, 6, 1), "last_date": dt.date(2011, 10, 3)}, - # am2={"first_date": dt.date(2012, 7, 2), "last_date": None}, -) - - -def read_platform_start_dates_cfg_override( - start_dates_cfg_filename, -) -> OrderedDict[dt.date, SUPPORTED_SAT]: - """The "platform_start_dates" dictionary is an OrderedDict - of keys (dates) with corresponding platforms/sats (values) - - Note: It seems like yaml can't safe_load() an OrderedDict. - """ - try: - with open(start_dates_cfg_filename, "r") as config_file: - file_dict = yaml.safe_load(config_file) - except FileNotFoundError: - raise RuntimeError( - f"Could not find specified start_dates config file: {start_dates_cfg_filename}" - ) - - platform_start_dates = OrderedDict(file_dict) - - # Assert that the keys are ordered. - assert sorted(platform_start_dates.keys()) == list(platform_start_dates.keys()) - # Assert that the platforms are in our list of supported sats. - assert all( - [ - platform in get_args(SUPPORTED_SAT) - for platform in platform_start_dates.values() - ] - ) - - return platform_start_dates - - -@cache -def get_platform_start_dates() -> OrderedDict[dt.date, SUPPORTED_SAT]: - """Return dict of start dates for differnt platforms. - - Platform start dates can be overridden via a YAML override file specified by - the `PLATFORM_START_DATES_CFG_OVERRIDE_FILE` envvar. - """ - - if override_file := os.environ.get("PLATFORM_START_DATES_CFG_OVERRIDE_FILE"): - _platform_start_dates = read_platform_start_dates_cfg_override(override_file) - logger.info(f"Read platform start dates from {override_file}") - # TODO: it's clear that we should refactor to support passing in custom - # platform start dates programatically. This is essentially global state and - # it makes it very difficult to test out different combinations as a result. - elif forced_platform := os.environ.get("FORCE_PLATFORM"): - if forced_platform not in get_args(SUPPORTED_SAT): - raise RuntimeError( - f"The forced platform ({forced_platform}) is not a supported platform." - ) - - forced_platform = cast(SUPPORTED_SAT, forced_platform) - - first_date_of_forced_platform = PLATFORM_AVAILABILITY[forced_platform][ - "first_date" - ] - _platform_start_dates = OrderedDict( - { - first_date_of_forced_platform: forced_platform, - } - ) - - else: - _platform_start_dates = OrderedDict( - { - dt.date(1978, 10, 25): "n07", - dt.date(1987, 7, 10): "F08", - dt.date(1991, 12, 3): "F11", - dt.date(1995, 10, 1): "F13", - # dt.date(2002, 6, 1): "ame", # AMSR-E is first AMSR sat - # F17 starts while AMSR-E is up, on 2008-01-01. We don't use - # F17 until 2011-10-04. - dt.date(2008, 1, 1): "F17", - # dt.date(2012, 7, 3): "am2", # AMSR2 - } - ) - - _platform_start_dates = cast( - OrderedDict[dt.date, SUPPORTED_SAT], _platform_start_dates - ) - - assert _platform_start_dates_are_consistent( - platform_start_dates=_platform_start_dates - ) - - return _platform_start_dates - - -def _platform_available_for_date( - *, - date: dt.date, - platform: SUPPORTED_SAT, - platform_availability: OrderedDict = PLATFORM_AVAILABILITY, -) -> bool: - """Determine if platform is available on this date.""" - # First, verify the values of the first listed platform - first_available_date = platform_availability[platform]["first_date"] - if date < first_available_date: - print( - f""" - Satellite {platform} is not available on date {date} - {date} is before first_available_date {first_available_date} - Date info: {platform_availability[platform]} - """ - ) - return False - - try: - last_available_date = platform_availability[platform]["last_date"] - try: - if date > last_available_date: - print( - f""" - Satellite {platform} is not available on date {date} - {date} is after last_available_date {last_available_date} - Date info: {platform_availability[platform]} - """ - ) - return False - except TypeError as e: - if last_available_date is None: - pass - else: - raise e - except IndexError as e: - # last_date is set to None if platform is still providing new data - if last_available_date is None: - pass - else: - raise e - - return True - - -def _platform_start_dates_are_consistent( - *, - platform_start_dates: OrderedDict[dt.date, SUPPORTED_SAT], -) -> bool: - """Return whether the provided start date structure is valid.""" - date_list = list(platform_start_dates.keys()) - platform_list = list(platform_start_dates.values()) - try: - date = date_list[0] - platform = platform_list[0] - assert _platform_available_for_date( - date=date, - platform=platform, - platform_availability=PLATFORM_AVAILABILITY, - ) - - for idx in range(1, len(date_list)): - date = date_list[idx] - platform = platform_list[idx] - - # Check the end of the prior platform's date range - prior_date = date - dt.timedelta(days=1) - prior_platform = platform_list[idx - 1] - assert _platform_available_for_date( - date=prior_date, - platform=prior_platform, - platform_availability=PLATFORM_AVAILABILITY, - ) - - # Check this platform's first available date - assert _platform_available_for_date( - date=date, - platform=platform, - platform_availability=PLATFORM_AVAILABILITY, - ) - except AssertionError: - raise RuntimeError( - f""" - platform start dates are not consistent - platform_start_dates: {platform_start_dates} - platform_availability: {PLATFORM_AVAILABILITY} - """ - ) - - return True - - -def get_platform_by_date( - date: dt.date, -) -> SUPPORTED_SAT: - """Return the platform for this date.""" - platform_start_dates = get_platform_start_dates() - - start_date_list = list(platform_start_dates.keys()) - platform_list = list(platform_start_dates.values()) - - if date < start_date_list[0]: - raise RuntimeError( - f""" - date {date} too early. - First start_date: {start_date_list[0]} - """ - ) - - return_platform = None - if date >= start_date_list[-1]: - return_platform = platform_list[-1] - - if return_platform is None: - return_platform = platform_list[0] - for start_date, latest_platform in zip(start_date_list[1:], platform_list[1:]): - if date >= start_date: - return_platform = latest_platform - continue - else: - break - - return return_platform - - -def get_first_platform_start_date() -> dt.date: - """Return the start date of the first platform.""" - platform_start_dates = get_platform_start_dates() - earliest_date = min(platform_start_dates.keys()) - - return earliest_date diff --git a/seaice_ecdr/platforms/__init__.py b/seaice_ecdr/platforms/__init__.py new file mode 100644 index 00000000..53553041 --- /dev/null +++ b/seaice_ecdr/platforms/__init__.py @@ -0,0 +1,8 @@ +from seaice_ecdr.platforms.config import PLATFORM_CONFIG +from seaice_ecdr.platforms.models import SUPPORTED_PLATFORM_ID, Platform + +__all__ = [ + "PLATFORM_CONFIG", + "SUPPORTED_PLATFORM_ID", + "Platform", +] diff --git a/seaice_ecdr/platforms/config.py b/seaice_ecdr/platforms/config.py new file mode 100644 index 00000000..521d98d3 --- /dev/null +++ b/seaice_ecdr/platforms/config.py @@ -0,0 +1,164 @@ +"""Platform config + +Contains configuration for platforms supported by this code (e.g., AMSR2, F17, etc.). + +Platform start dates are read from a yaml file in this directory +(`default_platform_start_dates.yml`) unless overridden by the +`PLATFORM_START_DATES_CONFIG_FILEPATH` envvar. +""" + +import datetime as dt +import os +from pathlib import Path +from typing import cast, get_args + +import yaml + +from seaice_ecdr.platforms.models import ( + SUPPORTED_PLATFORM_ID, + DateRange, + Platform, + PlatformConfig, + PlatformStartDate, + platform_for_id, +) + +_this_dir = Path(__file__).parent +_DEFAULT_PLATFORM_START_DATES_CONFIG_FILEPATH = Path( + _this_dir / "default_platform_start_dates.yml" +) + + +AM2_PLATFORM = Platform( + name="GCOM-W1 > Global Change Observation Mission 1st-Water", + sensor="AMSR2 > Advanced Microwave Scanning Radiometer 2", + id="am2", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=None, + ), +) + +AME_PLATFORM = Platform( + name="Aqua > Earth Observing System, Aqua", + sensor="AMSR-E > Advanced Microwave Scanning Radiometer-EOS", + id="ame", + date_range=DateRange( + first_date=dt.date(2002, 6, 1), + last_date=dt.date(2011, 10, 3), + ), +) + +F17_PLATFORM = Platform( + name="DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", + sensor="SSMIS > Special Sensor Microwave Imager/Sounder", + id="F17", + date_range=DateRange( + first_date=dt.date(2008, 1, 1), + last_date=None, + ), +) + +F13_PLATFORM = Platform( + name="DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", + sensor="SSM/I > Special Sensor Microwave/Imager", + id="F13", + date_range=DateRange( + first_date=dt.date(1995, 10, 1), + last_date=dt.date(2007, 12, 31), + ), +) +F11_PLATFORM = Platform( + name="DMSP 5D-2/F11 > Defense Meteorological Satellite Program-F11", + sensor="SSM/I > Special Sensor Microwave/Imager", + id="F11", + date_range=DateRange( + first_date=dt.date(1991, 12, 3), + last_date=dt.date(1995, 9, 30), + ), +) +F08_PLATFORM = Platform( + name="DMSP 5D-2/F8 > Defense Meteorological Satellite Program-F8", + sensor="SSM/I > Special Sensor Microwave/Imager", + id="F08", + date_range=DateRange( + first_date=dt.date(1987, 7, 10), + last_date=dt.date(1991, 12, 2), + ), +) + +N07_PLATFORM = Platform( + name="Nimbus-7", + sensor="SMMR > Scanning Multichannel Microwave Radiometer", + id="n07", + date_range=DateRange( + first_date=dt.date(1978, 10, 25), + last_date=dt.date(1987, 7, 9), + ), +) + +SUPPORTED_PLATFORMS = [ + AM2_PLATFORM, + AME_PLATFORM, + F17_PLATFORM, + F13_PLATFORM, + F11_PLATFORM, + F08_PLATFORM, + N07_PLATFORM, +] + + +def _get_platform_config() -> PlatformConfig: + """Gets the platform config given a start dates filepath. + + This function is not intended to be used outside of this module, as it sets + a global variable accessed from other parts of the code (`PLATFORM_CONFIG`). + """ + + if platform_override_filepath_str := os.environ.get( + "PLATFORM_START_DATES_CONFIG_FILEPATH" + ): + platform_start_dates_config_filepath = Path(platform_override_filepath_str) + else: + platform_start_dates_config_filepath = ( + _DEFAULT_PLATFORM_START_DATES_CONFIG_FILEPATH + ) + + if not platform_start_dates_config_filepath.is_file(): + raise RuntimeError( + f"Could not find platform config file: {platform_start_dates_config_filepath}" + ) + + # TODO: drop support for "FORCE_PLATFORM" in favor of a platform start dates + # config override file. + if forced_platform_id := os.environ.get("FORCE_PLATFORM"): + if forced_platform_id not in get_args(SUPPORTED_PLATFORM_ID): + raise RuntimeError( + f"The forced platform ({forced_platform_id}) is not a supported platform." + ) + + forced_platform_id = cast(SUPPORTED_PLATFORM_ID, forced_platform_id) + forced_platform = platform_for_id( + platforms=SUPPORTED_PLATFORMS, platform_id=forced_platform_id + ) + first_date_of_forced_platform = forced_platform.date_range.first_date + forced_cdr_platform_start_dates = [ + PlatformStartDate( + platform_id=forced_platform_id, + start_date=first_date_of_forced_platform, + ) + ] + + return PlatformConfig( + platforms=SUPPORTED_PLATFORMS, + cdr_platform_start_dates=forced_cdr_platform_start_dates, + ) + + with open(platform_start_dates_config_filepath, "r") as config_file: + start_dates_cfg = yaml.safe_load(config_file) + platform_cfg = PlatformConfig(platforms=SUPPORTED_PLATFORMS, **start_dates_cfg) + + return platform_cfg + + +PLATFORM_CONFIG = _get_platform_config() diff --git a/seaice_ecdr/platforms/default_platform_start_dates.yml b/seaice_ecdr/platforms/default_platform_start_dates.yml new file mode 100644 index 00000000..b128f207 --- /dev/null +++ b/seaice_ecdr/platforms/default_platform_start_dates.yml @@ -0,0 +1,18 @@ +cdr_platform_start_dates: + - platform_id: "n07" + start_date: "1978-10-25" + - platform_id: "F08" + start_date: "1987-07-10" + - platform_id: "F11" + start_date: "1991-12-03" + - platform_id: "F13" + start_date: "1995-10-01" + # TODO: we do not currently support AMSRE at the 25km resolution + # - platform_id: "ame" # AMSR-E is first AMSR sat + # start_date: "2002-06-01" + # F17 starts while AMSR-E is up, on 2008-01-01. We don't use F17 until + # 2011-10-04. + - platform_id: "F17" + start_date: "2011-10-04" + - platform_id: "am2" + start_date: "2012-07-02" diff --git a/seaice_ecdr/platforms/models.py b/seaice_ecdr/platforms/models.py new file mode 100644 index 00000000..ac813091 --- /dev/null +++ b/seaice_ecdr/platforms/models.py @@ -0,0 +1,191 @@ +"""Pydantic data models for platform configuration.""" + +import copy +import datetime as dt +from typing import Literal, cast + +from pydantic import BaseModel, root_validator, validator + +# TODO: ideally this is used sparingly. The code should accept any number of +# platform configurations, and those configurations should defined what's +# "supported". We could even include the import string for a fetch function that +# conforms to a spec for each platform, so that the e.g., `tb_data` module does +# not need to map specific IDs to functions. See: +# https://docs.pydantic.dev/2.3/usage/types/string_types/#importstring +SUPPORTED_PLATFORM_ID = Literal[ + "am2", # AMSR2 + "ame", # AMSRE + "F17", # SSMIS F17 + "F13", # SSMI F13 + "F11", # SSMI F11 + "F08", # SSMI F08 + "n07", # Nimbus-7 SMMR +] + + +class DateRange(BaseModel): + first_date: dt.date + # If the last_date is None, it indicates that the satellite is still + # operating and we do not have a "last date" yet. + last_date: dt.date | None + + @root_validator(skip_on_failure=True) + def validate_date_range( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values, + ): + first_date: dt.date = values["first_date"] + last_date: dt.date | None = values["last_date"] + + # If the last date isn't given, it means date range extends from the + # first date into the future (satellite is still operating) + if (last_date is not None) and (first_date > last_date): + raise ValueError( + f"First date ({first_date}) is after last date {last_date} in date range." + ) + + return values + + +class Platform(BaseModel): + # E.g., "DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17" + name: str + # GCMD sensor name. E.g., SSMIS > Special Sensor Microwave Imager/Sounder + sensor: str + # E.g., "F17" + id: SUPPORTED_PLATFORM_ID + # The available date range for the platform, inclusive. + date_range: DateRange + + +def platform_for_id( + *, platforms: list[Platform], platform_id: SUPPORTED_PLATFORM_ID +) -> Platform: + for platform in platforms: + if platform_id == platform.id: + return platform + raise ValueError(f"Could not find platform with id {platform_id}.") + + +class PlatformStartDate(BaseModel): + platform_id: SUPPORTED_PLATFORM_ID + start_date: dt.date + + +class PlatformConfig(BaseModel): + platforms: list[Platform] + cdr_platform_start_dates: list[PlatformStartDate] + + @root_validator(skip_on_failure=True) + def validate_platform_start_dates_platform_in_platforms( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values, + ): + """Validate that each platform start date corresponds with a defined platform.""" + platform_ids = [platform.id for platform in values["platforms"]] + for platform_start_date in values["cdr_platform_start_dates"]: + if platform_start_date.platform_id not in platform_ids: + raise ValueError( + f"Did not find {platform_start_date.platform_id} in platform list (must be one of {platform_ids})." + ) + + return values + + @root_validator(skip_on_failure=True) + def validate_platform_start_date_in_platform_date_range( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values, + ): + """Validate that each platform start date is within the platform's date range.""" + for platform_start_date in values["cdr_platform_start_dates"]: + matching_platform = platform_for_id( + platforms=values["platforms"], + platform_id=platform_start_date.platform_id, + ) + start_date_before_first_date = ( + matching_platform.date_range.first_date > platform_start_date.start_date + ) + + last_date_is_not_none = matching_platform.date_range.last_date is not None + start_date_after_last_date = last_date_is_not_none and ( + matching_platform.date_range.last_date < platform_start_date.start_date + ) + + if start_date_before_first_date or start_date_after_last_date: + raise ValueError( + f"Platform start date of {platform_start_date.start_date}" + f" for {matching_platform.id}" + " is outside of the platform's date range:" + f" {matching_platform.date_range}" + ) + return values + + @validator("cdr_platform_start_dates") + def validate_platform_start_dates_in_order( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values: list[PlatformStartDate], + ) -> list[PlatformStartDate]: + """Validate that platform start dates are defined in order from old -> new. + + E.g., 1979-10-25 should be listed before 1987-07-10. + """ + last_start_date = None + for platform_start_date in values: + if last_start_date is None: + last_start_date = copy.deepcopy(platform_start_date) + assert last_start_date is not None + last_start_date = cast(PlatformStartDate, last_start_date) + continue + + # NOTE: the `type: ignore` on the next line is because mypy thinks + # this line is unreachable, but it is reachable. In fact, there is a + # unit test (`test_platform_config_validation_error`) that ensures + # this is true. + if last_start_date.start_date >= platform_start_date.start_date: # type: ignore[unreachable] + raise ValueError( + f"Platform start dates are not sequentially increasing:" + f" {platform_start_date.platform_id} with start date {platform_start_date.start_date}" + " is given after" + f" {last_start_date.platform_id} with start date {last_start_date.start_date}." + ) + + last_start_date = copy.deepcopy(platform_start_date) + + return values + + def platform_for_id(self, platform_id: SUPPORTED_PLATFORM_ID) -> Platform: + """Return the Platform for the given platform ID.""" + return platform_for_id(platforms=self.platforms, platform_id=platform_id) + + def get_platform_by_date( + self, + date: dt.date, + ) -> Platform: + """Return the platform for this date.""" + first_start_date = self.get_first_platform_start_date() + if date < first_start_date: + raise RuntimeError( + f""" + date {date} too early. + First start_date: {first_start_date} + """ + ) + + return_platform_id = None + for cdr_platform_start_date in self.cdr_platform_start_dates: + if date >= cdr_platform_start_date.start_date: + return_platform_id = cdr_platform_start_date.platform_id + continue + else: + break + + if return_platform_id is None: + raise RuntimeError(f"Could not find platform for {date=}") + + return self.platform_for_id(return_platform_id) + + def get_first_platform_start_date(self) -> dt.date: + """Return the start date of the first platform.""" + earliest_date = self.cdr_platform_start_dates[0].start_date + + return earliest_date diff --git a/seaice_ecdr/set_daily_ncattrs.py b/seaice_ecdr/set_daily_ncattrs.py index 5e6fa9d2..2a7afdbe 100644 --- a/seaice_ecdr/set_daily_ncattrs.py +++ b/seaice_ecdr/set_daily_ncattrs.py @@ -351,7 +351,7 @@ def finalize_cdecdr_ds( temporality="daily", aggregate=False, source=f"Generated from {ds_in.data_source}", - sats=[ds_in.platform], + platform_ids=[ds_in.platform], ) ds.attrs = new_global_attrs diff --git a/seaice_ecdr/spillover.py b/seaice_ecdr/spillover.py index 5f8ac77d..b9c14a58 100644 --- a/seaice_ecdr/spillover.py +++ b/seaice_ecdr/spillover.py @@ -9,7 +9,6 @@ from pm_tb_data._types import Hemisphere from scipy.ndimage import binary_dilation, generate_binary_structure, shift -from seaice_ecdr._types import SUPPORTED_SAT from seaice_ecdr.ancillary import ( ANCILLARY_SOURCES, get_adj123_field, @@ -237,7 +236,6 @@ def land_spillover( tb_data: EcdrTbData, algorithm: LAND_SPILL_ALGS, land_mask: npt.NDArray, - platform: SUPPORTED_SAT, ancillary_source: ANCILLARY_SOURCES, bt_conc=None, # only needed if the BT or NT spillover are used nt_conc=None, # only needed if the BT or NT spillover are used diff --git a/seaice_ecdr/tb_data.py b/seaice_ecdr/tb_data.py index c36a19d8..481027ba 100644 --- a/seaice_ecdr/tb_data.py +++ b/seaice_ecdr/tb_data.py @@ -15,7 +15,7 @@ from pm_tb_data.fetch.nsidc_0007 import get_nsidc_0007_tbs_from_disk from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS -from seaice_ecdr.platforms import SUPPORTED_SAT, get_platform_by_date +from seaice_ecdr.platforms import PLATFORM_CONFIG, SUPPORTED_PLATFORM_ID from seaice_ecdr.util import get_ecdr_grid_shape EXPECTED_ECDR_TB_NAMES = ("h19", "v19", "v22", "h37", "v37") @@ -35,7 +35,7 @@ class EcdrTbData: tbs: EcdrTbs resolution: ECDR_SUPPORTED_RESOLUTIONS data_source: str - platform: SUPPORTED_SAT + platform_id: SUPPORTED_PLATFORM_ID def get_null_grid( @@ -150,7 +150,7 @@ def _get_am2_tbs( tbs=ecdr_tbs, resolution=resolution, data_source=data_source, - platform="am2", + platform_id="am2", ) return ecdr_tb_data @@ -199,14 +199,14 @@ def _get_ame_tbs(*, date: dt.date, hemisphere: Hemisphere) -> EcdrTbData: tbs=ecdr_tbs, resolution=tb_resolution, data_source=data_source, - platform="ame", + platform_id="ame", ) return ecdr_tb_data def _get_nsidc_0001_tbs( - *, date: dt.date, hemisphere: Hemisphere, platform: NSIDC_0001_SATS + *, date: dt.date, hemisphere: Hemisphere, platform_id: NSIDC_0001_SATS ) -> EcdrTbData: NSIDC0001_DATA_DIR = Path("/ecs/DP1/PM/NSIDC-0001.006/") # NSIDC-0001 TBs for siconc are all at 25km @@ -219,7 +219,7 @@ def _get_nsidc_0001_tbs( hemisphere=hemisphere, data_dir=NSIDC0001_DATA_DIR, resolution=nsidc0001_resolution, - sat=platform, + sat=platform_id, ) ecdr_tbs = map_tbs_to_ecdr_channels( @@ -244,7 +244,7 @@ def _get_nsidc_0001_tbs( f"Used all-null TBS for date={date}," f" hemisphere={hemisphere}," f" resolution={tb_resolution}" - f" platform={platform}" + f" platform_id={platform_id}" ) # TODO: For debugging TBs, consider a print/log statement such as this: @@ -253,7 +253,7 @@ def _get_nsidc_0001_tbs( tbs=ecdr_tbs, resolution=tb_resolution, data_source=data_source, - platform=platform, # type: ignore[arg-type] + platform_id=platform_id, # type: ignore[arg-type] ) return ecdr_tb_data @@ -294,7 +294,7 @@ def _get_nsidc_0007_tbs(*, hemisphere: Hemisphere, date: dt.date) -> EcdrTbData: tbs=ecdr_tbs, resolution=SMMR_RESOLUTION, data_source=data_source, - platform="n07", + platform_id="n07", ) return ecdr_tb_data @@ -307,18 +307,18 @@ def get_25km_ecdr_tb_data( hemisphere: Hemisphere, ) -> EcdrTbData: """Get 25km ECDR Tb data for the given date and hemisphere.""" - platform = get_platform_by_date(date) - if platform == "am2": + platform = PLATFORM_CONFIG.get_platform_by_date(date) + if platform.id == "am2": return _get_am2_tbs(date=date, hemisphere=hemisphere, resolution="25") - elif platform == "ame": + elif platform.id == "ame": raise NotImplementedError("AME is not yet supported at 25km resolution") - elif platform in get_args(NSIDC_0001_SATS): + elif platform.id in get_args(NSIDC_0001_SATS): return _get_nsidc_0001_tbs( - platform=platform, # type: ignore[arg-type] + platform_id=platform.id, # type: ignore[arg-type] date=date, hemisphere=hemisphere, ) - elif platform == "n07": + elif platform.id == "n07": # SMMR return _get_nsidc_0007_tbs(date=date, hemisphere=hemisphere) else: @@ -337,18 +337,18 @@ def get_ecdr_tb_data( data. It's up to the caller to decide how they want to handle resolution differences between platforms. """ - platform = get_platform_by_date(date) - if platform == "am2": + platform = PLATFORM_CONFIG.get_platform_by_date(date) + if platform.id == "am2": return _get_am2_tbs(date=date, hemisphere=hemisphere, resolution="12.5") - elif platform == "ame": + elif platform.id == "ame": return _get_ame_tbs(date=date, hemisphere=hemisphere) - elif platform in get_args(NSIDC_0001_SATS): + elif platform.id in get_args(NSIDC_0001_SATS): return _get_nsidc_0001_tbs( - platform=platform, # type: ignore[arg-type] + platform_id=platform.id, # type: ignore[arg-type] date=date, hemisphere=hemisphere, ) - elif platform == "n07": + elif platform.id == "n07": # SMMR return _get_nsidc_0007_tbs(date=date, hemisphere=hemisphere) else: diff --git a/seaice_ecdr/temporal_composite_daily.py b/seaice_ecdr/temporal_composite_daily.py index e7bd197a..ef534158 100644 --- a/seaice_ecdr/temporal_composite_daily.py +++ b/seaice_ecdr/temporal_composite_daily.py @@ -16,7 +16,7 @@ from pm_icecon.fill_polehole import fill_pole_hole from pm_tb_data._types import NORTH, Hemisphere -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ( ANCILLARY_SOURCES, get_non_ocean_mask, @@ -28,10 +28,7 @@ create_idecdr_for_date, get_idecdr_filepath, ) -from seaice_ecdr.platforms import ( - get_first_platform_start_date, - get_platform_by_date, -) +from seaice_ecdr.platforms import PLATFORM_CONFIG from seaice_ecdr.spillover import LAND_SPILL_ALGS from seaice_ecdr.util import ( date_range, @@ -136,13 +133,13 @@ def get_tie_filepath( ) -> Path: """Return the complete daily tie file path.""" - platform = get_platform_by_date(date) - sat = cast(SUPPORTED_SAT, platform) + platform = PLATFORM_CONFIG.get_platform_by_date(date) + platform_id = platform.id standard_fn = standard_daily_filename( hemisphere=hemisphere, date=date, - sat=sat, + platform_id=platform_id, resolution=resolution, ) # Add `tiecdr` to the beginning of the standard name to distinguish it as a @@ -175,7 +172,7 @@ def iter_dates_near_date( near-real-time use, because data from "the future" are not available. """ earliest_date = target_date - dt.timedelta(days=day_range) - beginning_of_platform_coverage = get_first_platform_start_date() + beginning_of_platform_coverage = PLATFORM_CONFIG.get_first_platform_start_date() if earliest_date < beginning_of_platform_coverage: logger.warning( f"Resetting temporal interpolation earliest date from {earliest_date} to {beginning_of_platform_coverage}" @@ -374,13 +371,13 @@ def read_or_create_and_read_idecdr_ds( overwrite_ide: bool = False, ) -> xr.Dataset: """Read an idecdr netCDF file, creating it if it doesn't exist.""" - platform = get_platform_by_date( + platform = PLATFORM_CONFIG.get_platform_by_date( date, ) ide_filepath = get_idecdr_filepath( date=date, - platform=platform, + platform_id=platform.id, hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=intermediate_output_dir, @@ -645,11 +642,9 @@ def temporal_interpolation( # grid is having its pole hole filled! if fill_the_pole_hole and hemisphere == NORTH: cdr_conc_pre_polefill = cdr_conc.copy() - platform = get_platform_by_date(date) near_pole_hole_mask = nh_polehole_mask( date=date, resolution=resolution, - sat=platform, ancillary_source=ancillary_source, ) cdr_conc_pole_filled = fill_pole_hole( @@ -720,11 +715,9 @@ def temporal_interpolation( # Fill pole hole of BT bt_conc_pre_polefill = bt_conc_2d.copy() - platform = get_platform_by_date(date) near_pole_hole_mask = nh_polehole_mask( date=date, resolution=resolution, - sat=platform, ancillary_source=ancillary_source, ) bt_conc_pole_filled = fill_pole_hole( diff --git a/seaice_ecdr/tests/integration/test_complete_daily.py b/seaice_ecdr/tests/integration/test_complete_daily.py index 28d39c06..9b12bd04 100644 --- a/seaice_ecdr/tests/integration/test_complete_daily.py +++ b/seaice_ecdr/tests/integration/test_complete_daily.py @@ -14,7 +14,7 @@ def test_make_standard_cdecdr_netcdf(base_output_dir_test_path): # noqa output_path = make_standard_cdecdr_netcdf( date=dt.date(2022, 3, day), hemisphere=NORTH, - resolution="12.5", + resolution="25", base_output_dir=base_output_dir_test_path, land_spillover_alg="NT2", ancillary_source=ancillary_source, diff --git a/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py b/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py index 370e2ca9..c7872093 100644 --- a/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py +++ b/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py @@ -15,6 +15,7 @@ make_idecdr_netcdf, write_ide_netcdf, ) +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID cdr_conc_fieldname = "conc" @@ -26,7 +27,7 @@ def sample_idecdr_dataset_nh(): test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" + test_resolution: Final = "25" ancillary_source: Final = "CDRv5" ide_conc_ds = initial_daily_ecdr_dataset( @@ -46,7 +47,7 @@ def sample_idecdr_dataset_sh(): test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" + test_resolution: Final = "25" ancillary_source: Final = "CDRv5" ide_conc_ds = initial_daily_ecdr_dataset( @@ -133,8 +134,8 @@ def test_cli_idecdr_ncfile_creation(tmpdir): tmpdir_path = Path(tmpdir) test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" - test_platform = "am2" + test_resolution: Final = "25" + test_platform_id: SUPPORTED_PLATFORM_ID = "am2" ancillary_source: Final = "CDRv5" make_idecdr_netcdf( @@ -149,7 +150,7 @@ def test_cli_idecdr_ncfile_creation(tmpdir): output_path = get_idecdr_filepath( hemisphere=test_hemisphere, date=test_date, - platform=test_platform, + platform_id=test_platform_id, resolution=test_resolution, intermediate_output_dir=tmpdir_path, ) @@ -168,8 +169,8 @@ def test_can_drop_fields_from_idecdr_netcdf( tmpdir_path = Path(tmpdir) test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" - test_platform = "am2" + test_resolution: Final = "25" + test_platform_id: SUPPORTED_PLATFORM_ID = "am2" ancillary_source: Final = "CDRv5" make_idecdr_netcdf( @@ -184,7 +185,7 @@ def test_can_drop_fields_from_idecdr_netcdf( output_path = get_idecdr_filepath( hemisphere=test_hemisphere, date=test_date, - platform=test_platform, + platform_id=test_platform_id, resolution=test_resolution, intermediate_output_dir=tmpdir_path, ) diff --git a/seaice_ecdr/tests/integration/test_monthly.py b/seaice_ecdr/tests/integration/test_monthly.py index 632a9d2b..79f1ced6 100644 --- a/seaice_ecdr/tests/integration/test_monthly.py +++ b/seaice_ecdr/tests/integration/test_monthly.py @@ -30,8 +30,8 @@ def test_make_monthly_nc(base_output_dir_test_path, monkeypatch): # noqa year=2022, month=3, hemisphere=NORTH, + resolution="25", complete_output_dir=complete_output_dir, - resolution="12.5", ancillary_source=ancillary_source, ) diff --git a/seaice_ecdr/tests/integration/test_tb_data.py b/seaice_ecdr/tests/integration/test_tb_data.py index 894b3a27..fcc9afdf 100644 --- a/seaice_ecdr/tests/integration/test_tb_data.py +++ b/seaice_ecdr/tests/integration/test_tb_data.py @@ -3,15 +3,16 @@ import numpy as np from pm_tb_data._types import NORTH, SOUTH -from seaice_ecdr.platforms import get_platform_start_dates -from seaice_ecdr.tb_data import get_ecdr_tb_data +from seaice_ecdr.platforms import PLATFORM_CONFIG +from seaice_ecdr.tb_data import get_25km_ecdr_tb_data def test_get_ecdr_tb_data(): - platform_start_dates = get_platform_start_dates() - for date, platform in platform_start_dates.items(): - ecdr_tb_data = get_ecdr_tb_data(date=date, hemisphere=NORTH) - assert ecdr_tb_data.platform == platform + for platform_start_date in PLATFORM_CONFIG.cdr_platform_start_dates: + ecdr_tb_data = get_25km_ecdr_tb_data( + date=platform_start_date.start_date, hemisphere=NORTH + ) + assert ecdr_tb_data.platform_id == platform_start_date.platform_id assert not np.all(np.isnan(ecdr_tb_data.tbs.v19)) assert not np.all(np.isnan(ecdr_tb_data.tbs.h19)) @@ -25,7 +26,7 @@ def test_get_ecdr_tb_data_missing_channel(): We know this happens at least once: on 10/10/1995 SH for 22v (from F13). """ - ecdr_tb_data = get_ecdr_tb_data(date=dt.date(1995, 10, 10), hemisphere=SOUTH) + ecdr_tb_data = get_25km_ecdr_tb_data(date=dt.date(1995, 10, 10), hemisphere=SOUTH) # v22 is known to be missing for this day and hemisphere. assert np.all(np.isnan(ecdr_tb_data.tbs.v22)) diff --git a/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py b/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py index 52c4ca73..274f0f69 100644 --- a/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py +++ b/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py @@ -11,6 +11,7 @@ from pm_tb_data._types import NORTH from seaice_ecdr.initial_daily_ecdr import get_idecdr_filepath +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID from seaice_ecdr.temporal_composite_daily import ( make_tiecdr_netcdf, read_or_create_and_read_idecdr_ds, @@ -18,8 +19,8 @@ date = dt.date(2021, 2, 19) hemisphere = NORTH -resolution: Final = "12.5" -platform = "am2" +resolution: Final = "25" +platform_id: SUPPORTED_PLATFORM_ID = "am2" land_spillover_alg: Final = "NT2" ancillary_source: Final = "CDRv5" @@ -29,7 +30,7 @@ def test_read_or_create_and_read_idecdr_ds(tmpdir): sample_ide_filepath = get_idecdr_filepath( date=date, - platform=platform, + platform_id=platform_id, hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=Path(tmpdir), diff --git a/seaice_ecdr/tests/regression/test_daily_aggregate.py b/seaice_ecdr/tests/regression/test_daily_aggregate.py index 6deb762d..eecdcfff 100644 --- a/seaice_ecdr/tests/regression/test_daily_aggregate.py +++ b/seaice_ecdr/tests/regression/test_daily_aggregate.py @@ -23,7 +23,7 @@ def test_daily_aggregate_matches_daily_data(tmpdir): ) year = 2022 - resolution: Final = "12.5" + resolution: Final = "25" land_spillover_alg: Final = "NT2" ancillary_source: Final = "CDRv5" diff --git a/seaice_ecdr/tests/unit/test_monthly.py b/seaice_ecdr/tests/unit/test_monthly.py index e30287bd..e7148417 100644 --- a/seaice_ecdr/tests/unit/test_monthly.py +++ b/seaice_ecdr/tests/unit/test_monthly.py @@ -13,8 +13,8 @@ QA_OF_CDR_SEAICE_CONC_DAILY_BITMASKS, QA_OF_CDR_SEAICE_CONC_MONTHLY_BITMASKS, _get_daily_complete_filepaths_for_month, + _platform_id_for_month, _qa_field_has_flag, - _sat_for_month, calc_cdr_seaice_conc_monthly, calc_melt_onset_day_cdr_seaice_conc_monthly, calc_qa_of_cdr_seaice_conc_monthly, @@ -86,27 +86,27 @@ def _mock_daily_ds_for_month(num_days: int) -> xr.Dataset: # Check that no error is raised for AMSR2, full month's worth of data check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(31), - sat="am2", + platform_id="am2", ) # Check that an error is raised for AMSR2, not a full month's worth of data with pytest.raises(RuntimeError): check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(19), - sat="am2", + platform_id="am2", ) # Check that an error is not raised for n07, with modified min worth of data check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(10), - sat="n07", + platform_id="n07", ) # Check that an error is raised for n07, not a full month's worth of data with pytest.raises(RuntimeError): check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(9), - sat="n07", + platform_id="n07", ) @@ -514,7 +514,7 @@ def test_monthly_ds(monkeypatch, tmpdir): ) actual = make_monthly_ds( daily_ds_for_month=_mock_daily_ds, - sat="am2", + platform_id="am2", hemisphere=NORTH, resolution="12.5", ancillary_source="CDRv5", @@ -547,14 +547,14 @@ def test_monthly_ds(monkeypatch, tmpdir): xr.testing.assert_allclose(actual, after_write, atol=0.009) -def test__sat_for_month(): - assert "am2" == _sat_for_month(sats=["am2", "am2", "am2", "am2"]) +def test__platform_id_for_month(): + assert "am2" == _platform_id_for_month(platform_ids=["am2", "am2", "am2", "am2"]) - assert "am2" == _sat_for_month(sats=["F17", "F17", "am2", "am2"]) + assert "am2" == _platform_id_for_month(platform_ids=["F17", "F17", "am2", "am2"]) - assert "F17" == _sat_for_month(sats=["F13", "F13", "F13", "F17"]) + assert "F17" == _platform_id_for_month(platform_ids=["F13", "F13", "F13", "F17"]) - assert "am2" == _sat_for_month(sats=["F13", "F17", "am2"]) + assert "am2" == _platform_id_for_month(platform_ids=["F13", "F17", "am2"]) def test_calc_surface_mask_monthly(): diff --git a/seaice_ecdr/tests/unit/test_nc_attrs.py b/seaice_ecdr/tests/unit/test_nc_attrs.py index f40ec5c2..c72a13ba 100644 --- a/seaice_ecdr/tests/unit/test_nc_attrs.py +++ b/seaice_ecdr/tests/unit/test_nc_attrs.py @@ -5,8 +5,6 @@ from seaice_ecdr.nc_attrs import ( _get_software_version_id, _get_time_coverage_attrs, - get_platforms_for_sats, - get_sensors_for_sats, ) @@ -123,45 +121,3 @@ def test__get_software_version_id(): # git@github.com:nsidc/seaice_ecdr.git@10fdd316452d0d69fbcf4e7915b66c227298b0ec assert "github" in software_ver_id assert "@" in software_ver_id - - -def test_get_platforms_for_sat(): - expected = [ - "DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", - "DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", - "GCOM-W1 > Global Change Observation Mission 1st-Water", - ] - - actual = get_platforms_for_sats( - [ - "F13", - "F17", - "F17", - "am2", - "am2", - "am2", - ] - ) - - assert expected == actual - - -def test_get_sensors_for_sats(): - expected = [ - "SSM/I > Special Sensor Microwave/Imager", - "SSMIS > Special Sensor Microwave Imager/Sounder", - "AMSR2 > Advanced Microwave Scanning Radiometer 2", - ] - - actual = get_sensors_for_sats( - [ - "F13", - "F17", - "F17", - "am2", - "am2", - "am2", - ] - ) - - assert expected == actual diff --git a/seaice_ecdr/tests/unit/test_platforms.py b/seaice_ecdr/tests/unit/test_platforms.py index 56ac27bd..c3decbf3 100644 --- a/seaice_ecdr/tests/unit/test_platforms.py +++ b/seaice_ecdr/tests/unit/test_platforms.py @@ -1,124 +1,197 @@ """Test the platforms.py routine for seaice_ecdr.""" import datetime as dt +from collections import OrderedDict +from pathlib import Path from typing import get_args +import pytest import yaml +from pydantic import ValidationError from seaice_ecdr.platforms import ( - PLATFORM_AVAILABILITY, - PLATFORMS_FOR_SATS, - SUPPORTED_SAT, - _platform_available_for_date, - _platform_start_dates_are_consistent, - get_platform_by_date, - get_platform_start_dates, + PLATFORM_CONFIG, + SUPPORTED_PLATFORM_ID, +) +from seaice_ecdr.platforms.config import SUPPORTED_PLATFORMS, _get_platform_config +from seaice_ecdr.platforms.models import ( + DateRange, + Platform, + PlatformConfig, + PlatformStartDate, ) -platform_test_dates = { - "n07": dt.date(1980, 1, 1), - "F08": dt.date(1990, 1, 1), - "F11": dt.date(1992, 6, 1), - "F13": dt.date(1998, 10, 1), - "F17": dt.date(2011, 12, 25), - "ame": dt.date(2005, 3, 15), - "am2": dt.date(2019, 2, 14), -} +platform_test_dates: OrderedDict[SUPPORTED_PLATFORM_ID, dt.date] = OrderedDict( + { + "n07": dt.date(1980, 1, 1), + "F08": dt.date(1990, 1, 1), + "F11": dt.date(1992, 6, 1), + "F13": dt.date(1998, 10, 1), + "F17": dt.date(2011, 12, 25), + "ame": dt.date(2005, 3, 15), + "am2": dt.date(2019, 2, 14), + } +) -def test_SUPPORTED_SAT(): - cdrv5_sats = ( +def test_SUPPORTED_PLATFORM_ID(): + cdrv5_platform_ids = ( "am2", "F17", ) - for sat in cdrv5_sats: - assert sat in get_args(SUPPORTED_SAT) + for platform_id in cdrv5_platform_ids: + assert platform_id in get_args(SUPPORTED_PLATFORM_ID) def test_platforms_for_sats(): - for key in PLATFORMS_FOR_SATS.keys(): - assert key in get_args(SUPPORTED_SAT) - + for platform in SUPPORTED_PLATFORMS: + assert platform.id in get_args(SUPPORTED_PLATFORM_ID) -def test_default_platform_availability(): - for key in PLATFORM_AVAILABILITY.keys(): - assert key in str(SUPPORTED_SAT) - pa_dict = PLATFORM_AVAILABILITY[key] - assert "first_date" in pa_dict - assert "last_date" in pa_dict +def test_get_platform_by_date(): + date_before_any_satellites = dt.date(1900, 1, 1) + with pytest.raises(RuntimeError): + PLATFORM_CONFIG.get_platform_by_date(date_before_any_satellites) + expected_f13_date = dt.date(1995, 11, 1) + platform = PLATFORM_CONFIG.get_platform_by_date(expected_f13_date) + assert platform.id == "F13" -def test_default_platform_start_dates_are_consistent(): - platform_start_dates = get_platform_start_dates() - assert _platform_start_dates_are_consistent( - platform_start_dates=platform_start_dates - ) +def test_override_platform_by_date(monkeypatch, tmpdir): + override_file = Path(tmpdir / "override_platform_dates.yaml") + expected_platform_dates = { + "cdr_platform_start_dates": [ + {"platform_id": "F08", "start_date": dt.date(1987, 8, 12)}, + {"platform_id": "F11", "start_date": dt.date(1992, 6, 15)}, + ], + } -def test_platform_availability_by_date(): - all_platforms = list(get_args(SUPPORTED_SAT)) + with open(override_file, "w") as yaml_file: + yaml.safe_dump(expected_platform_dates, yaml_file) - date_before_any_satellites = dt.date(1900, 1, 1) - for platform in all_platforms: - assert not _platform_available_for_date( - date=date_before_any_satellites, - platform=platform, - ) + monkeypatch.setenv("PLATFORM_START_DATES_CONFIG_FILEPATH", str(override_file)) + platform_config = _get_platform_config() - date_after_dead_satellites = dt.date(2100, 1, 1) - dead_satellites = ( - "n07", - "F08", - "F11", - "F13", - "ame", + assert len(platform_config.cdr_platform_start_dates) == 2 + assert platform_config.cdr_platform_start_dates[0].platform_id == "F08" + assert platform_config.cdr_platform_start_dates[0].start_date == dt.date( + 1987, 8, 12 + ) + assert platform_config.cdr_platform_start_dates[1].platform_id == "F11" + assert platform_config.cdr_platform_start_dates[1].start_date == dt.date( + 1992, 6, 15 ) - for platform in dead_satellites: - assert not _platform_available_for_date( - date=date_after_dead_satellites, - platform=platform, - ) - - for platform in platform_test_dates.keys(): - assert _platform_available_for_date( - date=platform_test_dates[platform], - platform=platform, - ) - - -def test_get_platform_by_date(): - platform_start_dates = get_platform_start_dates() - date_list = platform_start_dates.keys() - platform_list = platform_start_dates.values() - for date, expected_platform in zip(date_list, platform_list): - print(f"testing {date} -> {expected_platform}") - platform = get_platform_by_date( - date=date, +def test__get_platform_config(): + platform_cfg = _get_platform_config() + + assert len(platform_cfg.platforms) >= 1 + assert len(platform_cfg.cdr_platform_start_dates) >= 1 + assert PLATFORM_CONFIG == platform_cfg + + +def test_platform_config_validation_error(): + # Tests `validate_platform_start_dates_platform_in_platforms` + with pytest.raises(ValidationError, match=r"Did not find am2 in platform list.*"): + PlatformConfig( + platforms=[ + Platform( + id="ame", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=None, + ), + ) + ], + cdr_platform_start_dates=[ + PlatformStartDate( + platform_id="am2", + start_date=dt.date(2022, 1, 1), + ) + ], ) - assert platform == expected_platform + # tests `validate_platform_start_dates_in_order` + with pytest.raises( + ValidationError, match=r"Platform start dates are not sequential.*" + ): + PlatformConfig( + platforms=[ + Platform( + id="F13", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(1991, 1, 1), + last_date=dt.date(1992, 1, 1), + ), + ), + Platform( + id="am2", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=None, + ), + ), + ], + cdr_platform_start_dates=[ + PlatformStartDate(platform_id="am2", start_date=dt.date(2022, 1, 1)), + PlatformStartDate(platform_id="F13", start_date=dt.date(1991, 1, 1)), + ], + ) -def test_override_platform_by_date(monkeypatch, tmpdir): - override_file = tmpdir / "override_platform_dates.yaml" - expected_platform_dates = { - dt.date(1987, 7, 10): "F08", - dt.date(1991, 12, 3): "F11", - dt.date(1995, 10, 1): "F13", - dt.date(2002, 6, 1): "ame", - } + # tests `validate_platform_start_date_in_platform_date_range` + # First error date is before the date range, and the second is after. + for err_start_date in (dt.date(2000, 1, 1), dt.date(2015, 1, 1)): + with pytest.raises( + ValidationError, match=r".*is outside of the platform's date range.*" + ): + PlatformConfig( + platforms=[ + Platform( + id="ame", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=dt.date(2012, 12, 31), + ), + ) + ], + cdr_platform_start_dates=[ + PlatformStartDate( + platform_id="ame", + # Start date is before the first available date + start_date=err_start_date, + ) + ], + ) + + +def test_platform_date_range_validation_error(): + # tests `validate_date_range` + with pytest.raises(ValidationError, match=r".*First date.*is after last date.*"): + DateRange( + first_date=dt.date(2021, 1, 1), + last_date=dt.date(2020, 1, 1), + ) - with open(override_file, "w") as yaml_file: - yaml.safe_dump(expected_platform_dates, yaml_file) - monkeypatch.setenv("PLATFORM_START_DATES_CFG_OVERRIDE_FILE", str(override_file)) +def test_get_first_platform_start_date(): + actual = PLATFORM_CONFIG.get_first_platform_start_date() - # This is a cached function. Calls from other tests may interfere, so clear - # the cache here. - get_platform_start_dates.cache_clear() - platform_dates = get_platform_start_dates() + min_date = min( + [ + platform_start_date.start_date + for platform_start_date in PLATFORM_CONFIG.cdr_platform_start_dates + ] + ) - assert platform_dates == expected_platform_dates + assert actual == min_date diff --git a/seaice_ecdr/tests/unit/test_temporal_composite_daily.py b/seaice_ecdr/tests/unit/test_temporal_composite_daily.py index 6ef3366d..f250cc60 100644 --- a/seaice_ecdr/tests/unit/test_temporal_composite_daily.py +++ b/seaice_ecdr/tests/unit/test_temporal_composite_daily.py @@ -12,6 +12,7 @@ from seaice_ecdr.constants import ECDR_PRODUCT_VERSION from seaice_ecdr.initial_daily_ecdr import get_idecdr_dir, get_idecdr_filepath +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID from seaice_ecdr.temporal_composite_daily import ( iter_dates_near_date, temporally_composite_dataarray, @@ -81,12 +82,12 @@ def test_access_to_standard_output_filename(tmpdir): """Verify that standard output file names can be generated.""" date = dt.date(2021, 2, 19) resolution: Final = "12.5" - sat = "am2" + platform_id: SUPPORTED_PLATFORM_ID = "am2" intermediate_output_dir = Path(tmpdir) sample_ide_filepath = get_idecdr_filepath( date=date, - platform=sat, + platform_id=platform_id, hemisphere=NORTH, resolution=resolution, intermediate_output_dir=intermediate_output_dir, diff --git a/seaice_ecdr/tests/unit/test_util.py b/seaice_ecdr/tests/unit/test_util.py index f84ab3dd..10f3fbec 100644 --- a/seaice_ecdr/tests/unit/test_util.py +++ b/seaice_ecdr/tests/unit/test_util.py @@ -13,8 +13,8 @@ date_range, get_num_missing_pixels, nrt_daily_filename, + platform_id_from_filename, raise_error_for_dates, - sat_from_filename, standard_daily_aggregate_filename, standard_daily_filename, standard_monthly_aggregate_filename, @@ -26,7 +26,7 @@ def test_daily_filename_north(): expected = f"sic_psn12.5_20210101_am2_{ECDR_PRODUCT_VERSION}.nc" actual = standard_daily_filename( - hemisphere=NORTH, resolution="12.5", sat="am2", date=dt.date(2021, 1, 1) + hemisphere=NORTH, resolution="12.5", platform_id="am2", date=dt.date(2021, 1, 1) ) assert actual == expected @@ -36,7 +36,7 @@ def test_daily_filename_south(): expected = f"sic_pss12.5_20210101_am2_{ECDR_PRODUCT_VERSION}.nc" actual = standard_daily_filename( - hemisphere=SOUTH, resolution="12.5", sat="am2", date=dt.date(2021, 1, 1) + hemisphere=SOUTH, resolution="12.5", platform_id="am2", date=dt.date(2021, 1, 1) ) assert actual == expected @@ -46,7 +46,7 @@ def test_nrt_daily_filename(): expected = f"sic_psn12.5_20210101_am2_{ECDR_PRODUCT_VERSION}_P.nc" actual = nrt_daily_filename( - hemisphere=NORTH, resolution="12.5", sat="am2", date=dt.date(2021, 1, 1) + hemisphere=NORTH, resolution="12.5", platform_id="am2", date=dt.date(2021, 1, 1) ) assert actual == expected @@ -71,7 +71,7 @@ def test_monthly_filename_north(): actual = standard_monthly_filename( hemisphere=NORTH, resolution="12.5", - sat="am2", + platform_id="am2", year=2021, month=1, ) @@ -85,7 +85,7 @@ def test_monthly_filename_south(): actual = standard_monthly_filename( hemisphere=SOUTH, resolution="12.5", - sat="am2", + platform_id="am2", year=2021, month=1, ) @@ -108,30 +108,33 @@ def test_monthly_aggregate_filename(): assert actual == expected -def test_daily_sat_from_filename(): - expected_sat: Final = "am2" +def test_daily_platform_id_from_filename(): + expected_platform_id: Final = "am2" fn = standard_daily_filename( - hemisphere=NORTH, resolution="12.5", sat=expected_sat, date=dt.date(2021, 1, 1) + hemisphere=NORTH, + resolution="12.5", + platform_id=expected_platform_id, + date=dt.date(2021, 1, 1), ) - actual_sat = sat_from_filename(fn) + actual_platform_id = platform_id_from_filename(fn) - assert expected_sat == actual_sat + assert expected_platform_id == actual_platform_id -def test_monthly_sat_from_filename(): - expected_sat: Final = "F17" +def test_monthly_platform_id_from_filename(): + expected_platform_id: Final = "F17" fn = standard_monthly_filename( hemisphere=SOUTH, resolution="12.5", - sat=expected_sat, + platform_id=expected_platform_id, year=2021, month=1, ) - actual_sat = sat_from_filename(fn) + actual_platform_id = platform_id_from_filename(fn) - assert expected_sat == actual_sat + assert expected_platform_id == actual_platform_id def test_date_range(): diff --git a/seaice_ecdr/util.py b/seaice_ecdr/util.py index 0d530005..18e9b8f7 100644 --- a/seaice_ecdr/util.py +++ b/seaice_ecdr/util.py @@ -8,29 +8,30 @@ import xarray as xr from pm_tb_data._types import Hemisphere -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ANCILLARY_SOURCES, get_ocean_mask from seaice_ecdr.constants import ECDR_PRODUCT_VERSION from seaice_ecdr.grid_id import get_grid_id +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID def standard_daily_filename( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, date: dt.date, ) -> str: """Return standard daily NetCDF filename. - North Daily files: sic_psn12.5_YYYYMMDD_sat_v05r01.nc - South Daily files: sic_pss12.5_YYYYMMDD_sat_v05r01.nc + North Daily files: sic_psn12.5_{YYYYMMDD}_{platform_id}_v05r01.nc + South Daily files: sic_pss12.5_{YYYYMMDD}_{platform_id}_v05r01.nc """ grid_id = get_grid_id( hemisphere=hemisphere, resolution=resolution, ) - fn = f"sic_{grid_id}_{date:%Y%m%d}_{sat}_{ECDR_PRODUCT_VERSION}.nc" + fn = f"sic_{grid_id}_{date:%Y%m%d}_{platform_id}_{ECDR_PRODUCT_VERSION}.nc" return fn @@ -39,13 +40,13 @@ def nrt_daily_filename( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, date: dt.date, ) -> str: standard_fn = standard_daily_filename( hemisphere=hemisphere, resolution=resolution, - sat=sat, + platform_id=platform_id, date=date, ) standard_fn_path = Path(standard_fn) @@ -84,20 +85,20 @@ def standard_monthly_filename( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, year: int, month: int, ) -> str: """Return standard monthly NetCDF filename. - North Monthly files: sic_psn12.5_YYYYMM_sat_v05r01.nc - South Monthly files: sic_pss12.5_YYYYMM_sat_v05r01.nc + North Monthly files: sic_psn12.5_{YYYYMM}_{platform_id}_v05r01.nc + South Monthly files: sic_pss12.5_{YYYYMM}_{platform_id}_v05r01.nc """ grid_id = get_grid_id( hemisphere=hemisphere, resolution=resolution, ) - fn = f"sic_{grid_id}_{year}{month:02}_{sat}_{ECDR_PRODUCT_VERSION}.nc" + fn = f"sic_{grid_id}_{year}{month:02}_{platform_id}_{ECDR_PRODUCT_VERSION}.nc" return fn @@ -129,22 +130,22 @@ def standard_monthly_aggregate_filename( # This regex works for both daily and monthly filenames. -STANDARD_FN_REGEX = re.compile(r"sic_ps.*_.*_(?P.*)_.*.nc") +STANDARD_FN_REGEX = re.compile(r"sic_ps.*_.*_(?P.*)_.*.nc") -def sat_from_filename(filename: str) -> SUPPORTED_SAT: +def platform_id_from_filename(filename: str) -> SUPPORTED_PLATFORM_ID: match = STANDARD_FN_REGEX.match(filename) if not match: - raise RuntimeError(f"Failed to parse satellite from {filename}") + raise RuntimeError(f"Failed to parse platform from {filename}") - sat = match.group("sat") + platform_id = match.group("platform_id") - # Ensure the sat is expected. - assert sat in get_args(SUPPORTED_SAT) - sat = cast(SUPPORTED_SAT, sat) + # Ensure the platform is expected. + assert platform_id in get_args(SUPPORTED_PLATFORM_ID) + platform_id = cast(SUPPORTED_PLATFORM_ID, platform_id) - return sat + return platform_id def date_range(*, start_date: dt.date, end_date: dt.date) -> Iterator[dt.date]: diff --git a/seaice_ecdr/validation.py b/seaice_ecdr/validation.py index 323e393a..47004309 100644 --- a/seaice_ecdr/validation.py +++ b/seaice_ecdr/validation.py @@ -63,7 +63,7 @@ from seaice_ecdr.monthly import get_monthly_dir from seaice_ecdr.util import date_range, get_complete_output_dir, get_num_missing_pixels -VALIDATION_RESOLUTION: Final = "12.5" +VALIDATION_RESOLUTION: Final = "25" ERROR_FILE_BITMASK = dict( missing_file=-9999, @@ -443,8 +443,8 @@ def validate_outputs( ) # monthly filepaths should have the form - # "sic_ps{n|s}12.5_{YYYYMM}_{sat}_v05r00.nc" - expected_fn_glob = f"sic_ps{hemisphere[0]}12.5_{year}{month:02}_*_{ECDR_PRODUCT_VERSION}.nc" + # "sic_ps{n|s}25_{YYYYMM}_{sat}_v05r00.nc" + expected_fn_glob = f"sic_ps{hemisphere[0]}25_{year}{month:02}_*_{ECDR_PRODUCT_VERSION}.nc" results = list(monthly_dir.glob(expected_fn_glob)) if not results: validation_dict = make_validation_dict_for_missing_file()