diff --git a/CHANGELOG.md b/CHANGELOG.md index baf99311..f689dccd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# v0.2.0 + +* Produce 25km CDR instead of 12.5km. +* Refactor how platforms are handled to support overriding platform start dates + via yaml configuration files. + + # v0.1.0 * Initial version of the ECDR. diff --git a/environment.yml b/environment.yml index 3ad87001..28a1f9d5 100644 --- a/environment.yml +++ b/environment.yml @@ -20,7 +20,11 @@ dependencies: - pandas ~=1.4.4 - opencv ~=4.8.0 - pm_tb_data ~=0.4.0 - - pm_icecon ~=0.3.1 + - pm_icecon ~=0.4.0 + - leafmap + - rioxarray + - hvplot + - jupyter_bokeh ############################# # Non-imported dependencies # @@ -33,6 +37,8 @@ dependencies: - mypy ==1.7.0 - pyfakefs ~=5.2.4 - pytest-order ~=1.0.1 + # required by leafmap + - localtileserver # other utilities - bump-my-version ~=0.10.0 diff --git a/make_25km_ecdr.sh b/make_25km_ecdr.sh new file mode 100755 index 00000000..048915a4 --- /dev/null +++ b/make_25km_ecdr.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -exo pipefail + +BT_NT_BASE_DIR="/share/apps/G02202_V5/25km/BT_NT_ss" +mkdir -p $BT_NT_BASE_DIR +NT2_BASE_DIR="/share/apps/G02202_V5/25km/NT2_ss" +mkdir -p $NT2_BASE_DIR + +START_DATE="2018-04-01" +END_DATE="2018-12-31" +HEMISPHERE="north" + +# NT method first +FORCE_PLATFORM=F17 ./scripts/cli.sh multiprocess-daily --start-date $START_DATE --end-date $END_DATE --hemisphere $HEMISPHERE --base-output-dir $BT_NT_BASE_DIR --land-spillover-alg BT_NT --resolution 25 --ancillary-source CDRv4 +FORCE_PLATFORM=am2 ./scripts/cli.sh multiprocess-daily --start-date $START_DATE --end-date $END_DATE --hemisphere $HEMISPHERE --base-output-dir $BT_NT_BASE_DIR --land-spillover-alg BT_NT --resolution 25 --ancillary-source CDRv4 + +# NT2 method second +FORCE_PLATFORM=F17 ./scripts/cli.sh multiprocess-daily --start-date $START_DATE --end-date $END_DATE --hemisphere $HEMISPHERE --base-output-dir $NT2_BASE_DIR --land-spillover-alg NT2 --resolution 25 --ancillary-source CDRv4 +FORCE_PLATFORM=am2 ./scripts/cli.sh multiprocess-daily --start-date $START_DATE --end-date $END_DATE --hemisphere $HEMISPHERE --base-output-dir $NT2_BASE_DIR --land-spillover-alg NT2 --resolution 25 --ancillary-source CDRv4 diff --git a/pyproject.toml b/pyproject.toml index 4f19f1e0..5f937c35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "seaice_ecdr" -version = "0.1.0" +version = "0.2.0" [tool.bumpversion] -current_version = "0.1.0" +current_version = "0.2.0" commit = false tag = false @@ -87,5 +87,6 @@ module = [ "rasterio.*", "yaml.*", "cv2.*", + "ruamel.*", ] ignore_missing_imports = true diff --git a/scripts/ancillary/adapt_cdrv5_anc.py b/scripts/ancillary/adapt_cdrv5_anc.py index 3aefca53..ccd1ac45 100644 --- a/scripts/ancillary/adapt_cdrv5_anc.py +++ b/scripts/ancillary/adapt_cdrv5_anc.py @@ -46,8 +46,10 @@ ecdr_anc_fn = { "psn12.5": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-psn12.5.nc", "pss12.5": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-pss12.5.nc", - "psn25": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-psn25.nc", - "pss25": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-pss25.nc", + # "psn25": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-psn25.nc", + # "pss25": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-pss25.nc", + "psn25": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-psn25_new.nc", + "pss25": "/share/apps/G02202_V5/v05r00_ancillary/ecdr-ancillary-pss25_new.nc", } nsidc0771_fn = { @@ -148,10 +150,8 @@ def calc_adj123_np(surftype_da, ocean_val=50, coast_val=200): Output: Numpy array with adjacency values of 1, 2, 3 """ - surftype = surftype_da.as_numpy() + surftype = surftype_da.to_numpy() is_ocean = surftype == ocean_val - is_coast = surftype == coast_val - is_land = (~is_ocean) & (~is_coast) kernel = [ [ @@ -167,7 +167,7 @@ def calc_adj123_np(surftype_da, ocean_val=50, coast_val=200): ], ] adj123_arr = np.zeros(surftype.shape, dtype=np.uint8) - adj123_arr[is_land] = 255 + adj123_arr[~is_ocean] = 255 for adj_val in range(1, 4): is_unlabeled = adj123_arr == 0 is_labeled = (adj123_arr == 255) | ((~is_unlabeled) & (adj123_arr < adj_val)) @@ -182,7 +182,7 @@ def calc_adj123_np(surftype_da, ocean_val=50, coast_val=200): adj123_arr[is_newly_labeled] = adj_val # Remove the land grid cells from the adjacency matrix - adj123_arr[adj123_arr == 255] = 0 + # adj123_arr[adj123_arr == 255] = 0 return adj123_arr @@ -559,8 +559,7 @@ def generate_ecdr_anc_file(gridid): ds_hires_anc.data_vars["polehole_bitmask"].attrs[ "flag_masks" ] - ) - - 1, + ), ) ), ), diff --git a/scripts/ancillary/daily_clim_from_v4_v04r00.py b/scripts/ancillary/daily_clim_from_v4_v04r00.py new file mode 100644 index 00000000..e8a96dfa --- /dev/null +++ b/scripts/ancillary/daily_clim_from_v4_v04r00.py @@ -0,0 +1,283 @@ +"""Adapt the CDR v4 ancillary daily climatology files for CDR v5. + +Note: this version uses the CDRv4 (v04r00) ancillary information + and is used to directly compare v5 code output with (published) v4 files +""" + +from pathlib import Path +from typing import get_args + +import numpy as np +import numpy.typing as npt +import xarray as xr +from loguru import logger +from pm_tb_data._types import Hemisphere +from scipy.signal import convolve2d + +from seaice_ecdr.ancillary import get_ancillary_ds +from seaice_ecdr.constants import CDR_ANCILLARY_DIR +from seaice_ecdr.grid_id import get_grid_id + +# The commented out lines are old method... +# CDR_V4_CODE_DIR = Path("~/seaice_cdr").resolve() +# if not CDR_V4_CODE_DIR.is_dir(): +# # Alert the user to this dependency. +# raise RuntimeError( +# "The `seaice_cdr` (https://bitbucket.org/nsidc/seaice_cdr/) repository is" +# " expected to be manually cloned to `CDR_V4_CODE_DIR` for this script to work." +# ) +# CDR_V4_ANCILLARY_DIR = CDR_V4_CODE_DIR / "/source/ancillary/" + +CDR_V4_ANCILLARY_DIR = Path( + "/share/apps/G02202_V5/cdrv4_ancillary/vagrant_source_ancillary/ancillary" +) + + +def get_v4_climatology(*, hemisphere: Hemisphere) -> xr.Dataset: + ds = xr.load_dataset( + CDR_V4_ANCILLARY_DIR / f"doy-validice-{hemisphere}-smmr.nc", + mask_and_scale=False, + ) + + return ds + + +def get_v4_icemask_exact( + *, + surftype: npt.NDArray, + old_vim: npt.NDArray, +) -> npt.NDArray[np.uint8]: + + # Investigate arrays + is_new_ocean = surftype == 50 + is_new_land = ~is_new_ocean + + n_days, ydim, xdim = old_vim.shape + new_vim = np.zeros((n_days, ydim, xdim), dtype=np.uint8) + new_vim[:] = 255 + for d in range(n_days): + if (d % 20) == 0: + print(f"{d} of {n_days}", flush=True) + + old_slice = old_vim[d, :, :] + new_slice = new_vim[d, :, :] + new_slice[is_new_land] = 3 + + # Valid ice in CDRv4 file has bit 1 set, so values of 1 and 3 match + # Invalid (daily!) ice in CDRv4 does not have bit 0 set, so ... is 0 or 2 + # because there are a few places where the daily SMMR mask is valid but where + # the CDRv4 monthly mask was not. So, invalid is 0 or 2 + # Values above 250 encoded lake, coast, land and are ignored here + is_valid = is_new_ocean & ((old_slice == 1) | (old_slice == 3)) + is_invalid = is_new_ocean & ((old_slice == 0) | (old_slice == 2)) + + new_slice[is_valid] = 1 + new_slice[is_invalid] = 0 + + n_unassigned = np.sum(np.where(is_new_ocean & (new_slice == 255), 1, 0)) + n_unassigned_prior = n_unassigned + kernel = np.ones((3, 3), dtype=np.uint8) + while n_unassigned > 0: + # Expand valid + convolved = convolve2d( + (new_slice == 1).astype(np.uint8), + kernel, + mode="same", + boundary="fill", + fillvalue=0, + ) + is_newly_valid = is_new_ocean & (convolved > 0) & (new_slice == 255) + new_slice[is_newly_valid] = 1 + + # Expand invalid + convolved = convolve2d( + (new_slice == 0).astype(np.uint8), + kernel, + mode="same", + boundary="fill", + fillvalue=0, + ) + is_newly_invalid = is_new_ocean & (convolved > 0) & (new_slice == 255) + new_slice[is_newly_invalid] = 0 + + # Check to see if we've assigned all ocean grid cells to valid or invalid + n_unassigned = np.sum(np.where(is_new_ocean & (new_slice == 255), 1, 0)) + if n_unassigned == n_unassigned_prior: + print(f"breaking with {n_unassigned} still unassigned") + break + + n_unassigned_prior = n_unassigned + + if not np.all(new_slice <= 3): + print(f"uh oh...not all points were assigned! d={d}") + breakpoint() + + # Here, new_vim is the (366, ydim, xdim) VALID ice_mask on the new "land" mask + assert np.all(new_vim <= 3) + + # Convert to INVALID ice mask by swapping 0s and 1s and by replacing the land values + # with 0s + vim_is_valid = new_vim == 1 + vim_is_invalid = new_vim == 0 + vim_is_land = new_vim == 3 # this was set above + + # Reassign values, and make sure we got them all + new_vim[:] = 255 + new_vim[vim_is_invalid] = 1 + new_vim[vim_is_valid] = 0 + new_vim[vim_is_land] = 1 + + assert np.all(new_vim <= 1) + + return new_vim + + +def get_v4_icemask( + *, surftype: npt.NDArray, old_vim: npt.NDArray +) -> npt.NDArray[np.uint8]: + # Investigate arrays + is_new_ocean = surftype == 50 + is_new_land = ~is_new_ocean + + n_days, ydim, xdim = old_vim.shape + new_vim = np.zeros((n_days, ydim, xdim), dtype=np.uint8) + new_vim[:] = 255 + for d in range(n_days): + if (d % 20) == 0: + print(f"{d} of {n_days}", flush=True) + + old_slice = old_vim[d, :, :] + new_slice = new_vim[d, :, :] + new_slice[is_new_land] = 3 + + # Valid ice in CDRv4 file has bit 1 set, so values of 1 and 3 match + # Invalid (daily!) ice in CDRv4 does not have bit 0 set, so ... is 0 or 2 + # because there are a few places where the daily SMMR mask is valid but where + # the CDRv4 monthly mask was not. So, invalid is 0 or 2 + # Values above 250 encoded lake, coast, land and are ignored here + is_valid = is_new_ocean & ((old_slice == 1) | (old_slice == 3)) + is_invalid = is_new_ocean & ((old_slice == 0) | (old_slice == 2)) + + new_slice[is_valid] = 1 + new_slice[is_invalid] = 0 + + n_unassigned = np.sum(np.where(is_new_ocean & (new_slice == 255), 1, 0)) + n_unassigned_prior = n_unassigned + kernel = np.ones((3, 3), dtype=np.uint8) + while n_unassigned > 0: + # Expand valid + convolved = convolve2d( + (new_slice == 1).astype(np.uint8), + kernel, + mode="same", + boundary="fill", + fillvalue=0, + ) + is_newly_valid = is_new_ocean & (convolved > 0) & (new_slice == 255) + new_slice[is_newly_valid] = 1 + + # Expand invalid + convolved = convolve2d( + (new_slice == 0).astype(np.uint8), + kernel, + mode="same", + boundary="fill", + fillvalue=0, + ) + is_newly_invalid = is_new_ocean & (convolved > 0) & (new_slice == 255) + new_slice[is_newly_invalid] = 0 + + # Check to see if we've assigned all ocean grid cells to valid or invalid + n_unassigned = np.sum(np.where(is_new_ocean & (new_slice == 255), 1, 0)) + if n_unassigned == n_unassigned_prior: + print(f"breaking with {n_unassigned} still unassigned") + break + + n_unassigned_prior = n_unassigned + + if not np.all(new_slice <= 3): + print(f"uh oh...not all points were assigned! d={d}") + breakpoint() + + # Here, new_vim is the (366, ydim, xdim) VALID ice_mask on the new "land" mask + assert np.all(new_vim <= 3) + + # Convert to INVALID ice mask by swapping 0s and 1s and by replacing the land values + # with 0s + vim_is_valid = new_vim == 1 + vim_is_invalid = new_vim == 0 + vim_is_land = new_vim == 3 # this was set above + + # Reassign values, and make sure we got them all + new_vim[:] = 255 + new_vim[vim_is_invalid] = 1 + new_vim[vim_is_valid] = 0 + new_vim[vim_is_land] = 1 + + assert np.all(new_vim <= 1) + + return new_vim + + +def make_v5_climatology(*, hemisphere: Hemisphere): + v4_ds = get_v4_climatology(hemisphere=hemisphere) + ancillary_ds = get_ancillary_ds( + hemisphere=hemisphere, + resolution="25", + ancillary_source="CDRv4", + ) + # icemask_arr = get_v4_icemask( + # surftype=ancillary_ds.surface_type.data, old_vim=v4_ds.daily_icemask.data + # ) + + icemask_arr = get_v4_icemask_exact( + surftype=ancillary_ds.surface_type.data, + old_vim=v4_ds.daily_icemask.data, + ) + + invalid_ice_mask_arr = xr.DataArray( + icemask_arr, + dims=("doy", "y", "x"), + attrs=ancillary_ds.invalid_ice_mask.attrs, + ) + invalid_ice_mask_arr.encoding["_FillValue"] = None + + v5_ds = xr.Dataset( + data_vars=dict( + invalid_ice_mask=invalid_ice_mask_arr, + crs=ancillary_ds.crs, + ), + coords=dict( + # 366 days to account for leap years. + doy=np.arange(1, 366 + 1, dtype=np.int16), + y=ancillary_ds.y, + x=ancillary_ds.x, + ), + ) + + # TODO: any other attrs for the day of year coordinate? + v5_ds.doy.attrs = dict( + long_name="Day of year", + comment="366 days are provided to account for leap years.", + ) + + # Preserve the geospatial global attrs + v5_ds.attrs = {k: v for k, v in ancillary_ds.attrs.items() if "geospatial_" in k} + v5_ds.attrs["comment"] = v4_ds.comment + + # Ensure coordinate vars don't get a fill value + v5_ds.doy.encoding["_FillValue"] = None + v5_ds.y.encoding["_FillValue"] = None + v5_ds.x.encoding["_FillValue"] = None + + grid_id = get_grid_id(hemisphere=hemisphere, resolution="25") + output_filepath = ( + CDR_ANCILLARY_DIR / f"ecdr-ancillary-{grid_id}-smmr-invalid-ice-v04r00.nc" + ) + v5_ds.to_netcdf(output_filepath) + logger.info(f"Wrote {output_filepath}") + + +if __name__ == "__main__": + for hemisphere in get_args(Hemisphere): + make_v5_climatology(hemisphere=hemisphere) diff --git a/scripts/ancillary/gather_ancillary_fields.py b/scripts/ancillary/gather_ancillary_fields.py index ea23dc27..74970c82 100644 --- a/scripts/ancillary/gather_ancillary_fields.py +++ b/scripts/ancillary/gather_ancillary_fields.py @@ -362,6 +362,8 @@ def load_or_create_land90_conc( filepath = get_ancillary_filepath(hemisphere=hemisphere, resolution="12.5") ancillary_ds.compute() + ### SS: Temporarily change name of filepath + breakpoint() ancillary_ds.to_netcdf(filepath) logger.info(f"wrote {filepath}") diff --git a/scripts/ancillary/gen_cdrv5anc_from_v4.py b/scripts/ancillary/gen_cdrv5anc_from_v4.py new file mode 100644 index 00000000..9fa63643 --- /dev/null +++ b/scripts/ancillary/gen_cdrv5anc_from_v4.py @@ -0,0 +1,226 @@ +"""Create a CDRv5-compatible ancillary file from CDRv4 ancillary files + +Note: this assumes you've run the code ./gen_generic_valid_icemasks.py + in order to produce local files: + ./cdrv4_psn25_validice_12m_corrected.dat + ./cdrv4_pss25_validice_12m_corrected.dat + +The v5 files need: + latitude: not used for calculations, so can come from nsidc0771 or v5 file + longitude: not used for calculations, so can come from nsidc0771 or v5 file + surface_type: should come from nsidc0780, but is adapted here for compat + adj123: calculated from surface_type + l90c: calcluated from surface_type + min_concentration: adapted from CDRv4 file + invalid_ice_mask: adapted from CDRv4 file + polehole_bitmask [NH only]: adapted from CDRv4 file + + +""" + +import numpy as np +from land_spillover import create_land90 +from netCDF4 import Dataset +from scipy.ndimage import binary_dilation, generate_binary_structure + +fnmap = { + "v5_orig_psn25": "ecdr-ancillary-psn25.nc", + "v5_orig_pss25": "ecdr-ancillary-pss25.nc", + "v5_from_v4_psn25": "ecdr-ancillary-psn25-v04r00.nc", + "v5_from_v4_pss25": "ecdr-ancillary-pss25-v04r00.nc", + "v4_psn25": "G02202-cdr-ancillary-nh.nc", + "v4_pss25": "G02202-cdr-ancillary-sh.nc", +} + + +def gen_adj123(is_ocean, nlevels=3): + """Gen diagonal-orthogonality-distance map""" + # Initialize so that non-ocean is zero and ocean is 255 + is_ocean.tofile("is_ocean.dat") + dists = np.zeros_like(is_ocean, dtype=np.uint8) + dists[is_ocean] = 255 + kernel = generate_binary_structure(2, 2) + for dist in range(nlevels): + is_labeled = dists <= dist + dilated = binary_dilation(is_labeled, structure=kernel) + is_newly_labeled = (dilated > 0) & (dists == 255) + dists[is_newly_labeled] = dist + 1 + """ Debug code: + ofn = f'added_dist_{dist}.dat' + dists.tofile(ofn) + print(f'Wrote: {ofn} ({np.sum(np.where(is_newly_labeled, 1, 0))} vals: {np.unique(dists)})') + """ + + return dists + + +def gen_v4_anc_file(hem): + """Generate the given hem's anc file + Expects 'hem' of 'n' or 's' + Currently set up for 25km only + """ + gridid = f"ps{hem}25" + print(f"Setting gridid to: {gridid}") + + ds4 = Dataset(fnmap[f"v4_ps{hem}25"]) + + v5fromv4 = {} + v5fromv4["latitude"] = np.array(ds4.variables["latitude"]).astype(np.float64) + v5fromv4["longitude"] = np.array(ds4.variables["longitude"]).astype(np.float64) + + ydim, xdim = v5fromv4["latitude"].shape + + v4land = np.array(ds4.variables["landmask"]) + v5land = np.zeros_like(v4land, dtype=np.uint8) + v5land[v4land == 0] = 50 # Ocean + v5land[v4land == 252] = 75 # Lake + v5land[v4land == 253] = 200 # Coast: Note v4 is ortho, v5 is 8-conn + v5land[v4land == 254] = 250 # Land: Note v4 is ortho, v5 is 8-conn + v5fromv4["surface_type"] = v5land + + v4minconc = np.array(ds4.variables["min_concentration"]) + # v5 conc is float and ranges from 0.0 to 1.0 + v5minconc = v4minconc.astype(np.float32) / 100.0 + v5fromv4["min_concentration"] = v5minconc + + # Note: these input files come from running: + # python ./gen_generic_valid_icemasks.py + # They are 12 months x ydim x xdim + # and are coded: + # Valid sea ice (if land mask allows) + # 200: ocean, valid + # 225: lake, valid + # 250: coast, valid + # Not-valid sea ice + # 100: ocean, invalid + # 125: lake, invalid # Note: the original data never had invalid lake + # 75: coast, invalid + v4valid_corrected = np.fromfile( + f"./cdrv4_{gridid}_validice_12m_corrected.dat", dtype=np.uint8 + ).reshape(12, ydim, xdim) + + # The invalid ice mask should be: + # - everywhere the valid ice mask is 0 (False) + # - all non-ocean pixels + # Defaults to invalid. Add in valid locations + v5validice = np.zeros_like(v4valid_corrected, dtype=np.uint8) + v5invalidice = np.zeros_like(v4valid_corrected, dtype=np.uint8) + is_v5_ocean = v5fromv4["surface_type"] == 50 + for month in range(12): + # This should be a slice? + thismonth_v5 = v5validice[month, ::] + thismonth_v4 = v4valid_corrected[month, ::] + thismonth_v5[is_v5_ocean & (thismonth_v4 == 200)] = 1 # valid expanded-ocean + thismonth_v5[is_v5_ocean & (thismonth_v4 == 225)] = 1 # valid expanded-lake + thismonth_v5[is_v5_ocean & (thismonth_v4 == 250)] = 1 # valid coast + # Now flip those valids to invalids (and vice-versa) + v5invalidice[v5validice == 0] = 1 + v5invalidice[v5validice == 1] = 0 + v5fromv4["invalid_ice_mask"] = v5invalidice + v5invalidice.tofile("v5invalidice.dat") + + if "polehole" in ds4.variables.keys(): + # CDRv4 mask is: 1-SMMR, 2-SSMI, 4-SSMIS + v4_polemask = np.array(ds4.variables["polehole"]) + v5_polemask = np.zeros_like(v4_polemask, dtype=np.uint8) + + """ + # Test of the bit-checking technique + # First, reproducing to verify technique + for bitnum in range(2 + 1): + is_bit = np.bitwise_and(v4_polemask, np.left_shift(1, bitnum)) + v5_polemask[is_bit > 0] += 2 ** bitnum + + if np.all(v4_polemask == v5_polemask): + print('bitmask technique works') + else: + print('bitmask technique FAILS') + v4_polemask.tofile('v4_polemask.dat') + v5_polemask.tofile('v5_polemask.dat') + breakpoint() + """ + + # CDRv5 mask is: 1-SMMR, 2-F8, 4-F11, 8-F13, 16-F17, 32-AME, 64-AM2 + # So, will convert by mapping bits: + # 1 -> 1 (total: 1) + # 2 -> 2, 4, 8 (total: 14) + # 4 -> 16, 32, 64 (total: 112) + is_bit0 = np.bitwise_and(v4_polemask, np.left_shift(1, 0)) + is_bit1 = np.bitwise_and(v4_polemask, np.left_shift(1, 1)) + is_bit2 = np.bitwise_and(v4_polemask, np.left_shift(1, 2)) + + is_bit0.tofile("is_bit0.dat") + is_bit1.tofile("is_bit1.dat") + is_bit2.tofile("is_bit2.dat") + + v5_polemask[is_bit0 > 0] += 1 # SMMR + v5_polemask[is_bit1 > 0] += 14 # SSMI => F8, F11, F13 + v5_polemask[is_bit2 > 0] += 112 # SSMIS => F17, AME, AM2 + + v5fromv4["polehole_bitmask"] = v5_polemask + v4_polemask.tofile("v4_polemask.dat") + v5_polemask.tofile("v5_polemask.dat") + + v4_adj123 = gen_adj123(is_v5_ocean) + v4_adj123.tofile("v4_adj123.dat") + + v5fromv4["adj123"] = v4_adj123 + + v4land90 = create_land90(adj123=v4_adj123) + v5fromv4["l90c"] = v4land90 + + print("v5fromv4 keys:") + for key in v5fromv4.keys(): + print(f" {key}") + + # Now, let's create a new file by copying the old one, and then + # updating its values + orig_v5_fn = fnmap[f"v5_orig_{gridid}"] + new_v5_fn = fnmap[f"v5_from_v4_{gridid}"] + + import sh + + # print(f"shell cp options: {f'-vn {orig_v5_fn} {new_v5_fn}'}") + print(sh.cp("-vn", orig_v5_fn, new_v5_fn)) + + # Open the new file...and then edit it + newds = Dataset(new_v5_fn, "r+") + print(f"newds vars: {newds.variables.keys()}") + possible_var_list = [ + "surface_type", + "adj123", + "l90c", + "min_concentration", + "invalid_ice_mask", + "polehole_bitmask", + ] + var_list = [var for var in possible_var_list if var in newds.variables.keys()] + for var in var_list: + print(f"same dtypes {var}: {newds.variables[var].dtype == v5fromv4[var].dtype}") + print(f"same shapes {var}: {newds.variables[var].shape == v5fromv4[var].shape}") + newds.variables[var][:] = v5fromv4[var][:] + + if "polehole_bitmask" in newds.variables.keys(): + newds.variables["polehole_bitmask"].valid_range = np.array( + (0, 127), dtype=np.uint8 + ) + + newds.close() + print(f"Wrote: {new_v5_fn}") + + +if __name__ == "__main__": + import sys + + valid_hems = ("n", "s") + try: + hem = sys.argv[1][0] + assert hem in valid_hems + + except IndexError: + hem = "n" + print(f"No hem given: assuming {hem}") + except AssertionError: + raise ValueError(f"Given hem ({hem}) not in {valid_hems}") + + gen_v4_anc_file(hem) diff --git a/scripts/ancillary/gen_dailyclim_from0079.py b/scripts/ancillary/gen_dailyclim_from0079.py new file mode 100644 index 00000000..f1543022 --- /dev/null +++ b/scripts/ancillary/gen_dailyclim_from0079.py @@ -0,0 +1,361 @@ +""" +./gen_dailyclim_from0079.py + +Generate a daily climatology from Bootstrap (NSIDC-0079) data + - Note: this is expected to be valuable because 0079 is manually QC'd + +Usage: + python gen_dailyclim_from0079.py [hem] + where hem is nh or sh + and if hem is omitted, both will be generated + +Assumes daily 0079 data is located in "standard" ecs directories: + /ecs/DP1/PM/NSIDC-0079.004//NSIDC0079_SEAICE_PS_{HEM}25km_{YYYYMMDD}_v4.0.nc + where YYYYMMDD is 8-digit date string + HEM is capital N or S + +Overall approach: + - Initialize uint8 daily fields for day-of-year 1-366 to 255 + - ignore index zero + - treat last day of year as doy 365 + - by accumulating values in both index 365 and 366, and ORing the + final fields + - use separate fields for sea ice extent found (siext) and siext_notfound + - use 10% min for siext threshold + - Loop through all days from 11/01/1978 (start of 0079) + through end of 2023 (current latest-date + - apply any siext to all days +/- 5 day-of-years from current day + +""" + +import datetime as dt +import os + +import numpy as np +import xarray as xr +from netCDF4 import Dataset +from scipy.signal import convolve2d + +fn_0079_ = ( + "/ecs/DP1/PM/NSIDC-0079.004/{dotymd}/NSIDC0079_SEAICE_PS_{hem}25km_{strymd}_v4.0.nc" +) +fn_dayclim_ = "daily_siext_from0079_{d0}-{d1}_{gridid}.nc" + +reference_gridid_files = { + "psn25": "/share/apps/G02202_V5/v05_ancillary/ecdr-ancillary-psn25.nc", + "pss25": "/share/apps/G02202_V5/v05_ancillary/ecdr-ancillary-pss25.nc", +} + +# first_date = dt.date(1986, 1, 1) # testcase 1986-1988 +# last_date = dt.date(1988, 12, 31) # testcase 1986-1988 + +first_date = dt.date(1978, 11, 1) # 0079 starts Nov 1, 1978 +last_date = dt.date(2023, 12, 31) # as of Sept 2024, last full year is 2023 + +# Will include a doy's observation in mask for doys +/- doy_offset days of +# doy with observations +doy_offset = 4 + + +def iter_adj_doys(date, offset): + """Return iterator of days-of-year surrounding date""" + d = date - dt.timedelta(days=offset) + while d <= date + dt.timedelta(days=offset): + doy = int(d.strftime("%j").lstrip("0")) + yield doy + + d += dt.timedelta(days=1) + + +def dilate_siext(sie, nosie): + """Return an array where siext is dilated into land""" + kernel = [[0, 1, 0], [1, 1, 1], [0, 1, 0]] + # max_iterations = 2 + # max_iterations = 5 + max_iterations = 10 + + ndays, ydim, xdim = sie.shape + mask = np.zeros((ndays, ydim, xdim), dtype=np.uint8) + mask[:] = 255 + + for doy in range(1, 366 + 1): + if doy % 50 == 0: + print(f"Working on doy: {doy} of 366...") + + yea = sie[doy, :, :] + nay = nosie[doy, :, :] + + for _ in range(max_iterations): + # Dilate yea + is_yea = yea == 100 + is_nay = nay == 100 + yea_conv = convolve2d( + is_yea, kernel, mode="same", boundary="fill", fillvalue=0 + ) + fill_with_yea = (yea_conv > 0) & ~is_nay + yea[fill_with_yea] = 100 + + # Dilate nay + is_nay = nay == 100 + is_yea = yea == 100 + nay_conv = convolve2d( + is_nay, kernel, mode="same", boundary="fill", fillvalue=0 + ) + fill_with_nay = (nay_conv > 0) & ~is_yea + nay[fill_with_nay] = 100 + + # Use those values to fill mask + is_nay = nay == 100 + is_yea = yea == 100 + + msk = mask[doy, :, :] + msk[is_nay] = 0 + msk[is_yea] = 100 + # breakpoint() + + return mask + + +def find_sie_0079(gridid, d0, d1): + if gridid == "psn25": + xdim = 304 + ydim = 448 + hem = "N" + elif gridid == "pss25": + xdim = 316 + ydim = 332 + hem = "S" + else: + raise ValueError(f"Cannot figure out xdim, ydim for gridid {gridid}") + + sie_exists = np.zeros((367, ydim, xdim), dtype=np.uint8) + nosie_exists = np.zeros((367, ydim, xdim), dtype=np.uint8) + + date = d0 + while date <= d1: + # if date.day == 1 and (date.month % 3 == 0): + if date.month == 1 and date.day == 1: + print(f"Working on: {date=}", flush=True) + + try: + fn_format_dict = { + "dotymd": date.strftime("%Y.%m.%d"), + "strymd": date.strftime("%Y%m%d"), + "hem": hem, + } + fn0079 = fn_0079_.format(**fn_format_dict) + ds0079 = Dataset(fn0079) + ds0079.set_auto_maskandscale(False) + except FileNotFoundError: + print(f"FileNotFound: {fn0079}", flush=True) + + icecon_vars = [key for key in ds0079.variables.keys() if "_ICECON" in key] + if len(icecon_vars) > 1: + print(f"More than one icecon for {date=}: {icecon_vars}", flush=True) + elif len(icecon_vars) == 0: + # Skip this day + pass + else: + # Values of siconc are 0-1000 (0-100%), 1100 missing, 1200 land + siconc = np.array(ds0079.variables[icecon_vars[0]][0, :, :]) + + is_siext = (siconc >= 10) & (siconc <= 1000) + is_nosiext = siconc < 10 + + for doy in iter_adj_doys(date, doy_offset): + sie_exists[doy, is_siext] = 100 + nosie_exists[doy, is_nosiext] = 100 + + date += dt.timedelta(days=1) + + # Finalize by ensuring that doy 365 and 366 are the same + has_365 = sie_exists[365, :, :] == 100 + hasno_365 = nosie_exists[365, :, :] == 100 + has_366 = sie_exists[366, :, :] == 100 + hasno_366 = nosie_exists[366, :, :] == 100 + + sie_exists[366, :, :][has_365] = 100 + sie_exists[365, :, :][has_366] = 100 + + nosie_exists[366, :, :][hasno_365] = 100 + nosie_exists[365, :, :][hasno_366] = 100 + + assert np.all(sie_exists[365, :, :] == sie_exists[366, :, :]) + assert np.all(nosie_exists[365, :, :] == nosie_exists[366, :, :]) + + return sie_exists, nosie_exists + + +def get_surface_type(gridid, vernum): + # Return the surface_type field for this hem and version + if gridid == "psn25": + hem = "n" + elif gridid == "pss25": + hem = "s" + + if vernum == 4: + verstr = "-v04r00" + elif vernum == 5: + verstr = "" + + fn = f"/share/apps/G02202_V5/v05_ancillary/ecdr-ancillary-ps{hem}25{verstr}.nc" + ds_anc = Dataset(fn) + ds_anc.set_auto_maskandscale(False) + surftype = np.array(ds_anc["surface_type"]) + + return surftype + + +def gen_dailyclim_0079(hem, d0, d1): + + print("Will gen daily climatology from 0079 for:") + print(f" hem: {hem}") + print(f" from: {d0} through {d1}", flush=True) + + if hem == "nh": + gridid = "psn25" + xdim = 304 + ydim = 448 + elif hem == "sh": + gridid = "pss25" + xdim = 316 + ydim = 332 + else: + raise ValueError(f"Cannot figure out gridid for hem: {hem}") + + nc_fn = f"ecdr-ancillary-{gridid}-dailyclim.nc" + if os.path.isfile(nc_fn): + print(f"Output file exists: {nc_fn}") + return + + mask_fn = f'dailyclim_{hem}_{d0.strftime("%Y%m%d")}-{d1.strftime("%Y%m%d")}.dat' + if os.path.isfile(mask_fn): + # Read in the .dat values instead of calculating them + mask = np.fromfile(mask_fn, dtype=np.uint8).reshape(367, ydim, xdim) + print(f" Read mask values from: {mask_fn}", flush=True) + else: + # Calculate the day-of-year [doy] mask values + sie_fn = f'sie_exists_{hem}_{d0.strftime("%Y%m%d")}-{d1.strftime("%Y%m%d")}.dat' + nosie_fn = ( + f'nosie_exists_{hem}_{d0.strftime("%Y%m%d")}-{d1.strftime("%Y%m%d")}.dat' + ) + + if os.path.isfile(sie_fn) and os.path.isfile(nosie_fn): + print("Will read from:") + print(f" yes: {sie_fn}") + print(f" not: {nosie_fn}", flush=True) + sie_exists = np.fromfile(sie_fn, dtype=np.uint8).reshape(367, ydim, xdim) + nosie_exists = np.fromfile(nosie_fn, dtype=np.uint8).reshape( + 367, ydim, xdim + ) + else: + print("Will write to:") + print(f" yes: {sie_fn}") + print(f" not: {nosie_fn}", flush=True) + + sie_exists, nosie_exists = find_sie_0079(gridid, d0, d1) + + sie_exists.tofile(sie_fn) + nosie_exists.tofile(nosie_fn) + + # Fill lake values with 255 if they have 0 or 100 in either sie or nosie + v4_surftype = get_surface_type(gridid, 4) + v5_surftype = get_surface_type(gridid, 5) + + is_lake_v4 = v4_surftype == 75 + is_lake_v5 = v5_surftype == 75 + + for doy in range(1, 366 + 1): + sie_exists[doy, is_lake_v4] = 255 + sie_exists[doy, is_lake_v5] = 255 + + nosie_exists[doy, is_lake_v4] = 255 + nosie_exists[doy, is_lake_v5] = 255 + + # Now, process the fields by dilation of yea and nay + mask = dilate_siext(sie_exists, nosie_exists) + mask_fn = f'dailyclim_{hem}_{d0.strftime("%Y%m%d")}-{d1.strftime("%Y%m%d")}.dat' + mask.tofile(mask_fn) + print(f" Wrote: {mask_fn}", flush=True) + + # Create the netCDF file from the mask value + # netcdf file will get its CRS information from a sample file + # The field will be 'invalid_ice_mask' + # - a value of 1 (or True) indicates where sea ice was not observed + # near this day-of-year + # - a value of 0 means that sea ice is permitted for this doy + # ...if the grid cell is ocean (not land) + # In the mask field here: + # 0: invalid ice + # 100: potentially valid sea ice (though could be land, depending on land/surfacetype mask) + # 255: sea ice validity/invalidity not determined because far from ocean + # Note: lakes were filled here and will never have valid sea ice + + # Load the reference dataset + reference_ds = xr.open_dataset(reference_gridid_files[gridid]) + + iim = mask.copy() + iim[mask == 0] = 1 + iim[mask != 0] = 0 + + invalid_ice_mask_arr = xr.DataArray( + iim, + dims=("doy", "y", "x"), + attrs={ + "short_name": "daily invalid ice mask", + "long_name": f"{gridid} daily invalid ice mask from NSIDC-0079", + "grid_mapping": "crs", + "flag_values": np.array((0, 1), dtype=np.uint8), + "flag_meanings": "valid_seaice_location invalid_seaice_location", + "units": 1, + "comment": "Mask indicating where seaice will not be found on this day based on climatology from NSIDC-0079", + }, + ) + invalid_ice_mask_arr.encoding["_FillValue"] = None + + iim_ds = xr.Dataset( + data_vars=dict( + invalid_ice_mask=invalid_ice_mask_arr[1:, :, :], # exclude doy 0 + crs=reference_ds.crs, + ), + coords=dict( + doy=np.arange(1, 366 + 1, dtype=np.int16), + y=reference_ds.y, + x=reference_ds.x, + ), + ) + + # TODO: Does this var need other attrs? + iim_ds.doy.attrs = dict( + long_name="Day of year", + comment="366 days are provided to account for leap years.", + ) + + iim_ds.doy.encoding["_FillValue"] = None + iim_ds.y.encoding["_FillValue"] = None + iim_ds.x.encoding["_FillValue"] = None + + # TODO: Global attributes will likely be wrong because they are copied! + iim_ds.to_netcdf(nc_fn) + print(f"Wrote: {nc_fn}") + + +if __name__ == "__main__": + import sys + + all_hems = ("nh", "sh") + try: + hem_list = (sys.argv[1],) + assert hem_list[0] in all_hems + except IndexError: + hem_list = all_hems + print("No hem given, using: {hem_list}", flush=True) + except AssertionError: + err_message = f""" + Invalid hemisphere + {sys.argv[1]} not in {all_hems} + """ + raise ValueError(err_message) + + for hem in hem_list: + gen_dailyclim_0079(hem, first_date, last_date) diff --git a/scripts/ancillary/gen_generic_valid_icemasks.py b/scripts/ancillary/gen_generic_valid_icemasks.py new file mode 100644 index 00000000..31bb7ebe --- /dev/null +++ b/scripts/ancillary/gen_generic_valid_icemasks.py @@ -0,0 +1,214 @@ +"""Create land-agnostic versions of the CDRv4 valid ice masks + +gen_generic_valid_icemasks.py + +The issue with the original valid ice masks is that they include the land +and lakes. This version extends oceans and lakes separately. +""" + +import numpy as np +from netCDF4 import Dataset +from scipy.ndimage import generate_binary_structure +from scipy.signal import convolve2d + +OCEAN = 0 +LAKE = 252 +COAST = 253 +LAND = 254 + +INVALOCEAN = 100 +INVALLAKE = 125 +VALOCEAN = 200 +VALLAKE = 225 + + +def get_grid_dims(gridid): + if gridid == "psn25": + xdim = 304 + ydim = 448 + elif gridid == "pss25": + xdim = 316 + ydim = 332 + else: + raise ValueError(f"gridid not implemented: {gridid}") + + return xdim, ydim + + +def get_grid_hem(gridid): + if gridid == "psn25": + hem = "n" + elif gridid == "pss25": + hem = "s" + else: + raise ValueError(f"gridid not implemented: {gridid}") + + return hem + + +def is_adjacent(boolarr): + """Calc pixels adjacent to boolean array""" + kernel = generate_binary_structure(2, 1) + convolved = convolve2d( + boolarr.astype(np.uint8), + kernel, + mode="same", + boundary="fill", + fillvalue=0, + ) + + return convolved > 0 + + +def fix_valid(validarr, gridid, month_index): + """Implement known fixes for CD$v4 valid ice mask""" + if gridid == "psn25": + # There is a small pocket of ocean that is missed in some of the + # valid ice arrays + psn25_errors = [ + # check_index then nearby_index + ((236, 180), (234, 182)), + ((216, 0), (216, 1)), + ((217, 0), (217, 1)), + ((218, 0), (218, 1)), + ((219, 0), (219, 1)), + ((220, 0), (220, 2)), + ((220, 1), (220, 2)), + ((77, 405), (78, 405)), + ((76, 405), (78, 405)), + ((76, 406), (78, 405)), + ((75, 406), (78, 405)), + ((74, 406), (78, 405)), + ((74, 407), (78, 405)), + ((73, 407), (78, 405)), + ((72, 408), (78, 405)), + ((71, 408), (78, 405)), + ((70, 409), (78, 405)), + ((85, 411), (86, 411)), + ((53, 348), (53, 347)), + ((87, 404), (98, 404)), + ((82, 407), (83, 407)), + ((106, 407), (107, 407)), + ((267, 316), (267, 315)), + ((271, 312), (270, 312)), + ((25, 181), (25, 180)), + ((25, 190), (25, 189)), + ((25, 179), (25, 178)), + ((135, 69), (135, 70)), + ((139, 42), (139, 43)), + ((285, 308), (285, 309)), + ((286, 313), (286, 314)), + ((260, 286), (260, 285)), + ((267, 316), (267, 317)), + ((267, 315), (267, 317)), + ((114, 422), (115, 422)), + ((113, 422), (115, 422)), + ((112, 422), (111, 421)), + ((111, 422), (111, 421)), + ((25, 190), (25, 191)), + ((27, 176), (27, 175)), + ] + + for check_index, nearby_index in psn25_errors: + check_value = validarr[check_index[1], check_index[0]] + nearby_value = validarr[nearby_index[1], nearby_index[0]] + if check_value != nearby_value: + validarr[check_index[1], check_index[0]] = nearby_value + print( + f"Mismatch between {check_index} and {nearby_index} (month={month_index + 1}): fixing" + ) + + return validarr + + +def gen_generic_valid_icemasks(gridid): + """Create valid ice masks that are landmask-independent.""" + xdim, ydim = get_grid_dims(gridid) + hem = get_grid_hem(gridid) + nmonths = 12 + + incfn = f"./G02202-cdr-ancillary-{hem}h.nc" + ds_in = Dataset(incfn) + validice_arr_v4 = np.array(ds_in.variables["valid_ice_mask"]) + assert validice_arr_v4.shape == (12, ydim, xdim) + + # CDRv4 land mask is ocean=0, lake=252, coast=253, land=254 + landmask = np.array(ds_in.variables["landmask"]) + is_ocean = landmask == OCEAN + is_lake = landmask == LAKE + is_coast = landmask == COAST + + # Loop through all months, creating no-land versions of the mask + newvalid_12months = np.zeros_like(validice_arr_v4, dtype=np.uint8) + for m in range(nmonths): + valid = validice_arr_v4[m, :, :] + + # newvalid will have values: + # 0: not yet set + # 100: not-valid-seaice-ocean (INVALOCEAN) + # 125: not-valid-seaice-lake (INVALLAKE) + # 200: valid-seaice-ocean (VALOCEAN) + # 225: valid-seaice-lake (VALLAKE) + + valid = fix_valid(valid, gridid, m) + + # initialize with valid/invalid ocean/lake + newvalid = np.zeros_like(valid, dtype=np.uint8) + newvalid[is_ocean & (valid == 0)] = INVALOCEAN # invalid_ocean + newvalid[is_lake & (valid == 0)] = INVALLAKE # invalid_lake + newvalid[is_ocean & (valid == 1)] = VALOCEAN # valid_ocean + newvalid[is_lake & (valid == 1)] = VALLAKE # valid_lake + + # Loop until all pixels are explicitly assigned a value + n_unset = np.sum(np.where(newvalid == 0, 1, 0)) + prior_n_unset = n_unset + + while n_unset > 0: + # Cyclicly dilate: validocean, validlake, invalidocean, invalid lake + + # validocean + is_adj_validocean = is_adjacent(newvalid == VALOCEAN) + newvalid[is_adj_validocean & (newvalid == 0)] = VALOCEAN + + # validlake + is_adj_validlake = is_adjacent(newvalid == VALLAKE) + newvalid[is_adj_validlake & (newvalid == 0)] = VALLAKE + + # invalidocean + is_adj_invalidocean = is_adjacent(newvalid == INVALOCEAN) + newvalid[is_adj_invalidocean & (newvalid == 0)] = INVALOCEAN + + # invalidlake + # Note: By inspection, there were no lake grid cells that were + # ever marked "invalid" ice + is_adj_invalidlake = is_adjacent(newvalid == INVALLAKE) + newvalid[is_adj_invalidlake & (newvalid == 0)] = INVALLAKE + + n_invalid_lake = np.sum(np.where(is_adj_invalidlake, 1, 0)) + if n_invalid_lake > 0: + print("***********") + print(f"num invalid lake: {n_invalid_lake}") + print("***********") + + n_unset = np.sum(np.where(newvalid == 0, 1, 0)) + if n_unset == prior_n_unset: + print("n_unset did not change!") + breakpoint() + + prior_n_unset = n_unset + + # Add the coast back in + is_valid = (newvalid == VALLAKE) | (newvalid == VALOCEAN) + newvalid[is_coast] = 75 + newvalid[is_coast & is_valid] = 250 + + newvalid_12months[m, :, :] = newvalid[:, :] + + ofn = f"cdrv4_{gridid}_validice_12m_corrected.dat" + newvalid_12months.tofile(ofn) + print(f"Wrote: {ofn}") + + +if __name__ == "__main__": + for gridid in ("psn25", "pss25"): + gen_generic_valid_icemasks(gridid) diff --git a/scripts/ancillary/gen_v5_flagmask.py b/scripts/ancillary/gen_v5_flagmask.py new file mode 100644 index 00000000..259ff4bc --- /dev/null +++ b/scripts/ancillary/gen_v5_flagmask.py @@ -0,0 +1,36 @@ +"""gen_v5_flagmask.py""" + +import numpy as np +from netCDF4 import Dataset + +# NH +nh_anc = Dataset("./ecdr-ancillary-psn25.nc") +nh_surf = np.array(nh_anc.variables["surface_type"]) +ydim, xdim = nh_surf.shape + +nh_flag = np.zeros((ydim, xdim), dtype=np.uint8) +nh_flag[:] = 255 +nh_flag[nh_surf == 50] = 0 # Ocean +nh_flag[nh_surf == 75] = 252 # Lake +nh_flag[nh_surf == 200] = 253 # Coast +nh_flag[nh_surf == 250] = 254 # Land +assert np.all(nh_flag != 255) +ofn = "flagmask_psn25_v05.dat" +nh_flag.tofile(ofn) +print(f"Wrote: {ofn}") + +# SH +sh_anc = Dataset("./ecdr-ancillary-pss25.nc") +sh_surf = np.array(sh_anc.variables["surface_type"]) +ydim, xdim = sh_surf.shape + +sh_flag = np.zeros((ydim, xdim), dtype=np.uint8) +sh_flag[:] = 255 +sh_flag[sh_surf == 50] = 0 # Ocean +sh_flag[sh_surf == 75] = 252 # Lake +sh_flag[sh_surf == 200] = 253 # Coast +sh_flag[sh_surf == 250] = 254 # Land +assert np.all(sh_flag != 255) +ofn = "flagmask_pss25_v05.dat" +sh_flag.tofile(ofn) +print(f"Wrote: {ofn}") diff --git a/scripts/ancillary/v5minconc_fromv4_12.5km.py b/scripts/ancillary/v5minconc_fromv4_12.5km.py new file mode 100644 index 00000000..7cbebd98 --- /dev/null +++ b/scripts/ancillary/v5minconc_fromv4_12.5km.py @@ -0,0 +1,140 @@ +import numpy as np +from netCDF4 import Dataset +from scipy.ndimage import maximum_filter, median_filter + +"""Create a v5 minconc array from the v4 minconc array + +This is the 12.5km version. Start by filling in with overlying 25km v4cell, then max-fill the remainder +""" + + +def get_hem(gridid): + if gridid == "psn12.5": + return "n" + elif gridid == "psn25": + return "n" + elif gridid == "pss12.5": + return "s" + elif gridid == "pss25": + return "s" + else: + raise ValueError(f"Could not determine hem of {gridid}") + + +def get_res(gridid): + if gridid == "psn25": + return "25" + elif gridid == "pss25": + return "25" + elif gridid == "psn12.5": + return "12.5" + elif gridid == "pss12.5": + return "12.5" + else: + raise ValueError(f"Could not determine res of {gridid}") + + +def gen_v5minconc_12p5(gridid): + """Generate the extended v5 min_conc field from the v4 version""" + hem = get_hem(gridid) + # res = get_res(gridid) + + # This will be the 25km v4 anc file (for minconc and v4land) + ds4 = Dataset(f"./G02202-cdr-ancillary-{hem}h.nc") + ds4.set_auto_maskandscale(False) + ds4land = np.array(ds4.variables["landmask"]) + nonoceanv4 = ds4land != 0 + v4minconc_i2 = np.array(ds4.variables["min_concentration"]) + + # This will be the 12.5km v5 anc file (for surface type) + ds5 = Dataset(f"./ecdr-ancillary-{gridid}.nc") + ds5.set_auto_maskandscale(False) + ds5land = np.array(ds5.variables["surface_type"]) + nonoceanv5 = ds5land != 50 + + ydim12, xdim12 = ds5land.shape + v5minconc_i2 = np.zeros((ydim12, xdim12), dtype=np.int16) + for joff in range(2): + for ioff in range(2): + v5_slice = v5minconc_i2[joff::2, ioff::2] + v5_slice[:, :] = v4minconc_i2[:, :] + v5_slice[nonoceanv4] = 1200 # 1200 for v4land/v5ocean + + v5minconc_i2[nonoceanv5] = 1500 # 1500 for v5land + + # Check the initial fill + # breakpoint() + # print('check v5minconc_i2 init fill') + + # Do an initial pass, replacing values with the max of the '+' locality + kernel = [[0, 1, 0], [1, 1, 1], [0, 1, 0]] + minconc = v5minconc_i2.copy() + minconc[minconc >= 1200] = -100 + expanded = median_filter(minconc, footprint=kernel) + has_new_values = (v5minconc_i2 > 0) & (v5minconc_i2 < 1200) & (expanded >= 0) + v5minconc_i2[has_new_values] = expanded[has_new_values] + + # Check the initial smooth + # breakpoint() + # print('check v5minconc_i2 init fill') + + # Now, update v5minconc_i2 until all nonocean cells have a value + n_unassigned = np.sum(np.where(v5minconc_i2 == 1200, 1, 0)) + + # First, attempt '+' filter + kernel = [[0, 1, 0], [1, 1, 1], [0, 1, 0]] + last_n_unassigned = 0 + n_iterations = 0 + while (n_unassigned > 0) and (n_unassigned != last_n_unassigned): + minconc = v5minconc_i2.copy() + minconc[minconc >= 1200] = -100 + expanded = maximum_filter(minconc, footprint=kernel) + + has_new_values = (v5minconc_i2 == 1200) & (expanded >= 0) + v5minconc_i2[has_new_values] = expanded[has_new_values] + + try: + assert np.all(v5minconc_i2 >= 0) + except AssertionError: + breakpoint() + + last_n_unassigned = n_unassigned + n_unassigned = np.sum(np.where(v5minconc_i2 == 1200, 1, 0)) + n_iterations += 1 + + print(f"n still unassigned after {n_iterations}: {n_unassigned}") + + footprint_radius = 3 + while (n_unassigned > 0) and (footprint_radius < 21): + last_n_unassigned = 0 + n_iterations = 0 + while (n_unassigned > 0) and (n_unassigned != last_n_unassigned): + print(f"{footprint_radius}: {n_unassigned}", flush=True) + minconc = v5minconc_i2.copy() + minconc[minconc >= 1200] = -100 + expanded = maximum_filter(minconc, size=footprint_radius) + + assert minconc.shape == expanded.shape + + has_new_values = (v5minconc_i2 == 1200) & (expanded >= 0) + v5minconc_i2[has_new_values] = expanded[has_new_values] + + assert np.all(v5minconc_i2 >= 0) + + last_n_unassigned = n_unassigned + n_unassigned = np.sum(np.where(v5minconc_i2 == 1200, 1, 0)) + n_iterations += 1 + + footprint_radius += 2 + + ofn = f"v5minconc_i2_{gridid}.dat" + v5minconc_i2.tofile(ofn) + print(f"Wrote: {ofn}") + + print(f"n still unassigned after {n_iterations}: {n_unassigned}") + print(f" footprint_radius: {footprint_radius}") + + +if __name__ == "__main__": + for gridid in ("psn12.5", "pss12.5"): + gen_v5minconc_12p5(gridid) diff --git a/scripts/ancillary/v5minconc_fromv4_25km.py b/scripts/ancillary/v5minconc_fromv4_25km.py new file mode 100644 index 00000000..f3a5c272 --- /dev/null +++ b/scripts/ancillary/v5minconc_fromv4_25km.py @@ -0,0 +1,109 @@ +import numpy as np +from netCDF4 import Dataset +from scipy.ndimage import maximum_filter + +"""Create a v5 minconc array from the v4 minconc array + +I wonder if I should do both 25 and 12.5km versions +and for the 12.5, should I start by taking the max of the surrounding +grid cells or just sub in the overlying 25km cell value? +""" + + +def get_hem(gridid): + if gridid == "psn25": + return "n" + elif gridid == "pss25": + return "s" + else: + raise ValueError(f"Could not determine hem of {gridid}") + + +def get_res(gridid): + if gridid == "psn25": + return "25" + elif gridid == "pss25": + return "25" + else: + raise ValueError(f"Could not determine res of {gridid}") + + +def gen_v5minconc(gridid): + """Generate the extended v5 min_conc field from the v4 version""" + hem = get_hem(gridid) + # res = get_res(gridid) + + ds4 = Dataset(f"./G02202-cdr-ancillary-{hem}h.nc") + ds4.set_auto_maskandscale(False) + ds4land = np.array(ds4.variables["landmask"]) + nonoceanv4 = ds4land != 0 + v4minconc_i2 = np.array(ds4.variables["min_concentration"]) + + ds5 = Dataset(f"./ecdr-ancillary-{gridid}.nc") + ds5.set_auto_maskandscale(False) + ds5land = np.array(ds5.variables["surface_type"]) + nonoceanv5 = ds5land != 50 + v5minconc_i2 = v4minconc_i2.copy() + v5minconc_i2[nonoceanv4] = 1200 # 1200 for v4land/v5ocean + v5minconc_i2[nonoceanv5] = 1500 # 1500 for v5land + + # Now, update v5minconc_i2 until all nonocean cells have a value + n_unassigned = np.sum(np.where(v5minconc_i2 == 1200, 1, 0)) + + # First, attempt '+' filter + kernel = [[0, 1, 0], [1, 1, 1], [0, 1, 0]] + last_n_unassigned = 0 + n_iterations = 0 + while (n_unassigned > 0) and (n_unassigned != last_n_unassigned): + minconc = v5minconc_i2.copy() + minconc[minconc >= 1200] = -100 + expanded = maximum_filter(minconc, footprint=kernel) + + has_new_values = (v5minconc_i2 == 1200) & (expanded >= 0) + v5minconc_i2[has_new_values] = expanded[has_new_values] + + try: + assert np.all(v5minconc_i2 >= 0) + except AssertionError: + breakpoint() + + last_n_unassigned = n_unassigned + n_unassigned = np.sum(np.where(v5minconc_i2 == 1200, 1, 0)) + n_iterations += 1 + + print(f"n still unassigned after {n_iterations}: {n_unassigned}") + + footprint_radius = 3 + while (n_unassigned > 0) and (footprint_radius < 21): + last_n_unassigned = 0 + n_iterations = 0 + while (n_unassigned > 0) and (n_unassigned != last_n_unassigned): + print(f"{footprint_radius}: {n_unassigned}", flush=True) + minconc = v5minconc_i2.copy() + minconc[minconc >= 1200] = -100 + expanded = maximum_filter(minconc, size=footprint_radius) + + assert minconc.shape == expanded.shape + + has_new_values = (v5minconc_i2 == 1200) & (expanded >= 0) + v5minconc_i2[has_new_values] = expanded[has_new_values] + + assert np.all(v5minconc_i2 >= 0) + + last_n_unassigned = n_unassigned + n_unassigned = np.sum(np.where(v5minconc_i2 == 1200, 1, 0)) + n_iterations += 1 + + footprint_radius += 2 + + ofn = f"v5minconc_i2_{gridid}.dat" + v5minconc_i2.tofile(ofn) + print(f"Wrote: {ofn}") + + print(f"n still unassigned after {n_iterations}: {n_unassigned}") + print(f" footprint_radius: {footprint_radius}") + + +if __name__ == "__main__": + for gridid in ("psn25", "pss25"): + gen_v5minconc(gridid) diff --git a/seaice_ecdr/__init__.py b/seaice_ecdr/__init__.py index 6250a5da..b0146856 100644 --- a/seaice_ecdr/__init__.py +++ b/seaice_ecdr/__init__.py @@ -1,3 +1,4 @@ +import datetime as dt import os import sys @@ -5,7 +6,7 @@ from seaice_ecdr.constants import LOGS_DIR -__version__ = "v0.1.0" +__version__ = "v0.2.0" DEFAULT_LOG_LEVEL = "INFO" @@ -34,7 +35,8 @@ if not do_not_log: # One file per day. LOGS_DIR.mkdir(exist_ok=True) - file_sink_fp = LOGS_DIR / "{time:%Y-%m-%d}.log" + # file_sink_fp = LOGS_DIR / f"{time:%Y-%m-%d}.log" + file_sink_fp = LOGS_DIR / f"{dt.datetime.now():%Y-%m-%d}.log" logger.debug(f"Logging to {file_sink_fp}") logger.add( file_sink_fp, diff --git a/seaice_ecdr/_types.py b/seaice_ecdr/_types.py index 3236d8be..b21fe922 100644 --- a/seaice_ecdr/_types.py +++ b/seaice_ecdr/_types.py @@ -2,14 +2,3 @@ # In kilometers. ECDR_SUPPORTED_RESOLUTIONS = Literal["12.5", "25"] - -# Supported sats -SUPPORTED_SAT = Literal[ - "am2", # AMSR2 - "ame", # AMSRE - "F17", # SSMIS F17 - "F13", # SSMI F13 - "F11", # SSMI F11 - "F08", # SSMI F08 - "n07", # Nimbus-7 SMMR -] diff --git a/seaice_ecdr/ancillary.py b/seaice_ecdr/ancillary.py index 5af2884f..30feb52c 100644 --- a/seaice_ecdr/ancillary.py +++ b/seaice_ecdr/ancillary.py @@ -8,9 +8,10 @@ import datetime as dt from functools import cache from pathlib import Path -from typing import get_args +from typing import Literal, get_args import numpy as np +import numpy.typing as npt import pandas as pd import xarray as xr from pm_tb_data._types import NORTH, Hemisphere @@ -18,25 +19,39 @@ from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.constants import CDR_ANCILLARY_DIR from seaice_ecdr.grid_id import get_grid_id -from seaice_ecdr.platforms import SUPPORTED_SAT, get_platform_by_date +from seaice_ecdr.platforms import PLATFORM_CONFIG, Platform +from seaice_ecdr.platforms.config import N07_PLATFORM + +ANCILLARY_SOURCES = Literal["CDRv4", "CDRv5"] def get_ancillary_filepath( - *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> Path: grid_id = get_grid_id( hemisphere=hemisphere, resolution=resolution, ) - filepath = CDR_ANCILLARY_DIR / f"ecdr-ancillary-{grid_id}.nc" + if ancillary_source == "CDRv5": + filepath = CDR_ANCILLARY_DIR / f"ecdr-ancillary-{grid_id}.nc" + elif ancillary_source == "CDRv4": + filepath = CDR_ANCILLARY_DIR / f"ecdr-ancillary-{grid_id}-v04r00.nc" + else: + raise ValueError(f"Unknown ancillary source: {ancillary_source}") return filepath @cache def get_ancillary_ds( - *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.Dataset: """Return xr Dataset of ancillary data for this hemisphere/resolution.""" # TODO: This list could be determined from an examination of @@ -46,7 +61,11 @@ def get_ancillary_ds( "ECDR currently only supports {get_args(ECDR_SUPPORTED_RESOLUTIONS)} resolutions." ) - filepath = get_ancillary_filepath(hemisphere=hemisphere, resolution=resolution) + filepath = get_ancillary_filepath( + hemisphere=hemisphere, + resolution=resolution, + ancillary_source=ancillary_source, + ) ds = xr.load_dataset(filepath) return ds @@ -77,12 +96,13 @@ def get_surfacetype_da( date: dt.date, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - platform: SUPPORTED_SAT, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.DataArray: """Return a dataarray with surface type information for this date.""" ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) xvar = ancillary_ds.variables["x"] @@ -91,7 +111,8 @@ def get_surfacetype_da( polehole_surface_type = 100 if "polehole_bitmask" in ancillary_ds.data_vars.keys(): polehole_bitmask = ancillary_ds.polehole_bitmask - polehole_bitlabel = f"{platform}_polemask" + platform = PLATFORM_CONFIG.get_platform_by_date(date) + polehole_bitlabel = f"{platform.id}_polemask" polehole_bitvalue = bitmask_value_for_meaning( var=polehole_bitmask, meaning=polehole_bitlabel, @@ -154,22 +175,24 @@ def nh_polehole_mask( *, date: dt.date, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat=None, + ancillary_source: ANCILLARY_SOURCES, + platform: Platform | None = None, ) -> xr.DataArray: """Return the northern hemisphere pole hole mask for the given date and resolution.""" ancillary_ds = get_ancillary_ds( hemisphere=NORTH, resolution=resolution, + ancillary_source=ancillary_source, ) polehole_bitmask = ancillary_ds.polehole_bitmask - if sat is None: - sat = get_platform_by_date( + if platform is None: + platform = PLATFORM_CONFIG.get_platform_by_date( date=date, ) - polehole_bitlabel = f"{sat}_polemask" + polehole_bitlabel = f"{platform.id}_polemask" polehole_bitvalue = bitmask_value_for_meaning( var=polehole_bitmask, meaning=polehole_bitlabel, @@ -188,7 +211,10 @@ def nh_polehole_mask( return polehole_mask -def get_smmr_ancillary_filepath(*, hemisphere) -> Path: +# def get_smmr_ancillary_filepath(*, hemisphere, ancillary_source: ANCILLARY_SOURCES) -> Path: +def get_daily_ancillary_filepath( + *, hemisphere, ancillary_source: ANCILLARY_SOURCES +) -> Path: """Return filepath to SMMR ancillary NetCDF. Contains a day-of-year climatology used for SMMR. @@ -198,30 +224,56 @@ def get_smmr_ancillary_filepath(*, hemisphere) -> Path: # Hard-coded to 25km resolution, which is what we expect for SMMR. resolution="25", ) - fn = f"ecdr-ancillary-{grid_id}-smmr-invalid-ice.nc" - filepath = CDR_ANCILLARY_DIR / fn + + if ancillary_source == "CDRv5": + filepath = CDR_ANCILLARY_DIR / f"ecdr-ancillary-{grid_id}-smmr-invalid-ice.nc" + elif ancillary_source == "CDRv4": + filepath = ( + CDR_ANCILLARY_DIR / f"ecdr-ancillary-{grid_id}-smmr-invalid-ice-v04r00.nc" + ) + else: + raise ValueError(f"Unknown smmr ancillary source: {ancillary_source}") return filepath -def get_smmr_invalid_ice_mask(*, date: dt.date, hemisphere: Hemisphere) -> xr.DataArray: - ancillary_file = get_smmr_ancillary_filepath(hemisphere=hemisphere) +def get_smmr_invalid_ice_mask( + *, + date: dt.date, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, +) -> xr.DataArray: + # TODO: Consider using daily instead of monthly icemask for SMMR? Others? + # ancillary_file = get_daily_ancillary_filepath(hemisphere=hemisphere, ancillary_source=ancillary_source) + ancillary_file = get_ancillary_filepath( + hemisphere=hemisphere, + resolution=resolution, + ancillary_source=ancillary_source, + ) with xr.open_dataset(ancillary_file) as ds: invalid_ice_mask = ds.invalid_ice_mask.copy().astype(bool) - doy = date.timetuple().tm_yday + if "month" in invalid_ice_mask.variable.dims: + # Monthly ice mask + month = date.month - icemask_for_doy = invalid_ice_mask.sel(doy=doy) + icemask_for_date = invalid_ice_mask.sel(month=month) + icemask_for_date = icemask_for_date.drop_vars("month") + elif "doy" in invalid_ice_mask.variable.dims: + # day-of-year (doy) ice mask + doy = date.timetuple().tm_yday + icemask_for_date = invalid_ice_mask.sel(doy=doy) - # Drop the DOY dim. This is consistent with the other case returned by - # `get_invalid_ice_mask`, which has `month` as a dimension instead. - icemask_for_doy = icemask_for_doy.drop_vars("doy") + # Drop the DOY dim. This is consistent with the other case returned by + # `get_invalid_ice_mask`, which has `month` as a dimension instead. + icemask_for_date = icemask_for_date.drop_vars("doy") # Ice mask needs to be boolean - icemask_for_doy = icemask_for_doy.astype("bool") + icemask_for_date = icemask_for_date.astype("bool") - return icemask_for_doy + return icemask_for_date def get_invalid_ice_mask( @@ -229,7 +281,8 @@ def get_invalid_ice_mask( hemisphere: Hemisphere, date: dt.date, resolution: ECDR_SUPPORTED_RESOLUTIONS, - platform: SUPPORTED_SAT, + ancillary_source: ANCILLARY_SOURCES, + platform: Platform, ) -> xr.DataArray: """Return an invalid ice mask for the given date. @@ -237,13 +290,17 @@ def get_invalid_ice_mask( month-based mask. """ # SMMR / n07 case: - if platform == "n07": - return get_smmr_invalid_ice_mask(hemisphere=hemisphere, date=date) - + if platform == N07_PLATFORM: + # TODO: Daily (SMMR) mask is used at end for cleanup, + # but not for initial TB field generation + # Skip the smmr invalid ice mask for now... + print("WARNING: Using non-SMMR invalid ice masks") + # return get_smmr_invalid_ice_mask(hemisphere=hemisphere, date=date) # All other platforms: ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) invalid_ice_mask = ancillary_ds.invalid_ice_mask.sel(month=date.month) @@ -260,7 +317,10 @@ def get_invalid_ice_mask( def get_ocean_mask( - *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.DataArray: """Return a binary mask where True values represent `ocean`. @@ -269,6 +329,7 @@ def get_ocean_mask( ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) surface_type = ancillary_ds.surface_type @@ -297,6 +358,7 @@ def get_non_ocean_mask( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.DataArray: """Return a binary mask where True values represent non-ocean pixels. @@ -305,6 +367,7 @@ def get_non_ocean_mask( ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) surface_type = ancillary_ds.surface_type @@ -341,11 +404,15 @@ def get_non_ocean_mask( def get_land90_conc_field( - *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.DataArray: ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) land90_da = ancillary_ds.l90c @@ -354,11 +421,15 @@ def get_land90_conc_field( def get_adj123_field( - *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.DataArray: ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) adj123_da = ancillary_ds.adj123 @@ -366,13 +437,99 @@ def get_adj123_field( return adj123_da +def get_nt_landmask( + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, +) -> npt.NDArray: + """Returns a numpy array equivalent to that used in the original + NT code, particularly for the NT land spillover algorithm.""" + + ancillary_ds = get_ancillary_ds( + hemisphere=hemisphere, + resolution=resolution, + ancillary_source=ancillary_source, + ) + if "cdrv4_landmask" in ancillary_ds.variables.keys(): + return np.array(ancillary_ds.variables["cdrv4_nt_landmask"]) + + # If a landmask field like that used in CDRv4 is not available in + # the ancillary field, create it + raise RuntimeError("gen_cdrv4_nt_landmask() not yet implemented.") + # cdrv4_nt_landmask = gen_cdrv4_nt_landmask( + # ancillary_ds=ancillary_ds, + # ) + + # return cdrv4_nt_landmask + + +def get_nt_shoremap( + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, +) -> npt.NDArray: + """Returns a numpy array equivalent to that used in the original + NT code, particularly for the NT land spillover algorithm.""" + + ancillary_ds = get_ancillary_ds( + hemisphere=hemisphere, + resolution=resolution, + ancillary_source=ancillary_source, + ) + if "cdrv4_nt_shoremap" in ancillary_ds.variables.keys(): + return np.array(ancillary_ds.variables["cdrv4_nt_shoremap"]) + + # If a shoremap field like that used in CDRv4 is not available in + # the ancillary field, create it + raise RuntimeError("gen_cdrv4_nt_shoremap() not yet implemented.") + # cdrv4_nt_shoremap = gen_cdrv4_nt_shoremap( + # ancillary_ds=ancillary_ds, + # ) + + # return cdrv4_nt_shoremap + + +def get_nt_minic( + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, +) -> npt.NDArray: + """Returns a numpy array equivalent to that used in the original + NT code, particularly for the NT land spillover algorithm.""" + + ancillary_ds = get_ancillary_ds( + hemisphere=hemisphere, + resolution=resolution, + ancillary_source=ancillary_source, + ) + if "cdrv4_nt_minic" in ancillary_ds.variables.keys(): + return np.array(ancillary_ds.variables["cdrv4_nt_minic"]) + + # If a minic field like that used in CDRv4 is not available in + # the ancillary field, create it + raise RuntimeError("gen_cdrv4_nt_minic() not yet implemented.") + # cdrv4_nt_minic = gen_cdrv4_nt_minic( + # ancillary_ds=ancillary_ds, + # ) + + # return cdrv4_nt_minic + + def get_empty_ds_with_time( - *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, date: dt.date + *, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + date: dt.date, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.Dataset: """Return an "empty" xarray dataset with x, y, crs, and time set.""" ancillary_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) time_as_int = (date - dt.date(1970, 1, 1)).days diff --git a/seaice_ecdr/complete_daily_ecdr.py b/seaice_ecdr/complete_daily_ecdr.py index ef219484..d4c9d7be 100644 --- a/seaice_ecdr/complete_daily_ecdr.py +++ b/seaice_ecdr/complete_daily_ecdr.py @@ -17,6 +17,7 @@ from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ( + ANCILLARY_SOURCES, get_non_ocean_mask, get_surfacetype_da, ) @@ -30,10 +31,9 @@ date_in_nh_melt_season, melting, ) -from seaice_ecdr.platforms import ( - get_platform_by_date, -) +from seaice_ecdr.platforms import PLATFORM_CONFIG from seaice_ecdr.set_daily_ncattrs import finalize_cdecdr_ds +from seaice_ecdr.spillover import LAND_SPILL_ALGS from seaice_ecdr.temporal_composite_daily import get_tie_filepath, make_tiecdr_netcdf from seaice_ecdr.util import ( date_range, @@ -74,19 +74,19 @@ def get_ecdr_filepath( is_nrt: bool, ) -> Path: """Return the complete daily eCDR file path.""" - platform = get_platform_by_date(date) + platform = PLATFORM_CONFIG.get_platform_by_date(date) if is_nrt: ecdr_filename = nrt_daily_filename( hemisphere=hemisphere, date=date, - sat=platform, + platform_id=platform.id, resolution=resolution, ) else: ecdr_filename = standard_daily_filename( hemisphere=hemisphere, date=date, - sat=platform, + platform_id=platform.id, resolution=resolution, ) @@ -127,6 +127,8 @@ def read_or_create_and_read_standard_tiecdr_ds( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, intermediate_output_dir: Path, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, overwrite_tie: bool = False, ) -> xr.Dataset: """Read an tiecdr netCDF file, creating it if it doesn't exist. @@ -139,6 +141,8 @@ def read_or_create_and_read_standard_tiecdr_ds( resolution=resolution, intermediate_output_dir=intermediate_output_dir, overwrite_tie=overwrite_tie, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) tie_ds = read_tiecdr_ds( @@ -255,6 +259,7 @@ def create_melt_onset_field( date: dt.date, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, complete_output_dir: Path, intermediate_output_dir: Path, is_nrt: bool, @@ -336,6 +341,7 @@ def create_melt_onset_field( non_ocean_mask = get_non_ocean_mask( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) updated_melt_onset = update_melt_onset_for_day( @@ -356,6 +362,7 @@ def _add_melt_onset_for_nh( date: dt.date, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, complete_output_dir: Path, intermediate_output_dir: Path, is_nrt: bool, @@ -370,6 +377,7 @@ def _add_melt_onset_for_nh( complete_output_dir=complete_output_dir, intermediate_output_dir=intermediate_output_dir, is_nrt=is_nrt, + ancillary_source=ancillary_source, ) # Update cde_ds with melt onset info @@ -421,6 +429,7 @@ def _add_surfacetype_da( cde_ds: xr.Dataset, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.Dataset: """Add the surface_type field to the complete daily dataset for the given date.""" cde_ds_with_surfacetype = cde_ds.copy() @@ -432,12 +441,11 @@ def _add_surfacetype_da( # The methodology here should be reviewed to see if there is # a "better" way to add a geo-referenced dataarray to an existing # xr Dataset. - platform = get_platform_by_date(date) surfacetype_da = get_surfacetype_da( date=date, hemisphere=hemisphere, resolution=resolution, - platform=platform, + ancillary_source=ancillary_source, ) # Force use of the cde_ds coords instead of the x, y, time vars # from the ancillary file (which *should* be compatible...but we @@ -460,6 +468,7 @@ def complete_daily_ecdr_ds( date: dt.date, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, complete_output_dir: Path, intermediate_output_dir: Path, is_nrt: bool, @@ -481,6 +490,7 @@ def complete_daily_ecdr_ds( cde_ds=cde_ds, hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) # For the northern hemisphere, create the melt onset field and add it to the @@ -494,9 +504,12 @@ def complete_daily_ecdr_ds( complete_output_dir=complete_output_dir, intermediate_output_dir=intermediate_output_dir, is_nrt=is_nrt, + ancillary_source=ancillary_source, ) - cde_ds = finalize_cdecdr_ds(cde_ds, hemisphere, resolution) + cde_ds = finalize_cdecdr_ds( + cde_ds, hemisphere, resolution, ancillary_source=ancillary_source + ) # TODO: Need to ensure that the cdr_seaice_conc field does not have values # where seaice cannot occur, eg over land or lakes @@ -522,7 +535,9 @@ def write_cde_netcdf( This function also creates a checksum file for the complete daily netcdf. """ - logger.info(f"Writing netCDF of initial_daily eCDR file to: {output_filepath}") + logger.info( + f"Writing netCDF of complete, temporally interpolated eCDR file to: {output_filepath}" + ) for excluded_field in excluded_fields: if excluded_field in cde_ds.variables.keys(): cde_ds = cde_ds.drop_vars(excluded_field) @@ -565,6 +580,8 @@ def make_standard_cdecdr_netcdf( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, base_output_dir: Path, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, overwrite_cde: bool = False, ) -> Path: """Create a 'standard', complete (ready for prod) daily CDR NetCDF file. @@ -606,6 +623,8 @@ def make_standard_cdecdr_netcdf( hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=intermediate_output_dir, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) # Ensure the previous day's complete daily field exists for the melt @@ -623,6 +642,8 @@ def make_standard_cdecdr_netcdf( resolution=resolution, base_output_dir=base_output_dir, overwrite_cde=overwrite_cde, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) cde_ds = complete_daily_ecdr_ds( @@ -633,6 +654,7 @@ def make_standard_cdecdr_netcdf( complete_output_dir=complete_output_dir, intermediate_output_dir=intermediate_output_dir, is_nrt=False, + ancillary_source=ancillary_source, ) written_cde_ncfile = write_cde_netcdf( @@ -679,6 +701,8 @@ def create_standard_ecdr_for_dates( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, base_output_dir: Path, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, overwrite_cde: bool = False, ) -> list[dt.date]: """Create "standard" (non-NRT) daily ECDR NC files for the provided dates. @@ -697,6 +721,8 @@ def create_standard_ecdr_for_dates( resolution=resolution, base_output_dir=base_output_dir, overwrite_cde=overwrite_cde, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) except Exception: logger.exception(f"Failed to create standard ECDR for {date=}") @@ -765,6 +791,17 @@ def create_standard_ecdr_for_dates( required=True, type=click.Choice(get_args(ECDR_SUPPORTED_RESOLUTIONS)), ) +@click.option( + "--land-spillover-alg", + required=True, + # type=click.Choice(("BT_NT", "NT2", "ILS")), + type=click.Choice(get_args(LAND_SPILL_ALGS)), +) +@click.option( + "--ancillary-source", + required=True, + type=click.Choice(get_args(ANCILLARY_SOURCES)), +) @click.option( "--overwrite", is_flag=True, @@ -776,6 +813,8 @@ def cli( hemisphere: Hemisphere, base_output_dir: Path, resolution: ECDR_SUPPORTED_RESOLUTIONS, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, overwrite: bool, ) -> None: """Run the temporal composite daily ECDR algorithm with AMSR2 data. @@ -787,6 +826,7 @@ def cli( projection, resolution, and bounds), and TBtype (TB type includes source and methodology for getting those TBs onto the grid) """ + # raise ValueError('made it to here!') if end_date is None: end_date = copy.copy(date) @@ -796,5 +836,7 @@ def cli( resolution=resolution, base_output_dir=base_output_dir, overwrite_cde=overwrite, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) raise_error_for_dates(error_dates=error_dates) diff --git a/seaice_ecdr/constants.py b/seaice_ecdr/constants.py index eea61e02..398e970e 100644 --- a/seaice_ecdr/constants.py +++ b/seaice_ecdr/constants.py @@ -21,3 +21,4 @@ # Location of surface mask & geo-information files. CDR_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / f"{ECDR_PRODUCT_VERSION}_ancillary" +CDRv4_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / "cdrv4_equiv_ancillary" diff --git a/seaice_ecdr/daily_aggregate.py b/seaice_ecdr/daily_aggregate.py index a3dd42ef..75335132 100644 --- a/seaice_ecdr/daily_aggregate.py +++ b/seaice_ecdr/daily_aggregate.py @@ -13,16 +13,16 @@ from pm_tb_data._types import Hemisphere from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS -from seaice_ecdr.ancillary import get_ancillary_ds +from seaice_ecdr.ancillary import ANCILLARY_SOURCES, get_ancillary_ds from seaice_ecdr.checksum import write_checksum_file from seaice_ecdr.complete_daily_ecdr import get_ecdr_filepath from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.nc_attrs import get_global_attrs from seaice_ecdr.nc_util import concatenate_nc_files -from seaice_ecdr.platforms import get_first_platform_start_date +from seaice_ecdr.platforms import PLATFORM_CONFIG from seaice_ecdr.util import ( get_complete_output_dir, - sat_from_filename, + platform_id_from_filename, standard_daily_aggregate_filename, ) @@ -37,7 +37,9 @@ def _get_daily_complete_filepaths_for_year( resolution: ECDR_SUPPORTED_RESOLUTIONS, ) -> list[Path]: data_list = [] - start_date = max(dt.date(year, 1, 1), get_first_platform_start_date()) + start_date = max( + dt.date(year, 1, 1), PLATFORM_CONFIG.get_first_platform_start_date() + ) for period in pd.period_range(start=start_date, end=dt.date(year, 12, 31)): expected_fp = get_ecdr_filepath( date=period.to_timestamp().date(), @@ -86,6 +88,7 @@ def _update_ncrcat_daily_ds( daily_filepaths: list[Path], hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ): """Update the aggregate dataset created by `ncrcat`. @@ -94,6 +97,7 @@ def _update_ncrcat_daily_ds( surf_geo_ds = get_ancillary_ds( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) ds["latitude"] = surf_geo_ds.latitude ds["longitude"] = surf_geo_ds.longitude @@ -113,7 +117,7 @@ def _update_ncrcat_daily_ds( temporality="daily", aggregate=True, source=", ".join([fp.name for fp in daily_filepaths]), - sats=[sat_from_filename(fp.name) for fp in daily_filepaths], + platform_ids=[platform_id_from_filename(fp.name) for fp in daily_filepaths], ) ds.attrs = daily_aggregate_ds_global_attrs @@ -126,6 +130,7 @@ def make_daily_aggregate_netcdf_for_year( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, complete_output_dir: Path, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ) -> None: try: daily_filepaths = _get_daily_complete_filepaths_for_year( diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index 66b8b139..5d449f47 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -20,45 +20,56 @@ import pm_icecon.nt.compute_nt_ic as nt import xarray as xr from loguru import logger +from pm_icecon._types import LandSpilloverMethods from pm_icecon.bt.params.nsidc0007 import get_smmr_params from pm_icecon.bt.xfer_tbs import xfer_rss_tbs from pm_icecon.errors import UnexpectedSatelliteError -from pm_icecon.fill_polehole import fill_pole_hole from pm_icecon.interpolation import spatial_interp_tbs -from pm_icecon.land_spillover import apply_nt2_land_spillover from pm_icecon.nt._types import NasateamGradientRatioThresholds from pm_icecon.nt.params.params import get_cdr_nt_params from pm_icecon.nt.tiepoints import NasateamTiePoints from pm_tb_data._types import NORTH, Hemisphere from pm_tb_data.fetch.nsidc_0001 import NSIDC_0001_SATS -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ( - get_adj123_field, + ANCILLARY_SOURCES, get_empty_ds_with_time, get_invalid_ice_mask, - get_land90_conc_field, get_non_ocean_mask, nh_polehole_mask, ) from seaice_ecdr.cli.util import datetime_to_date -from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR +from seaice_ecdr.constants import CDR_ANCILLARY_DIR, DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.grid_id import get_grid_id -from seaice_ecdr.platforms import get_platform_by_date +from seaice_ecdr.platforms import PLATFORM_CONFIG, SUPPORTED_PLATFORM_ID from seaice_ecdr.regrid_25to12 import reproject_ideds_25to12 -from seaice_ecdr.tb_data import EXPECTED_ECDR_TB_NAMES, EcdrTbData, get_ecdr_tb_data +from seaice_ecdr.spillover import LAND_SPILL_ALGS, land_spillover +from seaice_ecdr.tb_data import ( + EXPECTED_ECDR_TB_NAMES, + EcdrTbData, + get_25km_ecdr_tb_data, + get_ecdr_tb_data, +) from seaice_ecdr.util import get_intermediate_output_dir, standard_daily_filename -def cdr_bootstrap( +def platform_is_smmr(platform_id: SUPPORTED_PLATFORM_ID): + return platform_id in ("n07", "s36") + + +def cdr_bootstrap_raw( *, tb_v37: npt.NDArray, tb_h37: npt.NDArray, tb_v19: npt.NDArray, bt_coefs, - platform: SUPPORTED_SAT, + platform: SUPPORTED_PLATFORM_ID, ): - """Generate the raw bootstrap concentration field.""" + """Generate the raw bootstrap concentration field. + Note: tb fields should already be transformed before + being passed to this function. + """ wtp_37v = bt_coefs["bt_wtp_v37"] wtp_37h = bt_coefs["bt_wtp_h37"] wtp_19v = bt_coefs["bt_wtp_v19"] @@ -67,24 +78,6 @@ def cdr_bootstrap( itp_37h = bt_coefs["bt_itp_h37"] itp_19v = bt_coefs["bt_itp_v19"] - # Transform TBs for the bootstrap calculation - try: - transformed = xfer_rss_tbs( - tbs=dict( - v37=tb_v37, - h37=tb_h37, - v19=tb_v19, - ), - platform=platform.lower(), - ) - tb_v37 = transformed["v37"] - tb_h37 = transformed["h37"] - tb_v19 = transformed["v19"] - except UnexpectedSatelliteError: - logger.warning( - f"No BT Tb transformation to F13 available for {platform=}. Using untransformed Tbs instead." - ) - bt_conc = bt.calc_bootstrap_conc( tb_v37=tb_v37, tb_h37=tb_h37, @@ -95,20 +88,14 @@ def cdr_bootstrap( itp_37v=itp_37v, itp_37h=itp_37h, itp_19v=itp_19v, - line_37v37h=bt_coefs["vh37_lnline"], - line_37v19v=bt_coefs["v1937_lnline"], + line_37v37h=bt_coefs["vh37_iceline"], + line_37v19v=bt_coefs["v1937_iceline"], ad_line_offset=bt_coefs["ad_line_offset"], maxic_frac=bt_coefs["maxic"], # Note: the missing value of 255 ends up getting set to `nan` below. missing_flag_value=255, ) - # Set any bootstrap concentrations below 10% to 0. - bt_conc[bt_conc < 10] = 0 - - # Remove bt_conc flags (e.g., missing) - bt_conc[bt_conc >= 250] = np.nan - return bt_conc @@ -193,6 +180,7 @@ def _setup_ecdr_ds( date: dt.date, tb_data: EcdrTbData, hemisphere: Hemisphere, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.Dataset: # Initialize geo-referenced xarray Dataset grid_id = get_grid_id( @@ -201,7 +189,10 @@ def _setup_ecdr_ds( ) ecdr_ide_ds = get_empty_ds_with_time( - hemisphere=hemisphere, resolution=tb_data.resolution, date=date + hemisphere=hemisphere, + resolution=tb_data.resolution, + date=date, + ancillary_source=ancillary_source, ) # Set initial global attributes @@ -214,7 +205,7 @@ def _setup_ecdr_ds( ecdr_ide_ds.attrs["data_source"] = tb_data.data_source # Set the platform - ecdr_ide_ds.attrs["platform"] = tb_data.platform + ecdr_ide_ds.attrs["platform"] = tb_data.platform_id file_date = dt.date(1970, 1, 1) + dt.timedelta( days=int(ecdr_ide_ds.variables["time"].data) @@ -273,31 +264,103 @@ def get_nasateam_weather_mask( ecdr_ide_ds["v37_day_si"].data[0, :, :], ecdr_ide_ds["v19_day_si"].data[0, :, :], ) + # Round off for better match with CDRv4 results + nt_gr_3719 = np.round(nt_gr_3719, 4) nt_3719_weather_mask = nt_gr_3719 > nt_coefs["nt_gradient_thresholds"]["3719"] # SMMR does not have a 22v channel that's suitable for the nt weather # filter. Instead, we just re-use the 3719 gradient ratios for 2219. - if ecdr_ide_ds.platform == "n07": + if platform_is_smmr(ecdr_ide_ds.platform): return nt_3719_weather_mask nt_gr_2219 = nt.compute_ratio( ecdr_ide_ds["v22_day_si"].data[0, :, :], ecdr_ide_ds["v19_day_si"].data[0, :, :], ) - nt_2219_weather_mask = nt_gr_2219 > nt_coefs["nt_gradient_thresholds"]["2219"] + # Round off for better match with CDRv4 results + nt_gr_2219 = np.round(nt_gr_2219, 4) + nt_2219_weather_mask = nt_gr_2219 > nt_coefs["nt_gradient_thresholds"]["2219"] weather_mask = nt_3719_weather_mask | nt_2219_weather_mask + is_zero_tb = ( + (ecdr_ide_ds["v22_day_si"].data[0, :, :] == 0) + | (ecdr_ide_ds["v19_day_si"].data[0, :, :] == 0) + | (ecdr_ide_ds["v37_day_si"].data[0, :, :] == 0) + ) + weather_mask[is_zero_tb] = 0 + return weather_mask +def get_flagmask( + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, +) -> None | npt.NDArray: + """ + Return a set of flags (uint8s of value 251-255) + that correspond to non-ocean features of this grid/landmask + """ + + # NOTE: This could be better organized and name-conventioned? + + # TODO: Put these flagmasks in the ancillary files + # (or at least a better location!) + + flagmask = None + + if resolution == "25" and hemisphere == "north": + gridid = "psn25" + xdim = 304 + ydim = 448 + elif resolution == "25" and hemisphere == "south": + gridid = "pss25" + xdim = 316 + ydim = 332 + elif resolution == "12.5" and hemisphere == "north": + gridid = "psn12.5" + xdim = 608 + ydim = 896 + elif resolution == "12.5" and hemisphere == "south": + gridid = "pss12.5" + xdim = 632 + ydim = 664 + + if ancillary_source == "CDRv4": + version_string = "v04r00" + elif ancillary_source == "CDRv5": + version_string = "v05r01" + + flagmask_fn = CDR_ANCILLARY_DIR / f"flagmask_{gridid}_{version_string}.dat" + try: + flagmask_fn.is_file() + except AssertionError as e: + print(f"No such flagmask_fn: {flagmask_fn}") + raise e + + try: + flagmask = np.fromfile(flagmask_fn, dtype=np.uint8).reshape(ydim, xdim) + except ValueError as e: + print(f"Could not reshape to: {xdim}, {ydim}") + raise e + + # With ancillary_source constrained to CDRv4 and CDRv5, this is unreachable + # so these lines are commented out for mypy reasons + # if flagmask is None: + # logger.warning(f"No flagmask found for {hemisphere=} {ancillary_source=}") + + return flagmask + + def compute_initial_daily_ecdr_dataset( *, date: dt.date, hemisphere: Hemisphere, tb_data: EcdrTbData, - fill_the_pole_hole: bool = False, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.Dataset: """Create intermediate daily ECDR xarray dataset. @@ -312,15 +375,50 @@ def compute_initial_daily_ecdr_dataset( date=date, tb_data=tb_data, hemisphere=hemisphere, + ancillary_source=ancillary_source, ) # Spatially interpolate the brightness temperatures for tbname in EXPECTED_ECDR_TB_NAMES: tb_day_name = f"{tbname}_day" + tb_si_varname = f"{tb_day_name}_si" - tb_si_data = spatial_interp_tbs(ecdr_ide_ds[tb_day_name].data[0, :, :]) + + if ancillary_source == "CDRv5": + # The CDRv5 spatint requires min of two adj grid cells + # and allows corner grid cells with weighting of 0.707 + tb_si_data = spatial_interp_tbs( + ecdr_ide_ds[tb_day_name].data[0, :, :], + ) + elif ancillary_source == "CDRv4": + # The CDRv4 calculation does not use diagonal grid cells + # and requires a min of 3 adjacent grid cells + tb_si_data = spatial_interp_tbs( + ecdr_ide_ds[tb_day_name].data[0, :, :], + corner_weight=0, + min_weightsum=3, + image_shift_mode="grid-wrap", # CDRv4 wraps tb field for interp + ) + tb_si_longname = f"Spatially interpolated {ecdr_ide_ds[tb_day_name].long_name}" tb_units = "K" + + if ancillary_source == "CDRv4": + # The CDRv4 calculation causes TB to be zero/missing where + # no sea ice can occur because of invalid region or land + logger.debug(f"Applying invalid ice mask to TB field: {tb_si_varname}") + platform = PLATFORM_CONFIG.get_platform_by_date(date) + invalid_ice_mask = get_invalid_ice_mask( + hemisphere=hemisphere, + date=date, + resolution=tb_data.resolution, + platform=platform, + ancillary_source=ancillary_source, + ) + + # Set the TB values to zero at (monthly?) invalid ice mask + tb_si_data[invalid_ice_mask] = 0 + ecdr_ide_ds[tb_si_varname] = ( ("time", "y", "x"), np.expand_dims(tb_si_data, axis=0), @@ -337,6 +435,29 @@ def compute_initial_daily_ecdr_dataset( }, ) + # Enforce missing TB value consistency across all channels + _, ydim, xdim = ecdr_ide_ds["v19_day_si"].data.shape + is_atleastone_zerotb = np.zeros((1, ydim, xdim), dtype="bool") + is_atleastone_nantb = np.zeros((1, ydim, xdim), dtype="bool") + tb_field_list = [ + "h19_day_si", + "v19_day_si", + "v22_day_si", + "h37_day_si", + "v37_day_si", + ] + for key in tb_field_list: + if key in ecdr_ide_ds.variables.keys(): + is_zero_tb = ecdr_ide_ds[key].data == 0 + is_atleastone_zerotb[is_zero_tb] = True + + is_NaN_tb = np.isnan(ecdr_ide_ds[key].data) + is_atleastone_nantb[is_NaN_tb] = True + for key in tb_field_list: + if key in ecdr_ide_ds.variables.keys(): + ecdr_ide_ds[key].data[is_atleastone_zerotb] = 0 + ecdr_ide_ds[key].data[is_atleastone_nantb] = np.nan + # Set up the spatial interpolation bitmask field where the various # TB fields were interpolated _, ydim, xdim = ecdr_ide_ds["h19_day_si"].data.shape @@ -363,12 +484,57 @@ def compute_initial_daily_ecdr_dataset( non_ocean_mask = get_non_ocean_mask( hemisphere=hemisphere, resolution=tb_data.resolution, + ancillary_source=ancillary_source, ) spatint_bitmask_arr[non_ocean_mask.data] = 0 # Drop the un-spatially interpolated TB field to save space and compute ecdr_ide_ds = ecdr_ide_ds.drop_vars(tb_varname) + # Attempt to get exact duplicate of v4 + # v4 uses np.round() to average the TB arrays as integers. + # this code uses floats (not integers) for the TB values. + # Because np.round() rounds to the nearest even value (not the nearest + # value!), the results are not the same as adding 0.5 and then cropping + # eg np.round(1834.5) -> 1834 + # eg np.round(1835.5) -> 1836 + # See: https://docs.scipy.org/doc//numpy-1.9.0/reference/generated/numpy.around.html#numpy.around + # In order to reproduce the v4 rounding values, need to first round to + # two decimal places, then round to 1 + ecdr_ide_ds["v37_day_si"].data = np.round( + np.round(ecdr_ide_ds["v37_day_si"].data, 2), 1 + ) + ecdr_ide_ds["h37_day_si"].data = np.round( + np.round(ecdr_ide_ds["h37_day_si"].data, 2), 1 + ) + ecdr_ide_ds["v19_day_si"].data = np.round( + np.round(ecdr_ide_ds["v19_day_si"].data, 2), 1 + ) + ecdr_ide_ds["h19_day_si"].data = np.round( + np.round(ecdr_ide_ds["h19_day_si"].data, 2), 1 + ) + ecdr_ide_ds["v22_day_si"].data = np.round( + np.round(ecdr_ide_ds["v22_day_si"].data, 2), 1 + ) + + # Set a missing data mask using 19V TB field + # Note: this will not include missing TBs in the day's invalid_ice field + ecdr_ide_ds["missing_tb_mask"] = ( + ("time", "y", "x"), + np.isnan(ecdr_ide_ds["v19_day_si"].data), + { + "grid_mapping": "crs", + "standard_name": "missing_tb_binary_mask", + "long_name": "Map of Missing TBs", + "comment": "Mask indicating pixels with missing TBs", + "units": 1, + }, + { + "zlib": True, + }, + ) + logger.debug("Initialized missing_tb_mask where TB was NaN") + spat_int_flag_mask_values = np.array([1, 2, 4, 8, 16, 32], dtype=np.uint8) ecdr_ide_ds["spatial_interpolation_flag"] = ( ("time", "y", "x"), @@ -395,26 +561,26 @@ def compute_initial_daily_ecdr_dataset( ) logger.debug("Initialized spatial_interpolation_flag with TB fill locations") - platform = get_platform_by_date(date) - if platform == "am2": + platform = PLATFORM_CONFIG.get_platform_by_date(date) + if platform.id == "am2": bt_coefs_init = pmi_bt_params_amsr2.get_ausi_amsr2_bootstrap_params( date=date, satellite="amsr2", gridid=ecdr_ide_ds.grid_id, ) - elif platform == "ame": + elif platform.id == "ame": bt_coefs_init = pmi_bt_params_amsre.get_ausi_amsre_bootstrap_params( date=date, satellite="amsre", gridid=ecdr_ide_ds.grid_id, ) - elif platform in get_args(NSIDC_0001_SATS): + elif platform.id in get_args(NSIDC_0001_SATS): bt_coefs_init = pmi_bt_params_0001.get_nsidc0001_bootstrap_params( date=date, - satellite=platform, + satellite=platform.id, gridid=ecdr_ide_ds.grid_id, ) - elif platform == "n07": + elif platform_is_smmr(platform.id): bt_coefs_init = get_smmr_params(hemisphere=hemisphere, date=date) else: raise RuntimeError(f"platform bootstrap params not implemented: {platform}") @@ -430,11 +596,13 @@ def compute_initial_daily_ecdr_dataset( date=date, resolution=tb_data.resolution, platform=platform, + ancillary_source=ancillary_source, ) non_ocean_mask = get_non_ocean_mask( hemisphere=hemisphere, resolution=tb_data.resolution, + ancillary_source=ancillary_source, ) ecdr_ide_ds["invalid_ice_mask"] = invalid_ice_mask.expand_dims(dim="time") @@ -450,13 +618,14 @@ def compute_initial_daily_ecdr_dataset( pole_mask = nh_polehole_mask( date=date, resolution=tb_data.resolution, - sat=platform, + ancillary_source=ancillary_source, + platform=platform, ) ecdr_ide_ds["pole_mask"] = pole_mask nt_params = get_cdr_nt_params( hemisphere=hemisphere, - platform=platform, + platform=platform.id, ) nt_coefs = NtCoefs( @@ -490,19 +659,22 @@ def compute_initial_daily_ecdr_dataset( }, ) + # NOTE: Strictly speaking, perhaps this should happen before computing + # the TB mask? But that does a very broad check (10 < TB < 350K) + # and so does not end up mattering # TODO: this mapping is in preparation for using separately-transformed # TBs in the Bootstrap algorithm. The better approach would be to # adjust the parameters for each platform so that this tranformation # would not be needed. That's...a fair bit of work though. + # NOTE: Notice that the bootstrap algorithms below will take these + # transformed *local* bt_??? vars, not he ecdr_ide_es arrays bt_v37 = ecdr_ide_ds["v37_day_si"].data[0, :, :] bt_h37 = ecdr_ide_ds["h37_day_si"].data[0, :, :] bt_v22 = ecdr_ide_ds["v22_day_si"].data[0, :, :] bt_v19 = ecdr_ide_ds["v19_day_si"].data[0, :, :] # Transform TBs for the bootstrap calculation - # TODO: DRY out this code some...These transformations also occur for the - # bootstrap SIC conc. We only want to transform Tbs for the bootstrap SIC - # calc and the weather filtering. + # TODO: Are there separate NT algorithm TB transformations? try: transformed = xfer_rss_tbs( tbs=dict( @@ -511,19 +683,23 @@ def compute_initial_daily_ecdr_dataset( v19=bt_v19, v22=bt_v22, ), - platform=platform.lower(), + platform=platform.id.lower(), ) bt_v37 = transformed["v37"] bt_h37 = transformed["h37"] bt_v19 = transformed["v19"] bt_v22 = transformed["v22"] except UnexpectedSatelliteError: - logger.warning( - f"No BT Tb transformation to F13 available for {platform=}. Using untransformed Tbs instead." - ) + logger.info(f"Using un-transformed TBs for {platform=}.") # Compute the BT weather mask - bt_weather_mask = bt.get_weather_mask( + + # Note: the variable returned is water_arr in cdralgos + # Note: in cdrv4, only part of the water_arr is labeled "BT weather" + # but I think that's because there are two parts to the + # BT weather filter + # bt_weather_mask = bt.get_weather_mask( + bt_water_mask = bt.get_water_mask( v37=bt_v37, h37=bt_h37, v22=bt_v22, @@ -532,16 +708,26 @@ def compute_initial_daily_ecdr_dataset( tb_mask=ecdr_ide_ds["invalid_tb_mask"].data[0, :, :], ln1=bt_coefs_init["vh37_lnline"], date=date, - # TODO: in the (distant?) future, we will want these to come + # TODO: in the future, we will want these to come # from a bt_coefs structure, not bt_coefs_init wintrc=bt_coefs_init["wintrc"], wslope=bt_coefs_init["wslope"], wxlimt=bt_coefs_init["wxlimt"], + is_smmr=platform_is_smmr(platform.id), ) - ecdr_ide_ds["bt_weather_mask"] = ( + # Note: + # Here, the "bt_weather_mask" is the almost "water_arr" variable in cdralgos + # However, it does not include the invalid_tb_mask (which here also + # includes the pole hole) nor does it include land, so: + # cdralgos_water_arr = bt_weather_mask | (ecdr_ide_ds["invalid_tb_mask"].data & ~ecdr_ide_ds["non_ocean_mask"].data) + # Note that this cdralgos_water_arr will differ because it will have + # zeros where there is no data because of the pole hole + + ecdr_ide_ds["bt_weather_mask"] = ( # note <-- name is weather_mask ("time", "y", "x"), - np.expand_dims(bt_weather_mask.data, axis=0), + # np.expand_dims(bt_weather_mask.data, axis=0), + np.expand_dims(bt_water_mask.data, axis=0), # note <-- var water_mask { "grid_mapping": "crs", "standard_name": "bt_weather_binary_mask", @@ -571,62 +757,115 @@ def compute_initial_daily_ecdr_dataset( for coef in bt_coefs_to_update: bt_coefs.pop(coef, None) - bt_coefs["vh37_lnline"] = bt.get_linfit( + """ + This is the fortran function: + call ret_linfit1( + > nc, nr, land, gdata, v37, h37, ln, lnchk, add1, water_arr, + > vh37) + """ + bt_coefs["vh37_iceline"] = bt.get_linfit( land_mask=ecdr_ide_ds["non_ocean_mask"].data, tb_mask=ecdr_ide_ds["invalid_tb_mask"].data[0, :, :], - tbx=ecdr_ide_ds["v37_day_si"].data[0, :, :], - tby=ecdr_ide_ds["h37_day_si"].data[0, :, :], + tbx=bt_v37, # Note: <-- not the ecdr_ide_ds key/value + tby=bt_h37, # Note: <-- not the ecdr_ide_ds key/value lnline=bt_coefs_init["vh37_lnline"], add=bt_coefs["add1"], - weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], # note: water ) - bt_coefs["bt_wtp_v37"] = bt.calculate_water_tiepoint( - wtp_init=bt_coefs_init["bt_wtp_v37"], - weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], - tb=ecdr_ide_ds["v37_day_si"].data[0, :, :], - ) + # TODO: Keeping commented-out weather_mask kwarg calls to highlight + # the transition from weather_mask to water_mask in these function + # calls + if ancillary_source == "CDRv4": + logger.error("SKIPPING calculation of water tiepoint to match CDRv4") + bt_coefs["bt_wtp_v37"] = bt_coefs_init["bt_wtp_v37"] + else: + bt_coefs["bt_wtp_v37"] = bt.calculate_water_tiepoint( + wtp_init=bt_coefs_init["bt_wtp_v37"], + # weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + tb=bt_v37, + ) - bt_coefs["bt_wtp_h37"] = bt.calculate_water_tiepoint( - wtp_init=bt_coefs_init["bt_wtp_h37"], - weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], - tb=ecdr_ide_ds["h37_day_si"].data[0, :, :], - ) + if ancillary_source == "CDRv4": + logger.error("SKIPPING calculation of water tiepoint to match CDRv4") + bt_coefs["bt_wtp_h37"] = bt_coefs_init["bt_wtp_h37"] + else: + bt_coefs["bt_wtp_h37"] = bt.calculate_water_tiepoint( + wtp_init=bt_coefs_init["bt_wtp_h37"], + # weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + tb=bt_h37, + ) - bt_coefs["bt_wtp_v19"] = bt.calculate_water_tiepoint( - wtp_init=bt_coefs_init["bt_wtp_v19"], - weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], - tb=ecdr_ide_ds["v19_day_si"].data[0, :, :], - ) + if ancillary_source == "CDRv4": + logger.error("SKIPPING calculation of water tiepoint to match CDRv4") + bt_coefs["bt_wtp_v19"] = bt_coefs_init["bt_wtp_v19"] + else: + bt_coefs["bt_wtp_v19"] = bt.calculate_water_tiepoint( + wtp_init=bt_coefs_init["bt_wtp_v19"], + # weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + tb=bt_v19, + ) bt_coefs["ad_line_offset"] = bt.get_adj_ad_line_offset( wtp_x=bt_coefs["bt_wtp_v37"], wtp_y=bt_coefs["bt_wtp_h37"], - line_37v37h=bt_coefs["vh37_lnline"], + line_37v37h=bt_coefs["vh37_iceline"], ) - bt_coefs["v1937_lnline"] = bt.get_linfit( - land_mask=ecdr_ide_ds["non_ocean_mask"].data, - tb_mask=ecdr_ide_ds["invalid_tb_mask"].data[0, :, :], - tbx=ecdr_ide_ds["v37_day_si"].data[0, :, :], - tby=ecdr_ide_ds["v19_day_si"].data[0, :, :], - lnline=bt_coefs_init["v1937_lnline"], - add=bt_coefs["add2"], - weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], - tba=ecdr_ide_ds["h37_day_si"].data[0, :, :], - iceline=bt_coefs["vh37_lnline"], - ad_line_offset=bt_coefs["ad_line_offset"], - ) + # TODO: note that we are using bt_ not <_day_si> vars... + + # NOTE: cdralgos uses ret_linfit1() for NH sets 1,2 and SH set2 + # and uses ret_linfit2() for NH set 2 + pre_AMSR_platforms = ("n07", "F08", "F11", "F13", "F17") + if hemisphere == "south" and platform in pre_AMSR_platforms: + bt_coefs["v1937_iceline"] = bt.get_linfit( + land_mask=ecdr_ide_ds["non_ocean_mask"].data, + tb_mask=ecdr_ide_ds["invalid_tb_mask"].data[0, :, :], + tbx=bt_v37, + tby=bt_v19, + lnline=bt_coefs_init["v1937_lnline"], + add=bt_coefs["add2"], + water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + # these default to None; so using "ret_linfit1(), not ret_linfit2()" + # tba=bt_h37, + # iceline=bt_coefs["vh37_iceline"], + # ad_line_offset=bt_coefs["ad_line_offset"], + ) + else: + bt_coefs["v1937_iceline"] = bt.get_linfit( + land_mask=ecdr_ide_ds["non_ocean_mask"].data, + tb_mask=ecdr_ide_ds["invalid_tb_mask"].data[0, :, :], + tbx=bt_v37, + tby=bt_v19, + lnline=bt_coefs_init["v1937_lnline"], + add=bt_coefs["add2"], + water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + tba=bt_h37, + iceline=bt_coefs["vh37_iceline"], + ad_line_offset=bt_coefs["ad_line_offset"], + ) - # finally, compute the CDR. - bt_conc = cdr_bootstrap( - tb_v37=ecdr_ide_ds["v37_day_si"].data[0, :, :], - tb_h37=ecdr_ide_ds["h37_day_si"].data[0, :, :], - tb_v19=ecdr_ide_ds["v19_day_si"].data[0, :, :], + bt_conc = cdr_bootstrap_raw( + tb_v37=bt_v37, + tb_h37=bt_h37, + tb_v19=bt_v19, bt_coefs=bt_coefs, - platform=platform, + platform=platform.id, ) + # Set any bootstrap concentrations below 10% to 0. + # NOTE: This is probably necessary for land spillover algorithms + # to properly work with "exactly 0% siconc" calculations + # TODO: This 10% cutoff should be a configuration value + bt_conc[bt_conc < 10] = 0 + + # Remove bt_conc flags (e.g., missing) and set to NaN + bt_conc[bt_conc >= 250] = np.nan + + # Now, compute CDR version of NT estimate nt_conc = cdr_nasateam( tb_h19=ecdr_ide_ds["h19_day_si"].data[0, :, :], tb_v37=ecdr_ide_ds["v37_day_si"].data[0, :, :], @@ -634,6 +873,9 @@ def compute_initial_daily_ecdr_dataset( nt_tiepoints=nt_coefs["nt_tiepoints"], ) + # Need to set invalid ice to zero (note: this includes land) + nt_conc[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] = 0 + cdr_conc_raw = calculate_cdr_conc( bt_conc=bt_conc, nt_conc=nt_conc, @@ -671,57 +913,62 @@ def compute_initial_daily_ecdr_dataset( cdr_conc = cdr_conc_raw.copy() cdr_conc[set_to_zero_sic] = 0 - # Will use spillover_applied with values: - # 1: NT2 - # 2: BT (not yet added) - # 4: NT (not yet added) - spillover_applied = np.zeros((ydim, xdim), dtype=np.uint8) + # TODO: Some minimum thresholding already occurs for bt_conc + # for bt_conc (and nt_conc?). I don't think the + # land_spillover algorithm should have to rely on this. cdr_conc_pre_spillover = cdr_conc.copy() - logger.debug("Applying NT2 land spillover technique...") - l90c = get_land90_conc_field( - hemisphere=hemisphere, - resolution=tb_data.resolution, - ) - adj123 = get_adj123_field( + + bt_asCDRv4_conc = bt_conc.copy() + bt_asCDRv4_conc[ecdr_ide_ds["bt_weather_mask"].data[0, :, :]] = 0 + bt_asCDRv4_conc[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] = 0 + bt_asCDRv4_conc[ecdr_ide_ds["non_ocean_mask"].data] = 120.0 + bt_asCDRv4_conc[np.isnan(bt_asCDRv4_conc)] = 110.0 + # Here, difference from v4 is ~3E-05 (ie 4-byte floating point roundoff) + + nt_asCDRv4_conc = nt_conc.copy() + # Convert to 2-byte int + is_nt_nan = np.isnan(nt_asCDRv4_conc) + nt_asCDRv4_conc = (10 * nt_asCDRv4_conc).astype(np.int16) + + # In CDRv4, NT weather is only where weather condition removes NT conc + is_ntwx_CDRv4 = (nt_conc > 0) & ecdr_ide_ds["nt_weather_mask"].data[0, :, :] + nt_asCDRv4_conc[is_ntwx_CDRv4] = 0 + + nt_asCDRv4_conc[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] = -10 + nt_asCDRv4_conc[is_nt_nan] = -10 + + # Note: the NT array here is np.int16 + nt_asCDRv4_conc = nt_asCDRv4_conc.copy() + is_negative_10 = nt_asCDRv4_conc == -10 + + nt_asCDRv4_conc = np.divide(nt_asCDRv4_conc, 10.0).astype(np.float32) + nt_asCDRv4_conc[is_negative_10] = -10 + + cdr_conc = land_spillover( + cdr_conc=cdr_conc, hemisphere=hemisphere, - resolution=tb_data.resolution, - ) - cdr_conc = apply_nt2_land_spillover( - conc=cdr_conc, - adj123=adj123.data, - l90c=l90c.data, - anchoring_siconc=50.0, - affect_dist3=True, + tb_data=tb_data, + algorithm=land_spillover_alg, + land_mask=non_ocean_mask.data, + ancillary_source=ancillary_source, + bt_conc=bt_asCDRv4_conc, + nt_conc=nt_asCDRv4_conc, + bt_wx=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + fix_goddard_bt_error=True, ) - spillover_applied[cdr_conc_pre_spillover != cdr_conc.data] = 1 - - # Fill the NH pole hole - # TODO: Should check for NH and have grid-dependent filling scheme - # NOTE: Usually, the pole hole will be filled in pass 3 ("complete_daily"), - # along with melt onset calc. - if fill_the_pole_hole and hemisphere == NORTH: - cdr_conc_pre_polefill = cdr_conc.copy() - platform = get_platform_by_date(date) - near_pole_hole_mask = nh_polehole_mask( - date=date, - resolution=tb_data.resolution, - sat=platform, - ) - cdr_conc = fill_pole_hole( - conc=cdr_conc, - near_pole_hole_mask=near_pole_hole_mask.data, + # In case we used the BT-NT land spillover, set cdr_conc to zero + # where weather filtered...except that it's only the nt wx that gets + # applied...and only where NT wx removed NT conc + if land_spillover_alg == "BT_NT": + set_to_zero_sic = ( + ecdr_ide_ds["nt_weather_mask"].data[0, :, :] + | ecdr_ide_ds["invalid_ice_mask"].data[0, :, :] ) - logger.debug("Filled pole hole") - is_pole_filled = (cdr_conc != cdr_conc_pre_polefill) & (~np.isnan(cdr_conc)) - if "spatial_interpolation_bitmask" in ecdr_ide_ds.variables.keys(): - ecdr_ide_ds["spatial_interpolation_flag"] = ecdr_ide_ds[ - "spatial_interpolation_flag" - ].where( - ~is_pole_filled, - other=TB_SPATINT_BITMASK_MAP["pole_filled"], - ) - logger.debug("Updated spatial_interpolation with pole hole value") + cdr_conc[is_ntwx_CDRv4] = 0 + + spillover_applied = np.full((ydim, xdim), False, dtype=bool) + spillover_applied[cdr_conc_pre_spillover != cdr_conc.data] = True # Mask out non-ocean pixels and clamp conc to between 10-100%. # TODO: These values should be in a configuration file/structure @@ -729,6 +976,23 @@ def compute_initial_daily_ecdr_dataset( cdr_conc[cdr_conc < 10] = 0 cdr_conc[cdr_conc > 100] = 100 + # Set missing TBs to NaN + cdr_conc[ecdr_ide_ds["missing_tb_mask"].data[0, :, :]] = np.nan + bt_conc[ecdr_ide_ds["missing_tb_mask"].data[0, :, :]] = np.nan + nt_conc[ecdr_ide_ds["missing_tb_mask"].data[0, :, :]] = np.nan + + # TODO: Remove these CDRv4 flags? + # Apply the CDRv4 flags to the conc field for more direct comparison + flagmask = get_flagmask( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + + if flagmask is not None: + cdr_conc[flagmask > 250] = flagmask[flagmask > 250] + cdr_conc[np.isnan(cdr_conc)] = 255 + # Add the BT raw field to the dataset bt_conc = bt_conc / 100.0 # re-set range from 0 to 1 ecdr_ide_ds["raw_bt_seaice_conc"] = ( @@ -747,7 +1011,7 @@ def compute_initial_daily_ecdr_dataset( }, ) - # Add the BT coefficients to the raw_bt_seaice_conc DataArray + # Add the BT coefficients to the raw_bt_seaice_conc DataArray as attrs for attr in sorted(bt_coefs.keys()): if type(bt_coefs[attr]) in (float, int): ecdr_ide_ds.variables["raw_bt_seaice_conc"].attrs[attr] = bt_coefs[attr] @@ -811,7 +1075,7 @@ def compute_initial_daily_ecdr_dataset( # Add the QA bitmask field to the initial daily xarray dataset # 1: BT weather # 2: NT weather - # 4: NT2 spillover (or in general...any/all spillover corrections) + # 4: Land spillover applied (e.g,. NT2 land spillover) # 8: Missing TBs (exclusive of valid_ice mask) # 16: Invalid ice mask # 32: Spatial interpolation applied @@ -822,7 +1086,7 @@ def compute_initial_daily_ecdr_dataset( qa_bitmask = np.zeros((ydim, xdim), dtype=np.uint8) qa_bitmask[ecdr_ide_ds["bt_weather_mask"].data[0, :, :]] += 1 qa_bitmask[ecdr_ide_ds["nt_weather_mask"].data[0, :, :]] += 2 - qa_bitmask[spillover_applied == 1] += 4 + qa_bitmask[spillover_applied] += 4 qa_bitmask[invalid_tb_mask & ~ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] += 8 qa_bitmask[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] += 16 qa_bitmask[ecdr_ide_ds["spatial_interpolation_flag"].data[0, :, :] != 0] += 32 @@ -876,16 +1140,32 @@ def initial_daily_ecdr_dataset( date: dt.date, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.Dataset: """Create xr dataset containing the first pass of daily enhanced CDR.""" - tb_data = get_ecdr_tb_data( - date=date, - hemisphere=hemisphere, - ) + # TODO: if/else should be temporary. It's just a way to clearly divide how a + # requestion for 12.5km or 25km ECDR is handled. + if resolution == "12.5": + # In the 12.5km case, we try to get 12.5km Tbs, but sometimes we get + # 25km (older, non-AMSR platforms) + tb_data = get_ecdr_tb_data( + date=date, + hemisphere=hemisphere, + ) + else: + # In the 25km case, we always expect 25km Tbs + tb_data = get_25km_ecdr_tb_data( + date=date, + hemisphere=hemisphere, + ) + initial_ecdr_ds = compute_initial_daily_ecdr_dataset( date=date, hemisphere=hemisphere, tb_data=tb_data, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) # If the computed ide_ds is not on the desired grid (ie resolution), @@ -918,6 +1198,7 @@ def write_ide_netcdf( "raw_bt_seaice_conc", ), tb_fields: Iterable[str] = ("h19_day_si", "h37_day_si"), + # tb_fields: Iterable[str] = ("h19_day_si", "h37_day_si", "v19_day_si", "v22_day_si", "v37_day_si"), ) -> Path: """Write the initial_ecdr_ds to a netCDF file and return the path.""" logger.info(f"Writing netCDF of initial_daily eCDR file to: {output_filepath}") @@ -970,7 +1251,7 @@ def get_idecdr_dir(*, intermediate_output_dir: Path) -> Path: def get_idecdr_filepath( *, date: dt.date, - platform, + platform_id: SUPPORTED_PLATFORM_ID, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, intermediate_output_dir: Path, @@ -980,7 +1261,7 @@ def get_idecdr_filepath( standard_fn = standard_daily_filename( hemisphere=hemisphere, date=date, - sat=platform, + platform_id=platform_id, resolution=resolution, ) idecdr_fn = "idecdr_" + standard_fn @@ -999,12 +1280,14 @@ def make_idecdr_netcdf( resolution: ECDR_SUPPORTED_RESOLUTIONS, intermediate_output_dir: Path, excluded_fields: Iterable[str], + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, overwrite_ide: bool = False, ) -> None: - platform = get_platform_by_date(date) + platform = PLATFORM_CONFIG.get_platform_by_date(date) output_path = get_idecdr_filepath( date=date, - platform=platform, + platform_id=platform.id, hemisphere=hemisphere, intermediate_output_dir=intermediate_output_dir, resolution=resolution, @@ -1016,6 +1299,8 @@ def make_idecdr_netcdf( date=date, hemisphere=hemisphere, resolution=resolution, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) written_ide_ncfile = write_ide_netcdf( @@ -1036,6 +1321,8 @@ def create_idecdr_for_date( intermediate_output_dir: Path, overwrite_ide: bool = False, verbose_intermed_ncfile: bool = False, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, ) -> None: excluded_fields = [] if not verbose_intermed_ncfile: @@ -1046,10 +1333,10 @@ def create_idecdr_for_date( "h37_day", "v37_day", # "h19_day_si", # include this field for melt onset calculation - "v19_day_si", - "v22_day_si", + "v19_day_si", # comment out this line to get all TB fields + "v22_day_si", # comment out this line to get all TB fields # "h37_day_si", # include this field for melt onset calculation - "v37_day_si", + "v37_day_si", # comment out this line to get all TB fields "NT_icecon_min", "non_ocean_mask", "pole_mask", @@ -1065,6 +1352,8 @@ def create_idecdr_for_date( intermediate_output_dir=intermediate_output_dir, excluded_fields=excluded_fields, overwrite_ide=overwrite_ide, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) except Exception as e: @@ -1094,6 +1383,11 @@ def create_idecdr_for_date( required=True, type=click.Choice(get_args(Hemisphere)), ) +@click.option( + "--land-spillover-alg", + required=True, + type=click.Choice(get_args(LandSpilloverMethods)), +) @click.option( "--base-output-dir", required=True, @@ -1130,13 +1424,20 @@ def create_idecdr_for_date( default=False, type=bool, ) +@click.option( + "--ancillary-source", + required=True, + type=click.Choice(get_args(ANCILLARY_SOURCES)), +) def cli( *, date: dt.date, hemisphere: Hemisphere, base_output_dir: Path, resolution: ECDR_SUPPORTED_RESOLUTIONS, + land_spillover_alg: LAND_SPILL_ALGS, verbose_intermed_ncfile: bool, + ancillary_source: ANCILLARY_SOURCES, ) -> None: """Run the initial daily ECDR algorithm with AMSR2 data. @@ -1155,4 +1456,6 @@ def cli( resolution=resolution, intermediate_output_dir=intermediate_output_dir, verbose_intermed_ncfile=verbose_intermed_ncfile, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) diff --git a/seaice_ecdr/make_25km_cdr.py b/seaice_ecdr/make_25km_cdr.py new file mode 100644 index 00000000..15bcfcac --- /dev/null +++ b/seaice_ecdr/make_25km_cdr.py @@ -0,0 +1,62 @@ +"""Produce versions of the 25km CDR. + +F17 starts on 2008-01-01. +AMSR2 starts on 2012-07-02. + +So we should target 2012-07-02 as the start date for comparisons between the two +platforms. + +We want to be able to: +* Use both nasateam land spillover techniques. +* +""" + +from pathlib import Path +from typing import Literal + +import xarray as xr +from pm_tb_data._types import Hemisphere + +from seaice_ecdr.grid_id import get_grid_id +from seaice_ecdr.spillover import LAND_SPILL_ALGS +from seaice_ecdr.util import get_complete_output_dir + + +def get_25km_daily_cdr( + *, + alg: LAND_SPILL_ALGS, + hemisphere: Hemisphere, + platform: Literal["am2", "F17"], +) -> xr.Dataset: + """Return the 25km CDR for the given algorithm.""" + if alg == "BT_NT": + base_dir = Path("/share/apps/G02202_V5/25km/BT_NT") + elif alg == "NT2": + base_dir = Path("/share/apps/G02202_V5/25km/NT2") + else: + raise RuntimeError(f"Unrecognized alg {alg}") + + complete_dir = get_complete_output_dir( + base_output_dir=base_dir, hemisphere=hemisphere, is_nrt=False + ) + + grid_id = get_grid_id(hemisphere=hemisphere, resolution="25") + + glob_pattern = f"sic_{grid_id}_*_{platform}_*.nc" + matching_files = list(complete_dir.glob(f"**/{glob_pattern}")) + + if not matching_files: + raise RuntimeError(f"No files found matching {glob_pattern} in {base_dir}") + + xr_ds = xr.open_mfdataset(matching_files) + # xr_ds = xr.open_mfdataset(matching_files, engine="raterio") + # Convert `DatetimeGregorian` to native date. Not sure why this happens when engine is rasterio. + # xr_ds["time"] = [ + # dt.date(thing.year, thing.month, thing.day) for thing in xr_ds.time.values + # ] + + return xr_ds + + +if __name__ == "__main__": + f17_nt = get_25km_daily_cdr(alg="BT_NT", hemisphere="north", platform="F17") diff --git a/seaice_ecdr/monthly.py b/seaice_ecdr/monthly.py index bbc8d019..7e82509f 100644 --- a/seaice_ecdr/monthly.py +++ b/seaice_ecdr/monthly.py @@ -35,16 +35,17 @@ from loguru import logger from pm_tb_data._types import NORTH, Hemisphere -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT -from seaice_ecdr.ancillary import flag_value_for_meaning +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS +from seaice_ecdr.ancillary import ANCILLARY_SOURCES, flag_value_for_meaning from seaice_ecdr.checksum import write_checksum_file from seaice_ecdr.complete_daily_ecdr import get_ecdr_filepath from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.nc_attrs import get_global_attrs +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID from seaice_ecdr.util import ( get_complete_output_dir, get_num_missing_pixels, - sat_from_filename, + platform_id_from_filename, standard_monthly_filename, ) @@ -52,10 +53,10 @@ def check_min_days_for_valid_month( *, daily_ds_for_month: xr.Dataset, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, ) -> None: days_in_ds = len(daily_ds_for_month.time) - if sat == "n07": + if platform_id == "n07": min_days = 10 else: min_days = 20 @@ -103,22 +104,24 @@ def _get_daily_complete_filepaths_for_month( return data_list -def _sat_for_month(*, sats: list[SUPPORTED_SAT]) -> SUPPORTED_SAT: - """Returns the satellite from this month given a list of input satellites. +def _platform_id_for_month( + *, platform_ids: list[SUPPORTED_PLATFORM_ID] +) -> SUPPORTED_PLATFORM_ID: + """Returns the platform ID from this month given a list of input platforms. - The sat for monthly files is based on which sat contributes most to the - month. If two sats contribute equally, use the latest sat in the series. + The platform for monthly files is based on which platform contributes most to the + month. If two platforms contribute equally, use the latest platform in the series. - Function assumes the list of satellites is already sorted (i.e., the latest - satellite is `sats[-1]`). + Function assumes the list of platform ids is already sorted (i.e., the latest + platform is `platform_ids[-1]`). """ - # More than one sat, we need to choose the most common/latest in the series. - # `Counter` returns a dict keyed by `sat` with counts as values: - count = Counter(sats) - most_common_sats = count.most_common() - most_common_and_latest_sat = most_common_sats[-1][0] + # More than one platform, we need to choose the most common/latest in the series. + # `Counter` returns a dict keyed by `platform` with counts as values: + count = Counter(platform_ids) + most_common_platform_ids = count.most_common() + most_common_and_latest_platform_id = most_common_platform_ids[-1][0] - return most_common_and_latest_sat + return most_common_and_latest_platform_id def get_daily_ds_for_month( @@ -156,19 +159,19 @@ def get_daily_ds_for_month( data=data_list, dims=("time",), coords=dict(time=ds.time) ) - # Extract `sat` from the filenames contributing to this + # Extract `platform_id` from the filenames contributing to this # dataset. Ideally, we would use a custom `combine_attrs` when reading the - # data with `xr.open_mfdataset` in order to get the sat/sensor from global + # data with `xr.open_mfdataset` in order to get the platform/sensor from global # attrs in each of the contributing files. Unfortunately this interface is # poorly documented and seems to have limited support. E.g., see # https://github.com/pydata/xarray/issues/6679 - sats = [] + platform_ids = [] for filepath in data_list: - sats.append(sat_from_filename(filepath.name)) + platform_ids.append(platform_id_from_filename(filepath.name)) - sat = _sat_for_month(sats=sats) + platform_id = _platform_id_for_month(platform_ids=platform_ids) - ds.attrs["sat"] = sat + ds.attrs["platform_id"] = platform_id return ds @@ -342,6 +345,7 @@ def calc_cdr_seaice_conc_monthly( daily_ds_for_month: xr.Dataset, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.DataArray: """Create the `cdr_seaice_conc_monthly` variable.""" daily_conc_for_month = daily_ds_for_month.cdr_seaice_conc @@ -351,6 +355,7 @@ def calc_cdr_seaice_conc_monthly( seaice_conc_var=conc_monthly, hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) conc_monthly.name = "cdr_seaice_conc_monthly" @@ -502,9 +507,10 @@ def calc_surface_type_mask_monthly( def make_monthly_ds( *, daily_ds_for_month: xr.Dataset, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> xr.Dataset: """Create a monthly dataset from daily data. @@ -514,7 +520,7 @@ def make_monthly_ds( # Min-day check check_min_days_for_valid_month( daily_ds_for_month=daily_ds_for_month, - sat=sat, + platform_id=platform_id, ) # create `cdr_seaice_conc_monthly`. This is the combined monthly SIC. @@ -523,6 +529,7 @@ def make_monthly_ds( daily_ds_for_month=daily_ds_for_month, hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) # Create `stdev_of_cdr_seaice_conc_monthly`, the standard deviation of the @@ -576,11 +583,11 @@ def make_monthly_ds( temporality="monthly", aggregate=False, source=", ".join([fp.item().name for fp in daily_ds_for_month.filepaths]), - # TODO: consider providing all sats that went into month? This would be + # TODO: consider providing all platforms that went into month? This would be # consistent with how we handle the aggregate filenames. Is it - # misleading to indicate that a month is a single sat when it may not + # misleading to indicate that a month is a single platform when it may not # really be? - sats=[sat], + platform_ids=[platform_id], ) monthly_ds.attrs.update(monthly_ds_global_attrs) @@ -598,7 +605,7 @@ def get_monthly_filepath( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, year: int, month: int, complete_output_dir: Path, @@ -610,7 +617,7 @@ def get_monthly_filepath( output_fn = standard_monthly_filename( hemisphere=hemisphere, resolution=resolution, - sat=sat, + platform_id=platform_id, year=year, month=month, ) @@ -627,6 +634,7 @@ def make_monthly_nc( hemisphere: Hemisphere, complete_output_dir: Path, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> Path: daily_ds_for_month = get_daily_ds_for_month( year=year, @@ -636,12 +644,12 @@ def make_monthly_nc( resolution=resolution, ) - sat = daily_ds_for_month.sat + platform_id = daily_ds_for_month.platform_id output_path = get_monthly_filepath( hemisphere=hemisphere, resolution=resolution, - sat=sat, + platform_id=platform_id, year=year, month=month, complete_output_dir=complete_output_dir, @@ -649,9 +657,10 @@ def make_monthly_nc( monthly_ds = make_monthly_ds( daily_ds_for_month=daily_ds_for_month, - sat=sat, + platform_id=platform_id, hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) # Set `x` and `y` `_FillValue` to `None`. Although unset initially, `xarray` @@ -736,6 +745,11 @@ def make_monthly_nc( required=True, type=click.Choice(get_args(ECDR_SUPPORTED_RESOLUTIONS)), ) +@click.option( + "--ancillary-source", + required=True, + type=click.Choice(get_args(ANCILLARY_SOURCES)), +) def cli( *, year: int, @@ -745,6 +759,7 @@ def cli( hemisphere: Hemisphere, base_output_dir: Path, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ): if end_year is None: end_year = year @@ -769,6 +784,7 @@ def cli( complete_output_dir=complete_output_dir, hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) except Exception: error_periods.append(period) diff --git a/seaice_ecdr/monthly_aggregate.py b/seaice_ecdr/monthly_aggregate.py index 2387b5b7..ed64de86 100644 --- a/seaice_ecdr/monthly_aggregate.py +++ b/seaice_ecdr/monthly_aggregate.py @@ -20,7 +20,7 @@ from seaice_ecdr.nc_util import concatenate_nc_files from seaice_ecdr.util import ( get_complete_output_dir, - sat_from_filename, + platform_id_from_filename, standard_monthly_aggregate_filename, ) @@ -38,8 +38,8 @@ def _get_monthly_complete_filepaths( # TODO: the monthly filenames are encoded in the # `util.standard_monthly_filename` func. Can we adapt that to use wildcards # and return a glob-able string? - # North Monthly files: sic_psn12.5_YYYYMM_sat_v05r00.nc - # South Monthly files: sic_pss12.5_YYYYMM_sat_v05r00.nc + # North Monthly files: sic_psn12.5_{YYYYMM}_{platform_id}_v05r00.nc + # South Monthly files: sic_pss12.5_{YYYYMM}_{platform_id}_v05r00.nc filename_pattern = f"sic_ps{hemisphere[0]}{resolution}_*.nc" monthly_files = list(sorted(monthly_dir.glob(filename_pattern))) @@ -102,7 +102,7 @@ def _update_ncrcat_monthly_ds( temporality="monthly", aggregate=True, source=", ".join([fp.name for fp in monthly_filepaths]), - sats=[sat_from_filename(fp.name) for fp in monthly_filepaths], + platform_ids=[platform_id_from_filename(fp.name) for fp in monthly_filepaths], ) agg_ds.attrs = monthly_aggregate_ds_global_attrs diff --git a/seaice_ecdr/multiprocess_daily.py b/seaice_ecdr/multiprocess_daily.py index 6cafc250..fe578496 100644 --- a/seaice_ecdr/multiprocess_daily.py +++ b/seaice_ecdr/multiprocess_daily.py @@ -3,16 +3,19 @@ from itertools import chain from multiprocessing import Pool from pathlib import Path -from typing import Final, get_args +from typing import get_args import click from pm_tb_data._types import Hemisphere +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS +from seaice_ecdr.ancillary import ANCILLARY_SOURCES from seaice_ecdr.cli.util import datetime_to_date from seaice_ecdr.complete_daily_ecdr import create_standard_ecdr_for_dates from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.initial_daily_ecdr import create_idecdr_for_date -from seaice_ecdr.platforms import get_first_platform_start_date +from seaice_ecdr.platforms import PLATFORM_CONFIG +from seaice_ecdr.spillover import LAND_SPILL_ALGS from seaice_ecdr.temporal_composite_daily import make_tiecdr_netcdf from seaice_ecdr.util import ( date_range, @@ -21,6 +24,10 @@ raise_error_for_dates, ) +# TODO: +# ancillary sources are given in seaice_ecdr.ancillary.ANCILLARY_SOURCES +# but are manually spelled out in the click options here + @click.command(name="multiprocess-daily") @click.option( @@ -80,12 +87,34 @@ "--overwrite", is_flag=True, ) +@click.option( + "--land-spillover-alg", + required=False, + # type=click.Choice(["BT_NT", "NT2", "ILS"]), + type=click.Choice(get_args(LAND_SPILL_ALGS)), + default="BT_NT", +) +@click.option( + "--ancillary-source", + required=True, + # type=click.Choice(["CDRv4", "CDRv5"]), + type=click.Choice(get_args(ANCILLARY_SOURCES)), + default="CDRv5", +) +@click.option( + "--resolution", + required=True, + type=click.Choice(get_args(ECDR_SUPPORTED_RESOLUTIONS)), +) def cli( start_date: dt.date, end_date: dt.date, hemisphere: Hemisphere, base_output_dir: Path, overwrite: bool, + land_spillover_alg: LAND_SPILL_ALGS, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ): dates = list(date_range(start_date=start_date, end_date=end_date)) dates_by_year = get_dates_by_year(dates) @@ -93,7 +122,7 @@ def cli( # craete a range of dates that includes the interpolation range. This # ensures that data expected for temporal interpolation of the requested # dates has the data it needs. - earliest_date = get_first_platform_start_date() + earliest_date = PLATFORM_CONFIG.get_first_platform_start_date() initial_start_date = max(earliest_date, start_date - dt.timedelta(days=5)) initial_end_date = min(end_date + dt.timedelta(days=5), dt.date.today()) initial_file_dates = list( @@ -106,14 +135,14 @@ def cli( is_nrt=False, ) - resolution: Final = "12.5" - _create_idecdr_wrapper = partial( create_idecdr_for_date, hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=intermediate_output_dir, overwrite_ide=overwrite, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) _create_tiecdr_wrapper = partial( @@ -122,6 +151,8 @@ def cli( resolution=resolution, intermediate_output_dir=intermediate_output_dir, overwrite_tie=overwrite, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) _complete_daily_wrapper = partial( @@ -130,6 +161,8 @@ def cli( resolution=resolution, base_output_dir=base_output_dir, overwrite_cde=overwrite, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) # Use 6 cores. This seems to perform well. Using the max number available diff --git a/seaice_ecdr/nc_attrs.py b/seaice_ecdr/nc_attrs.py index e67ff9ae..d078c8c6 100644 --- a/seaice_ecdr/nc_attrs.py +++ b/seaice_ecdr/nc_attrs.py @@ -8,8 +8,8 @@ import pandas as pd import xarray as xr -from seaice_ecdr._types import SUPPORTED_SAT from seaice_ecdr.constants import ECDR_PRODUCT_VERSION +from seaice_ecdr.platforms import PLATFORM_CONFIG, SUPPORTED_PLATFORM_ID # Datetime string format for date-related attributes. DATE_STR_FMT = "%Y-%m-%dT%H:%M:%SZ" @@ -123,65 +123,6 @@ def _get_time_coverage_attrs( return time_coverage_attrs -# Here’s what the GCMD platform long name should be based on sensor/platform short name: -PLATFORMS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="GCOM-W1 > Global Change Observation Mission 1st-Water", - ame="Aqua > Earth Observing System, Aqua", - F17="DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", - F13="DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", - F11="DMSP 5D-2/F11 > Defense Meteorological Satellite Program-F11", - F08="DMSP 5D-2/F8 > Defense Meteorological Satellite Program-F8", - n07="Nimbus-7", -) - - -def _unique_sats(sats: list[SUPPORTED_SAT]) -> list[SUPPORTED_SAT]: - """Return the unique set of satellites. - - Order is preserved. - """ - # `set` is unordered. This gets the unique list of `sats`. - unique_sats = list(dict.fromkeys(sats)) - - return unique_sats - - -def get_platforms_for_sats(sats: list[SUPPORTED_SAT]) -> list[str]: - """Get the unique set of platforms for the given list of sats. - - Assumes `sats` is ordered from oldest->newest. - """ - # `set` is unordered. This gets the unique list of `sats`. - unique_sats = _unique_sats(sats) - platforms_for_sat = [PLATFORMS_FOR_SATS[sat] for sat in unique_sats] - - return platforms_for_sat - - -# Here’s what the GCMD sensor name should be based on sensor short name: -SENSORS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="AMSR2 > Advanced Microwave Scanning Radiometer 2", - ame="AMSR-E > Advanced Microwave Scanning Radiometer-EOS", - F17="SSMIS > Special Sensor Microwave Imager/Sounder", - # TODO: de-dup SSM/I text? - F13="SSM/I > Special Sensor Microwave/Imager", - F11="SSM/I > Special Sensor Microwave/Imager", - F08="SSM/I > Special Sensor Microwave/Imager", - n07="SMMR > Scanning Multichannel Microwave Radiometer", -) - - -def get_sensors_for_sats(sats: list[SUPPORTED_SAT]) -> list[str]: - """Get the unique set of sensors for the given list of sats. - - Assumes `sats` is ordered from oldest->newest. - """ - unique_sats = _unique_sats(sats) - sensors_for_sat = [SENSORS_FOR_SATS[sat] for sat in unique_sats] - - return sensors_for_sat - - def get_global_attrs( *, time: xr.DataArray, @@ -196,7 +137,7 @@ def get_global_attrs( # of source filenames. source: str, # List of satellites that provided data for the given netcdf file. - sats: list[SUPPORTED_SAT], + platform_ids: list[SUPPORTED_PLATFORM_ID], ) -> dict[str, Any]: """Return a dictionary containing the global attributes for a standard ECDR NetCDF file. @@ -207,8 +148,11 @@ def get_global_attrs( # TODO: support different resolutions, platforms, and sensors! resolution: Final = "12.5" - platform = ", ".join(get_platforms_for_sats(sats)) - sensor = ", ".join(get_sensors_for_sats(sats)) + platforms = [ + PLATFORM_CONFIG.platform_for_id(platform_id) for platform_id in platform_ids + ] + platform = ", ".join([platform.name for platform in platforms]) + sensor = ", ".join([platform.sensor for platform in platforms]) time_coverage_attrs = _get_time_coverage_attrs( temporality=temporality, diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index aea205e7..f2eaf4c7 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -14,6 +14,7 @@ download_latest_lance_files, ) +from seaice_ecdr.ancillary import ANCILLARY_SOURCES from seaice_ecdr.cli.util import datetime_to_date from seaice_ecdr.complete_daily_ecdr import ( complete_daily_ecdr_ds, @@ -27,7 +28,7 @@ write_ide_netcdf, ) from seaice_ecdr.platforms import ( - get_platform_by_date, + PLATFORM_CONFIG, ) from seaice_ecdr.tb_data import EcdrTbData, map_tbs_to_ecdr_channels from seaice_ecdr.temporal_composite_daily import ( @@ -48,6 +49,7 @@ def compute_nrt_initial_daily_ecdr_dataset( *, date: dt.date, hemisphere: Hemisphere, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ): """Create an initial daily ECDR NetCDF using NRT LANCE AMSR2 data.""" # TODO: handle missing data case. @@ -57,7 +59,7 @@ def compute_nrt_initial_daily_ecdr_dataset( data_dir=LANCE_NRT_DATA_DIR, ) data_source: Final = "LANCE AU_SI12" - platform: Final = "am2" + platform_id: Final = "am2" ecdr_tbs = map_tbs_to_ecdr_channels( # TODO/Note: this mapping is the same as used for `am2`. @@ -79,13 +81,15 @@ def compute_nrt_initial_daily_ecdr_dataset( tbs=ecdr_tbs, resolution=LANCE_RESOLUTION, data_source=data_source, - platform=platform, + platform_id=platform_id, ) nrt_initial_ecdr_ds = compute_initial_daily_ecdr_dataset( date=date, hemisphere=hemisphere, tb_data=tb_data, + land_spillover_alg="NT2", + ancillary_source=ancillary_source, ) return nrt_initial_ecdr_ds @@ -98,11 +102,11 @@ def read_or_create_and_read_nrt_idecdr_ds( intermediate_output_dir: Path, overwrite: bool, ): - platform = get_platform_by_date(date) + platform = PLATFORM_CONFIG.get_platform_by_date(date) idecdr_filepath = get_idecdr_filepath( hemisphere=hemisphere, date=date, - platform=platform, + platform_id=platform.id, intermediate_output_dir=intermediate_output_dir, resolution="12.5", ) @@ -147,6 +151,7 @@ def temporally_interpolated_nrt_ecdr_dataset( intermediate_output_dir: Path, overwrite: bool, days_to_look_previously: int = 5, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ) -> xr.Dataset: init_datasets = [] for date in date_range( @@ -169,6 +174,7 @@ def temporally_interpolated_nrt_ecdr_dataset( data_stack=data_stack, interp_range=days_to_look_previously, one_sided_limit=days_to_look_previously, + ancillary_source=ancillary_source, ) return temporally_interpolated_ds @@ -213,6 +219,7 @@ def nrt_ecdr_for_day( hemisphere: Hemisphere, base_output_dir: Path, overwrite: bool, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ): """Create an initial daily ECDR NetCDF using NRT LANCE AMSR2 data.""" complete_output_dir = get_complete_output_dir( @@ -254,6 +261,7 @@ def nrt_ecdr_for_day( complete_output_dir=complete_output_dir, intermediate_output_dir=intermediate_output_dir, is_nrt=True, + ancillary_source=ancillary_source, ) written_cde_ncfile = write_cde_netcdf( diff --git a/seaice_ecdr/platforms.py b/seaice_ecdr/platforms.py deleted file mode 100644 index 2e3f1917..00000000 --- a/seaice_ecdr/platforms.py +++ /dev/null @@ -1,268 +0,0 @@ -"""platforms.py. - -Routines for dealing with the platform (satellite) and sensors -for CDRv5 - - -TODO: There are a couple of date ranges for which we do not want - to produce data files: - Aug 12-24, 1984 because there is no SMMR data - Dec 3, 1987 - Jan 12, 1988 because no F08 data - Also, anything prior to the start of the data record, - eg prior to Oct 25, 1978 -""" - -import datetime as dt -import os -from collections import OrderedDict -from functools import cache -from typing import cast, get_args - -import yaml -from loguru import logger - -from seaice_ecdr._types import SUPPORTED_SAT - -# TODO: De-dup with nc_attrs.py -# Here’s what the GCMD platform long name should be based on sensor/platform short name: -PLATFORMS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="GCOM-W1 > Global Change Observation Mission 1st-Water", - ame="Aqua > Earth Observing System, Aqua", - F17="DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", - F13="DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", - F11="DMSP 5D-2/F11 > Defense Meteorological Satellite Program-F11", - F08="DMSP 5D-2/F8 > Defense Meteorological Satellite Program-F8", - n07="Nimbus-7", -) - - -# TODO: De-dup with nc_attrs.py -# Here’s what the GCMD sensor name should be based on sensor short name: -SENSORS_FOR_SATS: dict[SUPPORTED_SAT, str] = dict( - am2="AMSR2 > Advanced Microwave Scanning Radiometer 2", - ame="AMSR-E > Advanced Microwave Scanning Radiometer-EOS", - F17="SSMIS > Special Sensor Microwave Imager/Sounder", - # TODO: de-dup SSM/I text? - F13="SSM/I > Special Sensor Microwave/Imager", - F11="SSM/I > Special Sensor Microwave/Imager", - F08="SSM/I > Special Sensor Microwave/Imager", - n07="SMMR > Scanning Multichannel Microwave Radiometer", -) - - -# These first and last dates were adapted from the cdrv4 file -# https://bitbucket.org/nsidc/seaice_cdr/src/master/source/config/cdr.yml -# of commit: -# https://bitbucket.org/nsidc/seaice_cdr/commits/c9c632e73530554d8acfac9090baeb1e35755897 -PLATFORM_AVAILABILITY: OrderedDict[SUPPORTED_SAT, dict] = OrderedDict( - n07={"first_date": dt.date(1978, 10, 25), "last_date": dt.date(1987, 7, 9)}, - F08={"first_date": dt.date(1987, 7, 10), "last_date": dt.date(1991, 12, 2)}, - F11={"first_date": dt.date(1991, 12, 3), "last_date": dt.date(1995, 9, 30)}, - F13={"first_date": dt.date(1995, 10, 1), "last_date": dt.date(2007, 12, 31)}, - F17={"first_date": dt.date(2008, 1, 1), "last_date": None}, - ame={"first_date": dt.date(2002, 6, 1), "last_date": dt.date(2011, 10, 3)}, - am2={"first_date": dt.date(2012, 7, 2), "last_date": None}, -) - - -def read_platform_start_dates_cfg_override( - start_dates_cfg_filename, -) -> OrderedDict[dt.date, SUPPORTED_SAT]: - """The "platform_start_dates" dictionary is an OrderedDict - of keys (dates) with corresponding platforms/sats (values) - - Note: It seems like yaml can't safe_load() an OrderedDict. - """ - try: - with open(start_dates_cfg_filename, "r") as config_file: - file_dict = yaml.safe_load(config_file) - except FileNotFoundError: - raise RuntimeError( - f"Could not find specified start_dates config file: {start_dates_cfg_filename}" - ) - - platform_start_dates = OrderedDict(file_dict) - - # Assert that the keys are ordered. - assert sorted(platform_start_dates.keys()) == list(platform_start_dates.keys()) - # Assert that the platforms are in our list of supported sats. - assert all( - [ - platform in get_args(SUPPORTED_SAT) - for platform in platform_start_dates.values() - ] - ) - - return platform_start_dates - - -@cache -def get_platform_start_dates() -> OrderedDict[dt.date, SUPPORTED_SAT]: - """Return dict of start dates for differnt platforms. - - Platform start dates can be overridden via a YAML override file specified by - the `PLATFORM_START_DATES_CFG_OVERRIDE_FILE` envvar. - """ - - if override_file := os.environ.get("PLATFORM_START_DATES_CFG_OVERRIDE_FILE"): - _platform_start_dates = read_platform_start_dates_cfg_override(override_file) - logger.info(f"Read platform start dates from {override_file}") - - else: - _platform_start_dates = OrderedDict( - { - dt.date(1978, 10, 25): "n07", - dt.date(1987, 7, 10): "F08", - dt.date(1991, 12, 3): "F11", - dt.date(1995, 10, 1): "F13", - dt.date(2002, 6, 1): "ame", # AMSR-E is first AMSR sat - # F17 starts while AMSR-E is up, on 2008-01-01. We don't use - # F17 until 2011-10-04. - dt.date(2011, 10, 4): "F17", - dt.date(2012, 7, 3): "am2", # AMSR2 - } - ) - - _platform_start_dates = cast( - OrderedDict[dt.date, SUPPORTED_SAT], _platform_start_dates - ) - - assert _platform_start_dates_are_consistent( - platform_start_dates=_platform_start_dates - ) - - return _platform_start_dates - - -def _platform_available_for_date( - *, - date: dt.date, - platform: SUPPORTED_SAT, - platform_availability: OrderedDict = PLATFORM_AVAILABILITY, -) -> bool: - """Determine if platform is available on this date.""" - # First, verify the values of the first listed platform - first_available_date = platform_availability[platform]["first_date"] - if date < first_available_date: - print( - f""" - Satellite {platform} is not available on date {date} - {date} is before first_available_date {first_available_date} - Date info: {platform_availability[platform]} - """ - ) - return False - - try: - last_available_date = platform_availability[platform]["last_date"] - try: - if date > last_available_date: - print( - f""" - Satellite {platform} is not available on date {date} - {date} is after last_available_date {last_available_date} - Date info: {platform_availability[platform]} - """ - ) - return False - except TypeError as e: - if last_available_date is None: - pass - else: - raise e - except IndexError as e: - # last_date is set to None if platform is still providing new data - if last_available_date is None: - pass - else: - raise e - - return True - - -def _platform_start_dates_are_consistent( - *, - platform_start_dates: OrderedDict[dt.date, SUPPORTED_SAT], -) -> bool: - """Return whether the provided start date structure is valid.""" - date_list = list(platform_start_dates.keys()) - platform_list = list(platform_start_dates.values()) - try: - date = date_list[0] - platform = platform_list[0] - assert _platform_available_for_date( - date=date, - platform=platform, - platform_availability=PLATFORM_AVAILABILITY, - ) - - for idx in range(1, len(date_list)): - date = date_list[idx] - platform = platform_list[idx] - - # Check the end of the prior platform's date range - prior_date = date - dt.timedelta(days=1) - prior_platform = platform_list[idx - 1] - assert _platform_available_for_date( - date=prior_date, - platform=prior_platform, - platform_availability=PLATFORM_AVAILABILITY, - ) - - # Check this platform's first available date - assert _platform_available_for_date( - date=date, - platform=platform, - platform_availability=PLATFORM_AVAILABILITY, - ) - except AssertionError: - raise RuntimeError( - f""" - platform start dates are not consistent - platform_start_dates: {platform_start_dates} - platform_availability: {PLATFORM_AVAILABILITY} - """ - ) - - return True - - -def get_platform_by_date( - date: dt.date, -) -> SUPPORTED_SAT: - """Return the platform for this date.""" - platform_start_dates = get_platform_start_dates() - - start_date_list = list(platform_start_dates.keys()) - platform_list = list(platform_start_dates.values()) - - if date < start_date_list[0]: - raise RuntimeError( - f""" - date {date} too early. - First start_date: {start_date_list[0]} - """ - ) - - return_platform = None - if date >= start_date_list[-1]: - return_platform = platform_list[-1] - - if return_platform is None: - return_platform = platform_list[0] - for start_date, latest_platform in zip(start_date_list[1:], platform_list[1:]): - if date >= start_date: - return_platform = latest_platform - continue - else: - break - - return return_platform - - -def get_first_platform_start_date() -> dt.date: - """Return the start date of the first platform.""" - platform_start_dates = get_platform_start_dates() - earliest_date = min(platform_start_dates.keys()) - - return earliest_date diff --git a/seaice_ecdr/platforms/__init__.py b/seaice_ecdr/platforms/__init__.py new file mode 100644 index 00000000..53553041 --- /dev/null +++ b/seaice_ecdr/platforms/__init__.py @@ -0,0 +1,8 @@ +from seaice_ecdr.platforms.config import PLATFORM_CONFIG +from seaice_ecdr.platforms.models import SUPPORTED_PLATFORM_ID, Platform + +__all__ = [ + "PLATFORM_CONFIG", + "SUPPORTED_PLATFORM_ID", + "Platform", +] diff --git a/seaice_ecdr/platforms/config.py b/seaice_ecdr/platforms/config.py new file mode 100644 index 00000000..521d98d3 --- /dev/null +++ b/seaice_ecdr/platforms/config.py @@ -0,0 +1,164 @@ +"""Platform config + +Contains configuration for platforms supported by this code (e.g., AMSR2, F17, etc.). + +Platform start dates are read from a yaml file in this directory +(`default_platform_start_dates.yml`) unless overridden by the +`PLATFORM_START_DATES_CONFIG_FILEPATH` envvar. +""" + +import datetime as dt +import os +from pathlib import Path +from typing import cast, get_args + +import yaml + +from seaice_ecdr.platforms.models import ( + SUPPORTED_PLATFORM_ID, + DateRange, + Platform, + PlatformConfig, + PlatformStartDate, + platform_for_id, +) + +_this_dir = Path(__file__).parent +_DEFAULT_PLATFORM_START_DATES_CONFIG_FILEPATH = Path( + _this_dir / "default_platform_start_dates.yml" +) + + +AM2_PLATFORM = Platform( + name="GCOM-W1 > Global Change Observation Mission 1st-Water", + sensor="AMSR2 > Advanced Microwave Scanning Radiometer 2", + id="am2", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=None, + ), +) + +AME_PLATFORM = Platform( + name="Aqua > Earth Observing System, Aqua", + sensor="AMSR-E > Advanced Microwave Scanning Radiometer-EOS", + id="ame", + date_range=DateRange( + first_date=dt.date(2002, 6, 1), + last_date=dt.date(2011, 10, 3), + ), +) + +F17_PLATFORM = Platform( + name="DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", + sensor="SSMIS > Special Sensor Microwave Imager/Sounder", + id="F17", + date_range=DateRange( + first_date=dt.date(2008, 1, 1), + last_date=None, + ), +) + +F13_PLATFORM = Platform( + name="DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", + sensor="SSM/I > Special Sensor Microwave/Imager", + id="F13", + date_range=DateRange( + first_date=dt.date(1995, 10, 1), + last_date=dt.date(2007, 12, 31), + ), +) +F11_PLATFORM = Platform( + name="DMSP 5D-2/F11 > Defense Meteorological Satellite Program-F11", + sensor="SSM/I > Special Sensor Microwave/Imager", + id="F11", + date_range=DateRange( + first_date=dt.date(1991, 12, 3), + last_date=dt.date(1995, 9, 30), + ), +) +F08_PLATFORM = Platform( + name="DMSP 5D-2/F8 > Defense Meteorological Satellite Program-F8", + sensor="SSM/I > Special Sensor Microwave/Imager", + id="F08", + date_range=DateRange( + first_date=dt.date(1987, 7, 10), + last_date=dt.date(1991, 12, 2), + ), +) + +N07_PLATFORM = Platform( + name="Nimbus-7", + sensor="SMMR > Scanning Multichannel Microwave Radiometer", + id="n07", + date_range=DateRange( + first_date=dt.date(1978, 10, 25), + last_date=dt.date(1987, 7, 9), + ), +) + +SUPPORTED_PLATFORMS = [ + AM2_PLATFORM, + AME_PLATFORM, + F17_PLATFORM, + F13_PLATFORM, + F11_PLATFORM, + F08_PLATFORM, + N07_PLATFORM, +] + + +def _get_platform_config() -> PlatformConfig: + """Gets the platform config given a start dates filepath. + + This function is not intended to be used outside of this module, as it sets + a global variable accessed from other parts of the code (`PLATFORM_CONFIG`). + """ + + if platform_override_filepath_str := os.environ.get( + "PLATFORM_START_DATES_CONFIG_FILEPATH" + ): + platform_start_dates_config_filepath = Path(platform_override_filepath_str) + else: + platform_start_dates_config_filepath = ( + _DEFAULT_PLATFORM_START_DATES_CONFIG_FILEPATH + ) + + if not platform_start_dates_config_filepath.is_file(): + raise RuntimeError( + f"Could not find platform config file: {platform_start_dates_config_filepath}" + ) + + # TODO: drop support for "FORCE_PLATFORM" in favor of a platform start dates + # config override file. + if forced_platform_id := os.environ.get("FORCE_PLATFORM"): + if forced_platform_id not in get_args(SUPPORTED_PLATFORM_ID): + raise RuntimeError( + f"The forced platform ({forced_platform_id}) is not a supported platform." + ) + + forced_platform_id = cast(SUPPORTED_PLATFORM_ID, forced_platform_id) + forced_platform = platform_for_id( + platforms=SUPPORTED_PLATFORMS, platform_id=forced_platform_id + ) + first_date_of_forced_platform = forced_platform.date_range.first_date + forced_cdr_platform_start_dates = [ + PlatformStartDate( + platform_id=forced_platform_id, + start_date=first_date_of_forced_platform, + ) + ] + + return PlatformConfig( + platforms=SUPPORTED_PLATFORMS, + cdr_platform_start_dates=forced_cdr_platform_start_dates, + ) + + with open(platform_start_dates_config_filepath, "r") as config_file: + start_dates_cfg = yaml.safe_load(config_file) + platform_cfg = PlatformConfig(platforms=SUPPORTED_PLATFORMS, **start_dates_cfg) + + return platform_cfg + + +PLATFORM_CONFIG = _get_platform_config() diff --git a/seaice_ecdr/platforms/default_platform_start_dates.yml b/seaice_ecdr/platforms/default_platform_start_dates.yml new file mode 100644 index 00000000..b128f207 --- /dev/null +++ b/seaice_ecdr/platforms/default_platform_start_dates.yml @@ -0,0 +1,18 @@ +cdr_platform_start_dates: + - platform_id: "n07" + start_date: "1978-10-25" + - platform_id: "F08" + start_date: "1987-07-10" + - platform_id: "F11" + start_date: "1991-12-03" + - platform_id: "F13" + start_date: "1995-10-01" + # TODO: we do not currently support AMSRE at the 25km resolution + # - platform_id: "ame" # AMSR-E is first AMSR sat + # start_date: "2002-06-01" + # F17 starts while AMSR-E is up, on 2008-01-01. We don't use F17 until + # 2011-10-04. + - platform_id: "F17" + start_date: "2011-10-04" + - platform_id: "am2" + start_date: "2012-07-02" diff --git a/seaice_ecdr/platforms/models.py b/seaice_ecdr/platforms/models.py new file mode 100644 index 00000000..ac813091 --- /dev/null +++ b/seaice_ecdr/platforms/models.py @@ -0,0 +1,191 @@ +"""Pydantic data models for platform configuration.""" + +import copy +import datetime as dt +from typing import Literal, cast + +from pydantic import BaseModel, root_validator, validator + +# TODO: ideally this is used sparingly. The code should accept any number of +# platform configurations, and those configurations should defined what's +# "supported". We could even include the import string for a fetch function that +# conforms to a spec for each platform, so that the e.g., `tb_data` module does +# not need to map specific IDs to functions. See: +# https://docs.pydantic.dev/2.3/usage/types/string_types/#importstring +SUPPORTED_PLATFORM_ID = Literal[ + "am2", # AMSR2 + "ame", # AMSRE + "F17", # SSMIS F17 + "F13", # SSMI F13 + "F11", # SSMI F11 + "F08", # SSMI F08 + "n07", # Nimbus-7 SMMR +] + + +class DateRange(BaseModel): + first_date: dt.date + # If the last_date is None, it indicates that the satellite is still + # operating and we do not have a "last date" yet. + last_date: dt.date | None + + @root_validator(skip_on_failure=True) + def validate_date_range( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values, + ): + first_date: dt.date = values["first_date"] + last_date: dt.date | None = values["last_date"] + + # If the last date isn't given, it means date range extends from the + # first date into the future (satellite is still operating) + if (last_date is not None) and (first_date > last_date): + raise ValueError( + f"First date ({first_date}) is after last date {last_date} in date range." + ) + + return values + + +class Platform(BaseModel): + # E.g., "DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17" + name: str + # GCMD sensor name. E.g., SSMIS > Special Sensor Microwave Imager/Sounder + sensor: str + # E.g., "F17" + id: SUPPORTED_PLATFORM_ID + # The available date range for the platform, inclusive. + date_range: DateRange + + +def platform_for_id( + *, platforms: list[Platform], platform_id: SUPPORTED_PLATFORM_ID +) -> Platform: + for platform in platforms: + if platform_id == platform.id: + return platform + raise ValueError(f"Could not find platform with id {platform_id}.") + + +class PlatformStartDate(BaseModel): + platform_id: SUPPORTED_PLATFORM_ID + start_date: dt.date + + +class PlatformConfig(BaseModel): + platforms: list[Platform] + cdr_platform_start_dates: list[PlatformStartDate] + + @root_validator(skip_on_failure=True) + def validate_platform_start_dates_platform_in_platforms( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values, + ): + """Validate that each platform start date corresponds with a defined platform.""" + platform_ids = [platform.id for platform in values["platforms"]] + for platform_start_date in values["cdr_platform_start_dates"]: + if platform_start_date.platform_id not in platform_ids: + raise ValueError( + f"Did not find {platform_start_date.platform_id} in platform list (must be one of {platform_ids})." + ) + + return values + + @root_validator(skip_on_failure=True) + def validate_platform_start_date_in_platform_date_range( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values, + ): + """Validate that each platform start date is within the platform's date range.""" + for platform_start_date in values["cdr_platform_start_dates"]: + matching_platform = platform_for_id( + platforms=values["platforms"], + platform_id=platform_start_date.platform_id, + ) + start_date_before_first_date = ( + matching_platform.date_range.first_date > platform_start_date.start_date + ) + + last_date_is_not_none = matching_platform.date_range.last_date is not None + start_date_after_last_date = last_date_is_not_none and ( + matching_platform.date_range.last_date < platform_start_date.start_date + ) + + if start_date_before_first_date or start_date_after_last_date: + raise ValueError( + f"Platform start date of {platform_start_date.start_date}" + f" for {matching_platform.id}" + " is outside of the platform's date range:" + f" {matching_platform.date_range}" + ) + return values + + @validator("cdr_platform_start_dates") + def validate_platform_start_dates_in_order( + cls, # noqa: F841 (`cls` is unused, but must be present for pydantic) + values: list[PlatformStartDate], + ) -> list[PlatformStartDate]: + """Validate that platform start dates are defined in order from old -> new. + + E.g., 1979-10-25 should be listed before 1987-07-10. + """ + last_start_date = None + for platform_start_date in values: + if last_start_date is None: + last_start_date = copy.deepcopy(platform_start_date) + assert last_start_date is not None + last_start_date = cast(PlatformStartDate, last_start_date) + continue + + # NOTE: the `type: ignore` on the next line is because mypy thinks + # this line is unreachable, but it is reachable. In fact, there is a + # unit test (`test_platform_config_validation_error`) that ensures + # this is true. + if last_start_date.start_date >= platform_start_date.start_date: # type: ignore[unreachable] + raise ValueError( + f"Platform start dates are not sequentially increasing:" + f" {platform_start_date.platform_id} with start date {platform_start_date.start_date}" + " is given after" + f" {last_start_date.platform_id} with start date {last_start_date.start_date}." + ) + + last_start_date = copy.deepcopy(platform_start_date) + + return values + + def platform_for_id(self, platform_id: SUPPORTED_PLATFORM_ID) -> Platform: + """Return the Platform for the given platform ID.""" + return platform_for_id(platforms=self.platforms, platform_id=platform_id) + + def get_platform_by_date( + self, + date: dt.date, + ) -> Platform: + """Return the platform for this date.""" + first_start_date = self.get_first_platform_start_date() + if date < first_start_date: + raise RuntimeError( + f""" + date {date} too early. + First start_date: {first_start_date} + """ + ) + + return_platform_id = None + for cdr_platform_start_date in self.cdr_platform_start_dates: + if date >= cdr_platform_start_date.start_date: + return_platform_id = cdr_platform_start_date.platform_id + continue + else: + break + + if return_platform_id is None: + raise RuntimeError(f"Could not find platform for {date=}") + + return self.platform_for_id(return_platform_id) + + def get_first_platform_start_date(self) -> dt.date: + """Return the start date of the first platform.""" + earliest_date = self.cdr_platform_start_dates[0].start_date + + return earliest_date diff --git a/seaice_ecdr/regrid_25to12.py b/seaice_ecdr/regrid_25to12.py index 271ac2bc..5fe6f7b7 100644 --- a/seaice_ecdr/regrid_25to12.py +++ b/seaice_ecdr/regrid_25to12.py @@ -19,6 +19,7 @@ from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS from seaice_ecdr.ancillary import ( + ANCILLARY_SOURCES, get_adj123_field, get_empty_ds_with_time, get_non_ocean_mask, @@ -36,6 +37,7 @@ def _setup_ecdr_ds_replacement( xr_tbs: xr.Dataset, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ) -> xr.Dataset: # Initialize geo-referenced xarray Dataset grid_id = get_grid_id( @@ -44,7 +46,10 @@ def _setup_ecdr_ds_replacement( ) ecdr_ide_ds = get_empty_ds_with_time( - hemisphere=hemisphere, resolution=resolution, date=date + hemisphere=hemisphere, + resolution=resolution, + date=date, + ancillary_source=ancillary_source, ) # Set initial global attributes @@ -72,6 +77,7 @@ def _setup_ecdr_ds_replacement( def get_reprojection_da_psn25to12( hemisphere: Hemisphere, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ) -> xr.DataArray: """Returns a mask with: 0: No values will be interpolated @@ -79,8 +85,12 @@ def get_reprojection_da_psn25to12( 2: block interpolation should be applied 3: nearest neighbor interpolation should be applied """ - is_ocean_25 = get_ocean_mask(hemisphere=hemisphere, resolution="25") - is_ocean_12 = get_ocean_mask(hemisphere=hemisphere, resolution="12.5") + is_ocean_25 = get_ocean_mask( + hemisphere=hemisphere, resolution="25", ancillary_source=ancillary_source + ) + is_ocean_12 = get_ocean_mask( + hemisphere=hemisphere, resolution="12.5", ancillary_source=ancillary_source + ) # Calculate where bilinear interpolation will compute properly resize25 = np.zeros(is_ocean_25.shape, dtype=np.float32) @@ -241,6 +251,7 @@ def adjust_reprojected_siconc_field( siconc_da: xr.DataArray, hemisphere: Hemisphere, adjustment_array: npt.NDArray, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", min_siext: float = 0.10, n_adj: int = 3, ) -> xr.DataArray: @@ -250,6 +261,7 @@ def adjust_reprojected_siconc_field( coast_adj = get_adj123_field( hemisphere=hemisphere, resolution="12.5", + ancillary_source=ancillary_source, ).to_numpy() # Initialize ice_edge_adjacency to 255 @@ -299,6 +311,7 @@ def reproject_ideds_25to12( date, hemisphere, resolution, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ): # Determine reprojection_masks reprojection_da = get_reprojection_da_psn25to12(hemisphere=hemisphere) @@ -325,6 +338,7 @@ def reproject_ideds_25to12( xr_tbs=reprojected_tbs_ds, resolution=resolution, hemisphere=hemisphere, + ancillary_source=ancillary_source, ) # add data_source and platform to the dataset attrs. reprojected_ideds.attrs["data_source"] = initial_ecdr_ds.data_source @@ -334,6 +348,7 @@ def reproject_ideds_25to12( reprojected_ideds["non_ocean_mask"] = get_non_ocean_mask( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) # Block-replace (=nearest-neighbor interp) diff --git a/seaice_ecdr/set_daily_ncattrs.py b/seaice_ecdr/set_daily_ncattrs.py index 67ad4aa5..2a7afdbe 100644 --- a/seaice_ecdr/set_daily_ncattrs.py +++ b/seaice_ecdr/set_daily_ncattrs.py @@ -5,6 +5,7 @@ from pm_tb_data._types import NORTH, Hemisphere from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS +from seaice_ecdr.ancillary import ANCILLARY_SOURCES from seaice_ecdr.nc_attrs import get_global_attrs from seaice_ecdr.util import get_num_missing_pixels @@ -30,6 +31,7 @@ def finalize_cdecdr_ds( ds_in: xr.Dataset, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, fields_to_drop: list = CDECDR_FIELDS_TO_DROP, fields_to_rename: dict = CDECDR_FIELDS_TO_RENAME, ) -> xr.Dataset: @@ -51,6 +53,7 @@ def finalize_cdecdr_ds( seaice_conc_var=ds["cdr_seaice_conc"], hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) ds["cdr_seaice_conc"] = ( ("time", "y", "x"), @@ -348,7 +351,7 @@ def finalize_cdecdr_ds( temporality="daily", aggregate=False, source=f"Generated from {ds_in.data_source}", - sats=[ds_in.platform], + platform_ids=[ds_in.platform], ) ds.attrs = new_global_attrs diff --git a/seaice_ecdr/spillover.py b/seaice_ecdr/spillover.py new file mode 100644 index 00000000..b9c14a58 --- /dev/null +++ b/seaice_ecdr/spillover.py @@ -0,0 +1,417 @@ +from pathlib import Path +from typing import Literal + +import numpy as np +import numpy.typing as npt +from pm_icecon.bt.compute_bt_ic import coastal_fix +from pm_icecon.land_spillover import apply_nt2_land_spillover +from pm_icecon.nt.compute_nt_ic import apply_nt_spillover +from pm_tb_data._types import Hemisphere +from scipy.ndimage import binary_dilation, generate_binary_structure, shift + +from seaice_ecdr.ancillary import ( + ANCILLARY_SOURCES, + get_adj123_field, + get_land90_conc_field, + get_non_ocean_mask, + get_nt_minic, + get_nt_shoremap, +) +from seaice_ecdr.tb_data import ( + EcdrTbData, +) +from seaice_ecdr.util import get_ecdr_grid_shape + +NT_MAPS_DIR = Path("/share/apps/G02202_V5/cdr_testdata/nt_datafiles/data36/maps") +LAND_SPILL_ALGS = Literal["BT_NT", "NT2", "ILS", "ILSb"] + + +def convert_nonocean_to_shoremap(*, is_nonocean: npt.NDArray): + """The shoremap is the an array that the original NT spillover algorithm + used to determine distance-from-shore. Then encoding is: + 0: Ocean far from coast + 1: land: land not adjacent to ocean + 2: shore: land 4-connected to ocean + 3: coast: ocean 8-connected to land + 4: near-coast: ocean 4-connected to "coast" + 5: far-coast: ocean 4-connected to "near-coast" + + All of this can be computed from the non-ocean mask + + Maddenly, there are logical inconsistencies in the CDRv4 fields that + make the calculations of coast slightly off + """ + conn4 = generate_binary_structure(2, 1) + conn8 = generate_binary_structure(2, 2) + + # Initialize to zero + shoremap = np.zeros(is_nonocean.shape, dtype=np.uint8) + + # Add the land + + # Add the shore (land adjacent to ocean) + is_ocean = ~is_nonocean + is_dilated_ocean = binary_dilation(is_ocean, structure=conn4) + is_shore = is_nonocean & is_dilated_ocean + is_land = is_nonocean & ~is_shore + shoremap[is_land] = 1 + shoremap[is_shore] = 2 + + # Add the coast + is_dilated_nonocean = binary_dilation(is_nonocean, structure=conn8) + is_coast = is_dilated_nonocean & is_ocean + shoremap[is_coast] = 3 + + # Add the nearcoast + is_dilated_coast = binary_dilation(is_coast, structure=conn4) + is_nearcoast = is_dilated_coast & is_ocean & ~is_coast + shoremap[is_nearcoast] = 4 + + # Add the farcoast + is_dilated_nearcoast = binary_dilation(is_nearcoast, structure=conn4) + is_farcoast = is_dilated_nearcoast & is_ocean & ~is_coast & ~is_nearcoast + shoremap[is_farcoast] = 5 + + +def _get_25km_shoremap(*, hemisphere: Hemisphere): + shoremap_fn = NT_MAPS_DIR / f"shoremap_{hemisphere}_25" + shoremap = np.fromfile(shoremap_fn, dtype=">i2")[150:].reshape( + get_ecdr_grid_shape(hemisphere=hemisphere, resolution="25") + ) + + return shoremap + + +def _get_25km_minic(*, hemisphere: Hemisphere): + if hemisphere == "north": + minic_fn = "SSMI8_monavg_min_con" + else: + minic_fn = "SSMI_monavg_min_con_s" + + minic_path = NT_MAPS_DIR / minic_fn + minic = np.fromfile(minic_path, dtype=">i2")[150:].reshape( + get_ecdr_grid_shape(hemisphere=hemisphere, resolution="25") + ) + + # Scale down by 10. The original alg. dealt w/ concentrations scaled by 10. + minic = minic / 10 + + return minic + + +def improved_land_spillover( + *, + ils_arr: npt.NDArray, + init_conc: npt.NDArray, + sie_min: float = 0.15, +) -> npt.NDArray: + """Improved land spillover algorithm dilates "anchor"ing siconc far + far-from-coast to coast and sets siconc values to zero if the dilated + siconc is less than a threshold [sie_min] + + The ils_arr is encoded as: + 0: missing information: siconc cannot be used to anchor/disanchore + 1: non-ocean (ie land or other grid cells without siconc (~lake)) + 2: ocean that may be removed by land spillover, if dilated sie low + 3: ocean whose concentration may anchor/disanchor coastal siconc + 4: ocean whose concentration is ignored for the ILS calcs + + Note: Because the algorithm removes near-coast siconc with nearby + low siconc, grid cells which could be used to disanchor siconc + but whose siconc is unknown are treated as "100% siconc" for + purposes of this calculation. In other words, missing (ils_arr==0) + values will be set to max siconc (1.0) before calculations are made + + The input conc and output conc arrays are expected to have: + min value of 0.0 + sie_min between 0.0 and 1.0 + max_considered siconc of 1.0 + """ + # Sanity check: ils_arr only contains the expected values + ils_set = {val for val in np.unique(ils_arr)} + assert ils_set.issubset(set(np.arange(5, dtype=np.uint8))) + + # Actual ILS algorithm begins here. + conc = init_conc.copy() + conc[ils_arr == 2] = -1.0 # potential ice removal grid cells to -1 + conc[conc > 1.0] = 1.0 # Clamp siconc to 1.0 max + + # Set any missing values to 100% siconc + conc[ils_arr == 0] = 1.0 + + # Remove vals from the fill_arr + fill_arr = conc.copy() + fill_arr[ils_arr == 1] = np.nan # Init to nan where land + fill_arr[ils_arr == 2] = -1 # Init to -1 where we need to fill + # fill_arr at ils_arr will be conc value + fill_arr[ils_arr == 4] = np.nan # Init to where won't use value + + # Start by dilating the far-from coast ice into coastal areas + is_fillable = (ils_arr == 2) | (ils_arr == 3) + + def _dilate_towards_shore(fill_arr, is_fillable): + sum_arr = np.zeros(fill_arr.shape, dtype=np.float32) + num_arr = np.zeros(fill_arr.shape, dtype=np.uint8) + ones_arr = np.ones(fill_arr.shape, dtype=np.uint8) + + needs_filling = (ils_arr == 2) & (fill_arr == -1.0) + n_needs_filling = np.sum(np.where(needs_filling, 1, 0)) + n_needs_filling_prior = n_needs_filling + 1 # Just needs to be different + + n_missing_values = np.sum(np.where((ils_arr == 2) & (fill_arr == -1.0), 1, 0)) + + while ( + (n_needs_filling != n_needs_filling_prior) + and (n_needs_filling > 0) + and (n_missing_values > 0) + ): + needs_val = is_fillable & (fill_arr < 0) + + # Note: convolved2d can't handle the missing values, + # so we need a shift-based approach + left = shift(fill_arr, (0, -1), order=0, mode="constant", cval=np.nan) + right = shift(fill_arr, (0, 1), order=0, mode="constant", cval=np.nan) + upward = shift(fill_arr, (-1, 0), order=0, mode="constant", cval=np.nan) + downward = shift(fill_arr, (1, 0), order=0, mode="constant", cval=np.nan) + + sum_arr[:] = 0.0 + num_arr[:] = 0 + + addable = needs_val & (left >= 0) + sum_arr[addable] += left[addable] + num_arr[addable] += ones_arr[addable] + + addable = needs_val & (right >= 0) + sum_arr[addable] += right[addable] + num_arr[addable] += ones_arr[addable] + + addable = needs_val & (upward >= 0) + sum_arr[addable] += upward[addable] + num_arr[addable] += ones_arr[addable] + + addable = needs_val & (downward >= 0) + sum_arr[addable] += downward[addable] + num_arr[addable] += ones_arr[addable] + + has_new_values = num_arr > 0 + + num_arr[num_arr == 0] = 1 # Prevent div by zero + new_values = np.divide(sum_arr, num_arr).astype(np.float32) + + fill_arr[has_new_values] = new_values[has_new_values] + + n_needs_filling_prior = n_needs_filling + + n_missing_values = np.sum( + np.where((ils_arr == 2) & (fill_arr == -1.0), 1, 0) + ) + needs_filling = is_fillable & (fill_arr == -1.0) + n_needs_filling = np.sum(np.where(needs_filling, 1, 0)) + + return fill_arr + + fill_arr = _dilate_towards_shore(fill_arr, is_fillable) + + # If there are still spots that need filling, then the process is repeated, + # but this time we allow dilation of conc values over land. + needs_filling = (ils_arr == 2) & (fill_arr == -1.0) + n_needs_filling = np.sum(np.where(needs_filling, 1, 0)) + if n_needs_filling > 0: + print("LOG: Grid has isolated pockets of seaice; dilating over land") + fill_arr[ils_arr == 1] = -1 + is_fillable[ils_arr == 1] = True + fill_arr = _dilate_towards_shore(fill_arr, is_fillable) + + # Remove siconc where fill_arr is lower than min_sie + filtered_conc = init_conc.copy() + filtered_conc[(ils_arr == 2) & (fill_arr < sie_min)] = 0 + + return filtered_conc + + +def land_spillover( + *, + cdr_conc: npt.NDArray, + hemisphere: Hemisphere, + tb_data: EcdrTbData, + algorithm: LAND_SPILL_ALGS, + land_mask: npt.NDArray, + ancillary_source: ANCILLARY_SOURCES, + bt_conc=None, # only needed if the BT or NT spillover are used + nt_conc=None, # only needed if the BT or NT spillover are used + bt_wx=None, # only needed if the BT or NT spillover are used + fix_goddard_bt_error: bool = False, # By default, don't fix Goddard bug +) -> npt.NDArray: + """Apply the land spillover technique to the CDR concentration field.""" + + # SS: Writing out the spillover anc fields... + if algorithm == "NT2": + l90c = get_land90_conc_field( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + adj123 = get_adj123_field( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + spillover_applied_nt2 = apply_nt2_land_spillover( + conc=cdr_conc, + adj123=adj123.data, + l90c=l90c.data, + # anchoring_siconc=50.0, + # affect_dist3=True, + anchoring_siconc=0.0, + affect_dist3=False, + ) + spillover_applied = spillover_applied_nt2 + elif algorithm == "BT_NT": + non_ocean_mask = get_non_ocean_mask( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + + # Bootstrap alg + bt_conc[bt_wx] = 0 + bt_conc[bt_conc == 110] = np.nan + spillover_applied_bt = coastal_fix( + conc=bt_conc, + missing_flag_value=np.nan, + land_mask=land_mask, + minic=10, + fix_goddard_bt_error=fix_goddard_bt_error, + ) + + # NT alg + # Apply the NT to the nt_conc field + # Only apply that to the cdr_conc field if nt_spilled > bt_conc + shoremap = get_nt_shoremap( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + + minic = get_nt_minic( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + nt_conc_for_ntspillover = nt_conc.copy() + is_negative_ntconc = nt_conc < 0 + nt_conc_for_ntspillover[is_negative_ntconc] = np.nan + + spillover_applied_nt = apply_nt_spillover( + conc=nt_conc_for_ntspillover, + shoremap=shoremap, + minic=minic, + match_CDRv4_cdralgos=True, + ) + nt_asCDRv4 = (10.0 * spillover_applied_nt).astype(np.int16) + nt_asCDRv4[is_negative_ntconc] = -10 + nt_asCDRv4[land_mask] = -100 + + # This section is for debugging; outputs bt and bt after spillover + # bt_asCDRv4 = spillover_applied_bt.copy() + # bt_asCDRv4[np.isnan(bt_asCDRv4)] = 110 + # bt_asCDRv4.tofile('bt_afterspill_v5.dat') + # nt_asCDRv4.tofile('nt_afterspill_v5.dat') + + # Note: be careful of ndarray vs xarray here! + is_nt_spillover = ( + (spillover_applied_nt != nt_conc) & (~non_ocean_mask.data) & (nt_conc > 0) + ) + use_nt_spillover = (spillover_applied_nt > bt_conc) & (spillover_applied_bt > 0) + + spillover_applied = spillover_applied_bt.copy() + spillover_applied[use_nt_spillover] = spillover_applied_nt[use_nt_spillover] + + # Here, we remove ice that the NT algorithm removed -- with conc + # < 15% -- regardless of the BT conc value + is_ntspill_removed = ( + is_nt_spillover + & (spillover_applied_nt >= 0) + & (spillover_applied_nt < 14.5) + ) + spillover_applied[is_ntspill_removed.data] = 0 + + elif algorithm == "ILS": + # Prepare the ILS array using the adj123 field to init ils_arr + ils_arr = np.zeros(cdr_conc.shape, dtype=np.uint8) + adj123 = get_adj123_field( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + ils_arr[adj123 == 0] = 1 # land -> land + ils_arr[adj123 == 1] = 2 # dist1 -> removable + ils_arr[adj123 == 2] = 2 # dist2 -> removable + ils_arr[adj123 == 3] = 3 # dist3 -> anchoring + ils_arr[adj123 > 3] = 4 # >dist3 -> ignored + + ils_arr[np.isnan(cdr_conc)] = 0 # NaN conc -> "missing + + # Rescale conc from 0-100 to 0-1 + ils_conc = cdr_conc / 100.0 + ils_conc[ils_conc > 1.0] = 1.0 + + spillover_applied_ils = improved_land_spillover( + ils_arr=ils_arr, + init_conc=ils_conc, + ) + + is_different = spillover_applied_ils != ils_conc + spillover_applied = cdr_conc.copy() + spillover_applied[is_different & ((adj123 == 1) | (adj123 == 2))] = 0 + + elif algorithm == "ILSb": + # Prepare the ILS array using shoremap to prep ils_arr + ils_arr = np.zeros(cdr_conc.shape, dtype=np.uint8) + shoremap = get_nt_shoremap( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + + # Need to use adj123 to get rid of shoremap's lakes + adj123 = get_adj123_field( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) + shoremap[(shoremap == 2) & (adj123 == 0)] = 1 # lakeshore -> land + shoremap[(shoremap == 3) & (adj123 == 0)] = 1 # lake -> land + shoremap[(shoremap == 3) & (adj123 == 0)] = 1 # lake -> land + shoremap[(shoremap == 4) & (adj123 == 0)] = 1 # laked2 -> land + shoremap[(shoremap == 5) & (adj123 == 0)] = 1 # laked3 -> land + shoremap[(shoremap == 0) & (adj123 == 0)] = 1 # laked+ -> land + + ils_arr[shoremap == 1] = 1 # land -> land + ils_arr[shoremap == 2] = 1 # coast -> land + ils_arr[shoremap == 3] = 2 # dist1 -> removable + ils_arr[shoremap == 4] = 2 # dist2 -> removable + ils_arr[shoremap == 5] = 3 # dist3 -> anchoring + ils_arr[shoremap == 0] = 4 # >dist3 -> ignored + + ils_arr[np.isnan(cdr_conc)] = 0 # NaN conc -> "missing + + # Rescale conc from 0-100 to 0-1 + ils_conc = cdr_conc / 100.0 + ils_conc[ils_conc > 1.0] = 1.0 + + spillover_applied_ils = improved_land_spillover( + ils_arr=ils_arr, + init_conc=ils_conc, + ) + + is_different = spillover_applied_ils != ils_conc + spillover_applied = cdr_conc.copy() + spillover_applied[is_different & ((shoremap == 3) | (shoremap == 4))] = 0 + + else: + raise RuntimeError( + f"The requested land spillover algorithm ({algorithm=}) is not implemented." + ) + + return spillover_applied diff --git a/seaice_ecdr/tb_data.py b/seaice_ecdr/tb_data.py index 58ba19c5..481027ba 100644 --- a/seaice_ecdr/tb_data.py +++ b/seaice_ecdr/tb_data.py @@ -15,7 +15,7 @@ from pm_tb_data.fetch.nsidc_0007 import get_nsidc_0007_tbs_from_disk from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS -from seaice_ecdr.platforms import SUPPORTED_SAT, get_platform_by_date +from seaice_ecdr.platforms import PLATFORM_CONFIG, SUPPORTED_PLATFORM_ID from seaice_ecdr.util import get_ecdr_grid_shape EXPECTED_ECDR_TB_NAMES = ("h19", "v19", "v22", "h37", "v37") @@ -35,7 +35,7 @@ class EcdrTbData: tbs: EcdrTbs resolution: ECDR_SUPPORTED_RESOLUTIONS data_source: str - platform: SUPPORTED_SAT + platform_id: SUPPORTED_PLATFORM_ID def get_null_grid( @@ -107,14 +107,22 @@ def map_tbs_to_ecdr_channels( return ecdr_tbs -def _get_am2_tbs(*, date: dt.date, hemisphere: Hemisphere) -> EcdrTbData: - tb_resolution: Final = "12.5" - data_source: Final = "AU_SI12" +def _get_am2_tbs( + *, + date: dt.date, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, +) -> EcdrTbData: + data_source: Final = f"AU_SI{resolution}" + am2_resolution_str = { + "12.5": "12", + "25": "25", + }[resolution] try: xr_tbs = get_au_si_tbs( date=date, hemisphere=hemisphere, - resolution="12", + resolution=am2_resolution_str, # type: ignore[arg-type] ) ecdr_tbs = map_tbs_to_ecdr_channels( mapping=dict( @@ -126,23 +134,23 @@ def _get_am2_tbs(*, date: dt.date, hemisphere: Hemisphere) -> EcdrTbData: ), xr_tbs=xr_tbs, hemisphere=hemisphere, - resolution=tb_resolution, + resolution=resolution, date=date, data_source=data_source, ) except FileNotFoundError: - ecdr_tbs = get_null_ecdr_tbs(hemisphere=hemisphere, resolution=tb_resolution) + ecdr_tbs = get_null_ecdr_tbs(hemisphere=hemisphere, resolution=resolution) logger.warning( f"Used all-null TBS for date={date}," f" hemisphere={hemisphere}," - f" resolution={tb_resolution}" + f" resolution={resolution}" ) ecdr_tb_data = EcdrTbData( tbs=ecdr_tbs, - resolution=tb_resolution, + resolution=resolution, data_source=data_source, - platform="am2", + platform_id="am2", ) return ecdr_tb_data @@ -191,16 +199,16 @@ def _get_ame_tbs(*, date: dt.date, hemisphere: Hemisphere) -> EcdrTbData: tbs=ecdr_tbs, resolution=tb_resolution, data_source=data_source, - platform="ame", + platform_id="ame", ) return ecdr_tb_data def _get_nsidc_0001_tbs( - *, date: dt.date, hemisphere: Hemisphere, platform: NSIDC_0001_SATS + *, date: dt.date, hemisphere: Hemisphere, platform_id: NSIDC_0001_SATS ) -> EcdrTbData: - NSIDC0001_DATA_DIR = Path("/ecs/DP4/PM/NSIDC-0001.006/") + NSIDC0001_DATA_DIR = Path("/ecs/DP1/PM/NSIDC-0001.006/") # NSIDC-0001 TBs for siconc are all at 25km nsidc0001_resolution: Final = "25" tb_resolution: Final = nsidc0001_resolution @@ -211,7 +219,7 @@ def _get_nsidc_0001_tbs( hemisphere=hemisphere, data_dir=NSIDC0001_DATA_DIR, resolution=nsidc0001_resolution, - sat=platform, + sat=platform_id, ) ecdr_tbs = map_tbs_to_ecdr_channels( @@ -236,7 +244,7 @@ def _get_nsidc_0001_tbs( f"Used all-null TBS for date={date}," f" hemisphere={hemisphere}," f" resolution={tb_resolution}" - f" platform={platform}" + f" platform_id={platform_id}" ) # TODO: For debugging TBs, consider a print/log statement such as this: @@ -245,7 +253,7 @@ def _get_nsidc_0001_tbs( tbs=ecdr_tbs, resolution=tb_resolution, data_source=data_source, - platform=platform, # type: ignore[arg-type] + platform_id=platform_id, # type: ignore[arg-type] ) return ecdr_tb_data @@ -286,29 +294,61 @@ def _get_nsidc_0007_tbs(*, hemisphere: Hemisphere, date: dt.date) -> EcdrTbData: tbs=ecdr_tbs, resolution=SMMR_RESOLUTION, data_source=data_source, - platform="n07", + platform_id="n07", ) return ecdr_tb_data +# TODO: dedupe this function with `get_ecdr_tb_data` +def get_25km_ecdr_tb_data( + *, + date: dt.date, + hemisphere: Hemisphere, +) -> EcdrTbData: + """Get 25km ECDR Tb data for the given date and hemisphere.""" + platform = PLATFORM_CONFIG.get_platform_by_date(date) + if platform.id == "am2": + return _get_am2_tbs(date=date, hemisphere=hemisphere, resolution="25") + elif platform.id == "ame": + raise NotImplementedError("AME is not yet supported at 25km resolution") + elif platform.id in get_args(NSIDC_0001_SATS): + return _get_nsidc_0001_tbs( + platform_id=platform.id, # type: ignore[arg-type] + date=date, + hemisphere=hemisphere, + ) + elif platform.id == "n07": + # SMMR + return _get_nsidc_0007_tbs(date=date, hemisphere=hemisphere) + else: + raise RuntimeError(f"Platform not supported: {platform}") + + def get_ecdr_tb_data( *, date: dt.date, hemisphere: Hemisphere, ) -> EcdrTbData: - platform = get_platform_by_date(date) - if platform == "am2": - return _get_am2_tbs(date=date, hemisphere=hemisphere) - elif platform == "ame": + """Get ECDR TBs that are expected for a 12.5km ECDR. + + The datasets that have a 12.5km native resolution will be returned as 12.5km + data. The datasets w/ a 25km native resolution will be returned as 25km + data. It's up to the caller to decide how they want to handle resolution + differences between platforms. + """ + platform = PLATFORM_CONFIG.get_platform_by_date(date) + if platform.id == "am2": + return _get_am2_tbs(date=date, hemisphere=hemisphere, resolution="12.5") + elif platform.id == "ame": return _get_ame_tbs(date=date, hemisphere=hemisphere) - elif platform in get_args(NSIDC_0001_SATS): + elif platform.id in get_args(NSIDC_0001_SATS): return _get_nsidc_0001_tbs( - platform=platform, # type: ignore[arg-type] + platform_id=platform.id, # type: ignore[arg-type] date=date, hemisphere=hemisphere, ) - elif platform == "n07": + elif platform.id == "n07": # SMMR return _get_nsidc_0007_tbs(date=date, hemisphere=hemisphere) else: diff --git a/seaice_ecdr/temporal_composite_daily.py b/seaice_ecdr/temporal_composite_daily.py index eb1a4e88..ef534158 100644 --- a/seaice_ecdr/temporal_composite_daily.py +++ b/seaice_ecdr/temporal_composite_daily.py @@ -16,18 +16,20 @@ from pm_icecon.fill_polehole import fill_pole_hole from pm_tb_data._types import NORTH, Hemisphere -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT -from seaice_ecdr.ancillary import get_non_ocean_mask, nh_polehole_mask +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS +from seaice_ecdr.ancillary import ( + ANCILLARY_SOURCES, + get_non_ocean_mask, + nh_polehole_mask, +) from seaice_ecdr.cli.util import datetime_to_date from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR from seaice_ecdr.initial_daily_ecdr import ( create_idecdr_for_date, get_idecdr_filepath, ) -from seaice_ecdr.platforms import ( - get_first_platform_start_date, - get_platform_by_date, -) +from seaice_ecdr.platforms import PLATFORM_CONFIG +from seaice_ecdr.spillover import LAND_SPILL_ALGS from seaice_ecdr.util import ( date_range, get_intermediate_output_dir, @@ -131,13 +133,13 @@ def get_tie_filepath( ) -> Path: """Return the complete daily tie file path.""" - platform = get_platform_by_date(date) - sat = cast(SUPPORTED_SAT, platform) + platform = PLATFORM_CONFIG.get_platform_by_date(date) + platform_id = platform.id standard_fn = standard_daily_filename( hemisphere=hemisphere, date=date, - sat=sat, + platform_id=platform_id, resolution=resolution, ) # Add `tiecdr` to the beginning of the standard name to distinguish it as a @@ -170,7 +172,7 @@ def iter_dates_near_date( near-real-time use, because data from "the future" are not available. """ earliest_date = target_date - dt.timedelta(days=day_range) - beginning_of_platform_coverage = get_first_platform_start_date() + beginning_of_platform_coverage = PLATFORM_CONFIG.get_first_platform_start_date() if earliest_date < beginning_of_platform_coverage: logger.warning( f"Resetting temporal interpolation earliest date from {earliest_date} to {beginning_of_platform_coverage}" @@ -205,6 +207,7 @@ def temporally_composite_dataarray( one_sided_limit: int = 3, still_missing_flag: int = 255, non_ocean_mask: xr.DataArray, + daily_climatology_mask: None | npt.NDArray = None, ) -> tuple[xr.DataArray, npt.NDArray]: """Temporally composite a DataArray referenced to given reference date up to interp_range days. @@ -220,6 +223,7 @@ def temporally_composite_dataarray( logger.debug(f"Temporally compositing {da.name} dataarray around {target_date}") # Our flag system requires that the value be expressible by no more than # nine days in either direction + if interp_range > 9: interp_range_error_message = ( f"interp_range in {__name__} is > 9: {interp_range}." @@ -242,6 +246,18 @@ def temporally_composite_dataarray( temp_comp_2d = np.squeeze(temp_comp_da.data) assert temp_comp_2d.shape == (ydim, xdim) + # TODO: These lines are commented out in order to reproduce the + # CDRv4 ERROR where the "home" smmr day does NOT have daily_clim + # applied to it. + # TODO: Putting these lines back in, because now I *am* seeing its effect + # in the re-run CDRv4 fields(!) + if daily_climatology_mask is not None: + temp_comp_2d[daily_climatology_mask] = 0 + + if daily_climatology_mask is not None: + daily_climatology_mask.tofile("dailyclimmask.dat") + print("WROTE dailyclimmask.dat") + # Initialize arrays initial_missing_locs = np.isnan(temp_comp_2d.data) @@ -257,7 +273,9 @@ def temporally_composite_dataarray( pconc[need_values] = 0 pdist[need_values] = 0 nconc[need_values] = 0 - ndist[need_values] = 0 + # TODO: Fix this for v5 release (implemented to match v04f00) + # ndist[need_values] = 0 # Correct + pdist[need_values] = 0 # Error as CDRv04r00 error for time_offset in range(1, interp_range + 1): if n_missing == 0: @@ -268,6 +286,9 @@ def temporally_composite_dataarray( prior_field = np.squeeze(da.isel(time=da.time.dt.date == prior_date).to_numpy()) next_field = np.squeeze(da.isel(time=da.time.dt.date == next_date).to_numpy()) + if daily_climatology_mask is not None: + prior_field[daily_climatology_mask] = 0 + next_field[daily_climatology_mask] = 0 # update prior arrays n_prior = prior_field.size @@ -323,10 +344,12 @@ def temporally_composite_dataarray( # Update the temporal interp flag value temporal_flags[have_only_prior] = 10 * pdist[have_only_prior] - temp_comp_2d[have_only_next] = nconc[have_only_next] + # temp_comp_2d[have_only_next] = nconc[have_only_next] # Correct + temp_comp_2d[have_only_next] = pconc[have_only_next] # Error as CDRv04r00 # Update the temporal interp flag value - temporal_flags[have_only_next] = ndist[have_only_next] + # temporal_flags[have_only_next] = ndist[have_only_next] # Correct + temporal_flags[have_only_next] = pdist[have_only_next] # Error as CDRv04r00 # Ensure flag values do not occur over land temporal_flags[non_ocean_mask.data] = 0 @@ -343,16 +366,18 @@ def read_or_create_and_read_idecdr_ds( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, intermediate_output_dir: Path, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, overwrite_ide: bool = False, ) -> xr.Dataset: """Read an idecdr netCDF file, creating it if it doesn't exist.""" - platform = get_platform_by_date( + platform = PLATFORM_CONFIG.get_platform_by_date( date, ) ide_filepath = get_idecdr_filepath( date=date, - platform=platform, + platform_id=platform.id, hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=intermediate_output_dir, @@ -363,6 +388,8 @@ def read_or_create_and_read_idecdr_ds( hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=intermediate_output_dir, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) logger.debug(f"Reading ideCDR file from: {ide_filepath}") ide_ds = xr.load_dataset(ide_filepath) @@ -465,6 +492,62 @@ def filter_field_via_bitmask( return output_da +def get_daily_climatology_mask( + date: dt.date, + hemisphere: Hemisphere, + resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, +) -> None | npt.NDArray: + """ + Given the date and ancillary source, return a mask where True values + indicate that the sea ice conc values should be set to zero + + NOTE: The date range for this is hard-coded to correspond to SMMR. + This should probably be an argument somehow? + + Because the day-of-year climatology includes the land mask as part of + the invalid ice mask, we get the non_ocean_mask to dis-convolve that. + (We don't necessarily want to set the land to sea-ice-conc=0.) + """ + from netCDF4 import Dataset + + # This date comes from PLATFORM_AVAILABILITY in platforms.py + # TODO: This should be refactored to have less hard-coding! + if date > dt.date(1987, 7, 9): + return None + + non_ocean_mask = get_non_ocean_mask( + hemisphere=hemisphere, + resolution=resolution, + ancillary_source=ancillary_source, + ) + + daily_ds = None + + if hemisphere == "north" and resolution == "25": + daily_ds = Dataset( + "/share/apps/G02202_V5/v05r01_ancillary/ecdr-ancillary-psn25-dailyclim.nc" + ) + elif hemisphere == "south" and resolution == "25": + daily_ds = Dataset( + "/share/apps/G02202_V5/v05r01_ancillary/ecdr-ancillary-pss25-dailyclim.nc" + ) + + if daily_ds is None: + raise RuntimeError( + f"Could not load daily_climatology mask for {date=} {hemisphere=} {resolution=} {ancillary_source=}" + ) + + # day-of-year index is doy - 1 + doy_index = int(date.strftime("%j")) - 1 + doy_mask_invalid = np.array(daily_ds.variables["invalid_ice_mask"])[doy_index, :, :] + + # Return mask of invalid seaice, excluding land + mask = (doy_mask_invalid != 0) & (~non_ocean_mask.data) + + return mask + + # TODO: better function name and docstring. This is first pass at refactor. def temporal_interpolation( *, @@ -472,6 +555,7 @@ def temporal_interpolation( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, data_stack: xr.Dataset, + ancillary_source: ANCILLARY_SOURCES, fill_the_pole_hole: bool = True, interp_range: int = 5, one_sided_limit: int = 3, @@ -491,13 +575,25 @@ def temporal_interpolation( non_ocean_mask = get_non_ocean_mask( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) + # daily_climatology_mask is True where historically no sea ice. + # It can be None if no daily_climatology mask is to be used + daily_climatology_mask = get_daily_climatology_mask( + date=date, + hemisphere=hemisphere, + resolution=resolution, + ancillary_source=ancillary_source, + ) + + # Actually compute the cdr_conc temporal composite ti_var, ti_flags = temporally_composite_dataarray( target_date=date, da=data_stack.conc, interp_range=interp_range, non_ocean_mask=non_ocean_mask, one_sided_limit=one_sided_limit, + daily_climatology_mask=daily_climatology_mask, ) tie_ds["cdr_conc_ti"] = ti_var @@ -538,7 +634,7 @@ def temporal_interpolation( cdr_conc = np.squeeze(tie_ds["cdr_conc_ti"].data) # TODO: May want to rename this field. Specifically, after this - # operation, this will be both temporally interpoalted and + # operation, this will be both temporally interpolated and # polehole-filled (if appropriate). For now, "cdr_conc" is okay tie_ds["cdr_conc"] = tie_ds["cdr_conc_ti"].copy() @@ -546,9 +642,10 @@ def temporal_interpolation( # grid is having its pole hole filled! if fill_the_pole_hole and hemisphere == NORTH: cdr_conc_pre_polefill = cdr_conc.copy() - platform = get_platform_by_date(date) near_pole_hole_mask = nh_polehole_mask( - date=date, resolution=resolution, sat=platform + date=date, + resolution=resolution, + ancillary_source=ancillary_source, ) cdr_conc_pole_filled = fill_pole_hole( conc=cdr_conc, @@ -592,6 +689,7 @@ def temporal_interpolation( # distinguish it from the cdr_conc_ti field pass + # NOTE: the bt_conc array does not have daily_climatology applied bt_conc, _ = temporally_composite_dataarray( target_date=date, da=data_stack.raw_bt_seaice_conc, @@ -600,9 +698,10 @@ def temporal_interpolation( one_sided_limit=one_sided_limit, ) + # NOTE: the nt_conc array does not have daily_climatology applied nt_conc, _ = temporally_composite_dataarray( target_date=date, - da=data_stack.raw_bt_seaice_conc, + da=data_stack.raw_nt_seaice_conc, interp_range=interp_range, non_ocean_mask=non_ocean_mask, one_sided_limit=one_sided_limit, @@ -616,9 +715,10 @@ def temporal_interpolation( # Fill pole hole of BT bt_conc_pre_polefill = bt_conc_2d.copy() - platform = get_platform_by_date(date) near_pole_hole_mask = nh_polehole_mask( - date=date, resolution=resolution, sat=platform + date=date, + resolution=resolution, + ancillary_source=ancillary_source, ) bt_conc_pole_filled = fill_pole_hole( conc=bt_conc_2d, @@ -728,8 +828,10 @@ def temporally_interpolated_ecdr_dataset( date: dt.date, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - interp_range: int = 5, intermediate_output_dir: Path, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, + interp_range: int = 5, fill_the_pole_hole: bool = True, ) -> xr.Dataset: """Create xr dataset containing the second pass of daily enhanced CDR. @@ -747,6 +849,8 @@ def temporally_interpolated_ecdr_dataset( hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=intermediate_output_dir, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) init_datasets.append(init_dataset) @@ -758,6 +862,7 @@ def temporally_interpolated_ecdr_dataset( date=date, data_stack=data_stack, fill_the_pole_hole=fill_the_pole_hole, + ancillary_source=ancillary_source, ) return tie_ds @@ -779,7 +884,9 @@ def write_tie_netcdf( ), ) -> Path: """Write the temporally interpolated ECDR to a netCDF file.""" - logger.info(f"Writing netCDF of initial_daily eCDR file to: {output_filepath}") + logger.info( + f"Writing netCDF of temporally_interpolated eCDR file to: {output_filepath}" + ) for excluded_field in excluded_fields: if excluded_field in tie_ds.variables.keys(): @@ -823,6 +930,8 @@ def make_tiecdr_netcdf( hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, intermediate_output_dir: Path, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, interp_range: int = 5, fill_the_pole_hole: bool = True, overwrite_tie: bool = False, @@ -844,6 +953,8 @@ def make_tiecdr_netcdf( interp_range=interp_range, intermediate_output_dir=intermediate_output_dir, fill_the_pole_hole=fill_the_pole_hole, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) written_tie_ncfile = write_tie_netcdf( @@ -869,6 +980,8 @@ def create_tiecdr_for_date_range( end_date: dt.date, resolution: ECDR_SUPPORTED_RESOLUTIONS, intermediate_output_dir: Path, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, overwrite_tie: bool, ) -> None: """Generate the temporally composited daily ecdr files for a range of dates.""" @@ -879,6 +992,8 @@ def create_tiecdr_for_date_range( resolution=resolution, intermediate_output_dir=intermediate_output_dir, overwrite_tie=overwrite_tie, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) @@ -954,6 +1069,8 @@ def cli( base_output_dir: Path, resolution: ECDR_SUPPORTED_RESOLUTIONS, overwrite: bool, + land_spillover_alg: LAND_SPILL_ALGS, + ancillary_source: ANCILLARY_SOURCES, ) -> None: """Run the temporal composite daily ECDR algorithm with AMSR2 data. @@ -980,4 +1097,6 @@ def cli( resolution=resolution, intermediate_output_dir=intermediate_output_dir, overwrite_tie=overwrite, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) diff --git a/seaice_ecdr/tests/integration/test_ancillary.py b/seaice_ecdr/tests/integration/test_ancillary.py index 0184ce90..a2120af6 100644 --- a/seaice_ecdr/tests/integration/test_ancillary.py +++ b/seaice_ecdr/tests/integration/test_ancillary.py @@ -9,7 +9,10 @@ def test_get_smmr_invalid_ice_mask(): for hemisphere in get_args(Hemisphere): icemask = get_smmr_invalid_ice_mask( - hemisphere=hemisphere, date=dt.date(2023, 1, 29) + date=dt.date(2023, 1, 29), + hemisphere=hemisphere, + resolution="25", + ancillary_source="CDRv4", ) assert icemask.dtype == bool diff --git a/seaice_ecdr/tests/integration/test_complete_daily.py b/seaice_ecdr/tests/integration/test_complete_daily.py index 9a2a5bfa..9b12bd04 100644 --- a/seaice_ecdr/tests/integration/test_complete_daily.py +++ b/seaice_ecdr/tests/integration/test_complete_daily.py @@ -1,18 +1,23 @@ import datetime as dt +from typing import Final from pm_tb_data._types import NORTH from seaice_ecdr.complete_daily_ecdr import make_standard_cdecdr_netcdf from seaice_ecdr.tests.integration import base_output_dir_test_path # noqa +ancillary_source: Final = "CDRv5" + def test_make_standard_cdecdr_netcdf(base_output_dir_test_path): # noqa for day in range(1, 5): output_path = make_standard_cdecdr_netcdf( date=dt.date(2022, 3, day), hemisphere=NORTH, - resolution="12.5", + resolution="25", base_output_dir=base_output_dir_test_path, + land_spillover_alg="NT2", + ancillary_source=ancillary_source, ) assert output_path.is_file() diff --git a/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py b/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py index 6c4cbd41..c7872093 100644 --- a/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py +++ b/seaice_ecdr/tests/integration/test_initial_daily_ecdr_generation.py @@ -15,6 +15,7 @@ make_idecdr_netcdf, write_ide_netcdf, ) +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID cdr_conc_fieldname = "conc" @@ -26,12 +27,15 @@ def sample_idecdr_dataset_nh(): test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" + test_resolution: Final = "25" + ancillary_source: Final = "CDRv5" ide_conc_ds = initial_daily_ecdr_dataset( date=test_date, hemisphere=test_hemisphere, resolution=test_resolution, + land_spillover_alg="NT2", + ancillary_source=ancillary_source, ) return ide_conc_ds @@ -43,12 +47,15 @@ def sample_idecdr_dataset_sh(): test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" + test_resolution: Final = "25" + ancillary_source: Final = "CDRv5" ide_conc_ds = initial_daily_ecdr_dataset( date=test_date, hemisphere=test_hemisphere, resolution=test_resolution, + land_spillover_alg="NT2", + ancillary_source=ancillary_source, ) return ide_conc_ds @@ -127,8 +134,9 @@ def test_cli_idecdr_ncfile_creation(tmpdir): tmpdir_path = Path(tmpdir) test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" - test_platform = "am2" + test_resolution: Final = "25" + test_platform_id: SUPPORTED_PLATFORM_ID = "am2" + ancillary_source: Final = "CDRv5" make_idecdr_netcdf( date=test_date, @@ -136,11 +144,13 @@ def test_cli_idecdr_ncfile_creation(tmpdir): resolution=test_resolution, intermediate_output_dir=tmpdir_path, excluded_fields=[], + land_spillover_alg="NT2", + ancillary_source=ancillary_source, ) output_path = get_idecdr_filepath( hemisphere=test_hemisphere, date=test_date, - platform=test_platform, + platform_id=test_platform_id, resolution=test_resolution, intermediate_output_dir=tmpdir_path, ) @@ -159,8 +169,9 @@ def test_can_drop_fields_from_idecdr_netcdf( tmpdir_path = Path(tmpdir) test_date = dt.datetime(2021, 4, 5).date() test_hemisphere = NORTH - test_resolution: Final = "12.5" - test_platform = "am2" + test_resolution: Final = "25" + test_platform_id: SUPPORTED_PLATFORM_ID = "am2" + ancillary_source: Final = "CDRv5" make_idecdr_netcdf( date=test_date, @@ -168,11 +179,13 @@ def test_can_drop_fields_from_idecdr_netcdf( resolution=test_resolution, intermediate_output_dir=tmpdir_path, excluded_fields=(cdr_conc_fieldname,), + land_spillover_alg="NT2", + ancillary_source=ancillary_source, ) output_path = get_idecdr_filepath( hemisphere=test_hemisphere, date=test_date, - platform=test_platform, + platform_id=test_platform_id, resolution=test_resolution, intermediate_output_dir=tmpdir_path, ) diff --git a/seaice_ecdr/tests/integration/test_monthly.py b/seaice_ecdr/tests/integration/test_monthly.py index a324f761..79f1ced6 100644 --- a/seaice_ecdr/tests/integration/test_monthly.py +++ b/seaice_ecdr/tests/integration/test_monthly.py @@ -1,3 +1,5 @@ +from typing import Final + import pytest import xarray as xr from pm_tb_data._types import NORTH @@ -6,6 +8,8 @@ from seaice_ecdr.tests.integration import base_output_dir_test_path # noqa from seaice_ecdr.util import get_complete_output_dir +ancillary_source: Final = "CDRv5" + @pytest.mark.order(after="test_complete_daily.py::test_make_cdecdr_netcdf") def test_make_monthly_nc(base_output_dir_test_path, monkeypatch): # noqa @@ -26,8 +30,9 @@ def test_make_monthly_nc(base_output_dir_test_path, monkeypatch): # noqa year=2022, month=3, hemisphere=NORTH, - resolution="12.5", + resolution="25", complete_output_dir=complete_output_dir, + ancillary_source=ancillary_source, ) assert output_path.is_file() diff --git a/seaice_ecdr/tests/integration/test_tb_data.py b/seaice_ecdr/tests/integration/test_tb_data.py index 894b3a27..fcc9afdf 100644 --- a/seaice_ecdr/tests/integration/test_tb_data.py +++ b/seaice_ecdr/tests/integration/test_tb_data.py @@ -3,15 +3,16 @@ import numpy as np from pm_tb_data._types import NORTH, SOUTH -from seaice_ecdr.platforms import get_platform_start_dates -from seaice_ecdr.tb_data import get_ecdr_tb_data +from seaice_ecdr.platforms import PLATFORM_CONFIG +from seaice_ecdr.tb_data import get_25km_ecdr_tb_data def test_get_ecdr_tb_data(): - platform_start_dates = get_platform_start_dates() - for date, platform in platform_start_dates.items(): - ecdr_tb_data = get_ecdr_tb_data(date=date, hemisphere=NORTH) - assert ecdr_tb_data.platform == platform + for platform_start_date in PLATFORM_CONFIG.cdr_platform_start_dates: + ecdr_tb_data = get_25km_ecdr_tb_data( + date=platform_start_date.start_date, hemisphere=NORTH + ) + assert ecdr_tb_data.platform_id == platform_start_date.platform_id assert not np.all(np.isnan(ecdr_tb_data.tbs.v19)) assert not np.all(np.isnan(ecdr_tb_data.tbs.h19)) @@ -25,7 +26,7 @@ def test_get_ecdr_tb_data_missing_channel(): We know this happens at least once: on 10/10/1995 SH for 22v (from F13). """ - ecdr_tb_data = get_ecdr_tb_data(date=dt.date(1995, 10, 10), hemisphere=SOUTH) + ecdr_tb_data = get_25km_ecdr_tb_data(date=dt.date(1995, 10, 10), hemisphere=SOUTH) # v22 is known to be missing for this day and hemisphere. assert np.all(np.isnan(ecdr_tb_data.tbs.v22)) diff --git a/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py b/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py index 5e2ff65f..274f0f69 100644 --- a/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py +++ b/seaice_ecdr/tests/integration/test_temporal_composite_daily_integration.py @@ -11,6 +11,7 @@ from pm_tb_data._types import NORTH from seaice_ecdr.initial_daily_ecdr import get_idecdr_filepath +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID from seaice_ecdr.temporal_composite_daily import ( make_tiecdr_netcdf, read_or_create_and_read_idecdr_ds, @@ -18,8 +19,10 @@ date = dt.date(2021, 2, 19) hemisphere = NORTH -resolution: Final = "12.5" -platform = "am2" +resolution: Final = "25" +platform_id: SUPPORTED_PLATFORM_ID = "am2" +land_spillover_alg: Final = "NT2" +ancillary_source: Final = "CDRv5" def test_read_or_create_and_read_idecdr_ds(tmpdir): @@ -27,7 +30,7 @@ def test_read_or_create_and_read_idecdr_ds(tmpdir): sample_ide_filepath = get_idecdr_filepath( date=date, - platform=platform, + platform_id=platform_id, hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=Path(tmpdir), @@ -38,6 +41,8 @@ def test_read_or_create_and_read_idecdr_ds(tmpdir): hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=Path(tmpdir), + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) assert sample_ide_filepath.exists() @@ -46,6 +51,8 @@ def test_read_or_create_and_read_idecdr_ds(tmpdir): hemisphere=hemisphere, resolution=resolution, intermediate_output_dir=Path(tmpdir), + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) assert test_ide_ds_with_creation == test_ide_ds_with_reading @@ -69,6 +76,8 @@ def test_create_tiecdr_file(tmpdir): resolution=resolution, intermediate_output_dir=Path(tmpdir), interp_range=2, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) assert fp.is_file() diff --git a/seaice_ecdr/tests/integration/test_validation.py b/seaice_ecdr/tests/integration/test_validation.py index b7406546..73071077 100644 --- a/seaice_ecdr/tests/integration/test_validation.py +++ b/seaice_ecdr/tests/integration/test_validation.py @@ -9,6 +9,9 @@ from seaice_ecdr.validation import validate_outputs +@pytest.mark.skip( + reason="skipping because currently adding CDRv4 flags to conc fields in order to match CDRv4 results, but in CDRv5 we want all non-valid values to be the fill value" +) @pytest.mark.order(after="test_monthly.py::test_make_monthly_nc") def test_validate_outputs(base_output_dir_test_path): # noqa for product_type, hemisphere in itertools.product( @@ -36,4 +39,8 @@ def test_validate_outputs(base_output_dir_test_path): # noqa with open(outputs["error_filepath"], "r") as error_csv: reader = csv.DictReader(error_csv) for row in reader: - assert row["error_code"] == "0" + try: + assert row["error_code"] == "0" + except AssertionError as e: + print(f'nonzero error_code: {row["error_code"]}') + raise e diff --git a/seaice_ecdr/tests/regression/test_daily_aggregate.py b/seaice_ecdr/tests/regression/test_daily_aggregate.py index f4cc7293..eecdcfff 100644 --- a/seaice_ecdr/tests/regression/test_daily_aggregate.py +++ b/seaice_ecdr/tests/regression/test_daily_aggregate.py @@ -13,7 +13,7 @@ from seaice_ecdr.util import get_complete_output_dir -def test_daily_aggreagate_matches_daily_data(tmpdir): +def test_daily_aggregate_matches_daily_data(tmpdir): base_output_dir = Path(tmpdir) hemisphere: Final = NORTH complete_output_dir = get_complete_output_dir( @@ -23,7 +23,9 @@ def test_daily_aggreagate_matches_daily_data(tmpdir): ) year = 2022 - resolution: Final = "12.5" + resolution: Final = "25" + land_spillover_alg: Final = "NT2" + ancillary_source: Final = "CDRv5" # First, ensure some daily data is created. datasets = [] @@ -34,6 +36,8 @@ def test_daily_aggreagate_matches_daily_data(tmpdir): hemisphere=hemisphere, resolution=resolution, base_output_dir=base_output_dir, + land_spillover_alg=land_spillover_alg, + ancillary_source=ancillary_source, ) ds = read_cdecdr_ds( @@ -51,6 +55,7 @@ def test_daily_aggreagate_matches_daily_data(tmpdir): hemisphere=hemisphere, resolution=resolution, complete_output_dir=complete_output_dir, + ancillary_source=ancillary_source, ) # Read back in the data. diff --git a/seaice_ecdr/tests/unit/test_complete_daily_ecdr.py b/seaice_ecdr/tests/unit/test_complete_daily_ecdr.py index 62489ff7..5434b521 100644 --- a/seaice_ecdr/tests/unit/test_complete_daily_ecdr.py +++ b/seaice_ecdr/tests/unit/test_complete_daily_ecdr.py @@ -30,6 +30,7 @@ def test_no_melt_onset_for_southern_hemisphere(tmpdir): date=date, hemisphere=SOUTH, resolution="12.5", + ancillary_source="CDRv5", complete_output_dir=complete_output_dir, intermediate_output_dir=intermediate_output_dir, is_nrt=False, @@ -55,6 +56,7 @@ def test_melt_onset_field_outside_melt_season(tmpdir): date=date, hemisphere=hemisphere, resolution="12.5", + ancillary_source="CDRv5", complete_output_dir=complete_output_dir, intermediate_output_dir=intermediate_output_dir, is_nrt=False, diff --git a/seaice_ecdr/tests/unit/test_melt.py b/seaice_ecdr/tests/unit/test_melt.py index 7e1a27bc..85f0807a 100644 --- a/seaice_ecdr/tests/unit/test_melt.py +++ b/seaice_ecdr/tests/unit/test_melt.py @@ -1,7 +1,7 @@ import datetime as dt import numpy as np -import numpy.testing as npt +import numpy.testing as nptesting # Note: npt is numpy.typing from seaice_ecdr import melt @@ -14,7 +14,7 @@ def test_with_melting_everywhere(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_with_some_non_melting_due_to_large_tb_deltas(): @@ -25,7 +25,7 @@ def test_with_some_non_melting_due_to_large_tb_deltas(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_that_tb_threshold_is_2_kelvin(): @@ -37,7 +37,7 @@ def test_that_tb_threshold_is_2_kelvin(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_with_some_non_melting_due_to_low_concentrations(): @@ -48,7 +48,7 @@ def test_with_some_non_melting_due_to_low_concentrations(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_that_50_percent_concentration_is_in_range(): @@ -59,7 +59,7 @@ def test_that_50_percent_concentration_is_in_range(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_with_missing_tb19_values(): @@ -70,7 +70,7 @@ def test_with_missing_tb19_values(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_with_missing_tb37_values(): @@ -81,7 +81,7 @@ def test_with_missing_tb37_values(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_with_both_missing_values(): @@ -92,7 +92,7 @@ def test_with_both_missing_values(): actual = melt.melting(concentrations=concentrations, tb_h19=tb19, tb_h37=tb37) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def test_date_in_melt_season(): diff --git a/seaice_ecdr/tests/unit/test_monthly.py b/seaice_ecdr/tests/unit/test_monthly.py index 56857a1a..e7148417 100644 --- a/seaice_ecdr/tests/unit/test_monthly.py +++ b/seaice_ecdr/tests/unit/test_monthly.py @@ -2,7 +2,7 @@ from pathlib import Path import numpy as np -import numpy.testing as npt +import numpy.testing as nptesting # Note: npt is numpy.typing import pytest import xarray as xr from pm_tb_data._types import NORTH @@ -13,8 +13,8 @@ QA_OF_CDR_SEAICE_CONC_DAILY_BITMASKS, QA_OF_CDR_SEAICE_CONC_MONTHLY_BITMASKS, _get_daily_complete_filepaths_for_month, + _platform_id_for_month, _qa_field_has_flag, - _sat_for_month, calc_cdr_seaice_conc_monthly, calc_melt_onset_day_cdr_seaice_conc_monthly, calc_qa_of_cdr_seaice_conc_monthly, @@ -86,27 +86,27 @@ def _mock_daily_ds_for_month(num_days: int) -> xr.Dataset: # Check that no error is raised for AMSR2, full month's worth of data check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(31), - sat="am2", + platform_id="am2", ) # Check that an error is raised for AMSR2, not a full month's worth of data with pytest.raises(RuntimeError): check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(19), - sat="am2", + platform_id="am2", ) # Check that an error is not raised for n07, with modified min worth of data check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(10), - sat="n07", + platform_id="n07", ) # Check that an error is raised for n07, not a full month's worth of data with pytest.raises(RuntimeError): check_min_days_for_valid_month( daily_ds_for_month=_mock_daily_ds_for_month(9), - sat="n07", + platform_id="n07", ) @@ -124,7 +124,7 @@ def test__qa_field_has_flag(): qa_field=mock_qa_field, flag_value=flag_1, ) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) # flag 2 expected = xr.DataArray([False, True, True]) @@ -132,7 +132,7 @@ def test__qa_field_has_flag(): qa_field=mock_qa_field, flag_value=flag_2, ) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) # flag 3 expected = xr.DataArray([False, False, True]) @@ -140,7 +140,7 @@ def test__qa_field_has_flag(): qa_field=mock_qa_field, flag_value=flag_3, ) - npt.assert_array_equal(expected, actual) + nptesting.assert_array_equal(expected, actual) def _mock_daily_ds_for_month(): @@ -336,7 +336,7 @@ def test_calc_qa_of_cdr_seaice_conc_monthly(): cdr_seaice_conc_monthly=_mean_daily_conc, ) - npt.assert_array_equal(expected_flags, actual.values) + nptesting.assert_array_equal(expected_flags, actual.values) def test__calc_conc_monthly(monkeypatch): @@ -386,9 +386,10 @@ def test__calc_conc_monthly(monkeypatch): daily_ds_for_month=mock_daily_ds, hemisphere="north", resolution="12.5", + ancillary_source="CDRv5", ) - npt.assert_array_equal( + nptesting.assert_array_equal( actual.values, np.array( [ @@ -432,7 +433,7 @@ def test_calc_stdv_of_cdr_seaice_conc_monthly(): == "Passive Microwave Monthly Northern Hemisphere Sea Ice Concentration Source Estimated Standard Deviation" ) - npt.assert_array_equal( + nptesting.assert_array_equal( actual.values, np.array( [ @@ -474,7 +475,7 @@ def test_calc_melt_onset_day_cdr_seaice_conc_monthly(): ) assert actual.long_name == "Monthly Day of Snow Melt Onset Over Sea Ice" - npt.assert_array_equal( + nptesting.assert_array_equal( actual.values, np.array( [ @@ -513,9 +514,10 @@ def test_monthly_ds(monkeypatch, tmpdir): ) actual = make_monthly_ds( daily_ds_for_month=_mock_daily_ds, - sat="am2", + platform_id="am2", hemisphere=NORTH, resolution="12.5", + ancillary_source="CDRv5", ) # Test that the dataset only contains the variables we expect. @@ -545,14 +547,14 @@ def test_monthly_ds(monkeypatch, tmpdir): xr.testing.assert_allclose(actual, after_write, atol=0.009) -def test__sat_for_month(): - assert "am2" == _sat_for_month(sats=["am2", "am2", "am2", "am2"]) +def test__platform_id_for_month(): + assert "am2" == _platform_id_for_month(platform_ids=["am2", "am2", "am2", "am2"]) - assert "am2" == _sat_for_month(sats=["F17", "F17", "am2", "am2"]) + assert "am2" == _platform_id_for_month(platform_ids=["F17", "F17", "am2", "am2"]) - assert "F17" == _sat_for_month(sats=["F13", "F13", "F13", "F17"]) + assert "F17" == _platform_id_for_month(platform_ids=["F13", "F13", "F13", "F17"]) - assert "am2" == _sat_for_month(sats=["F13", "F17", "am2"]) + assert "am2" == _platform_id_for_month(platform_ids=["F13", "F17", "am2"]) def test_calc_surface_mask_monthly(): diff --git a/seaice_ecdr/tests/unit/test_nc_attrs.py b/seaice_ecdr/tests/unit/test_nc_attrs.py index f40ec5c2..c72a13ba 100644 --- a/seaice_ecdr/tests/unit/test_nc_attrs.py +++ b/seaice_ecdr/tests/unit/test_nc_attrs.py @@ -5,8 +5,6 @@ from seaice_ecdr.nc_attrs import ( _get_software_version_id, _get_time_coverage_attrs, - get_platforms_for_sats, - get_sensors_for_sats, ) @@ -123,45 +121,3 @@ def test__get_software_version_id(): # git@github.com:nsidc/seaice_ecdr.git@10fdd316452d0d69fbcf4e7915b66c227298b0ec assert "github" in software_ver_id assert "@" in software_ver_id - - -def test_get_platforms_for_sat(): - expected = [ - "DMSP 5D-2/F13 > Defense Meteorological Satellite Program-F13", - "DMSP 5D-3/F17 > Defense Meteorological Satellite Program-F17", - "GCOM-W1 > Global Change Observation Mission 1st-Water", - ] - - actual = get_platforms_for_sats( - [ - "F13", - "F17", - "F17", - "am2", - "am2", - "am2", - ] - ) - - assert expected == actual - - -def test_get_sensors_for_sats(): - expected = [ - "SSM/I > Special Sensor Microwave/Imager", - "SSMIS > Special Sensor Microwave Imager/Sounder", - "AMSR2 > Advanced Microwave Scanning Radiometer 2", - ] - - actual = get_sensors_for_sats( - [ - "F13", - "F17", - "F17", - "am2", - "am2", - "am2", - ] - ) - - assert expected == actual diff --git a/seaice_ecdr/tests/unit/test_platforms.py b/seaice_ecdr/tests/unit/test_platforms.py index 56ac27bd..c3decbf3 100644 --- a/seaice_ecdr/tests/unit/test_platforms.py +++ b/seaice_ecdr/tests/unit/test_platforms.py @@ -1,124 +1,197 @@ """Test the platforms.py routine for seaice_ecdr.""" import datetime as dt +from collections import OrderedDict +from pathlib import Path from typing import get_args +import pytest import yaml +from pydantic import ValidationError from seaice_ecdr.platforms import ( - PLATFORM_AVAILABILITY, - PLATFORMS_FOR_SATS, - SUPPORTED_SAT, - _platform_available_for_date, - _platform_start_dates_are_consistent, - get_platform_by_date, - get_platform_start_dates, + PLATFORM_CONFIG, + SUPPORTED_PLATFORM_ID, +) +from seaice_ecdr.platforms.config import SUPPORTED_PLATFORMS, _get_platform_config +from seaice_ecdr.platforms.models import ( + DateRange, + Platform, + PlatformConfig, + PlatformStartDate, ) -platform_test_dates = { - "n07": dt.date(1980, 1, 1), - "F08": dt.date(1990, 1, 1), - "F11": dt.date(1992, 6, 1), - "F13": dt.date(1998, 10, 1), - "F17": dt.date(2011, 12, 25), - "ame": dt.date(2005, 3, 15), - "am2": dt.date(2019, 2, 14), -} +platform_test_dates: OrderedDict[SUPPORTED_PLATFORM_ID, dt.date] = OrderedDict( + { + "n07": dt.date(1980, 1, 1), + "F08": dt.date(1990, 1, 1), + "F11": dt.date(1992, 6, 1), + "F13": dt.date(1998, 10, 1), + "F17": dt.date(2011, 12, 25), + "ame": dt.date(2005, 3, 15), + "am2": dt.date(2019, 2, 14), + } +) -def test_SUPPORTED_SAT(): - cdrv5_sats = ( +def test_SUPPORTED_PLATFORM_ID(): + cdrv5_platform_ids = ( "am2", "F17", ) - for sat in cdrv5_sats: - assert sat in get_args(SUPPORTED_SAT) + for platform_id in cdrv5_platform_ids: + assert platform_id in get_args(SUPPORTED_PLATFORM_ID) def test_platforms_for_sats(): - for key in PLATFORMS_FOR_SATS.keys(): - assert key in get_args(SUPPORTED_SAT) - + for platform in SUPPORTED_PLATFORMS: + assert platform.id in get_args(SUPPORTED_PLATFORM_ID) -def test_default_platform_availability(): - for key in PLATFORM_AVAILABILITY.keys(): - assert key in str(SUPPORTED_SAT) - pa_dict = PLATFORM_AVAILABILITY[key] - assert "first_date" in pa_dict - assert "last_date" in pa_dict +def test_get_platform_by_date(): + date_before_any_satellites = dt.date(1900, 1, 1) + with pytest.raises(RuntimeError): + PLATFORM_CONFIG.get_platform_by_date(date_before_any_satellites) + expected_f13_date = dt.date(1995, 11, 1) + platform = PLATFORM_CONFIG.get_platform_by_date(expected_f13_date) + assert platform.id == "F13" -def test_default_platform_start_dates_are_consistent(): - platform_start_dates = get_platform_start_dates() - assert _platform_start_dates_are_consistent( - platform_start_dates=platform_start_dates - ) +def test_override_platform_by_date(monkeypatch, tmpdir): + override_file = Path(tmpdir / "override_platform_dates.yaml") + expected_platform_dates = { + "cdr_platform_start_dates": [ + {"platform_id": "F08", "start_date": dt.date(1987, 8, 12)}, + {"platform_id": "F11", "start_date": dt.date(1992, 6, 15)}, + ], + } -def test_platform_availability_by_date(): - all_platforms = list(get_args(SUPPORTED_SAT)) + with open(override_file, "w") as yaml_file: + yaml.safe_dump(expected_platform_dates, yaml_file) - date_before_any_satellites = dt.date(1900, 1, 1) - for platform in all_platforms: - assert not _platform_available_for_date( - date=date_before_any_satellites, - platform=platform, - ) + monkeypatch.setenv("PLATFORM_START_DATES_CONFIG_FILEPATH", str(override_file)) + platform_config = _get_platform_config() - date_after_dead_satellites = dt.date(2100, 1, 1) - dead_satellites = ( - "n07", - "F08", - "F11", - "F13", - "ame", + assert len(platform_config.cdr_platform_start_dates) == 2 + assert platform_config.cdr_platform_start_dates[0].platform_id == "F08" + assert platform_config.cdr_platform_start_dates[0].start_date == dt.date( + 1987, 8, 12 + ) + assert platform_config.cdr_platform_start_dates[1].platform_id == "F11" + assert platform_config.cdr_platform_start_dates[1].start_date == dt.date( + 1992, 6, 15 ) - for platform in dead_satellites: - assert not _platform_available_for_date( - date=date_after_dead_satellites, - platform=platform, - ) - - for platform in platform_test_dates.keys(): - assert _platform_available_for_date( - date=platform_test_dates[platform], - platform=platform, - ) - - -def test_get_platform_by_date(): - platform_start_dates = get_platform_start_dates() - date_list = platform_start_dates.keys() - platform_list = platform_start_dates.values() - for date, expected_platform in zip(date_list, platform_list): - print(f"testing {date} -> {expected_platform}") - platform = get_platform_by_date( - date=date, +def test__get_platform_config(): + platform_cfg = _get_platform_config() + + assert len(platform_cfg.platforms) >= 1 + assert len(platform_cfg.cdr_platform_start_dates) >= 1 + assert PLATFORM_CONFIG == platform_cfg + + +def test_platform_config_validation_error(): + # Tests `validate_platform_start_dates_platform_in_platforms` + with pytest.raises(ValidationError, match=r"Did not find am2 in platform list.*"): + PlatformConfig( + platforms=[ + Platform( + id="ame", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=None, + ), + ) + ], + cdr_platform_start_dates=[ + PlatformStartDate( + platform_id="am2", + start_date=dt.date(2022, 1, 1), + ) + ], ) - assert platform == expected_platform + # tests `validate_platform_start_dates_in_order` + with pytest.raises( + ValidationError, match=r"Platform start dates are not sequential.*" + ): + PlatformConfig( + platforms=[ + Platform( + id="F13", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(1991, 1, 1), + last_date=dt.date(1992, 1, 1), + ), + ), + Platform( + id="am2", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=None, + ), + ), + ], + cdr_platform_start_dates=[ + PlatformStartDate(platform_id="am2", start_date=dt.date(2022, 1, 1)), + PlatformStartDate(platform_id="F13", start_date=dt.date(1991, 1, 1)), + ], + ) -def test_override_platform_by_date(monkeypatch, tmpdir): - override_file = tmpdir / "override_platform_dates.yaml" - expected_platform_dates = { - dt.date(1987, 7, 10): "F08", - dt.date(1991, 12, 3): "F11", - dt.date(1995, 10, 1): "F13", - dt.date(2002, 6, 1): "ame", - } + # tests `validate_platform_start_date_in_platform_date_range` + # First error date is before the date range, and the second is after. + for err_start_date in (dt.date(2000, 1, 1), dt.date(2015, 1, 1)): + with pytest.raises( + ValidationError, match=r".*is outside of the platform's date range.*" + ): + PlatformConfig( + platforms=[ + Platform( + id="ame", + name="fooname", + sensor="sensorname", + date_range=DateRange( + first_date=dt.date(2012, 7, 2), + last_date=dt.date(2012, 12, 31), + ), + ) + ], + cdr_platform_start_dates=[ + PlatformStartDate( + platform_id="ame", + # Start date is before the first available date + start_date=err_start_date, + ) + ], + ) + + +def test_platform_date_range_validation_error(): + # tests `validate_date_range` + with pytest.raises(ValidationError, match=r".*First date.*is after last date.*"): + DateRange( + first_date=dt.date(2021, 1, 1), + last_date=dt.date(2020, 1, 1), + ) - with open(override_file, "w") as yaml_file: - yaml.safe_dump(expected_platform_dates, yaml_file) - monkeypatch.setenv("PLATFORM_START_DATES_CFG_OVERRIDE_FILE", str(override_file)) +def test_get_first_platform_start_date(): + actual = PLATFORM_CONFIG.get_first_platform_start_date() - # This is a cached function. Calls from other tests may interfere, so clear - # the cache here. - get_platform_start_dates.cache_clear() - platform_dates = get_platform_start_dates() + min_date = min( + [ + platform_start_date.start_date + for platform_start_date in PLATFORM_CONFIG.cdr_platform_start_dates + ] + ) - assert platform_dates == expected_platform_dates + assert actual == min_date diff --git a/seaice_ecdr/tests/unit/test_spillover.py b/seaice_ecdr/tests/unit/test_spillover.py new file mode 100644 index 00000000..acabed98 --- /dev/null +++ b/seaice_ecdr/tests/unit/test_spillover.py @@ -0,0 +1,74 @@ +"""Tests of the land spillover routines coded in seaice_ecdr.py. """ + +import numpy as np +import numpy.testing as nptesting + +from seaice_ecdr import spillover + +test_ils_array = np.array( + [ + [4, 3, 3, 2, 2, 2, 1, 1, 1], + [4, 3, 2, 2, 2, 1, 1, 1, 1], + [4, 3, 2, 2, 1, 1, 1, 2, 1], # includes isolated "coast" cell + [4, 3, 2, 2, 2, 2, 1, 1, 1], + [4, 3, 2, 2, 1, 2, 2, 2, 2], + [4, 3, 2, 2, 1, 1, 1, 2, 1], + [4, 3, 2, 2, 1, 1, 1, 1, 2], # includes diag-conn inlet cell + ], + dtype=np.uint8, +) + + +def test_ils_algorithm_requires_valid_arrvalues(tmpdir): + """ILS algorithm should check for valid ils_array.""" + ils_array = test_ils_array.copy() + ils_array[:, 0] = 5 + + init_conc = test_ils_array.copy().astype(np.float32) + init_conc[test_ils_array > 1] = 1.0 + expected_conc = init_conc.copy() + + try: + filtered_conc = spillover.improved_land_spillover( + ils_arr=ils_array, + init_conc=init_conc, + ) + except AssertionError: + # We expect an AssertionError + return + + # Satisfy vulture... + assert expected_conc is not None + assert filtered_conc is not None + + raise ValueError("We expected ils_array to fail with values of 5") + + +def test_ils_algorithm_keeps_anchored_ice(tmpdir): + """ILS algorithm should not delete anything if all values are high conc.""" + init_conc = test_ils_array.copy().astype(np.float32) + init_conc[test_ils_array > 1] = 1.0 + expected_conc = init_conc.copy() + + filtered_conc = spillover.improved_land_spillover( + ils_arr=test_ils_array, + init_conc=init_conc, + ) + + nptesting.assert_array_equal(filtered_conc, expected_conc) + + +def test_ils_algorithm_removes_unanchored_ice(tmpdir): + """ILS algorithm should not delete anything if all values are high conc.""" + init_conc = test_ils_array.copy().astype(np.float32) + init_conc[test_ils_array > 1] = 1.0 + init_conc[test_ils_array == 3] = 0.0 + expected_conc = init_conc.copy() + expected_conc[test_ils_array == 2] = 0.0 + + filtered_conc = spillover.improved_land_spillover( + ils_arr=test_ils_array, + init_conc=init_conc, + ) + + nptesting.assert_array_equal(filtered_conc, expected_conc) diff --git a/seaice_ecdr/tests/unit/test_temporal_composite_daily.py b/seaice_ecdr/tests/unit/test_temporal_composite_daily.py index 189f9dcb..f250cc60 100644 --- a/seaice_ecdr/tests/unit/test_temporal_composite_daily.py +++ b/seaice_ecdr/tests/unit/test_temporal_composite_daily.py @@ -12,6 +12,7 @@ from seaice_ecdr.constants import ECDR_PRODUCT_VERSION from seaice_ecdr.initial_daily_ecdr import get_idecdr_dir, get_idecdr_filepath +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID from seaice_ecdr.temporal_composite_daily import ( iter_dates_near_date, temporally_composite_dataarray, @@ -81,12 +82,12 @@ def test_access_to_standard_output_filename(tmpdir): """Verify that standard output file names can be generated.""" date = dt.date(2021, 2, 19) resolution: Final = "12.5" - sat = "am2" + platform_id: SUPPORTED_PLATFORM_ID = "am2" intermediate_output_dir = Path(tmpdir) sample_ide_filepath = get_idecdr_filepath( date=date, - platform=sat, + platform_id=platform_id, hemisphere=NORTH, resolution=resolution, intermediate_output_dir=intermediate_output_dir, @@ -156,6 +157,9 @@ def test_temporal_composite_da_oneday(): assert np.array_equal(temporal_composite, initial_data_array, equal_nan=True) +@pytest.mark.skip( + reason="Skipping because of intentionally-introduced temporal-interpolation error intended to reproduce CDRv4 results" +) def test_temporal_composite_da_multiday(): """Verify that temporal composite over multiple days. diff --git a/seaice_ecdr/tests/unit/test_util.py b/seaice_ecdr/tests/unit/test_util.py index 9ff74269..10f3fbec 100644 --- a/seaice_ecdr/tests/unit/test_util.py +++ b/seaice_ecdr/tests/unit/test_util.py @@ -13,8 +13,8 @@ date_range, get_num_missing_pixels, nrt_daily_filename, + platform_id_from_filename, raise_error_for_dates, - sat_from_filename, standard_daily_aggregate_filename, standard_daily_filename, standard_monthly_aggregate_filename, @@ -26,7 +26,7 @@ def test_daily_filename_north(): expected = f"sic_psn12.5_20210101_am2_{ECDR_PRODUCT_VERSION}.nc" actual = standard_daily_filename( - hemisphere=NORTH, resolution="12.5", sat="am2", date=dt.date(2021, 1, 1) + hemisphere=NORTH, resolution="12.5", platform_id="am2", date=dt.date(2021, 1, 1) ) assert actual == expected @@ -36,7 +36,7 @@ def test_daily_filename_south(): expected = f"sic_pss12.5_20210101_am2_{ECDR_PRODUCT_VERSION}.nc" actual = standard_daily_filename( - hemisphere=SOUTH, resolution="12.5", sat="am2", date=dt.date(2021, 1, 1) + hemisphere=SOUTH, resolution="12.5", platform_id="am2", date=dt.date(2021, 1, 1) ) assert actual == expected @@ -46,7 +46,7 @@ def test_nrt_daily_filename(): expected = f"sic_psn12.5_20210101_am2_{ECDR_PRODUCT_VERSION}_P.nc" actual = nrt_daily_filename( - hemisphere=NORTH, resolution="12.5", sat="am2", date=dt.date(2021, 1, 1) + hemisphere=NORTH, resolution="12.5", platform_id="am2", date=dt.date(2021, 1, 1) ) assert actual == expected @@ -71,7 +71,7 @@ def test_monthly_filename_north(): actual = standard_monthly_filename( hemisphere=NORTH, resolution="12.5", - sat="am2", + platform_id="am2", year=2021, month=1, ) @@ -85,7 +85,7 @@ def test_monthly_filename_south(): actual = standard_monthly_filename( hemisphere=SOUTH, resolution="12.5", - sat="am2", + platform_id="am2", year=2021, month=1, ) @@ -108,30 +108,33 @@ def test_monthly_aggregate_filename(): assert actual == expected -def test_daily_sat_from_filename(): - expected_sat: Final = "am2" +def test_daily_platform_id_from_filename(): + expected_platform_id: Final = "am2" fn = standard_daily_filename( - hemisphere=NORTH, resolution="12.5", sat=expected_sat, date=dt.date(2021, 1, 1) + hemisphere=NORTH, + resolution="12.5", + platform_id=expected_platform_id, + date=dt.date(2021, 1, 1), ) - actual_sat = sat_from_filename(fn) + actual_platform_id = platform_id_from_filename(fn) - assert expected_sat == actual_sat + assert expected_platform_id == actual_platform_id -def test_monthly_sat_from_filename(): - expected_sat: Final = "F17" +def test_monthly_platform_id_from_filename(): + expected_platform_id: Final = "F17" fn = standard_monthly_filename( hemisphere=SOUTH, resolution="12.5", - sat=expected_sat, + platform_id=expected_platform_id, year=2021, month=1, ) - actual_sat = sat_from_filename(fn) + actual_platform_id = platform_id_from_filename(fn) - assert expected_sat == actual_sat + assert expected_platform_id == actual_platform_id def test_date_range(): @@ -208,6 +211,7 @@ def test_get_num_missing_pixels(monkeypatch): seaice_conc_var=_mock_sic, hemisphere="north", resolution="12.5", + ancillary_source="CDRv5", ) assert detected_missing == 1 diff --git a/seaice_ecdr/util.py b/seaice_ecdr/util.py index e2d9cbd2..18e9b8f7 100644 --- a/seaice_ecdr/util.py +++ b/seaice_ecdr/util.py @@ -8,29 +8,30 @@ import xarray as xr from pm_tb_data._types import Hemisphere -from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS, SUPPORTED_SAT -from seaice_ecdr.ancillary import get_ocean_mask +from seaice_ecdr._types import ECDR_SUPPORTED_RESOLUTIONS +from seaice_ecdr.ancillary import ANCILLARY_SOURCES, get_ocean_mask from seaice_ecdr.constants import ECDR_PRODUCT_VERSION from seaice_ecdr.grid_id import get_grid_id +from seaice_ecdr.platforms import SUPPORTED_PLATFORM_ID def standard_daily_filename( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, date: dt.date, ) -> str: """Return standard daily NetCDF filename. - North Daily files: sic_psn12.5_YYYYMMDD_sat_v05r01.nc - South Daily files: sic_pss12.5_YYYYMMDD_sat_v05r01.nc + North Daily files: sic_psn12.5_{YYYYMMDD}_{platform_id}_v05r01.nc + South Daily files: sic_pss12.5_{YYYYMMDD}_{platform_id}_v05r01.nc """ grid_id = get_grid_id( hemisphere=hemisphere, resolution=resolution, ) - fn = f"sic_{grid_id}_{date:%Y%m%d}_{sat}_{ECDR_PRODUCT_VERSION}.nc" + fn = f"sic_{grid_id}_{date:%Y%m%d}_{platform_id}_{ECDR_PRODUCT_VERSION}.nc" return fn @@ -39,13 +40,13 @@ def nrt_daily_filename( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, date: dt.date, ) -> str: standard_fn = standard_daily_filename( hemisphere=hemisphere, resolution=resolution, - sat=sat, + platform_id=platform_id, date=date, ) standard_fn_path = Path(standard_fn) @@ -84,20 +85,20 @@ def standard_monthly_filename( *, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, - sat: SUPPORTED_SAT, + platform_id: SUPPORTED_PLATFORM_ID, year: int, month: int, ) -> str: """Return standard monthly NetCDF filename. - North Monthly files: sic_psn12.5_YYYYMM_sat_v05r01.nc - South Monthly files: sic_pss12.5_YYYYMM_sat_v05r01.nc + North Monthly files: sic_psn12.5_{YYYYMM}_{platform_id}_v05r01.nc + South Monthly files: sic_pss12.5_{YYYYMM}_{platform_id}_v05r01.nc """ grid_id = get_grid_id( hemisphere=hemisphere, resolution=resolution, ) - fn = f"sic_{grid_id}_{year}{month:02}_{sat}_{ECDR_PRODUCT_VERSION}.nc" + fn = f"sic_{grid_id}_{year}{month:02}_{platform_id}_{ECDR_PRODUCT_VERSION}.nc" return fn @@ -129,22 +130,22 @@ def standard_monthly_aggregate_filename( # This regex works for both daily and monthly filenames. -STANDARD_FN_REGEX = re.compile(r"sic_ps.*_.*_(?P.*)_.*.nc") +STANDARD_FN_REGEX = re.compile(r"sic_ps.*_.*_(?P.*)_.*.nc") -def sat_from_filename(filename: str) -> SUPPORTED_SAT: +def platform_id_from_filename(filename: str) -> SUPPORTED_PLATFORM_ID: match = STANDARD_FN_REGEX.match(filename) if not match: - raise RuntimeError(f"Failed to parse satellite from {filename}") + raise RuntimeError(f"Failed to parse platform from {filename}") - sat = match.group("sat") + platform_id = match.group("platform_id") - # Ensure the sat is expected. - assert sat in get_args(SUPPORTED_SAT) - sat = cast(SUPPORTED_SAT, sat) + # Ensure the platform is expected. + assert platform_id in get_args(SUPPORTED_PLATFORM_ID) + platform_id = cast(SUPPORTED_PLATFORM_ID, platform_id) - return sat + return platform_id def date_range(*, start_date: dt.date, end_date: dt.date) -> Iterator[dt.date]: @@ -185,11 +186,13 @@ def get_num_missing_pixels( seaice_conc_var: xr.DataArray, hemisphere: Hemisphere, resolution: ECDR_SUPPORTED_RESOLUTIONS, + ancillary_source: ANCILLARY_SOURCES, ) -> int: """The number of missing pixels is anywhere that there are nans over ocean.""" ocean_mask = get_ocean_mask( hemisphere=hemisphere, resolution=resolution, + ancillary_source=ancillary_source, ) num_missing_pixels = int((seaice_conc_var.isnull() & ocean_mask).astype(int).sum()) diff --git a/seaice_ecdr/validation.py b/seaice_ecdr/validation.py index 6dbaabad..47004309 100644 --- a/seaice_ecdr/validation.py +++ b/seaice_ecdr/validation.py @@ -53,6 +53,7 @@ from pm_tb_data._types import Hemisphere from seaice_ecdr.ancillary import ( + ANCILLARY_SOURCES, bitmask_value_for_meaning, flag_value_for_meaning, ) @@ -62,7 +63,7 @@ from seaice_ecdr.monthly import get_monthly_dir from seaice_ecdr.util import date_range, get_complete_output_dir, get_num_missing_pixels -VALIDATION_RESOLUTION: Final = "12.5" +VALIDATION_RESOLUTION: Final = "25" ERROR_FILE_BITMASK = dict( missing_file=-9999, @@ -216,6 +217,7 @@ def get_pixel_counts( ds: xr.Dataset, product: Product, hemisphere: Hemisphere, + ancillary_source: ANCILLARY_SOURCES = "CDRv5", ) -> dict[str, int]: """Return pixel counts from the daily or monthly ds. @@ -267,12 +269,15 @@ def get_pixel_counts( seaice_conc_var=seaice_conc_var, hemisphere=hemisphere, resolution=VALIDATION_RESOLUTION, + ancillary_source=ancillary_source, ) # Per CDR v4, "bad" ice pixels are outside the expected range. # Note: we use 0.0999 instead of 0.1 because SIC values of 10% are # decoded from the integer value of 10 to 0.1, which is represented # as 0.099999 as a floating point data. + # Note: xarray .sum() is similar to numpy.nansum() in that it will + # ignore NaNs in the summation operation gt_100_sic = int((seaice_conc_var > 1).sum()) if product == "daily": less_than_10_sic = int( @@ -438,8 +443,8 @@ def validate_outputs( ) # monthly filepaths should have the form - # "sic_ps{n|s}12.5_{YYYYMM}_{sat}_v05r00.nc" - expected_fn_glob = f"sic_ps{hemisphere[0]}12.5_{year}{month:02}_*_{ECDR_PRODUCT_VERSION}.nc" + # "sic_ps{n|s}25_{YYYYMM}_{sat}_v05r00.nc" + expected_fn_glob = f"sic_ps{hemisphere[0]}25_{year}{month:02}_*_{ECDR_PRODUCT_VERSION}.nc" results = list(monthly_dir.glob(expected_fn_glob)) if not results: validation_dict = make_validation_dict_for_missing_file() diff --git a/tasks/test.py b/tasks/test.py index 63594167..7744f037 100644 --- a/tasks/test.py +++ b/tasks/test.py @@ -20,7 +20,7 @@ def typecheck(ctx): def unit(ctx): """Run unit tests.""" print_and_run( - f"pytest --cov=seaice_ecdr --cov-fail-under 50 -s {PROJECT_DIR}/seaice_ecdr/tests/unit", + f"pytest --cov=seaice_ecdr --cov-fail-under 40 -s {PROJECT_DIR}/seaice_ecdr/tests/unit", pty=True, ) @@ -50,7 +50,8 @@ def pytest(ctx): Includes a code-coverage check. """ print_and_run( - "pytest --cov=seaice_ecdr --cov-fail-under 80 -s", + # "pytest --cov=seaice_ecdr --cov-fail-under 80 -s", + "pytest --cov=seaice_ecdr --cov-fail-under 70 -s", pty=True, ) diff --git a/viz.ipynb b/viz.ipynb new file mode 100644 index 00000000..0e40a42f --- /dev/null +++ b/viz.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a5e89821-62c2-4684-99a6-ef4d42f48be7", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import leafmap\n", + "import xarray as xr\n", + "import hvplot.xarray\n", + "import rioxarray as rxr\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from seaice_ecdr.util import get_complete_output_dir\n", + "from seaice_ecdr.make_25km_cdr import get_25km_daily_cdr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa2bd5f0", + "metadata": {}, + "outputs": [], + "source": [ + "HEMISPHERE = \"north\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2bc9edf", + "metadata": {}, + "outputs": [], + "source": [ + "f17_bt_nt = get_25km_daily_cdr(alg=\"BT_NT\", hemisphere=HEMISPHERE, platform=\"F17\")\n", + "am2_bt_nt = get_25km_daily_cdr(alg=\"BT_NT\", hemisphere=HEMISPHERE, platform=\"am2\")\n", + "\n", + "f17_bt_nt_conc = f17_bt_nt.cdr_seaice_conc\n", + "am2_bt_nt_conc = am2_bt_nt.cdr_seaice_conc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a3b17c6", + "metadata": {}, + "outputs": [], + "source": [ + "f17_nt2 = get_25km_daily_cdr(alg=\"NT2\", hemisphere=HEMISPHERE, platform=\"F17\")\n", + "am2_nt2 = get_25km_daily_cdr(alg=\"NT2\", hemisphere=HEMISPHERE, platform=\"am2\")\n", + "\n", + "f17_nt2_conc = f17_nt2.cdr_seaice_conc\n", + "am2_nt2_conc = am2_nt2.cdr_seaice_conc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43b9fdf5", + "metadata": {}, + "outputs": [], + "source": [ + "# Get average difference between the two platforms\n", + "nt_bt_mean_diff = abs((f17_bt_nt_conc - am2_bt_nt_conc)).mean(dim=(\"y\", \"x\"))\n", + "nt2_mean_diff = abs((f17_nt2_conc - am2_nt2_conc)).mean(dim=(\"y\", \"x\"))\n", + "\n", + "nt_bt_mean_diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c88697a6", + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(nt_bt_mean_diff.time, nt_bt_mean_diff, label=\"BT_NT\")\n", + "plt.plot(nt2_mean_diff.time, nt2_mean_diff, label=\"NT2\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08fc6a2b", + "metadata": {}, + "outputs": [], + "source": [ + "import holoviews as hv\n", + "f17_bt_nt = f17_bt_nt_conc.hvplot.image(\n", + " cmap=\"viridis\",\n", + " title=\"F17 BT_NT\",\n", + " min_height=len(f17_bt_nt_conc.y),\n", + " min_width=len(f17_bt_nt_conc.x),\n", + " xlabel=\"\",\n", + " ylabel=\"\",\n", + " colorbar=False,\n", + ")\n", + "am2_bt_nt = am2_bt_nt_conc.hvplot.image(\n", + " cmap=\"viridis\",\n", + " title=\"AMSR2 BT_NT\",\n", + " min_height=len(f17_bt_nt_conc.y),\n", + " min_width=len(f17_bt_nt_conc.x),\n", + " xlabel=\"\",\n", + " ylabel=\"\",\n", + " xticks=None,\n", + " yticks=None,\n", + " colorbar=False,\n", + ")\n", + "\n", + "f17_nt2 = f17_nt2_conc.hvplot.image(\n", + " cmap=\"viridis\",\n", + " title=\"F17 NT2\",\n", + " min_height=len(f17_bt_nt_conc.y),\n", + " min_width=len(f17_bt_nt_conc.x),\n", + " xlabel=\"\",\n", + " ylabel=\"\",\n", + " xticks=None,\n", + " yticks=None,\n", + " colorbar=False,\n", + ")\n", + "am2_nt2 = am2_nt2_conc.hvplot.image(\n", + " cmap=\"viridis\",\n", + " title=\"AMSR2 NT2\",\n", + " min_height=len(f17_bt_nt_conc.y),\n", + " min_width=len(f17_bt_nt_conc.x),\n", + " xlabel=\"\",\n", + " ylabel=\"\",\n", + " xticks=None,\n", + " yticks=None,\n", + " colorbar=True,\n", + ")\n", + "\n", + "ls = hv.link_selections.instance()\n", + "ls(f17_bt_nt + am2_bt_nt + f17_nt2 + am2_nt2).cols(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4098c30-3e76-4fa0-bab9-121835d6060c", + "metadata": {}, + "outputs": [], + "source": [ + "bt_nt_diff = (f17_bt_nt_conc - am2_bt_nt_conc).hvplot.image(\n", + " cmap=\"coolwarm\",\n", + " title=\"F17 - AM2 BT_NT\",\n", + " min_height=len(f17_bt_nt_conc.y),\n", + " min_width=len(f17_bt_nt_conc.x),\n", + " colorbar=False,\n", + ")\n", + "nt2_diff = (f17_nt2_conc - am2_nt2_conc).hvplot.image(\n", + " cmap=\"coolwarm\",\n", + " title=\"F17 - AM2 NT2\",\n", + " min_height=len(f17_bt_nt_conc.y),\n", + " min_width=len(f17_bt_nt_conc.x),\n", + " colorbar=False,\n", + ")\n", + "ls = hv.link_selections.instance()\n", + "ls(bt_nt_diff + nt2_diff)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}