From 4fd8ce6b9721255ccbcdb212988da5290502ac16 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 12:57:10 -0600 Subject: [PATCH 01/17] fix F17 start date --- seaice_ecdr/config/default_platform_start_dates.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seaice_ecdr/config/default_platform_start_dates.yml b/seaice_ecdr/config/default_platform_start_dates.yml index 9e72e1bd..43c9b323 100644 --- a/seaice_ecdr/config/default_platform_start_dates.yml +++ b/seaice_ecdr/config/default_platform_start_dates.yml @@ -8,4 +8,4 @@ cdr_platform_start_dates: - platform_id: "F13" start_date: "1995-10-01" - platform_id: "F17" - start_date: "2011-10-04" + start_date: "2008-01-01" From ccb56a603abb7b521736f7d3e01bb5d4b599a81d Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 19:20:32 -0600 Subject: [PATCH 02/17] remove specific code to support BT_NT land spillover algorithm --- seaice_ecdr/initial_daily_ecdr.py | 234 ++++++++++++++++++++---------- seaice_ecdr/spillover.py | 9 +- 2 files changed, 165 insertions(+), 78 deletions(-) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index 1f2cd971..86a697a8 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -73,6 +73,7 @@ def cdr_bootstrap_raw( """Generate the raw bootstrap concentration field. Note: tb fields should already be transformed before being passed to this function. + Note: bt_conc output values will range from 0.0 to maybe 150.0 """ wtp_37v = bt_coefs["bt_wtp_v37"] wtp_37h = bt_coefs["bt_wtp_h37"] @@ -82,6 +83,11 @@ def cdr_bootstrap_raw( itp_37h = bt_coefs["bt_itp_h37"] itp_19v = bt_coefs["bt_itp_v19"] + # Preserve bt_conc values up to 200% siconc + # NOTE: Don't want to get close to 250+ which tend to be flag values + # in original Goddard code + bt_coefs["maxic"] = 2.0 + bt_conc = bt.calc_bootstrap_conc( tb_v37=tb_v37, tb_h37=tb_h37, @@ -167,15 +173,43 @@ def calculate_cdr_conc( *, bt_conc: npt.NDArray, nt_conc: npt.NDArray, + cdr_conc_min_fraction: float, + cdr_conc_max_fraction: float, + max_valid_siconc=200.0, ) -> npt.NDArray: - """Run the CDR algorithm.""" - # Now calculate CDR SIC - is_bt_seaice = (bt_conc > 0) & (bt_conc <= 100) - use_nt_values = (nt_conc > bt_conc) & is_bt_seaice + """ + Run the CDR algorithm + Here, bt_conc and nt_conc are expected to have values in percent, + though the upper range of values might be great than 100%. + This is permitted for the bt_conc and nt_conc fields, but + not for the cdr_conc field. + Note: range of output values will be 0 to 100.0 and np.nan (=missing) + """ + cdr_conc_min_percent = cdr_conc_min_fraction * 100.0 + cdr_conc_max_percent = cdr_conc_max_fraction * 100.0 + + # It's possible that conc fields might have flagged values > max_valid_siconc + is_bt_seaice = (bt_conc > cdr_conc_min_percent) & (bt_conc <= max_valid_siconc) + is_cdr_seaice = is_bt_seaice + is_nt_seaice = (nt_conc > cdr_conc_min_percent) & (nt_conc <= max_valid_siconc) + + use_nt_values = is_nt_seaice & is_bt_seaice & (nt_conc > bt_conc) + # Note: Here, values without sea ice (because no TBs) have val np.nan cdr_conc = bt_conc.copy() cdr_conc[use_nt_values] = nt_conc[use_nt_values] + # Clamp cdr_conc to min/max + is_low_siconc = is_cdr_seaice & (cdr_conc < cdr_conc_min_percent) + cdr_conc[is_low_siconc] = 0 + + is_high_siconc = ( + is_cdr_seaice + & (cdr_conc > cdr_conc_max_percent) + & (cdr_conc <= max_valid_siconc) + ) + cdr_conc[is_high_siconc] = cdr_conc_max_percent + return cdr_conc @@ -364,6 +398,70 @@ def is_pre_AMSR_platform(platform_id: SUPPORTED_PLATFORM_ID): return platform_id in pre_AMSR_platforms +def compute_bt_asCDRv4_conc(bt_conc, bt_weather_mask, invalid_ice_mask, non_ocean_mask): + """ + This routine was created to match CDRv4 code. + The only reason not to delete is is for hints of how to compare + CDR concentration fields with legacy Goddard results. + + this would have been called before the land spillover algorithm with: + + bt_asCDRv4_conc = compute_bt_asCDRv4_conc( + bt_conc, + ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + ecdr_ide_ds["invalid_ice_mask"].data[0, :, :], + ecdr_ide_ds["non_ocean_mask"].data, + ) + + # By inspection the difference between these values and CDRv4 values + # was ~3E-05 (ie 4-byte floating point roundoff). + """ + + bt_asCDRv4_conc = bt_conc.copy() + bt_asCDRv4_conc[bt_weather_mask] = 0 + bt_asCDRv4_conc[invalid_ice_mask] = 0 + bt_asCDRv4_conc[non_ocean_mask] = 120.0 + bt_asCDRv4_conc[np.isnan(bt_asCDRv4_conc)] = 110.0 + + return bt_asCDRv4_conc + + +def compute_nt_asCDRv4_conc(nt_conc, weather_mask, invalid_ice_mask): + """ + This routine was created to match CDRv4 code. + The only reason not to delete is is for hints of how to compare + CDR concentration fields with legacy Goddard results. + + this would have been called before the land spillover algorithm with: + + nt_asCDRv4_conc = compute_nt_asCDRv4_conc(nt_conc, + weather_mask=ecdr_ide_ds["nt_weather_mask"].data[0, :, :], + invalid_ice_mask=ecdr_ide_ds["invalid_ice_mask"].data[0, :, :], + ) + + """ + nt_asCDRv4_conc = nt_conc.copy() + # Convert to 2-byte int + is_nt_nan = np.isnan(nt_asCDRv4_conc) + nt_asCDRv4_conc = (10 * nt_asCDRv4_conc).astype(np.int16) + + # In CDRv4, NT weather is only where weather condition removes NT conc + is_ntwx_CDRv4 = (nt_conc > 0) & weather_mask + nt_asCDRv4_conc[is_ntwx_CDRv4] = 0 + + nt_asCDRv4_conc[invalid_ice_mask] = -10 + nt_asCDRv4_conc[is_nt_nan] = -10 + + # Note: the NT array here is np.int16 + nt_asCDRv4_conc = nt_asCDRv4_conc.copy() + is_negative_10 = nt_asCDRv4_conc == -10 + + nt_asCDRv4_conc = np.divide(nt_asCDRv4_conc, 10.0).astype(np.float32) + nt_asCDRv4_conc[is_negative_10] = -10 + + return nt_asCDRv4_conc + + def compute_initial_daily_ecdr_dataset( *, date: dt.date, @@ -426,6 +524,7 @@ def compute_initial_daily_ecdr_dataset( ) # Set the TB values to zero at (monthly?) invalid ice mask + # TODO: Only set TB values to zero over the *non-ocean* grid cells tb_si_data[invalid_ice_mask] = 0 ecdr_ide_ds[tb_si_varname] = ( @@ -482,6 +581,11 @@ def compute_initial_daily_ecdr_dataset( "h37": 16, "pole_filled": 32, } + non_ocean_mask = get_non_ocean_mask( + hemisphere=hemisphere, + resolution=tb_data.resolution, + ancillary_source=ancillary_source, + ) for tbname in EXPECTED_ECDR_TB_NAMES: tb_varname = f"{tbname}_day" si_varname = f"{tbname}_day_si" @@ -490,11 +594,6 @@ def compute_initial_daily_ecdr_dataset( != ecdr_ide_ds[si_varname].data[0, :, :] ) & (~np.isnan(ecdr_ide_ds[si_varname].data[0, :, :])) spatint_bitmask_arr[is_tb_si_diff] += TB_SPATINT_BITMASK_MAP[tbname] - non_ocean_mask = get_non_ocean_mask( - hemisphere=hemisphere, - resolution=tb_data.resolution, - ancillary_source=ancillary_source, - ) spatint_bitmask_arr[non_ocean_mask.data] = 0 # Drop the un-spatially interpolated TB field to save space and compute @@ -874,10 +973,10 @@ def compute_initial_daily_ecdr_dataset( # NOTE: This is probably necessary for land spillover algorithms # to properly work with "exactly 0% siconc" calculations # TODO: This 10% cutoff should be a configuration value - bt_conc[bt_conc < 10] = 0 # Remove bt_conc flags (e.g., missing) and set to NaN - bt_conc[bt_conc >= 250] = np.nan + # NOTE: This...is probably not necessary now? + # bt_conc[bt_conc >= 250] = np.nan # Now, compute CDR version of NT estimate nt_conc = cdr_nasateam( @@ -887,12 +986,11 @@ def compute_initial_daily_ecdr_dataset( nt_tiepoints=nt_coefs["nt_tiepoints"], ) - # Need to set invalid ice to zero (note: this includes land) - nt_conc[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] = 0 - cdr_conc_raw = calculate_cdr_conc( bt_conc=bt_conc, nt_conc=nt_conc, + cdr_conc_min_fraction=0.1, + cdr_conc_max_fraction=1.0, ) # Apply masks @@ -924,6 +1022,14 @@ def compute_initial_daily_ecdr_dataset( | ecdr_ide_ds["invalid_ice_mask"].data[0, :, :] ) + # If all possible ocean grid cells have NaN siconc, + # then this field has no valid data + # NOTE: invalid_ice_mask includes non-ocean grid cells + + # TODO: Consider skipping to all-missing if all are missing + # is_potential_seaice = ~set_to_zero_sic + # is_all_missing = np.all(np.isnan(cdr_conc_raw[is_potential_seaice])) + cdr_conc = cdr_conc_raw.copy() cdr_conc[set_to_zero_sic] = 0 @@ -932,81 +1038,55 @@ def compute_initial_daily_ecdr_dataset( # land_spillover algorithm should have to rely on this. cdr_conc_pre_spillover = cdr_conc.copy() - bt_asCDRv4_conc = bt_conc.copy() - bt_asCDRv4_conc[ecdr_ide_ds["bt_weather_mask"].data[0, :, :]] = 0 - bt_asCDRv4_conc[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] = 0 - bt_asCDRv4_conc[ecdr_ide_ds["non_ocean_mask"].data] = 120.0 - bt_asCDRv4_conc[np.isnan(bt_asCDRv4_conc)] = 110.0 - # Here, difference from v4 is ~3E-05 (ie 4-byte floating point roundoff) - - nt_asCDRv4_conc = nt_conc.copy() - # Convert to 2-byte int - is_nt_nan = np.isnan(nt_asCDRv4_conc) - nt_asCDRv4_conc = (10 * nt_asCDRv4_conc).astype(np.int16) - - # In CDRv4, NT weather is only where weather condition removes NT conc - is_ntwx_CDRv4 = (nt_conc > 0) & ecdr_ide_ds["nt_weather_mask"].data[0, :, :] - nt_asCDRv4_conc[is_ntwx_CDRv4] = 0 - - nt_asCDRv4_conc[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] = -10 - nt_asCDRv4_conc[is_nt_nan] = -10 - - # Note: the NT array here is np.int16 - nt_asCDRv4_conc = nt_asCDRv4_conc.copy() - is_negative_10 = nt_asCDRv4_conc == -10 - - nt_asCDRv4_conc = np.divide(nt_asCDRv4_conc, 10.0).astype(np.float32) - nt_asCDRv4_conc[is_negative_10] = -10 - - cdr_conc = land_spillover( - cdr_conc=cdr_conc, - hemisphere=hemisphere, - tb_data=tb_data, - algorithm=land_spillover_alg, - land_mask=non_ocean_mask.data, - ancillary_source=ancillary_source, - bt_conc=bt_asCDRv4_conc, - nt_conc=nt_asCDRv4_conc, - bt_wx=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], - fix_goddard_bt_error=True, - ) - - # In case we used the BT-NT land spillover, set cdr_conc to zero - # where weather filtered...except that it's only the nt wx that gets - # applied...and only where NT wx removed NT conc if land_spillover_alg == "BT_NT": - set_to_zero_sic = ( - ecdr_ide_ds["nt_weather_mask"].data[0, :, :] - | ecdr_ide_ds["invalid_ice_mask"].data[0, :, :] + # The BT_NT algorithm requires separate consideration of the + # boostrap and NASATeam fields + bt_asCDRv4_conc = compute_bt_asCDRv4_conc( + bt_conc, + ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + ecdr_ide_ds["invalid_ice_mask"].data[0, :, :], + ecdr_ide_ds["non_ocean_mask"].data, + ) + nt_asCDRv4_conc = compute_nt_asCDRv4_conc( + nt_conc, + weather_mask=ecdr_ide_ds["nt_weather_mask"].data[0, :, :], + invalid_ice_mask=ecdr_ide_ds["invalid_ice_mask"].data[0, :, :], + ) + cdr_conc = land_spillover( + cdr_conc=cdr_conc, + hemisphere=hemisphere, + tb_data=tb_data, + algorithm=land_spillover_alg, + land_mask=non_ocean_mask.data, + ancillary_source=ancillary_source, + bt_conc=bt_asCDRv4_conc, + nt_conc=nt_asCDRv4_conc, + bt_wx=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], + fix_goddard_bt_error=True, ) + is_ntwx_CDRv4 = (nt_conc > 0) & ecdr_ide_ds["nt_weather_mask"].data[0, :, :] cdr_conc[is_ntwx_CDRv4] = 0 + else: + # Not doing BT_NT + cdr_conc = land_spillover( + cdr_conc=cdr_conc, + hemisphere=hemisphere, + tb_data=tb_data, + algorithm=land_spillover_alg, + land_mask=non_ocean_mask.data, + ancillary_source=ancillary_source, + fix_goddard_bt_error=True, + ) + spillover_applied = np.full((ydim, xdim), False, dtype=bool) spillover_applied[cdr_conc_pre_spillover != cdr_conc.data] = True - # Mask out non-ocean pixels and clamp conc to between 10-100%. - # TODO: These values should be in a configuration file/structure - cdr_conc[non_ocean_mask.data] = np.nan - cdr_conc[cdr_conc < 10] = 0 - cdr_conc[cdr_conc > 100] = 100 - # Set missing TBs to NaN cdr_conc[ecdr_ide_ds["missing_tb_mask"].data[0, :, :]] = np.nan bt_conc[ecdr_ide_ds["missing_tb_mask"].data[0, :, :]] = np.nan nt_conc[ecdr_ide_ds["missing_tb_mask"].data[0, :, :]] = np.nan - # TODO: Remove these CDRv4 flags? - # Apply the CDRv4 flags to the conc field for more direct comparison - flagmask = get_flagmask( - hemisphere=hemisphere, - resolution=tb_data.resolution, - ancillary_source=ancillary_source, - ) - - if flagmask is not None: - cdr_conc[flagmask > 250] = flagmask[flagmask > 250] - cdr_conc[np.isnan(cdr_conc)] = 255 - # Add the BT raw field to the dataset bt_conc = bt_conc / 100.0 # re-set range from 0 to 1 ecdr_ide_ds["raw_bt_seaice_conc"] = ( diff --git a/seaice_ecdr/spillover.py b/seaice_ecdr/spillover.py index e90edb16..e31bf8a8 100644 --- a/seaice_ecdr/spillover.py +++ b/seaice_ecdr/spillover.py @@ -294,16 +294,23 @@ def land_spillover( ) spillover_applied_nt2_bt = coastal_fix( - conc=spillover_applied_nt2, + conc=spillover_applied_nt2.copy(), missing_flag_value=np.nan, land_mask=land_mask, minic=10, fix_goddard_bt_error=fix_goddard_bt_error, ) + # Return NaN for missing and for land + spillover_applied_nt2_bt[non_ocean_mask] = np.nan + spillover_applied = spillover_applied_nt2_bt elif algorithm == "BT_NT": + # TODO: This algorithm is complicated by the NT algorithm + # and should not be supported. It remains here... + # because otherwise it is hard to understand how it + # was implemented. non_ocean_mask = get_non_ocean_mask( hemisphere=hemisphere, resolution=tb_data.resolution, From 9235dcff19614c953393424c466125f75feaed5d Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 19:36:17 -0600 Subject: [PATCH 03/17] allow raw BT and NT conc of 0 to 200% --- seaice_ecdr/set_daily_ncattrs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/seaice_ecdr/set_daily_ncattrs.py b/seaice_ecdr/set_daily_ncattrs.py index 05539e5b..ffae030a 100644 --- a/seaice_ecdr/set_daily_ncattrs.py +++ b/seaice_ecdr/set_daily_ncattrs.py @@ -307,6 +307,7 @@ def finalize_cdecdr_ds( # TODO: conversion to ubyte should be done with DataArray encoding dict # NOTE: We are overwriting the attrs of the original conc field # TODO: scale_factor and add_offset might get set during encoding + # NOTE: We allow raw siconc up to 200% for potential validation measures ds["raw_bt_seaice_conc"] = ( ("time", "y", "x"), ds["raw_bt_seaice_conc"].data, @@ -319,7 +320,7 @@ def finalize_cdecdr_ds( " raw field with no masking or filtering" ), "grid_mapping": "crs", - "valid_range": np.array((0, 100), dtype=np.uint8), + "valid_range": np.array((0, 200), dtype=np.uint8), }, ) @@ -327,6 +328,7 @@ def finalize_cdecdr_ds( # TODO: adding time dimension should probably happen earlier # TODO: conversion to ubyte should be done with DataArray encoding dict # TODO: scale_factor and add_offset might get set during encoding + # NOTE: We allow raw siconc up to 200% for potential validation measures ds["raw_nt_seaice_conc"] = ( ("time", "y", "x"), ds["raw_nt_seaice_conc"].data, @@ -339,9 +341,7 @@ def finalize_cdecdr_ds( " raw field with no masking or filtering" ), "grid_mapping": "crs", - # We set a `valid_min` of 0 because we allow nasateam raw - # concentrations >100%. We do not set an upper limit. - "valid_min": 0, + "valid_range": np.array((0, 200), dtype=np.uint8), }, ) From 57cc450f3eaa285b345d9929921a73f48de80bfe Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 20:10:14 -0600 Subject: [PATCH 04/17] fix QA flag values --- seaice_ecdr/initial_daily_ecdr.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index 86a697a8..fa6137c6 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -511,6 +511,7 @@ def compute_initial_daily_ecdr_dataset( tb_si_longname = f"Spatially interpolated {ecdr_ide_ds[tb_day_name].long_name}" tb_units = "K" + """ if ancillary_source == "CDRv4": # The CDRv4 calculation causes TB to be zero/missing where # no sea ice can occur because of invalid region or land @@ -526,6 +527,7 @@ def compute_initial_daily_ecdr_dataset( # Set the TB values to zero at (monthly?) invalid ice mask # TODO: Only set TB values to zero over the *non-ocean* grid cells tb_si_data[invalid_ice_mask] = 0 + """ ecdr_ide_ds[tb_si_varname] = ( ("time", "y", "x"), @@ -1079,8 +1081,9 @@ def compute_initial_daily_ecdr_dataset( fix_goddard_bt_error=True, ) - spillover_applied = np.full((ydim, xdim), False, dtype=bool) - spillover_applied[cdr_conc_pre_spillover != cdr_conc.data] = True + had_spillover_applied = (cdr_conc_pre_spillover != cdr_conc.data) & np.isfinite( + cdr_conc.data + ) # Set missing TBs to NaN cdr_conc[ecdr_ide_ds["missing_tb_mask"].data[0, :, :]] = np.nan @@ -1180,12 +1183,17 @@ def compute_initial_daily_ecdr_dataset( qa_bitmask = np.zeros((ydim, xdim), dtype=np.uint8) qa_bitmask[ecdr_ide_ds["bt_weather_mask"].data[0, :, :]] += 1 qa_bitmask[ecdr_ide_ds["nt_weather_mask"].data[0, :, :]] += 2 - qa_bitmask[spillover_applied] += 4 + qa_bitmask[had_spillover_applied] += 4 qa_bitmask[invalid_tb_mask & ~ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] += 8 - qa_bitmask[ecdr_ide_ds["invalid_ice_mask"].data[0, :, :]] += 16 qa_bitmask[ ecdr_ide_ds["cdr_seaice_conc_interp_spatial_flag"].data[0, :, :] != 0 ] += 32 + + # Explicitly set climatologically masked ocean cells to 16 + invalid_ice_ocean_mask = invalid_ice_mask.data & ~non_ocean_mask.data + qa_bitmask[invalid_ice_ocean_mask] = 16 + + # Explicitly set land to zero qa_bitmask[non_ocean_mask] = 0 ecdr_ide_ds["cdr_seaice_conc_qa_flag"] = ( From 2dcc36b6a028725d055e13c12e66365cc40fd400 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 20:11:03 -0600 Subject: [PATCH 05/17] fix ncattr typos --- seaice_ecdr/set_daily_ncattrs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seaice_ecdr/set_daily_ncattrs.py b/seaice_ecdr/set_daily_ncattrs.py index ffae030a..17239a6b 100644 --- a/seaice_ecdr/set_daily_ncattrs.py +++ b/seaice_ecdr/set_daily_ncattrs.py @@ -316,7 +316,7 @@ def finalize_cdecdr_ds( "coverage_content_type": "image", "units": "1", "long_name": ( - "Bootstrap sea ice concntration;" + "Bootstrap sea ice concentration;" " raw field with no masking or filtering" ), "grid_mapping": "crs", @@ -337,7 +337,7 @@ def finalize_cdecdr_ds( "coverage_content_type": "image", "units": "1", "long_name": ( - "NASA Team sea ice concntration;" + "NASA Team sea ice concentration;" " raw field with no masking or filtering" ), "grid_mapping": "crs", From d34537356df5383961d8830cbb9fdee3e2a7b559 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 20:17:57 -0600 Subject: [PATCH 06/17] comment out rounding statements used to match v4 results --- seaice_ecdr/initial_daily_ecdr.py | 35 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index fa6137c6..1d7b70ac 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -303,7 +303,7 @@ def get_nasateam_weather_mask( ecdr_ide_ds["v19_day_si"].data[0, :, :], ) # Round off for better match with CDRv4 results - nt_gr_3719 = np.round(nt_gr_3719, 4) + # nt_gr_3719 = np.round(nt_gr_3719, 4) nt_3719_weather_mask = nt_gr_3719 > nt_coefs["nt_gradient_thresholds"]["3719"] @@ -317,7 +317,7 @@ def get_nasateam_weather_mask( ecdr_ide_ds["v19_day_si"].data[0, :, :], ) # Round off for better match with CDRv4 results - nt_gr_2219 = np.round(nt_gr_2219, 4) + # nt_gr_2219 = np.round(nt_gr_2219, 4) nt_2219_weather_mask = nt_gr_2219 > nt_coefs["nt_gradient_thresholds"]["2219"] weather_mask = nt_3719_weather_mask | nt_2219_weather_mask @@ -611,21 +611,22 @@ def compute_initial_daily_ecdr_dataset( # See: https://docs.scipy.org/doc//numpy-1.9.0/reference/generated/numpy.around.html#numpy.around # In order to reproduce the v4 rounding values, need to first round to # two decimal places, then round to 1 - ecdr_ide_ds["v37_day_si"].data = np.round( - np.round(ecdr_ide_ds["v37_day_si"].data, 2), 1 - ) - ecdr_ide_ds["h37_day_si"].data = np.round( - np.round(ecdr_ide_ds["h37_day_si"].data, 2), 1 - ) - ecdr_ide_ds["v19_day_si"].data = np.round( - np.round(ecdr_ide_ds["v19_day_si"].data, 2), 1 - ) - ecdr_ide_ds["h19_day_si"].data = np.round( - np.round(ecdr_ide_ds["h19_day_si"].data, 2), 1 - ) - ecdr_ide_ds["v22_day_si"].data = np.round( - np.round(ecdr_ide_ds["v22_day_si"].data, 2), 1 - ) + ## NOTE: Remove these once we are ready to add CDRv5 integration tests + # ecdr_ide_ds["v37_day_si"].data = np.round( + # np.round(ecdr_ide_ds["v37_day_si"].data, 2), 1 + # ) + # ecdr_ide_ds["h37_day_si"].data = np.round( + # np.round(ecdr_ide_ds["h37_day_si"].data, 2), 1 + # ) + # ecdr_ide_ds["v19_day_si"].data = np.round( + # np.round(ecdr_ide_ds["v19_day_si"].data, 2), 1 + # ) + # ecdr_ide_ds["h19_day_si"].data = np.round( + # np.round(ecdr_ide_ds["h19_day_si"].data, 2), 1 + # ) + # ecdr_ide_ds["v22_day_si"].data = np.round( + # np.round(ecdr_ide_ds["v22_day_si"].data, 2), 1 + # ) # Set a missing data mask using 19V TB field # Note: this will not include missing TBs in the day's invalid_ice field From c711455453dfac27320f4e4c827c1e6ef1fb2e49 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 20:25:54 -0600 Subject: [PATCH 07/17] seaice_ecdr/temporal_composite_daily.py --- seaice_ecdr/temporal_composite_daily.py | 28 +++++++++++++------------ 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/seaice_ecdr/temporal_composite_daily.py b/seaice_ecdr/temporal_composite_daily.py index f351aebd..82dc43b7 100644 --- a/seaice_ecdr/temporal_composite_daily.py +++ b/seaice_ecdr/temporal_composite_daily.py @@ -244,13 +244,13 @@ def temporally_composite_dataarray( raise RuntimeError(f" the target_date was not in the dataarray: {target_date}") temp_comp_2d = np.squeeze(temp_comp_da.data) - assert temp_comp_2d.shape == (ydim, xdim) + try: + assert temp_comp_2d.shape == (ydim, xdim) + except AssertionError as e: + raise RuntimeError( + f"Error asserting that squeezed data array is two dimensional with shape ({ydim}, {xdim}); got {temp_comp_2d.shape} instead; Assertion error was {e}" + ) - # TODO: These lines are commented out in order to reproduce the - # CDRv4 ERROR where the "home" smmr day does NOT have daily_clim - # applied to it. - # TODO: Putting these lines back in, because now I *am* seeing its effect - # in the re-run CDRv4 fields(!) if daily_climatology_mask is not None: temp_comp_2d[daily_climatology_mask] = 0 @@ -269,9 +269,9 @@ def temporally_composite_dataarray( pconc[need_values] = 0 pdist[need_values] = 0 nconc[need_values] = 0 - # TODO: Fix this for v5 release (implemented to match v04f00) - # ndist[need_values] = 0 # Correct - pdist[need_values] = 0 # Error as CDRv04r00 error + # DONE: Fixed this for v5 release (implemented to match v04f00) + # pdist[need_values] = 0 # Error as CDRv04r00 error + ndist[need_values] = 0 # Correct for time_offset in range(1, interp_range + 1): if n_missing == 0: @@ -340,12 +340,14 @@ def temporally_composite_dataarray( # Update the temporal interp flag value temporal_flags[have_only_prior] = 10 * pdist[have_only_prior] - # temp_comp_2d[have_only_next] = nconc[have_only_next] # Correct - temp_comp_2d[have_only_next] = pconc[have_only_next] # Error as CDRv04r00 + # DONE: Corrected CDRv04r00 error + # temp_comp_2d[have_only_next] = pconc[have_only_next] # CDRv04r00 error + temp_comp_2d[have_only_next] = nconc[have_only_next] # Correct # Update the temporal interp flag value - # temporal_flags[have_only_next] = ndist[have_only_next] # Correct - temporal_flags[have_only_next] = pdist[have_only_next] # Error as CDRv04r00 + # DONE: Corrected CDRv04r00 error + # temporal_flags[have_only_next] = pdist[have_only_next] # CDRv04r00 error + temporal_flags[have_only_next] = ndist[have_only_next] # Correct # Ensure flag values do not occur over land temporal_flags[non_ocean_mask.data] = 0 From 07433e8da97be4d2dba16ea2830d16f3665e7912 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 20:32:23 -0600 Subject: [PATCH 08/17] comment out CDRv4 spatial interpolation method --- seaice_ecdr/initial_daily_ecdr.py | 35 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index 1d7b70ac..95a7a593 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -492,21 +492,26 @@ def compute_initial_daily_ecdr_dataset( tb_si_varname = f"{tb_day_name}_si" - if ancillary_source == "CDRv5": - # The CDRv5 spatint requires min of two adj grid cells - # and allows corner grid cells with weighting of 0.707 - tb_si_data = spatial_interp_tbs( - ecdr_ide_ds[tb_day_name].data[0, :, :], - ) - elif ancillary_source == "CDRv4": - # The CDRv4 calculation does not use diagonal grid cells - # and requires a min of 3 adjacent grid cells - tb_si_data = spatial_interp_tbs( - ecdr_ide_ds[tb_day_name].data[0, :, :], - corner_weight=0, - min_weightsum=3, - image_shift_mode="grid-wrap", # CDRv4 wraps tb field for interp - ) + # TODO: Remove the old (CDRv4) spatial interp method with v5 publishing + # if ancillary_source == "CDRv5": + # # The CDRv5 spatint requires min of two adj grid cells + # # and allows corner grid cells with weighting of 0.707 + # tb_si_data = spatial_interp_tbs( + # ecdr_ide_ds[tb_day_name].data[0, :, :], + # ) + # elif ancillary_source == "CDRv4": + # # The CDRv4 calculation does not use diagonal grid cells + # # and requires a min of 3 adjacent grid cells + # tb_si_data = spatial_interp_tbs( + # ecdr_ide_ds[tb_day_name].data[0, :, :], + # corner_weight=0, + # min_weightsum=3, + # image_shift_mode="grid-wrap", # CDRv4 wraps tb field for interp + # ) + + tb_si_data = spatial_interp_tbs( + ecdr_ide_ds[tb_day_name].data[0, :, :], + ) tb_si_longname = f"Spatially interpolated {ecdr_ide_ds[tb_day_name].long_name}" tb_units = "K" From 885625e9a373fb0f3573ec349573f693f3564b75 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Thu, 26 Sep 2024 22:53:59 -0600 Subject: [PATCH 09/17] fix daily melt onset fields --- seaice_ecdr/intermediate_daily.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/seaice_ecdr/intermediate_daily.py b/seaice_ecdr/intermediate_daily.py index 487f70ac..d9986f73 100644 --- a/seaice_ecdr/intermediate_daily.py +++ b/seaice_ecdr/intermediate_daily.py @@ -243,8 +243,13 @@ def update_melt_onset_for_day( # Apply non-ocean mask is_melted_today[0, non_ocean_mask] = False + # Note: zero counts as a prior value have_prior_melt_values = prior_melt_onset_field != MELT_ONSET_FILL_VALUE - is_missing_prior = prior_melt_onset_field == MELT_ONSET_FILL_VALUE + + # Note: zero is also a "missing prior" day that can be updated + is_missing_prior = (prior_melt_onset_field == MELT_ONSET_FILL_VALUE) | ( + prior_melt_onset_field == 0 + ) has_new_melt = is_missing_prior & is_melted_today melt_onset_field = np.zeros(prior_melt_onset_field.shape, dtype=np.uint8) @@ -253,6 +258,7 @@ def update_melt_onset_for_day( have_prior_melt_values ] + # Note: this can replace zeros with a value day_of_year = int(date.strftime("%j")) melt_onset_field[has_new_melt] = day_of_year @@ -305,13 +311,13 @@ def create_melt_onset_field( is_first_day_of_melt = day_of_year == MELT_SEASON_FIRST_DOY if is_first_day_of_melt: - # The first day of the melt seasion should start with an empty melt - # onset field. + # The first day of the melt seasion should have a prior_melt field + # with fill value where there is siconc and zeros in non-siconc ocean + # The fill values are set here. The zeros are set later in this routine prior_melt_onset_field = _empty_melt_onset_field( hemisphere=hemisphere, resolution=resolution, ) - logger.debug(f"using empty melt_onset_field for prior for {day_of_year}") else: # During the melt season, try to read the previous day's input as a # starting point. Use an empty melt onset field if no data for the @@ -357,6 +363,19 @@ def create_melt_onset_field( date=date, ) + # On first day of melt, set to 0 if siconc < 50% and didn't melt today + # Note: values of 0 can be replaced later if they melt in the melt season + if is_first_day_of_melt: + is_nosiconc_ti = ( + np.isfinite(cdr_conc_ti) + & (cdr_conc_ti < 0.5) + & (updated_melt_onset[0, :, :] == MELT_ONSET_FILL_VALUE) + ) + updated_melt_onset[0, is_nosiconc_ti] = 0 + logger.debug( + f"using zeros and fill values in ocean for first melt_onset field on day {day_of_year}" + ) + return updated_melt_onset From e2dbc827a190e98eff100e4792ede51368c295a1 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Fri, 27 Sep 2024 14:00:27 -0600 Subject: [PATCH 10/17] for monthly stdev field, use np.std() instead of xr.DataArray.std() to avoid div by zero error/warning --- seaice_ecdr/intermediate_monthly.py | 31 +++++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/seaice_ecdr/intermediate_monthly.py b/seaice_ecdr/intermediate_monthly.py index 1192f779..17c88acf 100644 --- a/seaice_ecdr/intermediate_monthly.py +++ b/seaice_ecdr/intermediate_monthly.py @@ -394,19 +394,29 @@ def calc_cdr_seaice_conc_monthly_stdev( daily_cdr_seaice_conc: xr.DataArray, ) -> xr.DataArray: """Create the `cdr_seaice_conc_monthly_stdev` variable.""" - cdr_seaice_conc_monthly_stdev = daily_cdr_seaice_conc.std( - dim="time", + # Using np.std() instead of DataArray.std() eliminates + # a div by zero warning from but means that the DataArray + # must be set up explicitly. + cdr_seaice_conc_monthly_stdev_np = np.std( + np.array(daily_cdr_seaice_conc), + axis=0, ddof=1, ) - cdr_seaice_conc_monthly_stdev.name = "cdr_seaice_conc_monthly_stdev" - - cdr_seaice_conc_monthly_stdev = cdr_seaice_conc_monthly_stdev.assign_attrs( - long_name="Passive Microwave Monthly Northern Hemisphere Sea Ice Concentration Source Estimated Standard Deviation", - valid_range=(np.float32(0.0), np.float32(1.0)), - grid_mapping="crs", - units="1", + cdr_seaice_conc_monthly_stdev = xr.DataArray( + data=cdr_seaice_conc_monthly_stdev_np, + name="cdr_seaice_conc_monthly_stdev", + coords=[ + daily_cdr_seaice_conc.y, + daily_cdr_seaice_conc.x, + ], + dims=["y", "x"], + attrs=dict( + long_name="Passive Microwave Monthly Northern Hemisphere Sea Ice Concentration Source Estimated Standard Deviation", + valid_range=(np.float32(0.0), np.float32(1.0)), + grid_mapping="crs", + units="1", + ), ) - cdr_seaice_conc_monthly_stdev.encoding = dict( _FillValue=-1, zlib=True, @@ -556,7 +566,6 @@ def make_intermediate_monthly_ds( cdr_seaice_conc_monthly_qa_flag=cdr_seaice_conc_monthly_qa_flag, surface_type_mask=surface_type_mask_monthly, ) - # Add monthly melt onset if the hemisphere is north. We don't detect melt in # the southern hemisphere. if hemisphere == NORTH: From d25212d5f2051fa3ee96501f83b7b594212a7ee0 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Fri, 27 Sep 2024 15:52:15 -0600 Subject: [PATCH 11/17] update comments and valid range for melt onset fields --- seaice_ecdr/intermediate_monthly.py | 23 ++++++++++++++++++----- seaice_ecdr/set_daily_ncattrs.py | 15 ++++++++++----- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/seaice_ecdr/intermediate_monthly.py b/seaice_ecdr/intermediate_monthly.py index 17c88acf..8d27c957 100644 --- a/seaice_ecdr/intermediate_monthly.py +++ b/seaice_ecdr/intermediate_monthly.py @@ -393,10 +393,18 @@ def calc_cdr_seaice_conc_monthly_stdev( *, daily_cdr_seaice_conc: xr.DataArray, ) -> xr.DataArray: - """Create the `cdr_seaice_conc_monthly_stdev` variable.""" - # Using np.std() instead of DataArray.std() eliminates - # a div by zero warning from but means that the DataArray - # must be set up explicitly. + """ + Create the `cdr_seaice_conc_monthly_stdev` variable. + + Note: Using np.std() instead of DataArray.std() eliminates + a div by zero warning from but means that the DataArray + must be set up explicitly instead of resulting from + the DataArray.std() operation. Attributes and encoding + are explicitly specified here just as they need to be + when using functions on a DataArray. + Note: In numpy array terms, "axis=0" refers to the time axis + because the dimensions of the DataArray are ("time", "y", "x"). + """ cdr_seaice_conc_monthly_stdev_np = np.std( np.array(daily_cdr_seaice_conc), axis=0, @@ -441,8 +449,13 @@ def calc_cdr_melt_onset_day_monthly( cdr_melt_onset_day_monthly = cdr_melt_onset_day_monthly.assign_attrs( long_name="Monthly Day of Snow Melt Onset Over Sea Ice", units="1", - valid_range=(np.ubyte(60), np.ubyte(255)), + valid_range=(np.ubyte(0), np.ubyte(255)), grid_mapping="crs", + comment="Value of 0 indicates sea ice concentration less than 50%" + " at start of melt season; values of 60-244 indicate day" + " of year of snow melt onset on sea ice detected during" + " melt season; value of 255 indicates no melt detected" + " during melt season, including non-ocean grid cells.", ) cdr_melt_onset_day_monthly.encoding = dict( _FillValue=None, diff --git a/seaice_ecdr/set_daily_ncattrs.py b/seaice_ecdr/set_daily_ncattrs.py index 17239a6b..7ebc333e 100644 --- a/seaice_ecdr/set_daily_ncattrs.py +++ b/seaice_ecdr/set_daily_ncattrs.py @@ -139,6 +139,10 @@ def finalize_cdecdr_ds( ) # Note: this is NH only, hence the try/except block + # Note: valid range allows values: + # 0: conc < 50% at start of melt season + # 60-244: day-of-year melt detected during melt season + # 255: no melt detected during melt season try: ds["cdr_melt_onset_day"] = ( ("time", "y", "x"), @@ -148,12 +152,13 @@ def finalize_cdecdr_ds( "long_name": "Day Of Year of NH Snow Melt Onset On Sea Ice", "units": "1", "grid_mapping": "crs", - "valid_range": np.array((60, 255), dtype=np.uint8), + "valid_range": np.array((0, 255), dtype=np.uint8), "comment": ( - "Value of 255 means no melt detected yet or the date" - " is outside the melt season. Other values indicate" - " the day of year when melt was first detected at" - " this location." + "Value of 0 indicates sea ice concentration less than 50%" + " at start of melt season; values of 60-244 indicate day" + " of year of snow melt onset on sea ice detected during" + " melt season; value of 255 indicates no melt detected" + " during melt season, including non-ocean grid cells." ), }, { From 616d6b2fdb5c758937029201fa8961807668819d Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Fri, 27 Sep 2024 19:06:41 -0600 Subject: [PATCH 12/17] use axis identification of 'time' axis to do monthly stdev calculation --- seaice_ecdr/intermediate_monthly.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/seaice_ecdr/intermediate_monthly.py b/seaice_ecdr/intermediate_monthly.py index 8d27c957..a2ddc545 100644 --- a/seaice_ecdr/intermediate_monthly.py +++ b/seaice_ecdr/intermediate_monthly.py @@ -405,19 +405,26 @@ def calc_cdr_seaice_conc_monthly_stdev( Note: In numpy array terms, "axis=0" refers to the time axis because the dimensions of the DataArray are ("time", "y", "x"). """ - cdr_seaice_conc_monthly_stdev_np = np.std( + cdr_seaice_conc_monthly_stdev_np = np.nanstd( np.array(daily_cdr_seaice_conc), - axis=0, + axis=daily_cdr_seaice_conc.get_axis_num("time"), ddof=1, ) + + # Extract non-'time' dims (and coords) from DataArray + dims = [dim for dim in daily_cdr_seaice_conc.dims if dim != "time"] + + # Note: list comprehension does not work using iterator as key + coords = [] + for dim in dims: + if dim != "time": + coords.append(daily_cdr_seaice_conc[dim]) + cdr_seaice_conc_monthly_stdev = xr.DataArray( data=cdr_seaice_conc_monthly_stdev_np, name="cdr_seaice_conc_monthly_stdev", - coords=[ - daily_cdr_seaice_conc.y, - daily_cdr_seaice_conc.x, - ], - dims=["y", "x"], + coords=coords, + dims=dims, attrs=dict( long_name="Passive Microwave Monthly Northern Hemisphere Sea Ice Concentration Source Estimated Standard Deviation", valid_range=(np.float32(0.0), np.float32(1.0)), From ccd3bf0283073b64c77f4fada7c95a882378ab03 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Fri, 27 Sep 2024 19:35:43 -0600 Subject: [PATCH 13/17] clamp lower bound of monthly cdr_seaice_conc to 10% --- seaice_ecdr/intermediate_monthly.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/seaice_ecdr/intermediate_monthly.py b/seaice_ecdr/intermediate_monthly.py index a2ddc545..c335f36c 100644 --- a/seaice_ecdr/intermediate_monthly.py +++ b/seaice_ecdr/intermediate_monthly.py @@ -359,6 +359,12 @@ def calc_cdr_seaice_conc_monthly( daily_conc_for_month = daily_ds_for_month.cdr_seaice_conc conc_monthly = daily_conc_for_month.mean(dim="time") + # NOTE: Clamp lower bound to 10% minic + conc_monthly = conc_monthly.where( + np.isnan(conc_monthly) | (conc_monthly >= 0.1), + other=0, + ) + num_missing_conc_pixels = get_num_missing_pixels( seaice_conc_var=conc_monthly, hemisphere=hemisphere, From 18a22a01e0cb9173d7705bf479f8c5c6bb2a9b42 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Mon, 30 Sep 2024 12:40:21 -0600 Subject: [PATCH 14/17] clean up comments --- seaice_ecdr/initial_daily_ecdr.py | 92 ++++++++----------------------- 1 file changed, 24 insertions(+), 68 deletions(-) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index 95a7a593..12ce6869 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -73,7 +73,10 @@ def cdr_bootstrap_raw( """Generate the raw bootstrap concentration field. Note: tb fields should already be transformed before being passed to this function. - Note: bt_conc output values will range from 0.0 to maybe 150.0 + Note: bt_conc output values will range from 0.0 to maybe 150.0% + We will default to having the maximum value be 200%, which + should cover all reasonable cases. + This is the "maxic" in bt_coefs. """ wtp_37v = bt_coefs["bt_wtp_v37"] wtp_37h = bt_coefs["bt_wtp_h37"] @@ -102,7 +105,6 @@ def cdr_bootstrap_raw( line_37v19v=bt_coefs["v1937_iceline"], ad_line_offset=bt_coefs["ad_line_offset"], maxic_frac=bt_coefs["maxic"], - # Note: the missing value of 255 ends up getting set to `nan` below. missing_flag_value=255, ) @@ -169,7 +171,7 @@ class NtCoefs(TypedDict): nt_gradient_thresholds: NasateamGradientRatioThresholds -def calculate_cdr_conc( +def calc_cdr_conc( *, bt_conc: npt.NDArray, nt_conc: npt.NDArray, @@ -188,7 +190,9 @@ def calculate_cdr_conc( cdr_conc_min_percent = cdr_conc_min_fraction * 100.0 cdr_conc_max_percent = cdr_conc_max_fraction * 100.0 - # It's possible that conc fields might have flagged values > max_valid_siconc + # It's possible that conc fields might have + # flagged values > max_valid_siconc + # so check for that here. is_bt_seaice = (bt_conc > cdr_conc_min_percent) & (bt_conc <= max_valid_siconc) is_cdr_seaice = is_bt_seaice is_nt_seaice = (nt_conc > cdr_conc_min_percent) & (nt_conc <= max_valid_siconc) @@ -235,7 +239,7 @@ def _setup_ecdr_ds( # Set initial global attributes - # Note: these attributes should probably go with + # TODO: these attributes should probably go with # a variable named "CDR_parameters" or similar ecdr_ide_ds.attrs["grid_id"] = grid_id @@ -275,7 +279,8 @@ def _setup_ecdr_ds( }, { "zlib": True, - # TODO: these TB fields can be expressly packed as uint16 + # TODO: These TB fields can be expressly packed as uint16 + # because that is how they are initially provided. }, ) @@ -302,9 +307,6 @@ def get_nasateam_weather_mask( ecdr_ide_ds["v37_day_si"].data[0, :, :], ecdr_ide_ds["v19_day_si"].data[0, :, :], ) - # Round off for better match with CDRv4 results - # nt_gr_3719 = np.round(nt_gr_3719, 4) - nt_3719_weather_mask = nt_gr_3719 > nt_coefs["nt_gradient_thresholds"]["3719"] # SMMR does not have a 22v channel that's suitable for the nt weather @@ -316,9 +318,6 @@ def get_nasateam_weather_mask( ecdr_ide_ds["v22_day_si"].data[0, :, :], ecdr_ide_ds["v19_day_si"].data[0, :, :], ) - # Round off for better match with CDRv4 results - # nt_gr_2219 = np.round(nt_gr_2219, 4) - nt_2219_weather_mask = nt_gr_2219 > nt_coefs["nt_gradient_thresholds"]["2219"] weather_mask = nt_3719_weather_mask | nt_2219_weather_mask @@ -344,8 +343,11 @@ def get_flagmask( # NOTE: This could be better organized and name-conventioned? - # TODO: Put these flagmasks in the ancillary files - # (or at least a better location!) + # TODO: Put these flagmasks in the ancillary files because they + # are a convenience to the user for creating plots of the + # siconc fields with sentinel values defined. But, for + # stricter compliance with CF-standards, we do not want + # these sentinel values in the actual data arrays. flagmask = None @@ -441,6 +443,7 @@ def compute_nt_asCDRv4_conc(nt_conc, weather_mask, invalid_ice_mask): """ nt_asCDRv4_conc = nt_conc.copy() + # Convert to 2-byte int is_nt_nan = np.isnan(nt_asCDRv4_conc) nt_asCDRv4_conc = (10 * nt_asCDRv4_conc).astype(np.int16) @@ -516,24 +519,6 @@ def compute_initial_daily_ecdr_dataset( tb_si_longname = f"Spatially interpolated {ecdr_ide_ds[tb_day_name].long_name}" tb_units = "K" - """ - if ancillary_source == "CDRv4": - # The CDRv4 calculation causes TB to be zero/missing where - # no sea ice can occur because of invalid region or land - logger.debug(f"Applying invalid ice mask to TB field: {tb_si_varname}") - invalid_ice_mask = get_invalid_ice_mask( - hemisphere=hemisphere, - date=date, - resolution=tb_data.resolution, - platform=PLATFORM_CONFIG.platform_for_id(tb_data.platform_id), - ancillary_source=ancillary_source, - ) - - # Set the TB values to zero at (monthly?) invalid ice mask - # TODO: Only set TB values to zero over the *non-ocean* grid cells - tb_si_data[invalid_ice_mask] = 0 - """ - ecdr_ide_ds[tb_si_varname] = ( ("time", "y", "x"), np.expand_dims(tb_si_data, axis=0), @@ -606,33 +591,6 @@ def compute_initial_daily_ecdr_dataset( # Drop the un-spatially interpolated TB field to save space and compute ecdr_ide_ds = ecdr_ide_ds.drop_vars(tb_varname) - # Attempt to get exact duplicate of v4 - # v4 uses np.round() to average the TB arrays as integers. - # this code uses floats (not integers) for the TB values. - # Because np.round() rounds to the nearest even value (not the nearest - # value!), the results are not the same as adding 0.5 and then cropping - # eg np.round(1834.5) -> 1834 - # eg np.round(1835.5) -> 1836 - # See: https://docs.scipy.org/doc//numpy-1.9.0/reference/generated/numpy.around.html#numpy.around - # In order to reproduce the v4 rounding values, need to first round to - # two decimal places, then round to 1 - ## NOTE: Remove these once we are ready to add CDRv5 integration tests - # ecdr_ide_ds["v37_day_si"].data = np.round( - # np.round(ecdr_ide_ds["v37_day_si"].data, 2), 1 - # ) - # ecdr_ide_ds["h37_day_si"].data = np.round( - # np.round(ecdr_ide_ds["h37_day_si"].data, 2), 1 - # ) - # ecdr_ide_ds["v19_day_si"].data = np.round( - # np.round(ecdr_ide_ds["v19_day_si"].data, 2), 1 - # ) - # ecdr_ide_ds["h19_day_si"].data = np.round( - # np.round(ecdr_ide_ds["h19_day_si"].data, 2), 1 - # ) - # ecdr_ide_ds["v22_day_si"].data = np.round( - # np.round(ecdr_ide_ds["v22_day_si"].data, 2), 1 - # ) - # Set a missing data mask using 19V TB field # Note: this will not include missing TBs in the day's invalid_ice field ecdr_ide_ds["missing_tb_mask"] = ( @@ -840,13 +798,15 @@ def compute_initial_daily_ecdr_dataset( # Here, the "bt_weather_mask" is the almost "water_arr" variable in cdralgos # However, it does not include the invalid_tb_mask (which here also # includes the pole hole) nor does it include land, so: - # cdralgos_water_arr = bt_weather_mask | (ecdr_ide_ds["invalid_tb_mask"].data & ~ecdr_ide_ds["non_ocean_mask"].data) + # cdralgos_water_arr = \ + # bt_weather_mask \ + # | (ecdr_ide_ds["invalid_tb_mask"].data \ + # & ~ecdr_ide_ds["non_ocean_mask"].data) # Note that this cdralgos_water_arr will differ because it will have # zeros where there is no data because of the pole hole ecdr_ide_ds["bt_weather_mask"] = ( # note <-- name is weather_mask ("time", "y", "x"), - # np.expand_dims(bt_weather_mask.data, axis=0), np.expand_dims(bt_water_mask.data, axis=0), # note <-- var water_mask { "grid_mapping": "crs", @@ -902,7 +862,6 @@ def compute_initial_daily_ecdr_dataset( else: bt_coefs["bt_wtp_v37"] = bt.calculate_water_tiepoint( wtp_init=bt_coefs_init["bt_wtp_v37"], - # weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], tb=bt_v37, ) @@ -913,7 +872,6 @@ def compute_initial_daily_ecdr_dataset( else: bt_coefs["bt_wtp_h37"] = bt.calculate_water_tiepoint( wtp_init=bt_coefs_init["bt_wtp_h37"], - # weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], tb=bt_h37, ) @@ -924,7 +882,6 @@ def compute_initial_daily_ecdr_dataset( else: bt_coefs["bt_wtp_v19"] = bt.calculate_water_tiepoint( wtp_init=bt_coefs_init["bt_wtp_v19"], - # weather_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], water_mask=ecdr_ide_ds["bt_weather_mask"].data[0, :, :], tb=bt_v19, ) @@ -994,14 +951,13 @@ def compute_initial_daily_ecdr_dataset( nt_tiepoints=nt_coefs["nt_tiepoints"], ) - cdr_conc_raw = calculate_cdr_conc( + cdr_conc_raw = calc_cdr_conc( bt_conc=bt_conc, nt_conc=nt_conc, cdr_conc_min_fraction=0.1, cdr_conc_max_fraction=1.0, ) - # Apply masks nt_weather_mask = get_nasateam_weather_mask( ecdr_ide_ds=ecdr_ide_ds, nt_coefs=nt_coefs ) @@ -1024,6 +980,7 @@ def compute_initial_daily_ecdr_dataset( }, ) + # Apply weather and invalid ice masks set_to_zero_sic = ( ecdr_ide_ds["nt_weather_mask"].data[0, :, :] | ecdr_ide_ds["bt_weather_mask"].data[0, :, :] @@ -1156,7 +1113,7 @@ def compute_initial_daily_ecdr_dataset( ecdr_ide_ds.variables["raw_nt_seaice_conc"].attrs[attr] = str(nt_coefs[attr]) # type: ignore[literal-required] # noqa # Add the final cdr_conc value to the xarray dataset - # Rescale conc values to 0-1 + # Rescale conc values fraction (i.e. 0 - 1 instead of 0.0 - 100.0 cdr_conc = cdr_conc / 100.0 # TODO: rename this variable from "conc" to "cdr_conc_init" or similar ecdr_ide_ds["conc"] = ( @@ -1308,7 +1265,6 @@ def write_ide_netcdf( "raw_bt_seaice_conc", ), tb_fields: Iterable[str] = ("h19_day_si", "h37_day_si"), - # tb_fields: Iterable[str] = ("h19_day_si", "h37_day_si", "v19_day_si", "v22_day_si", "v37_day_si"), ) -> Path: """Write the initial_ecdr_ds to a netCDF file and return the path.""" logger.info(f"Writing netCDF of initial_daily eCDR file to: {output_filepath}") From 36e382e44da9a07248e6fbf2ca00d1add89326f3 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Wed, 2 Oct 2024 09:56:55 -0600 Subject: [PATCH 15/17] tweaks per PR review --- seaice_ecdr/initial_daily_ecdr.py | 24 +++++++++--------------- seaice_ecdr/intermediate_monthly.py | 17 +++++++---------- seaice_ecdr/set_daily_ncattrs.py | 12 ++++++++---- seaice_ecdr/temporal_composite_daily.py | 6 ++---- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/seaice_ecdr/initial_daily_ecdr.py b/seaice_ecdr/initial_daily_ecdr.py index 12ce6869..3c0b4201 100644 --- a/seaice_ecdr/initial_daily_ecdr.py +++ b/seaice_ecdr/initial_daily_ecdr.py @@ -86,10 +86,11 @@ def cdr_bootstrap_raw( itp_37h = bt_coefs["bt_itp_h37"] itp_19v = bt_coefs["bt_itp_v19"] - # Preserve bt_conc values up to 200% siconc - # NOTE: Don't want to get close to 250+ which tend to be flag values - # in original Goddard code - bt_coefs["maxic"] = 2.0 + # Preserve bt_conc values up to 254% siconc. + # We need a value here because the pm_icecon function requires it. + # But we are only setting this particular value so that any calculated + # conc value can be represented by an unsigned byte (non-flag 255) value + bt_coefs["maxic"] = 2.54 bt_conc = bt.calc_bootstrap_conc( tb_v37=tb_v37, @@ -177,7 +178,6 @@ def calc_cdr_conc( nt_conc: npt.NDArray, cdr_conc_min_fraction: float, cdr_conc_max_fraction: float, - max_valid_siconc=200.0, ) -> npt.NDArray: """ Run the CDR algorithm @@ -185,17 +185,15 @@ def calc_cdr_conc( though the upper range of values might be great than 100%. This is permitted for the bt_conc and nt_conc fields, but not for the cdr_conc field. + Note: bt_conc and nt_conc values are expected to be >= 0.0 or np.nan Note: range of output values will be 0 to 100.0 and np.nan (=missing) """ cdr_conc_min_percent = cdr_conc_min_fraction * 100.0 cdr_conc_max_percent = cdr_conc_max_fraction * 100.0 - # It's possible that conc fields might have - # flagged values > max_valid_siconc - # so check for that here. - is_bt_seaice = (bt_conc > cdr_conc_min_percent) & (bt_conc <= max_valid_siconc) + is_bt_seaice = (bt_conc > cdr_conc_min_percent) & np.isfinite(bt_conc) is_cdr_seaice = is_bt_seaice - is_nt_seaice = (nt_conc > cdr_conc_min_percent) & (nt_conc <= max_valid_siconc) + is_nt_seaice = (nt_conc > cdr_conc_min_percent) & np.isfinite(nt_conc) use_nt_values = is_nt_seaice & is_bt_seaice & (nt_conc > bt_conc) @@ -207,11 +205,7 @@ def calc_cdr_conc( is_low_siconc = is_cdr_seaice & (cdr_conc < cdr_conc_min_percent) cdr_conc[is_low_siconc] = 0 - is_high_siconc = ( - is_cdr_seaice - & (cdr_conc > cdr_conc_max_percent) - & (cdr_conc <= max_valid_siconc) - ) + is_high_siconc = is_cdr_seaice & (cdr_conc > cdr_conc_max_percent) cdr_conc[is_high_siconc] = cdr_conc_max_percent return cdr_conc diff --git a/seaice_ecdr/intermediate_monthly.py b/seaice_ecdr/intermediate_monthly.py index c335f36c..e7bd24c9 100644 --- a/seaice_ecdr/intermediate_monthly.py +++ b/seaice_ecdr/intermediate_monthly.py @@ -417,20 +417,17 @@ def calc_cdr_seaice_conc_monthly_stdev( ddof=1, ) - # Extract non-'time' dims (and coords) from DataArray - dims = [dim for dim in daily_cdr_seaice_conc.dims if dim != "time"] - - # Note: list comprehension does not work using iterator as key - coords = [] - for dim in dims: - if dim != "time": - coords.append(daily_cdr_seaice_conc[dim]) + # Extract non-'time' dims and coords from DataArray + dims_without_time = [dim for dim in daily_cdr_seaice_conc.dims if dim != "time"] + coords_without_time = [ + daily_cdr_seaice_conc[dim] for dim in dims_without_time if dim != "time" + ] cdr_seaice_conc_monthly_stdev = xr.DataArray( data=cdr_seaice_conc_monthly_stdev_np, name="cdr_seaice_conc_monthly_stdev", - coords=coords, - dims=dims, + coords=coords_without_time, + dims=dims_without_time, attrs=dict( long_name="Passive Microwave Monthly Northern Hemisphere Sea Ice Concentration Source Estimated Standard Deviation", valid_range=(np.float32(0.0), np.float32(1.0)), diff --git a/seaice_ecdr/set_daily_ncattrs.py b/seaice_ecdr/set_daily_ncattrs.py index 7ebc333e..c4513e05 100644 --- a/seaice_ecdr/set_daily_ncattrs.py +++ b/seaice_ecdr/set_daily_ncattrs.py @@ -312,7 +312,9 @@ def finalize_cdecdr_ds( # TODO: conversion to ubyte should be done with DataArray encoding dict # NOTE: We are overwriting the attrs of the original conc field # TODO: scale_factor and add_offset might get set during encoding - # NOTE: We allow raw siconc up to 200% for potential validation measures + # NOTE: We allow raw siconc up to 254% because (1) that is the maximum + # representable value for a non-negative conc with a _FlagValue + # for missing of 255, and (2) for potential validation measures. ds["raw_bt_seaice_conc"] = ( ("time", "y", "x"), ds["raw_bt_seaice_conc"].data, @@ -325,7 +327,7 @@ def finalize_cdecdr_ds( " raw field with no masking or filtering" ), "grid_mapping": "crs", - "valid_range": np.array((0, 200), dtype=np.uint8), + "valid_range": np.array((0, 254), dtype=np.uint8), }, ) @@ -333,7 +335,9 @@ def finalize_cdecdr_ds( # TODO: adding time dimension should probably happen earlier # TODO: conversion to ubyte should be done with DataArray encoding dict # TODO: scale_factor and add_offset might get set during encoding - # NOTE: We allow raw siconc up to 200% for potential validation measures + # NOTE: We allow raw siconc up to 254% because (1) that is the maximum + # representable value for a non-negative conc with a _FlagValue + # for missing of 255, and (2) for potential validation measures. ds["raw_nt_seaice_conc"] = ( ("time", "y", "x"), ds["raw_nt_seaice_conc"].data, @@ -346,7 +350,7 @@ def finalize_cdecdr_ds( " raw field with no masking or filtering" ), "grid_mapping": "crs", - "valid_range": np.array((0, 200), dtype=np.uint8), + "valid_range": np.array((0, 254), dtype=np.uint8), }, ) diff --git a/seaice_ecdr/temporal_composite_daily.py b/seaice_ecdr/temporal_composite_daily.py index 82dc43b7..dd9b6fe1 100644 --- a/seaice_ecdr/temporal_composite_daily.py +++ b/seaice_ecdr/temporal_composite_daily.py @@ -244,11 +244,9 @@ def temporally_composite_dataarray( raise RuntimeError(f" the target_date was not in the dataarray: {target_date}") temp_comp_2d = np.squeeze(temp_comp_da.data) - try: - assert temp_comp_2d.shape == (ydim, xdim) - except AssertionError as e: + if temp_comp_2d.shape != (ydim, xdim): raise RuntimeError( - f"Error asserting that squeezed data array is two dimensional with shape ({ydim}, {xdim}); got {temp_comp_2d.shape} instead; Assertion error was {e}" + f"Error asserting that squeezed data array is two dimensional; shape is {temp_comp_2d.shape} instead of ({ydim}, {xdim})" ) if daily_climatology_mask is not None: From 97f52a7e8fabf69ab3ee8149d9d65c1a3aafa232 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Wed, 2 Oct 2024 10:11:55 -0600 Subject: [PATCH 16/17] set default log levels to SUCCESS for both dev and users --- seaice_ecdr/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/seaice_ecdr/__init__.py b/seaice_ecdr/__init__.py index b0146856..6bef7178 100644 --- a/seaice_ecdr/__init__.py +++ b/seaice_ecdr/__init__.py @@ -8,11 +8,14 @@ __version__ = "v0.2.0" -DEFAULT_LOG_LEVEL = "INFO" +# The standard loguru log levels, in increasing order of severity, are: +# TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL +DEFAULT_LOG_LEVEL = "SUCCESS" +DEFAULT_DEV_LOG_LEVEL = "SUCCESS" # If we're in dev, DEBUG level logs are appropriate, otherwise use INFO. env = os.environ.get("ENVIRONMENT") -_default_log_level = "DEBUG" if env == "dev" else DEFAULT_LOG_LEVEL +_default_log_level = DEFAULT_DEV_LOG_LEVEL if env == "dev" else DEFAULT_LOG_LEVEL # Configure the default logger, which prints to stderr. logger.configure( From 6b63eaf50fed23287c243137e30c29410243a1a5 Mon Sep 17 00:00:00 2001 From: Scott Stewart Date: Wed, 2 Oct 2024 10:33:12 -0600 Subject: [PATCH 17/17] ensure no siconc between 0 and 10% after temporal interpolation --- seaice_ecdr/temporal_composite_daily.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/seaice_ecdr/temporal_composite_daily.py b/seaice_ecdr/temporal_composite_daily.py index dd9b6fe1..514d3061 100644 --- a/seaice_ecdr/temporal_composite_daily.py +++ b/seaice_ecdr/temporal_composite_daily.py @@ -347,6 +347,10 @@ def temporally_composite_dataarray( # temporal_flags[have_only_next] = pdist[have_only_next] # CDRv04r00 error temporal_flags[have_only_next] = ndist[have_only_next] # Correct + # Ensure that no conc values are between 0 and 10% after temporal interp + is_conc_too_low = (temp_comp_2d > 0) & (temp_comp_2d < 0.1) + temp_comp_2d[is_conc_too_low] = 0 + # Ensure flag values do not occur over land temporal_flags[non_ocean_mask.data] = 0