Skip to content

Commit

Permalink
Merge pull request #163 from nsidc/nrt-like-v4
Browse files Browse the repository at this point in the history
NRT Processing updates for G10016
  • Loading branch information
trey-stafford authored Sep 26, 2024
2 parents ff19054 + 01ab4e4 commit 3b6bd6a
Show file tree
Hide file tree
Showing 19 changed files with 404 additions and 242 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
- name: "Install Conda environment"
uses: "mamba-org/setup-micromamba@v1"
with:
micromamba-version: '1.5.10-0' # any version from https://github.com/mamba-org/micromamba-releases
environment-file: "conda-lock.yml"
# When using a lock-file, we have to set an environment name.
environment-name: "seaice_ecdr-ci"
Expand Down
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ dependencies:
- nco ~=5.1.9
- pandas ~=1.4.4
- opencv ~=4.8.0
- pm_tb_data ~=0.4.0
- pm_icecon ~=0.4.0
- pm_tb_data ~=0.5.0
- pm_icecon ~=0.5.0
- leafmap
- rioxarray
- hvplot
Expand Down
14 changes: 12 additions & 2 deletions seaice_ecdr/ancillary.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,12 @@ def get_surfacetype_da(
if "polehole_bitmask" in ancillary_ds.data_vars.keys():
polehole_bitmask = ancillary_ds.polehole_bitmask
platform = PLATFORM_CONFIG.get_platform_by_date(date)
polehole_bitlabel = f"{platform.id}_polemask"
platform_id = platform.id
# TODO: Use F17 polemask if F18 is being used. There is currently no F18
# polemask defined in the ancilary file
if platform.id == "F18":
platform_id = "F17"
polehole_bitlabel = f"{platform_id}_polemask"
polehole_bitvalue = bitmask_value_for_meaning(
var=polehole_bitmask,
meaning=polehole_bitlabel,
Expand Down Expand Up @@ -192,7 +197,12 @@ def nh_polehole_mask(
date=date,
)

polehole_bitlabel = f"{platform.id}_polemask"
platform_id = platform.id
# TODO: Use F17 polemask if F18 is being used. There is currently no F18
# polemask defined in the ancilary file
if platform.id == "F18":
platform_id = "F17"
polehole_bitlabel = f"{platform_id}_polemask"
polehole_bitvalue = bitmask_value_for_meaning(
var=polehole_bitmask,
meaning=polehole_bitlabel,
Expand Down
8 changes: 6 additions & 2 deletions seaice_ecdr/cli/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from seaice_ecdr.cli.daily import cli as daily_cli
from seaice_ecdr.cli.monthly import cli as monthly_cli
from seaice_ecdr.cli.nrt import cli as nrt_cli
from seaice_ecdr.daily_aggregate import cli as daily_aggregate_cli
from seaice_ecdr.initial_daily_ecdr import cli as ecdr_cli
from seaice_ecdr.intermediate_daily import cli as intermediate_daily_cli
Expand All @@ -12,7 +13,7 @@
from seaice_ecdr.multiprocess_intermediate_daily import (
cli as multiprocess_intermediate_daily_cli,
)
from seaice_ecdr.nrt import nrt_cli
from seaice_ecdr.nrt import nrt_ecdr_for_dates
from seaice_ecdr.publish_daily import cli as publish_daily_cli
from seaice_ecdr.temporal_composite_daily import cli as tiecdr_cli
from seaice_ecdr.validation import cli as validation_cli
Expand All @@ -26,12 +27,12 @@ def cli():

cli.add_command(ecdr_cli)
cli.add_command(tiecdr_cli)
cli.add_command(nrt_cli)
cli.add_command(intermediate_daily_cli)
cli.add_command(intermediate_monthly_cli)
cli.add_command(validation_cli)
cli.add_command(multiprocess_intermediate_daily_cli)
cli.add_command(publish_daily_cli)
cli.add_command(nrt_ecdr_for_dates)

# CLIs that ops will use below:
# Generate standard daily files ready for publication:
Expand All @@ -42,6 +43,9 @@ def cli():
cli.add_command(monthly_cli)
# Generate monthly aggregate file (one per hemisphere)
cli.add_command(monthly_aggregate_cli)
# Wraps the `nrt_ecdr_for_dates` CLI with the correct platform start date
# configuration chosen.
cli.add_command(nrt_cli)

if __name__ == "__main__":
cli()
93 changes: 93 additions & 0 deletions seaice_ecdr/cli/nrt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Wrapper around the nrt CLI to override platform start dates config
This is a hack, and should be unnecessary once the code is refactored to make it
easier to configure the platform start dates.
"""

import copy
import datetime as dt
from pathlib import Path
from typing import get_args

import click
from pm_tb_data._types import Hemisphere

from seaice_ecdr.cli.util import CLI_EXE_PATH, datetime_to_date, run_cmd
from seaice_ecdr.constants import DEFAULT_BASE_NRT_OUTPUT_DIR
from seaice_ecdr.platforms.config import NRT_PLATFORM_START_DATES_CONFIG_FILEPATH


@click.command(name="daily-nrt")
@click.option(
"-d",
"--date",
required=True,
type=click.DateTime(formats=("%Y-%m-%d", "%Y%m%d", "%Y.%m.%d")),
callback=datetime_to_date,
)
@click.option(
"--end-date",
required=False,
type=click.DateTime(
formats=(
"%Y-%m-%d",
"%Y%m%d",
"%Y.%m.%d",
)
),
# Like `datetime_to_date` but allows `None`.
callback=lambda _ctx, _param, value: value if value is None else value.date(),
default=None,
help="If given, run temporal composite for `--date` through this end date.",
)
@click.option(
"-h",
"--hemisphere",
required=True,
type=click.Choice(get_args(Hemisphere)),
)
@click.option(
"--base-output-dir",
required=True,
type=click.Path(
exists=True,
file_okay=False,
dir_okay=True,
writable=True,
resolve_path=True,
path_type=Path,
),
default=DEFAULT_BASE_NRT_OUTPUT_DIR,
help=(
"Base output directory for NRT ECDR outputs."
" Subdirectories are created for outputs of"
" different stages of processing."
),
show_default=True,
)
@click.option(
"--overwrite",
is_flag=True,
help=("Overwrite intermediate and final outputs."),
)
def cli(
*,
date: dt.date,
end_date: dt.date | None,
hemisphere: Hemisphere,
base_output_dir: Path,
overwrite: bool,
):
if end_date is None:
end_date = copy.copy(date)

overwrite_str = " --overwrite" if overwrite else ""

run_cmd(
f"export PLATFORM_START_DATES_CONFIG_FILEPATH={NRT_PLATFORM_START_DATES_CONFIG_FILEPATH} &&"
f"{CLI_EXE_PATH} nrt"
f" --hemisphere {hemisphere}"
f" --base-output-dir {base_output_dir}"
f" --date {date:%Y-%m-%d}"
f" --end-date {end_date:%Y-%m-%d}" + overwrite_str
)
5 changes: 5 additions & 0 deletions seaice_ecdr/config/nrt_platform_start_dates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cdr_platform_start_dates:
# We only use F18 for NRT.
- platform_id: "F18"
# This appears to be the first date of data in NSIDC0080.
start_date: "2021-11-01"
40 changes: 35 additions & 5 deletions seaice_ecdr/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,26 @@
import subprocess
from pathlib import Path

from pydantic import BaseModel


class ProductVersion(BaseModel):
major_version_number: int
revision_number: int

@property
def version_str(self) -> str:
return f"v{self.major_version_number:02}r{self.revision_number:02}"

def __str__(self) -> str:
return self.version_str


# This is the version string for the ECDR product.
ECDR_PRODUCT_VERSION = "v05r00"
ECDR_PRODUCT_VERSION = ProductVersion(
major_version_number=5,
revision_number=0,
)

# NSIDC infrastructure-specific paths:
NSIDC_NFS_SHARE_DIR = Path("/share/apps/G02202_V5")
Expand Down Expand Up @@ -65,10 +83,22 @@ def _get_env_subdir_str() -> str:
LOGS_DIR = NSIDC_NFS_SHARE_DIR / f"{ECDR_PRODUCT_VERSION}_logs" / _env_subdir
LOGS_DIR.mkdir(parents=True, exist_ok=True)

# Location of LANCE AMSR2 NRT data files:
# TODO: nest the subdir under an `ecdr_inputs` or similar?
LANCE_NRT_DATA_DIR = NSIDC_NFS_SHARE_DIR / "lance_amsr2_nrt_data"

# Location of surface mask & geo-information files.
# TODO: we should consider moving the ancillary files to a different
# location. Currently, ancillary files are stored in the G02202 V5 specific dir,
# but the NRT product is G10016 and it uses the same ancillary data.
CDR_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / f"{ECDR_PRODUCT_VERSION}_ancillary"
CDRv4_ANCILLARY_DIR = NSIDC_NFS_SHARE_DIR / "cdrv4_equiv_ancillary"

# NRT outputs
ECDR_NRT_PRODUCT_VERSION = ProductVersion(
major_version_number=3,
revision_number=0,
)
NSIDC_NFS_NRT_SHARE_DIR = Path("/share/apps/G10016_V3")
if not NSIDC_NFS_SHARE_DIR.is_dir():
raise RuntimeError(f"Expected {NSIDC_NFS_NRT_SHARE_DIR} to exist, but it does not.")
DEFAULT_BASE_NRT_OUTPUT_DIR = (
NSIDC_NFS_NRT_SHARE_DIR / ECDR_NRT_PRODUCT_VERSION.version_str / _env_subdir
)
DEFAULT_BASE_NRT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
2 changes: 2 additions & 0 deletions seaice_ecdr/daily_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,14 @@ def _get_daily_complete_filepaths_for_year(
dt.date(year, 1, 1), PLATFORM_CONFIG.get_first_platform_start_date()
)
for period in pd.period_range(start=start_date, end=dt.date(year, 12, 31)):
platform = PLATFORM_CONFIG.get_platform_by_date(period.to_timestamp().date())
expected_fp = get_complete_daily_filepath(
date=period.to_timestamp().date(),
hemisphere=hemisphere,
resolution=resolution,
complete_output_dir=complete_output_dir,
is_nrt=False,
platform_id=platform.id,
)
if expected_fp.is_file():
data_list.append(expected_fp)
Expand Down
20 changes: 14 additions & 6 deletions seaice_ecdr/initial_daily_ecdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def get_flagmask(
if ancillary_source == "CDRv4":
version_string = "v04r00"
elif ancillary_source == "CDRv5":
version_string = ECDR_PRODUCT_VERSION
version_string = ECDR_PRODUCT_VERSION.version_str

flagmask_fn = CDR_ANCILLARY_DIR / f"flagmask_{gridid}_{version_string}.dat"
try:
Expand Down Expand Up @@ -417,12 +417,11 @@ def compute_initial_daily_ecdr_dataset(
# The CDRv4 calculation causes TB to be zero/missing where
# no sea ice can occur because of invalid region or land
logger.debug(f"Applying invalid ice mask to TB field: {tb_si_varname}")
platform = PLATFORM_CONFIG.get_platform_by_date(date)
invalid_ice_mask = get_invalid_ice_mask(
hemisphere=hemisphere,
date=date,
resolution=tb_data.resolution,
platform=platform,
platform=PLATFORM_CONFIG.platform_for_id(tb_data.platform_id),
ancillary_source=ancillary_source,
)

Expand Down Expand Up @@ -573,7 +572,7 @@ def compute_initial_daily_ecdr_dataset(
"Initialized cdr_seaice_conc_interp_spatial_flag with TB fill locations"
)

platform = PLATFORM_CONFIG.get_platform_by_date(date)
platform = PLATFORM_CONFIG.platform_for_id(tb_data.platform_id)
if platform.id == "am2":
bt_coefs_init = pmi_bt_params_amsr2.get_ausi_amsr2_bootstrap_params(
date=date,
Expand All @@ -586,6 +585,8 @@ def compute_initial_daily_ecdr_dataset(
satellite="amsre",
gridid=ecdr_ide_ds.grid_id,
)
# NOTE/TODO: we get F18 data from NSIDC-0080 for NRT processing, but we are
# using the NSIDC-0001 specific BT params for F18.
elif platform.id in get_args(NSIDC_0001_SATS):
bt_coefs_init = pmi_bt_params_0001.get_nsidc0001_bootstrap_params(
date=date,
Expand Down Expand Up @@ -1298,11 +1299,11 @@ def make_idecdr_netcdf(
land_spillover_alg: LAND_SPILL_ALGS,
ancillary_source: ANCILLARY_SOURCES,
overwrite_ide: bool = False,
platform_id: SUPPORTED_PLATFORM_ID,
) -> None:
platform = PLATFORM_CONFIG.get_platform_by_date(date)
output_path = get_idecdr_filepath(
date=date,
platform_id=platform.id,
platform_id=platform_id,
hemisphere=hemisphere,
intermediate_output_dir=intermediate_output_dir,
resolution=resolution,
Expand Down Expand Up @@ -1339,6 +1340,11 @@ def create_idecdr_for_date(
land_spillover_alg: LAND_SPILL_ALGS,
ancillary_source: ANCILLARY_SOURCES,
) -> None:
"""Create a standard IDECDR file for the given date.
Uses the default platform start date config to determine the correct
platform to use.
"""
excluded_fields = []
if not verbose_intermed_ncfile:
excluded_fields = [
Expand All @@ -1361,6 +1367,7 @@ def create_idecdr_for_date(
"missing_tb_mask",
]
try:
platform = PLATFORM_CONFIG.get_platform_by_date(date)
make_idecdr_netcdf(
date=date,
hemisphere=hemisphere,
Expand All @@ -1370,6 +1377,7 @@ def create_idecdr_for_date(
overwrite_ide=overwrite_ide,
land_spillover_alg=land_spillover_alg,
ancillary_source=ancillary_source,
platform_id=platform.id,
)

except Exception as e:
Expand Down
Loading

0 comments on commit 3b6bd6a

Please sign in to comment.