Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Monthly update for NetCDF groups #155

Merged
merged 7 commits into from
Sep 19, 2024
17 changes: 3 additions & 14 deletions seaice_ecdr/cli/daily.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,18 @@
import copy
import datetime as dt
import subprocess
from pathlib import Path
from typing import Final, get_args

import click
from pm_tb_data._types import Hemisphere

from seaice_ecdr.cli.util import datetime_to_date
from seaice_ecdr.cli.util import CLI_EXE_PATH, datetime_to_date, run_cmd
from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR
from seaice_ecdr.platforms.config import PROTOTYPE_PLATFORM_START_DATES_CONFIG_FILEPATH
from seaice_ecdr.publish_daily import publish_daily_nc_for_dates

_THIS_DIR = Path(__file__).parent

CLI_EXE_PATH = _THIS_DIR / "../../scripts/cli.sh"


def _run_cmd(cmd: str):
subprocess.run(
cmd,
shell=True,
check=True,
)


def make_25km_ecdr(
start_date: dt.date,
Expand All @@ -44,7 +33,7 @@ def make_25km_ecdr(
daily_intermediate_cmd = "intermediate-daily"
else:
daily_intermediate_cmd = "multiprocess-intermediate-daily"
_run_cmd(
run_cmd(
f"{CLI_EXE_PATH} {daily_intermediate_cmd}"
f" --start-date {start_date:%Y-%m-%d} --end-date {end_date:%Y-%m-%d}"
f" --hemisphere {hemisphere}"
Expand All @@ -57,7 +46,7 @@ def make_25km_ecdr(
# If the given start & end date intersect with the AMSR2 period, run that
# separately:
if start_date >= AM2_START_DATE:
_run_cmd(
run_cmd(
f"export PLATFORM_START_DATES_CONFIG_FILEPATH={PROTOTYPE_PLATFORM_START_DATES_CONFIG_FILEPATH} &&"
f" {CLI_EXE_PATH} {daily_intermediate_cmd}"
f" --start-date {start_date:%Y-%m-%d} --end-date {end_date:%Y-%m-%d}"
Expand Down
17 changes: 7 additions & 10 deletions seaice_ecdr/cli/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,11 @@
import click

from seaice_ecdr.cli.daily import cli as daily_cli

# TODO: The daily-aggregate processing is very parallelizable because
# each year is indendent of every other year. It could be
# implemented with multi-processing to speed up production
# on a multi-core machine. Perhaps as a cmdline arg to this
# CLI API?
from seaice_ecdr.cli.monthly import cli as monthly_cli
from seaice_ecdr.daily_aggregate import cli as daily_aggregate_cli
from seaice_ecdr.initial_daily_ecdr import cli as ecdr_cli
from seaice_ecdr.intermediate_daily import cli as intermediate_daily_cli
from seaice_ecdr.monthly import cli as monthly_cli
from seaice_ecdr.intermediate_monthly import cli as intermediate_monthly_cli
from seaice_ecdr.monthly_aggregate import cli as monthly_aggregate_cli
from seaice_ecdr.multiprocess_intermediate_daily import (
cli as multiprocess_intermediate_daily_cli,
Expand All @@ -33,17 +28,19 @@ def cli():
cli.add_command(tiecdr_cli)
cli.add_command(nrt_cli)
cli.add_command(intermediate_daily_cli)
cli.add_command(monthly_cli)
cli.add_command(intermediate_monthly_cli)
cli.add_command(monthly_aggregate_cli)
cli.add_command(validation_cli)
cli.add_command(multiprocess_intermediate_daily_cli)
cli.add_command(publish_daily_cli)

# CLIs that ops will use below:
# this is the CLI ops will interact with to produce standard daily file.s
# Generate standard daily files ready for publication:
cli.add_command(daily_cli)
# This is the CLI that ops will use to generate daily aggregate files by year:
# Generate daily aggregate files by year ready for publication:
cli.add_command(daily_aggregate_cli)
# Generate standard monthly files ready for publication:
cli.add_command(monthly_cli)

if __name__ == "__main__":
cli()
150 changes: 150 additions & 0 deletions seaice_ecdr/cli/monthly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import datetime as dt
from pathlib import Path
from typing import Final, get_args

import click
import pandas as pd
from pm_tb_data._types import Hemisphere

from seaice_ecdr.cli.util import CLI_EXE_PATH, run_cmd
from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR
from seaice_ecdr.platforms.config import (
DEFAULT_PLATFORM_START_DATES_CONFIG_FILEPATH,
PROTOTYPE_PLATFORM_START_DATES_CONFIG_FILEPATH,
)
from seaice_ecdr.publish_monthly import prepare_monthly_nc_for_publication


def make_monthly_25km_ecdr(
year: int,
month: int,
end_year: int | None,
end_month: int | None,
hemisphere: Hemisphere,
base_output_dir: Path,
):
if end_year is None:
end_year = year
if end_month is None:
end_month = month

# TODO: consider extracting these to CLI options that default to these values.
RESOLUTION: Final = "25"
ANCILLARY_SOURCE: Final = "CDRv4"
# TODO: the amsr2 start date should ideally be read from the platform start
# date config.
PROTOTYPE_PLATFORM_START_DATE = dt.date(2013, 1, 1)
# Use the default platform dates, which excldues AMSR2
run_cmd(
f"export PLATFORM_START_DATES_CONFIG_FILEPATH={DEFAULT_PLATFORM_START_DATES_CONFIG_FILEPATH} &&"
f" {CLI_EXE_PATH} intermediate-monthly"
f" --year {year} --month {month}"
f" --end-year {end_year} --end-month {end_month}"
f" --hemisphere {hemisphere}"
f" --base-output-dir {base_output_dir}"
f" --resolution {RESOLUTION}"
f" --ancillary-source {ANCILLARY_SOURCE}"
)

# If the given start & end date intersect with the AMSR2 period, run that
# separately:
if dt.date(year, month, 1) >= PROTOTYPE_PLATFORM_START_DATE:
run_cmd(
f"export PLATFORM_START_DATES_CONFIG_FILEPATH={PROTOTYPE_PLATFORM_START_DATES_CONFIG_FILEPATH} &&"
f"{CLI_EXE_PATH} intermediate-monthly"
f" --year {year} --month {month}"
f" --end-year {end_year} --end-month {end_month}"
f" --hemisphere {hemisphere}"
f" --base-output-dir {base_output_dir}"
f" --resolution {RESOLUTION}"
f" --ancillary-source {ANCILLARY_SOURCE}"
)

# Prepare the monthly data for publication
for period in pd.period_range(
start=pd.Period(year=year, month=month, freq="M"),
end=pd.Period(year=end_year, month=end_month, freq="M"),
freq="M",
):
prepare_monthly_nc_for_publication(
year=period.year,
month=period.month,
base_output_dir=base_output_dir,
hemisphere=hemisphere,
resolution=RESOLUTION,
)


@click.command(name="monthly")
@click.option(
"--year",
required=True,
type=int,
help="Year for which to create the monthly file.",
)
@click.option(
"--month",
required=True,
type=int,
help="Month for which to create the monthly file.",
)
@click.option(
"--end-year",
required=False,
default=None,
type=int,
help="If given, the end year for which to create monthly files.",
)
@click.option(
"--end-month",
required=False,
default=None,
type=int,
help="If given, the end year for which to create monthly files.",
)
@click.option(
"-h",
"--hemisphere",
required=True,
type=click.Choice(get_args(Hemisphere)),
)
@click.option(
"--base-output-dir",
required=True,
type=click.Path(
exists=True,
file_okay=False,
dir_okay=True,
writable=True,
resolve_path=True,
path_type=Path,
),
default=DEFAULT_BASE_OUTPUT_DIR,
help=(
"Base output directory for standard ECDR outputs."
" Subdirectories are created for outputs of"
" different stages of processing."
),
show_default=True,
)
def cli(
*,
year: int,
month: int,
end_year: int | None,
end_month: int | None,
hemisphere: Hemisphere,
base_output_dir: Path,
) -> None:
make_monthly_25km_ecdr(
year=year,
month=month,
end_year=end_year,
end_month=end_month,
hemisphere=hemisphere,
base_output_dir=base_output_dir,
)


if __name__ == "__main__":
cli()
15 changes: 15 additions & 0 deletions seaice_ecdr/cli/util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
"""util.py cli routines common to seaice_ecdr."""

import datetime as dt
import subprocess
from pathlib import Path

_THIS_DIR = Path(__file__).parent

CLI_EXE_PATH = (_THIS_DIR / "../../scripts/cli.sh").resolve()


def datetime_to_date(_ctx, _param, value: dt.datetime) -> dt.date:
"""Click callback that takes a `dt.datetime` and returns `dt.date`."""
return value.date()


def run_cmd(cmd: str) -> None:
"""Runs the given command in a shell-enabled subprocess."""
subprocess.run(
cmd,
shell=True,
check=True,
)
7 changes: 7 additions & 0 deletions seaice_ecdr/daily_aggregate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
"""Code to produce daily aggregate files from daily complete data.


TODO: The daily-aggregate processing is very parallelizable because
each year is indendent of every other year. It could be
implemented with multi-processing to speed up production
on a multi-core machine. Perhaps as a cmdline arg to this
CLI API?
"""

import datetime as dt
Expand Down
Loading