Skip to content

Commit

Permalink
Merge pull request #177 from nsidc/fixups-for-prod
Browse files Browse the repository at this point in the history
Fixups for prod
  • Loading branch information
trey-stafford authored Dec 6, 2024
2 parents 69ccc89 + bf45530 commit 2daa616
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 39 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# v1.0.2

* Fix for subprocess calls for `git` operations (e.g., to get the current
version) with potentially incorrect working dir.
* Remove OBE assertion
* Fix monthly validation code to process all months in date range.
* Update validation code to process data in final, publication-ready state.

# v1.0.1

* Monthly melt onset day uses DOY 244 as the last day in the melt season unless
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[project]
name = "seaice_ecdr"
version = "1.0.1"
version = "1.0.2"

[tool.bumpversion]
current_version = "1.0.1"
current_version = "1.0.2"
commit = false
tag = false

Expand Down
2 changes: 1 addition & 1 deletion seaice_ecdr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from seaice_ecdr.constants import LOGS_DIR

__version__ = "v1.0.1"
__version__ = "v1.0.2"

# The standard loguru log levels, in increasing order of severity, are:
# TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL
Expand Down
10 changes: 8 additions & 2 deletions seaice_ecdr/nc_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import subprocess
from collections import OrderedDict
from functools import cache
from pathlib import Path
from typing import Any, Literal, get_args

import pandas as pd
Expand Down Expand Up @@ -32,14 +33,19 @@ def _get_software_version_id():
Takes the form <git_repo>@<git_hash>. E.g.,:
"[email protected]:nsidc/seaice_ecdr.git@10fdd316452d0d69fbcf4e7915b66c227298b0ec"
"""
_this_dir = Path(__file__).parent
software_git_hash_result = subprocess.run(
["git", "rev-parse", "HEAD"], capture_output=True
["git", "rev-parse", "HEAD"],
capture_output=True,
cwd=_this_dir,
)
software_git_hash_result.check_returncode()
software_git_hash = software_git_hash_result.stdout.decode("utf8").strip()

software_git_url_result = subprocess.run(
["git", "config", "--get", "remote.origin.url"], capture_output=True
["git", "config", "--get", "remote.origin.url"],
capture_output=True,
cwd=_this_dir,
)
software_git_url_result.check_returncode()
software_git_url = software_git_url_result.stdout.decode("utf8").strip()
Expand Down
3 changes: 0 additions & 3 deletions seaice_ecdr/nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,6 @@ def override_attrs_for_nrt(
f"This data set provides a near-real-time (NRT) passive microwave sea ice concentration climate data record (CDR) based on gridded brightness temperatures (TBs) from the Global Change Observation Mission 1st-Water (GCOM-W1) passive microwave radiometer: Advanced Microwave Scanning Radiometer 2 (AMSR2). The sea ice concentration CDR is an estimate of sea ice concentration that is produced by combining concentration estimates from two algorithms developed at the NASA Goddard Space Flight Center (GSFC): the NASA Team algorithm and the Bootstrap algorithm. The individual algorithms are used to process and combine brightness temperature data at NSIDC. This product is designed to provide an NRT time series of sea ice concentrations (the fraction, or percentage, of ocean area covered by sea ice). The data are gridded on the NSIDC polar stereographic grid with {resolution} x {resolution} km grid cells and are available in NetCDF file format. Each file contains a variable with the CDR concentration values as well as variables that hold the NASA Team and Bootstrap processed concentrations for reference. Variables containing standard deviation, quality flags, and projection information are also included."
)

# NOTE: this NRT summary is specific to either F17 or AMSR2
assert platform_id in override_for_nrt.attrs["summary"]

override_for_nrt.attrs["id"] = "https://doi.org/10.7265/j0z0-4h87"
link_to_dataproduct = f"https://nsidc.org/data/g10016/versions/{ECDR_NRT_PRODUCT_VERSION.major_version_number}"
override_for_nrt.attrs["metadata_link"] = link_to_dataproduct
Expand Down
63 changes: 32 additions & 31 deletions seaice_ecdr/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@

import csv
import datetime as dt
import itertools
from collections import defaultdict
from pathlib import Path
from typing import Final, Literal, cast, get_args

import click
import xarray as xr
import datatree
import pandas as pd
from loguru import logger
from pm_tb_data._types import Hemisphere

Expand All @@ -59,13 +59,13 @@
)
from seaice_ecdr.cli.util import datetime_to_date
from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR
from seaice_ecdr.intermediate_daily import get_ecdr_filepath
from seaice_ecdr.intermediate_monthly import get_intermediate_monthly_dir
from seaice_ecdr.platforms import PLATFORM_CONFIG
from seaice_ecdr.publish_daily import get_complete_daily_filepath
from seaice_ecdr.publish_monthly import get_complete_monthly_dir
from seaice_ecdr.util import (
date_range,
find_standard_monthly_netcdf_files,
get_intermediate_output_dir,
get_complete_output_dir,
get_num_missing_pixels,
)

Expand Down Expand Up @@ -220,7 +220,7 @@ def get_error_code(

def get_pixel_counts(
*,
ds: xr.Dataset,
ds: datatree.DataTree,
product: Product,
hemisphere: Hemisphere,
ancillary_source: ANCILLARY_SOURCES = "CDRv5",
Expand All @@ -246,7 +246,7 @@ def get_pixel_counts(
# total ice land coast lake pole oceanmask ice-free missing bad melt
total_num_pixels = len(ds.x) * len(ds.y)
# Areas where there is a concentration detected.
num_ice_pixels = int(((seaice_conc_var > 0) & (seaice_conc_var <= 1)).sum())
num_ice_pixels = int(((seaice_conc_var > 0) & (seaice_conc_var <= 1)).sum()) # type: ignore[union-attr, operator]

# Surface value counts. These should be the same for every day.
surf_value_counts = {}
Expand All @@ -256,26 +256,28 @@ def get_pixel_counts(
continue

flag_value = flag_value_for_meaning(
var=ds.surface_type_mask,
var=ds.cdr_supplementary.surface_type_mask,
meaning=flag,
)
num_flag_pixels = int((ds.surface_type_mask == flag_value).sum())
num_flag_pixels = int(
(ds.cdr_supplementary.surface_type_mask == flag_value).sum()
)
surf_value_counts[flag] = num_flag_pixels

# Number of oceanmask (invalid ice mask) pixels
invalid_ice_bitmask_value = bitmask_value_for_meaning(
var=qa_var,
var=qa_var, # type: ignore[arg-type]
meaning="invalid_ice_mask_applied",
)
invalid_ice_mask = (qa_var & invalid_ice_bitmask_value) > 0
num_oceanmask_pixels = int(invalid_ice_mask.sum())

# Ice-free pixels (conc == 0)
num_ice_free_pixels = int((seaice_conc_var == 0).sum())
num_ice_free_pixels = int((seaice_conc_var == 0).sum()) # type: ignore[union-attr]

# Get the number of missing pixels in the cdr conc field.
num_missing_pixels = get_num_missing_pixels(
seaice_conc_var=seaice_conc_var,
seaice_conc_var=seaice_conc_var, # type: ignore[arg-type]
hemisphere=hemisphere,
resolution=VALIDATION_RESOLUTION,
ancillary_source=ancillary_source,
Expand All @@ -287,10 +289,10 @@ def get_pixel_counts(
# as 0.099999 as a floating point data.
# Note: xarray .sum() is similar to numpy.nansum() in that it will
# ignore NaNs in the summation operation
gt_100_sic = int((seaice_conc_var > 1).sum())
gt_100_sic = int((seaice_conc_var > 1).sum()) # type: ignore[union-attr, operator]
if product == "daily":
less_than_10_sic = int(
((seaice_conc_var > 0) & (seaice_conc_var <= 0.0999)).sum()
((seaice_conc_var > 0) & (seaice_conc_var <= 0.0999)).sum() # type: ignore[union-attr, operator]
)
num_bad_pixels = less_than_10_sic + gt_100_sic
else:
Expand All @@ -303,7 +305,7 @@ def get_pixel_counts(
if product == "monthly":
_melt_start_meaning = "at_least_one_day_during_month_has_melt_detected"
melt_start_detected_bitmask_value = bitmask_value_for_meaning(
var=qa_var,
var=qa_var, # type: ignore[arg-type]
meaning=_melt_start_meaning,
)
melt_start_detected_mask = (qa_var & melt_start_detected_bitmask_value) > 0
Expand Down Expand Up @@ -335,7 +337,7 @@ def make_validation_dict(
date: dt.date,
hemisphere: Hemisphere,
) -> dict:
ds = xr.open_dataset(data_fp)
ds = datatree.open_datatree(data_fp)

pixel_counts = get_pixel_counts(
ds=ds,
Expand Down Expand Up @@ -378,7 +380,7 @@ def validate_outputs(
* error_seaice_{n|s}_daily_{start_year}_{end_year}.csv. Contains the
following fields: [year, month, day, error_code]
"""
intermediate_output_dir = get_intermediate_output_dir(
complete_output_dir = get_complete_output_dir(
base_output_dir=base_output_dir,
hemisphere=hemisphere,
)
Expand All @@ -404,12 +406,12 @@ def validate_outputs(
if product == "daily":
for date in date_range(start_date=start_date, end_date=end_date):
platform = PLATFORM_CONFIG.get_platform_by_date(date)
data_fp = get_ecdr_filepath(
data_fp = get_complete_daily_filepath(
date=date,
platform_id=platform.id,
hemisphere=hemisphere,
resolution=VALIDATION_RESOLUTION,
intermediate_output_dir=intermediate_output_dir,
complete_output_dir=complete_output_dir,
is_nrt=False,
)

Expand Down Expand Up @@ -445,19 +447,18 @@ def validate_outputs(
),
)
else:
years = range(start_date.year, end_date.year + 1)
months = range(start_date.month, end_date.month + 1)
for year, month in itertools.product(years, months):
monthly_dir = get_intermediate_monthly_dir(
intermediate_output_dir=intermediate_output_dir,
periods = pd.period_range(start=start_date, end=end_date, freq="M")
for period in periods:
monthly_dir = get_complete_monthly_dir(
complete_output_dir=complete_output_dir,
)

results = find_standard_monthly_netcdf_files(
search_dir=monthly_dir,
hemisphere=hemisphere,
resolution=VALIDATION_RESOLUTION,
year=year,
month=month,
year=period.year,
month=period.month,
platform_id="*",
)
if not results:
Expand All @@ -466,24 +467,24 @@ def validate_outputs(
validation_dict = make_validation_dict(
data_fp=results[0],
product=product,
date=dt.date(year, month, 1),
date=dt.date(period.year, period.month, 1),
hemisphere=hemisphere,
)
write_error_entry(
product=product,
csv_writer=error_writer,
entry=dict(
year=year,
month=month,
year=period.year,
month=period.month,
**validation_dict["error"],
),
)
write_log_entry(
product=product,
csv_writer=log_writer,
entry=dict(
year=year,
month=month,
year=period.year,
month=period.month,
**validation_dict["log"],
),
)
Expand Down

0 comments on commit 2daa616

Please sign in to comment.