From 5d131bf45c72333af4b77b4bf22522cccff58486 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 7 Nov 2024 16:26:12 -0700 Subject: [PATCH 1/7] Fixup subproc cwd --- seaice_ecdr/nc_attrs.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/seaice_ecdr/nc_attrs.py b/seaice_ecdr/nc_attrs.py index be0e48d0..23e18ad6 100644 --- a/seaice_ecdr/nc_attrs.py +++ b/seaice_ecdr/nc_attrs.py @@ -3,6 +3,7 @@ import subprocess from collections import OrderedDict from functools import cache +from pathlib import Path from typing import Any, Literal, get_args import pandas as pd @@ -32,14 +33,19 @@ def _get_software_version_id(): Takes the form @. E.g.,: "git@github.com:nsidc/seaice_ecdr.git@10fdd316452d0d69fbcf4e7915b66c227298b0ec" """ + _this_dir = Path(__file__).parent software_git_hash_result = subprocess.run( - ["git", "rev-parse", "HEAD"], capture_output=True + ["git", "rev-parse", "HEAD"], + capture_output=True, + cwd=_this_dir, ) software_git_hash_result.check_returncode() software_git_hash = software_git_hash_result.stdout.decode("utf8").strip() software_git_url_result = subprocess.run( - ["git", "config", "--get", "remote.origin.url"], capture_output=True + ["git", "config", "--get", "remote.origin.url"], + capture_output=True, + cwd=_this_dir, ) software_git_url_result.check_returncode() software_git_url = software_git_url_result.stdout.decode("utf8").strip() From 093d056580028f123c3a90654c04abf3edea0300 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Fri, 8 Nov 2024 09:33:10 -0700 Subject: [PATCH 2/7] Remove no-longer-valid assertion --- seaice_ecdr/nrt.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/seaice_ecdr/nrt.py b/seaice_ecdr/nrt.py index 57efbd5b..a1195544 100644 --- a/seaice_ecdr/nrt.py +++ b/seaice_ecdr/nrt.py @@ -329,9 +329,6 @@ def override_attrs_for_nrt( f"This data set provides a near-real-time (NRT) passive microwave sea ice concentration climate data record (CDR) based on gridded brightness temperatures (TBs) from the Global Change Observation Mission 1st-Water (GCOM-W1) passive microwave radiometer: Advanced Microwave Scanning Radiometer 2 (AMSR2). The sea ice concentration CDR is an estimate of sea ice concentration that is produced by combining concentration estimates from two algorithms developed at the NASA Goddard Space Flight Center (GSFC): the NASA Team algorithm and the Bootstrap algorithm. The individual algorithms are used to process and combine brightness temperature data at NSIDC. This product is designed to provide an NRT time series of sea ice concentrations (the fraction, or percentage, of ocean area covered by sea ice). The data are gridded on the NSIDC polar stereographic grid with {resolution} x {resolution} km grid cells and are available in NetCDF file format. Each file contains a variable with the CDR concentration values as well as variables that hold the NASA Team and Bootstrap processed concentrations for reference. Variables containing standard deviation, quality flags, and projection information are also included." ) - # NOTE: this NRT summary is specific to either F17 or AMSR2 - assert platform_id in override_for_nrt.attrs["summary"] - override_for_nrt.attrs["id"] = "https://doi.org/10.7265/j0z0-4h87" link_to_dataproduct = f"https://nsidc.org/data/g10016/versions/{ECDR_NRT_PRODUCT_VERSION.major_version_number}" override_for_nrt.attrs["metadata_link"] = link_to_dataproduct From ad25cb58f0210eaaebc846e3334c390db6c623de Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 18 Nov 2024 10:34:39 -0700 Subject: [PATCH 3/7] Fixup monthly validation code to process all months in date range --- seaice_ecdr/validation.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/seaice_ecdr/validation.py b/seaice_ecdr/validation.py index 6c6e5ede..16ae9aa8 100644 --- a/seaice_ecdr/validation.py +++ b/seaice_ecdr/validation.py @@ -42,12 +42,12 @@ import csv import datetime as dt -import itertools from collections import defaultdict from pathlib import Path from typing import Final, Literal, cast, get_args import click +import pandas as pd import xarray as xr from loguru import logger from pm_tb_data._types import Hemisphere @@ -445,9 +445,8 @@ def validate_outputs( ), ) else: - years = range(start_date.year, end_date.year + 1) - months = range(start_date.month, end_date.month + 1) - for year, month in itertools.product(years, months): + periods = pd.period_range(start=start_date, end=end_date, freq="M") + for period in periods: monthly_dir = get_intermediate_monthly_dir( intermediate_output_dir=intermediate_output_dir, ) @@ -456,8 +455,8 @@ def validate_outputs( search_dir=monthly_dir, hemisphere=hemisphere, resolution=VALIDATION_RESOLUTION, - year=year, - month=month, + year=period.year, + month=period.month, platform_id="*", ) if not results: @@ -466,15 +465,15 @@ def validate_outputs( validation_dict = make_validation_dict( data_fp=results[0], product=product, - date=dt.date(year, month, 1), + date=dt.date(period.year, period.month, 1), hemisphere=hemisphere, ) write_error_entry( product=product, csv_writer=error_writer, entry=dict( - year=year, - month=month, + year=period.year, + month=period.month, **validation_dict["error"], ), ) @@ -482,8 +481,8 @@ def validate_outputs( product=product, csv_writer=log_writer, entry=dict( - year=year, - month=month, + year=period.year, + month=period.month, **validation_dict["log"], ), ) From 98d4753765b6ab959f8a394d0df3f5b259de136c Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 18 Nov 2024 11:24:40 -0700 Subject: [PATCH 4/7] Update validation code for new final outputs Note that this does not include any validation of the amsr2 data. --- seaice_ecdr/validation.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/seaice_ecdr/validation.py b/seaice_ecdr/validation.py index 16ae9aa8..9a8132ec 100644 --- a/seaice_ecdr/validation.py +++ b/seaice_ecdr/validation.py @@ -47,8 +47,8 @@ from typing import Final, Literal, cast, get_args import click +import datatree import pandas as pd -import xarray as xr from loguru import logger from pm_tb_data._types import Hemisphere @@ -59,13 +59,13 @@ ) from seaice_ecdr.cli.util import datetime_to_date from seaice_ecdr.constants import DEFAULT_BASE_OUTPUT_DIR -from seaice_ecdr.intermediate_daily import get_ecdr_filepath -from seaice_ecdr.intermediate_monthly import get_intermediate_monthly_dir from seaice_ecdr.platforms import PLATFORM_CONFIG +from seaice_ecdr.publish_daily import get_complete_daily_filepath +from seaice_ecdr.publish_monthly import get_complete_monthly_dir from seaice_ecdr.util import ( date_range, find_standard_monthly_netcdf_files, - get_intermediate_output_dir, + get_complete_output_dir, get_num_missing_pixels, ) @@ -220,7 +220,7 @@ def get_error_code( def get_pixel_counts( *, - ds: xr.Dataset, + ds: datatree.DataTree, product: Product, hemisphere: Hemisphere, ancillary_source: ANCILLARY_SOURCES = "CDRv5", @@ -256,10 +256,12 @@ def get_pixel_counts( continue flag_value = flag_value_for_meaning( - var=ds.surface_type_mask, + var=ds.cdr_supplementary.surface_type_mask, meaning=flag, ) - num_flag_pixels = int((ds.surface_type_mask == flag_value).sum()) + num_flag_pixels = int( + (ds.cdr_supplementary.surface_type_mask == flag_value).sum() + ) surf_value_counts[flag] = num_flag_pixels # Number of oceanmask (invalid ice mask) pixels @@ -335,7 +337,7 @@ def make_validation_dict( date: dt.date, hemisphere: Hemisphere, ) -> dict: - ds = xr.open_dataset(data_fp) + ds = datatree.open_datatree(data_fp) pixel_counts = get_pixel_counts( ds=ds, @@ -378,7 +380,7 @@ def validate_outputs( * error_seaice_{n|s}_daily_{start_year}_{end_year}.csv. Contains the following fields: [year, month, day, error_code] """ - intermediate_output_dir = get_intermediate_output_dir( + complete_output_dir = get_complete_output_dir( base_output_dir=base_output_dir, hemisphere=hemisphere, ) @@ -404,12 +406,12 @@ def validate_outputs( if product == "daily": for date in date_range(start_date=start_date, end_date=end_date): platform = PLATFORM_CONFIG.get_platform_by_date(date) - data_fp = get_ecdr_filepath( + data_fp = get_complete_daily_filepath( date=date, platform_id=platform.id, hemisphere=hemisphere, resolution=VALIDATION_RESOLUTION, - intermediate_output_dir=intermediate_output_dir, + complete_output_dir=complete_output_dir, is_nrt=False, ) @@ -447,8 +449,8 @@ def validate_outputs( else: periods = pd.period_range(start=start_date, end=end_date, freq="M") for period in periods: - monthly_dir = get_intermediate_monthly_dir( - intermediate_output_dir=intermediate_output_dir, + monthly_dir = get_complete_monthly_dir( + complete_output_dir=complete_output_dir, ) results = find_standard_monthly_netcdf_files( From 8edb08f2dbfddfa673cc3b7a76404e47a8859a7b Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Mon, 18 Nov 2024 11:31:39 -0700 Subject: [PATCH 5/7] Ignore datatree-related type errors --- seaice_ecdr/validation.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/seaice_ecdr/validation.py b/seaice_ecdr/validation.py index 9a8132ec..fe86f32f 100644 --- a/seaice_ecdr/validation.py +++ b/seaice_ecdr/validation.py @@ -246,7 +246,7 @@ def get_pixel_counts( # total ice land coast lake pole oceanmask ice-free missing bad melt total_num_pixels = len(ds.x) * len(ds.y) # Areas where there is a concentration detected. - num_ice_pixels = int(((seaice_conc_var > 0) & (seaice_conc_var <= 1)).sum()) + num_ice_pixels = int(((seaice_conc_var > 0) & (seaice_conc_var <= 1)).sum()) # type: ignore[union-attr, operator] # Surface value counts. These should be the same for every day. surf_value_counts = {} @@ -266,18 +266,18 @@ def get_pixel_counts( # Number of oceanmask (invalid ice mask) pixels invalid_ice_bitmask_value = bitmask_value_for_meaning( - var=qa_var, + var=qa_var, # type: ignore[arg-type] meaning="invalid_ice_mask_applied", ) invalid_ice_mask = (qa_var & invalid_ice_bitmask_value) > 0 num_oceanmask_pixels = int(invalid_ice_mask.sum()) # Ice-free pixels (conc == 0) - num_ice_free_pixels = int((seaice_conc_var == 0).sum()) + num_ice_free_pixels = int((seaice_conc_var == 0).sum()) # type: ignore[union-attr] # Get the number of missing pixels in the cdr conc field. num_missing_pixels = get_num_missing_pixels( - seaice_conc_var=seaice_conc_var, + seaice_conc_var=seaice_conc_var, # type: ignore[arg-type] hemisphere=hemisphere, resolution=VALIDATION_RESOLUTION, ancillary_source=ancillary_source, @@ -289,10 +289,10 @@ def get_pixel_counts( # as 0.099999 as a floating point data. # Note: xarray .sum() is similar to numpy.nansum() in that it will # ignore NaNs in the summation operation - gt_100_sic = int((seaice_conc_var > 1).sum()) + gt_100_sic = int((seaice_conc_var > 1).sum()) # type: ignore[union-attr, operator] if product == "daily": less_than_10_sic = int( - ((seaice_conc_var > 0) & (seaice_conc_var <= 0.0999)).sum() + ((seaice_conc_var > 0) & (seaice_conc_var <= 0.0999)).sum() # type: ignore[union-attr, operator] ) num_bad_pixels = less_than_10_sic + gt_100_sic else: @@ -305,7 +305,7 @@ def get_pixel_counts( if product == "monthly": _melt_start_meaning = "at_least_one_day_during_month_has_melt_detected" melt_start_detected_bitmask_value = bitmask_value_for_meaning( - var=qa_var, + var=qa_var, # type: ignore[arg-type] meaning=_melt_start_meaning, ) melt_start_detected_mask = (qa_var & melt_start_detected_bitmask_value) > 0 From 463139a8b3ef3eaab41657faa093a168636f35a8 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 5 Dec 2024 17:10:35 -0700 Subject: [PATCH 6/7] CHANGELOG --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06c0d2d4..baaa2bba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# v1.0.2 + +* Fix for subprocess calls for `git` operations (e.g., to get the current + version) with potentially incorrect working dir. +* Remove OBE assertion +* Fix monthly validation code to process all months in date range. +* Update validation code to process data in final, publication-ready state. + # v1.0.1 * Monthly melt onset day uses DOY 244 as the last day in the melt season unless From bf45530351a58cd7e2373d6c84165717df110812 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 5 Dec 2024 17:11:16 -0700 Subject: [PATCH 7/7] Bumpversion v1.0.1 -> v1.0.2 --- pyproject.toml | 4 ++-- seaice_ecdr/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a2808e35..e3dd5627 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "seaice_ecdr" -version = "1.0.1" +version = "1.0.2" [tool.bumpversion] -current_version = "1.0.1" +current_version = "1.0.2" commit = false tag = false diff --git a/seaice_ecdr/__init__.py b/seaice_ecdr/__init__.py index e88f979f..b499c0d3 100644 --- a/seaice_ecdr/__init__.py +++ b/seaice_ecdr/__init__.py @@ -6,7 +6,7 @@ from seaice_ecdr.constants import LOGS_DIR -__version__ = "v1.0.1" +__version__ = "v1.0.2" # The standard loguru log levels, in increasing order of severity, are: # TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL