From 8b00838be62a9adb2334ad7a46e9a468429294e8 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Wed, 27 Sep 2023 20:40:32 -0400 Subject: [PATCH] Separate `opera_utils.py` (#132) * separate more opera code from main config * fix tests and references to `opera_utils` add changelog entry --- CHANGELOG.md | 6 + src/dolphin/opera_utils.py | 224 +++++++++++++++++++++++++ src/dolphin/workflows/_cli_config.py | 13 +- src/dolphin/workflows/_utils.py | 202 +--------------------- src/dolphin/workflows/config.py | 46 +---- src/dolphin/workflows/s1_disp.py | 3 +- src/dolphin/workflows/wrapped_phase.py | 5 +- tests/test_workflows_config.py | 8 +- tests/test_workflows_opera_utils.py | 47 ++++++ tests/test_workflows_s1_disp.py | 5 +- tests/test_workflows_utils.py | 45 ----- 11 files changed, 302 insertions(+), 302 deletions(-) create mode 100644 src/dolphin/opera_utils.py create mode 100644 tests/test_workflows_opera_utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b55f52e5..3e82e80e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Unreleased +**Changed** + +- Moved all `OPERA_` variables to a new module `dolphin.opera_utils`. + - Other OPERA-specific quirks have been moved to the seperate `disp-s1` repo, + but the functions remaining are the ones that seem most broadly useful to `sweets` + and other users working with burst SLCs. # [0.4.1](https://github.com/isce-framework/dolphin/compare/v0.4.0...v0.4.1) - 2023-09-08 diff --git a/src/dolphin/opera_utils.py b/src/dolphin/opera_utils.py new file mode 100644 index 00000000..f8f1876d --- /dev/null +++ b/src/dolphin/opera_utils.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import itertools +import json +import re +import subprocess +import tempfile +from pathlib import Path +from typing import Pattern, Sequence, Union + +import h5py +from shapely import geometry, ops, wkt + +from dolphin._log import get_log +from dolphin._types import Filename + +logger = get_log(__name__) + + +# Specific to OPERA CSLC products: +OPERA_DATASET_ROOT = "/" +OPERA_DATASET_NAME = f"{OPERA_DATASET_ROOT}/data/VV" +OPERA_IDENTIFICATION = f"{OPERA_DATASET_ROOT}/identification" + +# It should match either or these within a filename: +# t087_185684_iw2 (which comes from COMPASS) +# T087-165495-IW3 (which is the official product naming scheme) +# e.g. +# OPERA_L2_CSLC-S1_T078-165495-IW3_20190906T232711Z_20230101T100506Z_S1A_VV_v1.0.h5 + +OPERA_BURST_RE = re.compile( + r"(?P.*?)(?P\d{3})[-_](?P\d{6})[-_](?Piw[1-3])", + re.IGNORECASE, +) + + +def group_by_burst( + file_list: Sequence[Filename], + burst_id_fmt: Union[str, Pattern[str]] = OPERA_BURST_RE, + minimum_images: int = 2, +) -> dict[str, list[Path]]: + """Group Sentinel CSLC files by burst. + + Parameters + ---------- + file_list: list[Filename] + list of paths of CSLC files + burst_id_fmt: str + format of the burst id in the filename. + Default is [`OPERA_BURST_RE`][dolphin.opera_utils.OPERA_BURST_RE] + minimum_images: int + Minimum number of SLCs needed to run the workflow for each burst. + If there are fewer SLCs in a burst, it will be skipped and + a warning will be logged. + + Returns + ------- + dict + key is the burst id of the SLC acquisition + Value is a list of Paths on that burst: + { + 't087_185678_iw2': [Path(...), Path(...),], + 't087_185678_iw3': [Path(...),... ], + } + """ + + def get_burst_id(filename): + if not (m := re.search(burst_id_fmt, str(filename))): + raise ValueError(f"Could not parse burst id from {filename}") + return m.group() + + def sort_by_burst_id(file_list): + """Sort files by burst id.""" + burst_ids = [get_burst_id(f) for f in file_list] + file_burst_tups = sorted( + [(Path(f), d) for f, d in zip(file_list, burst_ids)], + # use the date or dates as the key + key=lambda f_d_tuple: f_d_tuple[1], # type: ignore + ) + # Unpack the sorted pairs with new sorted values + file_list, burst_ids = zip(*file_burst_tups) # type: ignore + return file_list + + if not file_list: + return {} + + sorted_file_list = sort_by_burst_id(file_list) + # Now collapse into groups, sorted by the burst_id + grouped_images = { + burst_id: list(g) + for burst_id, g in itertools.groupby( + sorted_file_list, key=lambda x: get_burst_id(x) + ) + } + # Make sure that each burst has at least the minimum number of SLCs + out = {} + for burst_id, slc_list in grouped_images.items(): + if len(slc_list) < minimum_images: + logger.warning( + f"Skipping burst {burst_id} because it has only {len(slc_list)} SLCs." + f"Minimum number of SLCs is {minimum_images}" + ) + else: + out[burst_id] = slc_list + return out + + +def get_cslc_polygon( + opera_file: Filename, buffer_degrees: float = 0.0 +) -> Union[geometry.Polygon, None]: + """Get the union of the bounding polygons of the given files. + + Parameters + ---------- + opera_file : list[Filename] + list of COMPASS SLC filenames. + buffer_degrees : float, optional + Buffer the polygons by this many degrees, by default 0.0 + """ + dset_name = f"{OPERA_IDENTIFICATION}/bounding_polygon" + with h5py.File(opera_file) as hf: + if dset_name not in hf: + logger.debug(f"Could not find {dset_name} in {opera_file}") + return None + wkt_str = hf[dset_name][()].decode("utf-8") + return wkt.loads(wkt_str).buffer(buffer_degrees) + + +def get_union_polygon( + opera_file_list: Sequence[Filename], buffer_degrees: float = 0.0 +) -> geometry.Polygon: + """Get the union of the bounding polygons of the given files. + + Parameters + ---------- + opera_file_list : list[Filename] + list of COMPASS SLC filenames. + buffer_degrees : float, optional + Buffer the polygons by this many degrees, by default 0.0 + """ + polygons = [get_cslc_polygon(f, buffer_degrees) for f in opera_file_list] + polygons = [p for p in polygons if p is not None] + + if len(polygons) == 0: + raise ValueError("No polygons found in the given file list.") + # Union all the polygons + return ops.unary_union(polygons) + + +def make_nodata_mask( + opera_file_list: Sequence[Filename], + out_file: Filename, + buffer_pixels: int = 0, + overwrite: bool = False, +): + """Make a boolean raster mask from the union of nodata polygons. + + Parameters + ---------- + opera_file_list : list[Filename] + list of COMPASS SLC filenames. + out_file : Filename + Output filename. + buffer_pixels : int, optional + Number of pixels to buffer the union polygon by, by default 0. + Note that buffering will *decrease* the numba of pixels marked as nodata. + This is to be more conservative to not mask possible valid pixels. + overwrite : bool, optional + Overwrite the output file if it already exists, by default False + """ + from dolphin import io + + if Path(out_file).exists(): + if not overwrite: + logger.debug(f"Skipping {out_file} since it already exists.") + return + else: + logger.info(f"Overwriting {out_file} since overwrite=True.") + Path(out_file).unlink() + + # Check these are the right format to get nodata polygons + try: + test_f = f"NETCDF:{opera_file_list[0]}:{OPERA_DATASET_NAME}" + # convert pixels to degrees lat/lon + gt = io.get_raster_gt(test_f) + # TODO: more robust way to get the pixel size... this is a hack + # maybe just use pyproj to warp lat/lon to meters and back? + dx_meters = gt[1] + dx_degrees = dx_meters / 111000 + buffer_degrees = buffer_pixels * dx_degrees + except RuntimeError: + raise ValueError(f"Unable to open {test_f}") + + # Get the union of all the polygons and convert to a temp geojson + union_poly = get_union_polygon(opera_file_list, buffer_degrees=buffer_degrees) + # convert shapely polygon to geojson + + # Make a dummy raster from the first file with all 0s + # This will get filled in with the polygon rasterization + cmd = ( + f"gdal_calc.py --quiet --outfile {out_file} --type Byte -A" + f" NETCDF:{opera_file_list[0]}:{OPERA_DATASET_NAME} --calc 'numpy.nan_to_num(A)" + " * 0' --creation-option COMPRESS=LZW --creation-option TILED=YES" + " --creation-option BLOCKXSIZE=256 --creation-option BLOCKYSIZE=256" + ) + logger.info(cmd) + subprocess.check_call(cmd, shell=True) + + with tempfile.TemporaryDirectory() as tmpdir: + temp_vector_file = Path(tmpdir) / "temp.geojson" + with open(temp_vector_file, "w") as f: + f.write( + json.dumps( + { + "geometry": geometry.mapping(union_poly), + "properties": {"id": 1}, + } + ) + ) + + # Now burn in the union of all polygons + cmd = f"gdal_rasterize -q -burn 1 {temp_vector_file} {out_file}" + logger.info(cmd) + subprocess.check_call(cmd, shell=True) diff --git a/src/dolphin/workflows/_cli_config.py b/src/dolphin/workflows/_cli_config.py index c5ba8019..2f1565ec 100644 --- a/src/dolphin/workflows/_cli_config.py +++ b/src/dolphin/workflows/_cli_config.py @@ -7,13 +7,7 @@ from pathlib import Path from typing import Optional, Union -from .config import ( - OPERA_DATASET_NAME, - InterferogramNetworkType, - ShpMethod, - Workflow, - WorkflowName, -) +from .config import InterferogramNetworkType, ShpMethod, Workflow, WorkflowName def create_config( @@ -126,10 +120,7 @@ def get_parser(subparser=None, subcommand_name="run"): inputs.add_argument( "-sds", "--subdataset", - help=( - "Subdataset to use from HDF5/NetCDF files. For OPERA CSLC NetCDF files, if" - f" None is passed, the default is {OPERA_DATASET_NAME}." - ), + help="Subdataset to use from HDF5/NetCDF files.", ) inputs.add_argument( "--amplitude-mean-files", diff --git a/src/dolphin/workflows/_utils.py b/src/dolphin/workflows/_utils.py index ea2d9d09..dd9444cb 100644 --- a/src/dolphin/workflows/_utils.py +++ b/src/dolphin/workflows/_utils.py @@ -1,214 +1,14 @@ from __future__ import annotations -import itertools -import json -import re -import subprocess -import tempfile from pathlib import Path -from typing import Pattern, Sequence, Union -import h5py -from shapely import geometry, ops, wkt - -from dolphin import io from dolphin._log import get_log -from dolphin._types import Filename -from .config import OPERA_BURST_RE, OPERA_DATASET_NAME, OPERA_IDENTIFICATION, Workflow +from .config import Workflow logger = get_log(__name__) -def group_by_burst( - file_list: Sequence[Filename], - burst_id_fmt: Union[str, Pattern[str]] = OPERA_BURST_RE, - minimum_images: int = 2, -) -> dict[str, list[Path]]: - """Group Sentinel CSLC files by burst. - - Parameters - ---------- - file_list: list[Filename] - list of paths of CSLC files - burst_id_fmt: str - format of the burst id in the filename. - Default is [`OPERA_BURST_RE`][dolphin.workflows.config.OPERA_BURST_RE] - minimum_images: int - Minimum number of SLCs needed to run the workflow for each burst. - If there are fewer SLCs in a burst, it will be skipped and - a warning will be logged. - - Returns - ------- - dict - key is the burst id of the SLC acquisition - Value is a list of Paths on that burst: - { - 't087_185678_iw2': [Path(...), Path(...),], - 't087_185678_iw3': [Path(...),... ], - } - """ - - def get_burst_id(filename): - m = re.search(burst_id_fmt, str(filename)) - if not m: - raise ValueError(f"Could not parse burst id from {filename}") - return m.group() - - def sort_by_burst_id(file_list): - """Sort files by burst id.""" - burst_ids = [get_burst_id(f) for f in file_list] - file_burst_tups = sorted( - [(Path(f), d) for f, d in zip(file_list, burst_ids)], - # use the date or dates as the key - key=lambda f_d_tuple: f_d_tuple[1], # type: ignore - ) - # Unpack the sorted pairs with new sorted values - file_list, burst_ids = zip(*file_burst_tups) # type: ignore - return file_list - - if not file_list: - return {} - - sorted_file_list = sort_by_burst_id(file_list) - # Now collapse into groups, sorted by the burst_id - grouped_images = { - burst_id: list(g) - for burst_id, g in itertools.groupby( - sorted_file_list, key=lambda x: get_burst_id(x) - ) - } - # Make sure that each burst has at least the minimum number of SLCs - out = {} - for burst_id, slc_list in grouped_images.items(): - if len(slc_list) < minimum_images: - logger.warning( - f"Skipping burst {burst_id} because it has only {len(slc_list)} SLCs." - f"Minimum number of SLCs is {minimum_images}" - ) - else: - out[burst_id] = slc_list - return out - - -def get_cslc_polygon( - opera_file: Filename, buffer_degrees: float = 0.0 -) -> Union[geometry.Polygon, None]: - """Get the union of the bounding polygons of the given files. - - Parameters - ---------- - opera_file : list[Filename] - list of COMPASS SLC filenames. - buffer_degrees : float, optional - Buffer the polygons by this many degrees, by default 0.0 - """ - dset_name = f"{OPERA_IDENTIFICATION}/bounding_polygon" - with h5py.File(opera_file) as hf: - if dset_name not in hf: - logger.debug(f"Could not find {dset_name} in {opera_file}") - return None - wkt_str = hf[dset_name][()].decode("utf-8") - return wkt.loads(wkt_str).buffer(buffer_degrees) - - -def get_union_polygon( - opera_file_list: Sequence[Filename], buffer_degrees: float = 0.0 -) -> geometry.Polygon: - """Get the union of the bounding polygons of the given files. - - Parameters - ---------- - opera_file_list : list[Filename] - list of COMPASS SLC filenames. - buffer_degrees : float, optional - Buffer the polygons by this many degrees, by default 0.0 - """ - polygons = [get_cslc_polygon(f, buffer_degrees) for f in opera_file_list] - polygons = [p for p in polygons if p is not None] - - if len(polygons) == 0: - raise ValueError("No polygons found in the given file list.") - # Union all the polygons - return ops.unary_union(polygons) - - -def make_nodata_mask( - opera_file_list: Sequence[Filename], - out_file: Filename, - buffer_pixels: int = 0, - overwrite: bool = False, -): - """Make a boolean raster mask from the union of nodata polygons. - - Parameters - ---------- - opera_file_list : list[Filename] - list of COMPASS SLC filenames. - out_file : Filename - Output filename. - buffer_pixels : int, optional - Number of pixels to buffer the union polygon by, by default 0. - Note that buffering will *decrease* the numba of pixels marked as nodata. - This is to be more conservative to not mask possible valid pixels. - overwrite : bool, optional - Overwrite the output file if it already exists, by default False - """ - if Path(out_file).exists(): - if not overwrite: - logger.debug(f"Skipping {out_file} since it already exists.") - return - else: - logger.info(f"Overwriting {out_file} since overwrite=True.") - Path(out_file).unlink() - - # Check these are the right format to get nodata polygons - try: - test_f = f"NETCDF:{opera_file_list[0]}:{OPERA_DATASET_NAME}" - # convert pixels to degrees lat/lon - gt = io.get_raster_gt(test_f) - # TODO: more robust way to get the pixel size... this is a hack - # maybe just use pyproj to warp lat/lon to meters and back? - dx_meters = gt[1] - dx_degrees = dx_meters / 111000 - buffer_degrees = buffer_pixels * dx_degrees - except RuntimeError: - raise ValueError(f"Unable to open {test_f}") - - # Get the union of all the polygons and convert to a temp geojson - union_poly = get_union_polygon(opera_file_list, buffer_degrees=buffer_degrees) - # convert shapely polygon to geojson - - # Make a dummy raster from the first file with all 0s - # This will get filled in with the polygon rasterization - cmd = ( - f"gdal_calc.py --quiet --outfile {out_file} --type Byte -A" - f" NETCDF:{opera_file_list[0]}:{OPERA_DATASET_NAME} --calc 'numpy.nan_to_num(A)" - " * 0' --creation-option COMPRESS=LZW --creation-option TILED=YES" - " --creation-option BLOCKXSIZE=256 --creation-option BLOCKYSIZE=256" - ) - logger.info(cmd) - subprocess.check_call(cmd, shell=True) - - with tempfile.TemporaryDirectory() as tmpdir: - temp_vector_file = Path(tmpdir) / "temp.geojson" - with open(temp_vector_file, "w") as f: - f.write( - json.dumps( - { - "geometry": geometry.mapping(union_poly), - "properties": {"id": 1}, - } - ) - ) - - # Now burn in the union of all polygons - cmd = f"gdal_rasterize -q -burn 1 {temp_vector_file} {out_file}" - logger.info(cmd) - subprocess.check_call(cmd, shell=True) - - def _create_burst_cfg( cfg: Workflow, burst_id: str, diff --git a/src/dolphin/workflows/config.py b/src/dolphin/workflows/config.py index 7ee22acf..e795377a 100644 --- a/src/dolphin/workflows/config.py +++ b/src/dolphin/workflows/config.py @@ -1,6 +1,5 @@ from __future__ import annotations -import re from datetime import datetime from glob import glob from pathlib import Path @@ -30,16 +29,6 @@ logger = get_log(__name__) -# Specific to OPERA CSLC products: -OPERA_DATASET_ROOT = "/" -OPERA_DATASET_NAME = f"{OPERA_DATASET_ROOT}/data/VV" -OPERA_IDENTIFICATION = f"{OPERA_DATASET_ROOT}/identification" - -# for example, t087_185684_iw2 -OPERA_BURST_RE = re.compile( - r"t(?P\d{3})_(?P\d{6})_(?Piw[1-3])" -) - class PsOptions(BaseModel, extra="forbid"): """Options for the PS pixel selection portion of the workflow.""" @@ -229,11 +218,7 @@ class InputOptions(BaseModel, extra="forbid"): subdataset: Optional[str] = Field( None, - description=( - "If passing HDF5/NetCDF files, subdataset to use from CSLC files. " - f"If not specified, but all `cslc_file_list` looks like {OPERA_BURST_RE}, " - f" will use {OPERA_DATASET_NAME} as the subdataset." - ), + description="If passing HDF5/NetCDF files, subdataset to use from CSLC files. ", ) cslc_date_fmt: str = Field( "%Y%m%d", @@ -420,12 +405,6 @@ def _check_input_file_list(cls, v): return list(v) - @staticmethod - def _is_opera_file_list(cslc_file_list): - return all( - re.search(OPERA_BURST_RE, str(f)) is not None for f in cslc_file_list - ) - @model_validator(mode="after") def _check_slc_files_exist(self) -> "Workflow": file_list = self.cslc_file_list @@ -435,11 +414,11 @@ def _check_slc_files_exist(self) -> "Workflow": input_options = self.input_options date_fmt = input_options.cslc_date_fmt # Filter out files that don't have dates in the filename - file_matching_date = [Path(f) for f in file_list if get_dates(f, fmt=date_fmt)] - if len(file_matching_date) < len(file_list): + files_matching_date = [Path(f) for f in file_list if get_dates(f, fmt=date_fmt)] + if len(files_matching_date) < len(file_list): raise ValueError( - f"Found {len(file_matching_date)} files with dates in the filename" - f" out of {len(file_list)} files." + f"Found {len(files_matching_date)} files with dates like {date_fmt} in" + f" the filename out of {len(file_list)} files." ) ext = file_list[0].suffix @@ -447,18 +426,9 @@ def _check_slc_files_exist(self) -> "Workflow": if ext in [".h5", ".nc"]: subdataset = input_options.subdataset if subdataset is None: - if self._is_opera_file_list(file_list): - # Assume that the user forgot to set the subdataset, and set it to the - # default OPERA dataset name - logger.info( - "CSLC files look like OPERA files, setting subdataset to" - f" {OPERA_DATASET_NAME}." - ) - subdataset = input_options.subdataset = OPERA_DATASET_NAME - else: - raise ValueError( - "Must provide subdataset name for input HDF5 files." - ) + raise ValueError( + "Must provide subdataset name for input NetCDF/HDF5 files." + ) # Coerce the file_list to a sorted list of Path objects self.cslc_file_list = [ diff --git a/src/dolphin/workflows/s1_disp.py b/src/dolphin/workflows/s1_disp.py index 0d11b559..ff95e883 100755 --- a/src/dolphin/workflows/s1_disp.py +++ b/src/dolphin/workflows/s1_disp.py @@ -9,10 +9,11 @@ from dolphin import __version__ from dolphin._background import DummyProcessPoolExecutor from dolphin._log import get_log, log_runtime +from dolphin.opera_utils import group_by_burst from dolphin.utils import get_max_memory_usage, set_num_threads from . import stitch_and_unwrap, wrapped_phase -from ._utils import _create_burst_cfg, _remove_dir_if_empty, group_by_burst +from ._utils import _create_burst_cfg, _remove_dir_if_empty from .config import Workflow diff --git a/src/dolphin/workflows/wrapped_phase.py b/src/dolphin/workflows/wrapped_phase.py index cdcbeaf4..c4844b1c 100644 --- a/src/dolphin/workflows/wrapped_phase.py +++ b/src/dolphin/workflows/wrapped_phase.py @@ -7,8 +7,9 @@ from dolphin._background import NvidiaMemoryWatcher from dolphin._log import get_log, log_runtime from dolphin.interferogram import Network +from dolphin.opera_utils import make_nodata_mask -from . import _utils, sequential, single +from . import sequential, single from .config import Workflow @@ -54,7 +55,7 @@ def run(cfg: Workflow, debug: bool = False) -> tuple[list[Path], Path, Path, Pat # Make the nodata mask from the polygons, if we're using OPERA CSLCs try: nodata_mask_file = cfg.work_directory / "nodata_mask.tif" - _utils.make_nodata_mask( + make_nodata_mask( vrt_stack.file_list, out_file=nodata_mask_file, buffer_pixels=200 ) except Exception as e: diff --git a/tests/test_workflows_config.py b/tests/test_workflows_config.py index fc67f855..9f25bf9e 100644 --- a/tests/test_workflows_config.py +++ b/tests/test_workflows_config.py @@ -240,7 +240,7 @@ def test_input_date_sort(dir_with_2_slcs): def test_input_opera_cslc(tmp_path, slc_stack): - """Check that we recognize the OPERA filename format and don't need a subdataset.""" + """Check that we recognize the OPERA filename format.""" # Make a file with the OPERA name like OPERA_BURST_RE # r"t(?P\d{3})_(?P\d{6})_(?Piw[1-3])" start_date = 20220101 @@ -260,9 +260,11 @@ def test_input_opera_cslc(tmp_path, slc_stack): ) file_list.append(Path(fname)) - opts = config.Workflow(cslc_file_list=file_list) + opts = config.Workflow( + cslc_file_list=file_list, input_options=dict(subdataset="/data/VV") + ) assert opts.cslc_file_list == file_list - assert opts.input_options.subdataset == config.OPERA_DATASET_NAME + assert opts.input_options.subdataset == "/data/VV" def test_input_cslc_empty(): diff --git a/tests/test_workflows_opera_utils.py b/tests/test_workflows_opera_utils.py new file mode 100644 index 00000000..2d6686af --- /dev/null +++ b/tests/test_workflows_opera_utils.py @@ -0,0 +1,47 @@ +import random +from itertools import chain +from pathlib import Path + +import pytest + +from dolphin.opera_utils import group_by_burst + + +def test_group_by_burst(): + expected = { + "t087_185678_iw2": [ + Path("t087_185678_iw2/20180210/t087_185678_iw2_20180210_VV.h5"), + Path("t087_185678_iw2/20180318/t087_185678_iw2_20180318_VV.h5"), + Path("t087_185678_iw2/20180423/t087_185678_iw2_20180423_VV.h5"), + ], + "t087_185678_iw3": [ + Path("t087_185678_iw3/20180210/t087_185678_iw3_20180210_VV.h5"), + Path("t087_185678_iw3/20180318/t087_185678_iw3_20180318_VV.h5"), + Path("t087_185678_iw3/20180517/t087_185678_iw3_20180517_VV.h5"), + ], + "t087_185679_iw1": [ + Path("t087_185679_iw1/20180210/t087_185679_iw1_20180210_VV.h5"), + Path("t087_185679_iw1/20180318/t087_185679_iw1_20180318_VV.h5"), + ], + } + in_files = list(chain.from_iterable(expected.values())) + + assert group_by_burst(in_files) == expected + + # Any order should work + random.shuffle(in_files) + # but the order of the lists of each key may be different + for burst, file_list in group_by_burst(in_files).items(): + assert sorted(file_list) == sorted(expected[burst]) + + +def test_group_by_burst_non_opera(): + with pytest.raises(ValueError, match="Could not parse burst id"): + group_by_burst(["20200101.slc", "20200202.slc"]) + # A combination should still error + group_by_burst( + [ + "20200101.slc", + Path("t087_185679_iw1/20180210/t087_185679_iw1_20180210_VV.h5"), + ] + ) diff --git a/tests/test_workflows_s1_disp.py b/tests/test_workflows_s1_disp.py index bf6f0b1e..5accd07e 100644 --- a/tests/test_workflows_s1_disp.py +++ b/tests/test_workflows_s1_disp.py @@ -7,6 +7,7 @@ import pytest from make_netcdf import create_test_nc +from dolphin.opera_utils import OPERA_DATASET_NAME from dolphin.workflows import config, s1_disp # 'Grid size 49 will likely result in GPU under-utilization due to low occupancy.' @@ -28,7 +29,7 @@ def opera_slc_files(tmp_path, slc_stack) -> list[Path]: d.mkdir() file_list = [] - *group_parts, ds_name = config.OPERA_DATASET_NAME.split("/") + *group_parts, ds_name = OPERA_DATASET_NAME.split("/") group = "/".join(group_parts) for burst_id in ["t087_185683_iw2", "t087_185684_iw2"]: for i in range(len(slc_stack)): @@ -54,6 +55,7 @@ def test_s1_disp_run_single(opera_slc_files: list[Path], tmpdir): cfg = config.Workflow( workflow_name=config.WorkflowName.SINGLE, cslc_file_list=opera_slc_files, + input_options=dict(subdataset="/data/VV"), interferogram_network=dict( network_type=config.InterferogramNetworkType.MANUAL_INDEX, indexes=[(0, -1)], @@ -73,6 +75,7 @@ def test_s1_disp_run_stack(opera_slc_files: list[Path], tmpdir): cfg = config.Workflow( workflow_name=config.WorkflowName.STACK, cslc_file_list=opera_slc_files, + input_options=dict(subdataset="/data/VV"), phase_linking=dict( ministack_size=500, ), diff --git a/tests/test_workflows_utils.py b/tests/test_workflows_utils.py index 73d7f6aa..4818e88e 100644 --- a/tests/test_workflows_utils.py +++ b/tests/test_workflows_utils.py @@ -1,56 +1,11 @@ -import random -from itertools import chain -from pathlib import Path - import numpy as np import pytest from osgeo import gdal from dolphin import stack -from dolphin.workflows import group_by_burst from dolphin.workflows.single import setup_output_folder -def test_group_by_burst(): - expected = { - "t087_185678_iw2": [ - Path("t087_185678_iw2/20180210/t087_185678_iw2_20180210_VV.h5"), - Path("t087_185678_iw2/20180318/t087_185678_iw2_20180318_VV.h5"), - Path("t087_185678_iw2/20180423/t087_185678_iw2_20180423_VV.h5"), - ], - "t087_185678_iw3": [ - Path("t087_185678_iw3/20180210/t087_185678_iw3_20180210_VV.h5"), - Path("t087_185678_iw3/20180318/t087_185678_iw3_20180318_VV.h5"), - Path("t087_185678_iw3/20180517/t087_185678_iw3_20180517_VV.h5"), - ], - "t087_185679_iw1": [ - Path("t087_185679_iw1/20180210/t087_185679_iw1_20180210_VV.h5"), - Path("t087_185679_iw1/20180318/t087_185679_iw1_20180318_VV.h5"), - ], - } - in_files = list(chain.from_iterable(expected.values())) - - assert group_by_burst(in_files) == expected - - # Any order should work - random.shuffle(in_files) - # but the order of the lists of each key may be different - for burst, file_list in group_by_burst(in_files).items(): - assert sorted(file_list) == sorted(expected[burst]) - - -def test_group_by_burst_non_opera(): - with pytest.raises(ValueError, match="Could not parse burst id"): - group_by_burst(["20200101.slc", "20200202.slc"]) - # A combination should still error - group_by_burst( - [ - "20200101.slc", - Path("t087_185679_iw1/20180210/t087_185679_iw1_20180210_VV.h5"), - ] - ) - - def test_setup_output_folder(tmpdir, tiled_file_list): vrt_stack = stack.VRTStack(tiled_file_list, outfile=tmpdir / "stack.vrt") out_file_list = setup_output_folder(vrt_stack, driver="GTiff", dtype=np.complex64)