Skip to content

Commit

Permalink
Add metadata to compressed SLCs to track input files (#142)
Browse files Browse the repository at this point in the history
* `sort_files_by_date`: iterate to not exhaust generators

otherwise there was an empty error raised from `Path().glob()`

* single.py: save the filenames from the compressed SLCs into geotiff metadata

* fix backwards sorting

* fix metadata join for Paths and gdal strings
  • Loading branch information
scottstanie authored Oct 5, 2023
1 parent 0ad6ef2 commit 255cd4b
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 23 deletions.
20 changes: 20 additions & 0 deletions src/dolphin/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,26 @@ def get_raster_bounds(
return (left, bottom, right, top)


def get_raster_metadata(filename: Filename, domain: str = ""):
"""Get metadata from a raster file.
Parameters
----------
filename : Filename
Path to the file to load.
domain : str, optional
Domain to get metadata for. Default is "" (all domains).
Returns
-------
dict
Dictionary of metadata.
"""
ds = gdal.Open(fspath(filename))
md = ds.GetMetadata(domain)
return md


def rowcol_to_xy(
row: int,
col: int,
Expand Down
4 changes: 2 additions & 2 deletions src/dolphin/stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ class VRTStack:
Attributes
----------
file_list : list[pathlib.Path]
Names of files to stack
file_list : list[Filename]
Paths or GDAL-compatible strings (NETCDF:...) for paths to files.
outfile : pathlib.Path, optional (default = Path("slc_stack.vrt"))
Name of output file to write
dates : list[list[datetime.date]]
Expand Down
4 changes: 2 additions & 2 deletions src/dolphin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ def sort_key(file_date_tuple):
except TypeError:
return (-1, dates)

date_lists = [get_dates(f, fmt=file_date_fmt) for f in files]
file_dates = sorted([fd_tuple for fd_tuple in zip(files, date_lists)], key=sort_key)
file_date_tuples = [(f, get_dates(f, fmt=file_date_fmt)) for f in files]
file_dates = sorted([fd_tuple for fd_tuple in file_date_tuples], key=sort_key)

# Unpack the sorted pairs with new sorted values
file_list, dates = zip(*file_dates) # type: ignore
Expand Down
86 changes: 67 additions & 19 deletions src/dolphin/workflows/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@

import numpy as np
from numpy.typing import DTypeLike
from osgeo import gdal

from dolphin import io, shp
from dolphin import io, shp, utils
from dolphin._blocks import BlockManager
from dolphin._log import get_log
from dolphin._types import Filename
Expand Down Expand Up @@ -61,13 +62,13 @@ def run_wrapped_phase_single(
# TODO: extract common stuff between here and sequential
output_folder = Path(output_folder)
vrt = VRTStack.from_vrt_file(slc_vrt_file)
file_list_all = vrt.file_list
date_list_all = vrt.dates
input_slc_files = vrt.file_list
input_slc_dates = vrt.dates

# If we are using a different number of SLCs for the amplitude data,
# we should note that for the SHP finding algorithms
if shp_nslc is None:
shp_nslc = len(file_list_all)
shp_nslc = len(input_slc_files)

logger.info(f"{vrt}: from {vrt.file_list[0]} to {vrt.file_list[-1]}")

Expand Down Expand Up @@ -104,22 +105,22 @@ def run_wrapped_phase_single(

xhalf, yhalf = half_window["x"], half_window["y"]

# If we were passed any compressed SLCs in `file_list_all`,
# If we were passed any compressed SLCs in `input_slc_files`,
# then we want that index for when we create new compressed SLCs.
# We skip the old compressed SLCs to create new ones
first_non_comp_idx = 0
for filename in file_list_all:
for filename in input_slc_files:
if not Path(filename).name.startswith("compressed"):
break
first_non_comp_idx += 1

# Make the output folder using the start/end dates
d0 = date_list_all[first_non_comp_idx][0]
d1 = date_list_all[-1][0]
d0 = input_slc_dates[first_non_comp_idx][0]
d1 = input_slc_dates[-1][0]
start_end = io._format_date_pair(d0, d1)

msg = (
f"Processing {len(file_list_all) - first_non_comp_idx} SLCs +"
f"Processing {len(input_slc_files) - first_non_comp_idx} SLCs +"
f" {first_non_comp_idx} compressed SLCs. "
)
logger.info(msg)
Expand All @@ -133,7 +134,7 @@ def run_wrapped_phase_single(
)
logger.info(f"Total stack size (in pixels): {vrt.shape}")
# Set up the output folder with empty files to write into
output_slc_files = setup_output_folder(
phase_linked_slc_files = setup_output_folder(
vrt,
driver="GTiff",
start_idx=first_non_comp_idx,
Expand Down Expand Up @@ -204,7 +205,9 @@ def run_wrapped_phase_single(
method=shp_method,
)
# # Run the phase linking process on the current ministack
# # TESTING TODO
# TODO: Currently trying to use the latest-in-time compressed SLC as
# the reference (and we're assuming all compressed SLCs have the
# same "base phase")
reference_idx = max(0, first_non_comp_idx - 1)
try:
cur_mle_stack, tcorr, avg_coh = run_mle(
Expand Down Expand Up @@ -240,11 +243,11 @@ def run_wrapped_phase_single(

# Save each of the MLE estimates (ignoring those corresponding to
# compressed SLCs indexes)
assert len(cur_mle_stack[first_non_comp_idx:]) == len(output_slc_files)
assert len(cur_mle_stack[first_non_comp_idx:]) == len(phase_linked_slc_files)

for img, f in zip(
cur_mle_stack[first_non_comp_idx:, trimmed_rows, trimmed_cols],
output_slc_files,
phase_linked_slc_files,
):
writer.queue_write(img, f, out_rows.start, out_cols.start)

Expand Down Expand Up @@ -291,9 +294,54 @@ def run_wrapped_phase_single(
writer.notify_finished()
logger.info(f"Finished ministack of size {vrt.shape}.")

comp_slc_file, tcorr_file = output_files[0:2]

input_compressed_files = input_slc_files[:first_non_comp_idx]
input_real_slc_files = input_slc_files[first_non_comp_idx:]
_save_compressed_metadata(
comp_slc_file.filename, input_real_slc_files, input_compressed_files
)
# TODO: what would make most sense to return here?
# return output_slc_files, comp_slc_file, tcorr_file
return output_slc_files, output_files[0].filename, output_files[1].filename
# return phase_linked_slc_files, comp_slc_file, tcorr_file
return phase_linked_slc_files, comp_slc_file.filename, tcorr_file.filename


def _save_compressed_metadata(
comp_slc_file: Path,
input_real_slc_files: list[Filename],
input_compressed_files: list[Filename],
):
"""Save metadata about the compressed SLCs.
The filenames will be stored in GDAL metadata as a list of strings, separated
by commas, saved into the "DOLPHIN" domain.
Parameters
----------
comp_slc_file : Path
Path to the compressed SLC file
input_real_slc_files : list[Path]
List of input SLC files that were used to create the compressed SLC
input_compressed_files : list[Path]
List of input compressed SLC files that were used in the ministack
which produced the phase linking result used the compressed SLCs.
Note that these were not included in the dot product, but were used
by the phase linking optimization.
"""
comp_ds = gdal.Open(str(comp_slc_file), gdal.GA_Update)
real_names = [utils._get_path_from_gdal_str(p).name for p in input_real_slc_files]
comp_ds.SetMetadataItem(
"input_real_slc_files",
",".join(map(str, real_names)),
"DOLPHIN",
)
comp_names = [utils._get_path_from_gdal_str(p).name for p in input_compressed_files]
comp_ds.SetMetadataItem(
"input_compressed_slc_files",
",".join(map(str, comp_names)),
"DOLPHIN",
)
comp_ds.FlushCache()


def setup_output_folder(
Expand Down Expand Up @@ -335,7 +383,7 @@ def setup_output_folder(
Returns
-------
list[Path]
list of saved empty files.
list of saved empty files for the outputs of phase linking
"""
if output_folder is None:
output_folder = vrt_stack.outfile.parent
Expand All @@ -350,7 +398,7 @@ def setup_output_folder(
s = io._format_date_pair(d[0], d[1])
date_strs.append(s)

output_files = []
phase_linked_slc_files = []
for filename in date_strs:
slc_name = Path(filename).stem
output_path = output_folder / f"{slc_name}.slc.tif"
Expand All @@ -366,5 +414,5 @@ def setup_output_folder(
nodata=nodata,
)

output_files.append(output_path)
return output_files
phase_linked_slc_files.append(output_path)
return phase_linked_slc_files

0 comments on commit 255cd4b

Please sign in to comment.