Skip to content

Commit

Permalink
ENH: Support timedata as list of dates (#885)
Browse files Browse the repository at this point in the history
  • Loading branch information
tnatt authored Nov 28, 2024
1 parent 9859fb0 commit 3297dce
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 34 deletions.
6 changes: 6 additions & 0 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import shlex
import uuid
from datetime import datetime
from io import BufferedIOBase, BytesIO
from pathlib import Path
from tempfile import NamedTemporaryFile
Expand Down Expand Up @@ -395,3 +396,8 @@ def _load_config_from_path(config_path: Path) -> dict[str, Any]:
raise FileNotFoundError(f"Cannot find file for global config: {config_path}")

return ut.yaml_load(config_path)


def convert_datestr_to_isoformat(value: str, format: str = "%Y%m%d") -> str:
"""Convert a date string to ISO formatted string"""
return datetime.strptime(value, format).isoformat()
33 changes: 6 additions & 27 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,11 @@ class ExportData:
tagname: This is a short tag description which be be a part of file name.
timedata: If given, a list of lists with dates, .e.g.
[[20200101, "monitor"], [20180101, "base"]] or just [[2021010]]. The output
to metadata will from version 0.9 be different (API change)
timedata: Optional. List of dates, where the dates are strings on form
'YYYYMMDD', example ['20200101']. A maximum of two dates can be input,
the oldest date will be set as t0 in the metadata and the latest date will
be t1. Note it is also possible to provide a label to each date by using
a list of lists, .e.g. [[20200101, "monitor"], [20180101, "base"]].
vertical_domain: Optional. String with vertical domain either "time" or "depth"
(default). It is also possible to provide a reference for the vertical
Expand All @@ -317,29 +319,6 @@ class ExportData:
undef_is_zero: Flags that nans should be considered as zero in aggregations
.. note:: Comment on time formats
If two dates are present (i.e. the element represents a difference, the input
time format is on the form::
timedata: [[20200101, "monitor"], [20180101, "base"]]
Hence the last data (monitor) usually comes first.
In the new version this will shown in metadata files as where the oldest date is
shown as t0::
data:
t0:
value: 2018010T00:00:00
description: base
t1:
value: 202020101T00:00:00
description: monitor
The output files will be on the form: somename--t1_t0.ext
"""

# ----------------------------------------------------------------------------------
Expand Down Expand Up @@ -400,7 +379,7 @@ class ExportData:
runpath: Optional[Union[str, Path]] = None
subfolder: str = ""
tagname: str = ""
timedata: Optional[List[list]] = None
timedata: Optional[Union[List[str], List[List[str]]]] = None
unit: Optional[str] = ""
verbosity: str = "DEPRECATED" # remove in version 2
vertical_domain: Union[str, dict] = "depth" # dict input is deprecated
Expand Down
20 changes: 14 additions & 6 deletions src/fmu/dataio/providers/objectdata/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,16 +208,21 @@ def _get_stratigraphy_element(self) -> StratigraphyElement:

return StratigraphyElement(name=name)

def _get_fmu_time_object(self, timedata_item: list[str]) -> Timestamp:
def _get_fmu_time_object(self, timedata_item: str | list[str]) -> Timestamp:
"""
Returns a Timestamp from a timedata item on list
format: ["20200101", "monitor"] where the first item is a date and
Returns a Timestamp from a timedata item on either string or
list format: ["20200101", "monitor"] where the first item is a date and
the last item is an optional label
"""
value, *label = timedata_item

if isinstance(timedata_item, list):
value, *label = timedata_item
return Timestamp(
value=datetime.strptime(str(value), "%Y%m%d"),
label=label[0] if label else None,
)
return Timestamp(
value=datetime.strptime(str(value), "%Y%m%d"),
label=label[0] if label else None,
value=datetime.strptime(str(timedata_item), "%Y%m%d"),
)

def _get_timedata(self) -> Time | None:
Expand All @@ -234,6 +239,9 @@ def _get_timedata(self) -> Time | None:
if not self.dataio.timedata:
return None

if not isinstance(self.dataio.timedata, list):
raise ValueError("The 'timedata' argument should be a list")

if len(self.dataio.timedata) > 2:
raise ValueError("The 'timedata' argument can maximum contain two dates")

Expand Down
95 changes: 94 additions & 1 deletion tests/test_units/test_dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import yaml

from fmu.dataio._model.enums import FMUContext
from fmu.dataio._utils import prettyprint_dict
from fmu.dataio._utils import convert_datestr_to_isoformat, prettyprint_dict
from fmu.dataio.dataio import ExportData, read_metadata
from fmu.dataio.providers._fmu import FmuEnv

Expand Down Expand Up @@ -1244,3 +1244,96 @@ def test_top_base_as_strings_from_config(globalconfig1, regsurf):
meta = edata.generate_metadata(regsurf)
assert meta["data"]["top"]["name"] == "TheTopHorizon"
assert meta["data"]["base"]["name"] == "TheBaseHorizon"


def test_timedata_single_date(globalconfig1, regsurf):
"""Test that entering a single date works"""

t0 = "20230101"

meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[t0],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert "t1" not in meta["data"]["time"]

# should also work with the double list syntax
meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[[t0]],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert "t1" not in meta["data"]["time"]


def test_timedata_multiple_date(globalconfig1, regsurf):
"""Test that entering two dates works"""

t0 = "20230101"
t1 = "20240101"

meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[t0, t1],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert meta["data"]["time"]["t1"]["value"] == convert_datestr_to_isoformat(t1)

# should also work with the double list syntax
meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[[t0], [t1]],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert meta["data"]["time"]["t1"]["value"] == convert_datestr_to_isoformat(t1)


def test_timedata_multiple_date_sorting(globalconfig1, regsurf):
"""Test that dates are sorted no matter the input order"""

t0 = "20230101"
t1 = "20240101"

meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[t1, t0], # set oldest first
).generate_metadata(regsurf)

# check that oldest is t0
assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert meta["data"]["time"]["t1"]["value"] == convert_datestr_to_isoformat(t1)


def test_timedata_wrong_format(globalconfig1, regsurf):
"""Test that error is raised if timedata is input incorrect"""

with pytest.raises(ValueError, match="should be a list"):
ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata="20230101",
).generate_metadata(regsurf)

with pytest.raises(ValueError, match="two dates"):
ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=["20230101", "20240101", "20250101"],
).generate_metadata(regsurf)

0 comments on commit 3297dce

Please sign in to comment.