Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Support timedata as list of dates #885

Merged
merged 1 commit into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import shlex
import uuid
from datetime import datetime
from io import BufferedIOBase, BytesIO
from pathlib import Path
from tempfile import NamedTemporaryFile
Expand Down Expand Up @@ -395,3 +396,8 @@ def _load_config_from_path(config_path: Path) -> dict[str, Any]:
raise FileNotFoundError(f"Cannot find file for global config: {config_path}")

return ut.yaml_load(config_path)


def convert_datestr_to_isoformat(value: str, format: str = "%Y%m%d") -> str:
"""Convert a date string to ISO formatted string"""
return datetime.strptime(value, format).isoformat()
33 changes: 6 additions & 27 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,11 @@ class ExportData:

tagname: This is a short tag description which be be a part of file name.

timedata: If given, a list of lists with dates, .e.g.
[[20200101, "monitor"], [20180101, "base"]] or just [[2021010]]. The output
to metadata will from version 0.9 be different (API change)
timedata: Optional. List of dates, where the dates are strings on form
'YYYYMMDD', example ['20200101']. A maximum of two dates can be input,
the oldest date will be set as t0 in the metadata and the latest date will
be t1. Note it is also possible to provide a label to each date by using
a list of lists, .e.g. [[20200101, "monitor"], [20180101, "base"]].

vertical_domain: Optional. String with vertical domain either "time" or "depth"
(default). It is also possible to provide a reference for the vertical
Expand All @@ -317,29 +319,6 @@ class ExportData:

undef_is_zero: Flags that nans should be considered as zero in aggregations


.. note:: Comment on time formats

If two dates are present (i.e. the element represents a difference, the input
time format is on the form::

timedata: [[20200101, "monitor"], [20180101, "base"]]

Hence the last data (monitor) usually comes first.

In the new version this will shown in metadata files as where the oldest date is
shown as t0::

data:
t0:
value: 2018010T00:00:00
description: base
t1:
value: 202020101T00:00:00
description: monitor

The output files will be on the form: somename--t1_t0.ext

"""

# ----------------------------------------------------------------------------------
Expand Down Expand Up @@ -400,7 +379,7 @@ class ExportData:
runpath: Optional[Union[str, Path]] = None
subfolder: str = ""
tagname: str = ""
timedata: Optional[List[list]] = None
timedata: Optional[Union[List[str], List[List[str]]]] = None
unit: Optional[str] = ""
verbosity: str = "DEPRECATED" # remove in version 2
vertical_domain: Union[str, dict] = "depth" # dict input is deprecated
Expand Down
20 changes: 14 additions & 6 deletions src/fmu/dataio/providers/objectdata/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,16 +208,21 @@ def _get_stratigraphy_element(self) -> StratigraphyElement:

return StratigraphyElement(name=name)

def _get_fmu_time_object(self, timedata_item: list[str]) -> Timestamp:
def _get_fmu_time_object(self, timedata_item: str | list[str]) -> Timestamp:
"""
Returns a Timestamp from a timedata item on list
format: ["20200101", "monitor"] where the first item is a date and
Returns a Timestamp from a timedata item on either string or
list format: ["20200101", "monitor"] where the first item is a date and
the last item is an optional label
"""
value, *label = timedata_item

if isinstance(timedata_item, list):
value, *label = timedata_item
return Timestamp(
value=datetime.strptime(str(value), "%Y%m%d"),
label=label[0] if label else None,
)
return Timestamp(
value=datetime.strptime(str(value), "%Y%m%d"),
label=label[0] if label else None,
value=datetime.strptime(str(timedata_item), "%Y%m%d"),
)

def _get_timedata(self) -> Time | None:
Expand All @@ -234,6 +239,9 @@ def _get_timedata(self) -> Time | None:
if not self.dataio.timedata:
return None

if not isinstance(self.dataio.timedata, list):
raise ValueError("The 'timedata' argument should be a list")

if len(self.dataio.timedata) > 2:
raise ValueError("The 'timedata' argument can maximum contain two dates")

Expand Down
95 changes: 94 additions & 1 deletion tests/test_units/test_dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import yaml

from fmu.dataio._model.enums import FMUContext
from fmu.dataio._utils import prettyprint_dict
from fmu.dataio._utils import convert_datestr_to_isoformat, prettyprint_dict
from fmu.dataio.dataio import ExportData, read_metadata
from fmu.dataio.providers._fmu import FmuEnv

Expand Down Expand Up @@ -1244,3 +1244,96 @@ def test_top_base_as_strings_from_config(globalconfig1, regsurf):
meta = edata.generate_metadata(regsurf)
assert meta["data"]["top"]["name"] == "TheTopHorizon"
assert meta["data"]["base"]["name"] == "TheBaseHorizon"


def test_timedata_single_date(globalconfig1, regsurf):
"""Test that entering a single date works"""

t0 = "20230101"

meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[t0],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert "t1" not in meta["data"]["time"]

# should also work with the double list syntax
meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[[t0]],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert "t1" not in meta["data"]["time"]


def test_timedata_multiple_date(globalconfig1, regsurf):
"""Test that entering two dates works"""

t0 = "20230101"
t1 = "20240101"

meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[t0, t1],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert meta["data"]["time"]["t1"]["value"] == convert_datestr_to_isoformat(t1)

# should also work with the double list syntax
meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[[t0], [t1]],
).generate_metadata(regsurf)

assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert meta["data"]["time"]["t1"]["value"] == convert_datestr_to_isoformat(t1)


def test_timedata_multiple_date_sorting(globalconfig1, regsurf):
"""Test that dates are sorted no matter the input order"""

t0 = "20230101"
t1 = "20240101"

meta = ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=[t1, t0], # set oldest first
).generate_metadata(regsurf)

# check that oldest is t0
assert meta["data"]["time"]["t0"]["value"] == convert_datestr_to_isoformat(t0)
assert meta["data"]["time"]["t1"]["value"] == convert_datestr_to_isoformat(t1)


def test_timedata_wrong_format(globalconfig1, regsurf):
"""Test that error is raised if timedata is input incorrect"""

with pytest.raises(ValueError, match="should be a list"):
ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata="20230101",
).generate_metadata(regsurf)

with pytest.raises(ValueError, match="two dates"):
ExportData(
config=globalconfig1,
content="depth",
name="TopWhatever",
timedata=["20230101", "20240101", "20250101"],
).generate_metadata(regsurf)