From 364ceb375a040bac05ca2ddf53d36295f4012595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Sat, 9 Dec 2023 19:54:45 +0100 Subject: [PATCH] Add support for summary files beyond year 2262 (#238) Copied fallback code from res2df --- src/fmu/ensemble/util/dates.py | 106 +++++++++++++++++-- tests/test_dates.py | 182 +++++++++++++++++++++++++++++++++ tests/test_realization.py | 44 ++++++-- 3 files changed, 319 insertions(+), 13 deletions(-) create mode 100644 tests/test_dates.py diff --git a/src/fmu/ensemble/util/dates.py b/src/fmu/ensemble/util/dates.py index b90b563a..ba6e2ea9 100644 --- a/src/fmu/ensemble/util/dates.py +++ b/src/fmu/ensemble/util/dates.py @@ -5,6 +5,7 @@ import dateutil import pandas as pd import logging +from typing import List, Tuple logger = logging.getLogger(__name__) @@ -33,8 +34,12 @@ def date_range(start_date, end_date, freq): Returns: list of datetimes """ - freq = PD_FREQ_MNEMONICS.get(freq, freq) - return pd.date_range(start_date, end_date, freq=freq) + try: + return pd.date_range( + start_date, end_date, freq=PD_FREQ_MNEMONICS.get(freq, freq) + ) + except pd.errors.OutOfBoundsDatetime: + return _fallback_date_range(start_date, end_date, freq) def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date=None): @@ -125,7 +130,9 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date return datetimes -def normalize_dates(start_date, end_date, freq): +def normalize_dates( + start_date: datetime.date, end_date: datetime.date, freq: str +) -> Tuple[datetime.date, datetime.date]: """ Normalize start and end date according to frequency by extending the time range. @@ -145,6 +152,93 @@ def normalize_dates(start_date, end_date, freq): Return: Tuple of normalized (start_date, end_date) """ - freq = PD_FREQ_MNEMONICS.get(freq, freq) - offset = pd.tseries.frequencies.to_offset(freq) - return (offset.rollback(start_date).date(), offset.rollforward(end_date).date()) + offset = pd.tseries.frequencies.to_offset(PD_FREQ_MNEMONICS.get(freq, freq)) + try: + start_normalized = offset.rollback(start_date).date() + except pd.errors.OutOfBoundsDatetime: + # Pandas only supports datetime up to year 2262 + start_normalized = _fallback_date_roll( + datetime.datetime.combine(start_date, datetime.time()), "back", freq + ).date() + try: + end_normalized = offset.rollforward(end_date).date() + except pd.errors.OutOfBoundsDatetime: + # Pandas only supports datetime up to year 2262 + end_normalized = _fallback_date_roll( + datetime.datetime.combine(end_date, datetime.time()), "forward", freq + ).date() + + return (start_normalized, end_normalized) + + +def _fallback_date_roll( + rollme: datetime.datetime, direction: str, freq: str +) -> datetime.datetime: + """Fallback function for rolling dates forward or backward onto a + date frequency boundary. + + This function reimplements pandas' DateOffset.roll_forward() and backward() + only for monthly and yearly frequency. This is necessary as Pandas does not + support datetimes beyond year 2262 due to all datetimes in Pandas being + represented by nanosecond accuracy. + + This function is a fallback only, to keep support for using all Pandas timeoffsets + in situations where years beyond 2262 is not a issue.""" + if direction not in ["back", "forward"]: + raise ValueError(f"Unknown direction {direction}") + + if freq == "yearly": + if direction == "forward": + if rollme <= datetime.datetime(year=rollme.year, month=1, day=1): + return datetime.datetime(year=rollme.year, month=1, day=1) + return datetime.datetime(year=rollme.year + 1, month=1, day=1) + return datetime.datetime(year=rollme.year, month=1, day=1) + + if freq == "monthly": + if direction == "forward": + if rollme <= datetime.datetime(year=rollme.year, month=rollme.month, day=1): + return datetime.datetime(year=rollme.year, month=rollme.month, day=1) + return datetime.datetime( + year=rollme.year, month=rollme.month, day=1 + ) + dateutil.relativedelta.relativedelta( # type: ignore + months=1 + ) + return datetime.datetime(year=rollme.year, month=rollme.month, day=1) + + raise ValueError( + "Only yearly or monthly frequencies are " + "supported for simulations beyond year 2262" + ) + + +def _fallback_date_range( + start: datetime.date, end: datetime.date, freq: str +) -> List[datetime.datetime]: + """Fallback routine for generating date ranges beyond Pandas datetime64[ns] + year-2262 limit. + + Assumes that the start and end times already fall on a frequency boundary. + """ + if start == end: + return [datetime.datetime.combine(start, datetime.datetime.min.time())] + if end < start: + return [] + if freq == "yearly": + dates = [datetime.datetime.combine(start, datetime.datetime.min.time())] + [ + datetime.datetime(year=year, month=1, day=1) + for year in range(start.year + 1, end.year + 1) + ] + if datetime.datetime.combine(end, datetime.datetime.min.time()) != dates[-1]: + dates = dates + [ + datetime.datetime.combine(end, datetime.datetime.min.time()) + ] + return dates + if freq == "monthly": + dates = [] + date = datetime.datetime.combine(start, datetime.datetime.min.time()) + enddatetime = datetime.datetime.combine(end, datetime.datetime.min.time()) + while date <= enddatetime: + dates.append(date) + date = date + dateutil.relativedelta.relativedelta(months=1) # type: ignore + return dates + raise ValueError("Unsupported frequency for datetimes beyond year 2262") diff --git a/tests/test_dates.py b/tests/test_dates.py new file mode 100644 index 00000000..57dfcf13 --- /dev/null +++ b/tests/test_dates.py @@ -0,0 +1,182 @@ +from datetime import datetime as dt + +import pytest + +from fmu.ensemble.util.dates import _fallback_date_roll, date_range + +# These tests are duplicated from https://github.com/equinor/res2df/blob/master/tests/test_summary.py + + +@pytest.mark.parametrize( + "rollme, direction, freq, expected", + [ + ( + dt(3000, 1, 1), + "forward", + "yearly", + dt(3000, 1, 1), + ), + ( + dt(3000, 1, 1), + "forward", + "monthly", + dt(3000, 1, 1), + ), + ( + dt(3000, 1, 2), + "forward", + "yearly", + dt(3001, 1, 1), + ), + ( + dt(3000, 1, 2), + "forward", + "monthly", + dt(3000, 2, 1), + ), + ( + dt(3000, 1, 1), + "back", + "yearly", + dt(3000, 1, 1), + ), + ( + dt(3000, 1, 1), + "back", + "monthly", + dt(3000, 1, 1), + ), + ( + dt(3000, 12, 31), + "back", + "yearly", + dt(3000, 1, 1), + ), + ( + dt(3000, 2, 2), + "back", + "monthly", + dt(3000, 2, 1), + ), + pytest.param( + dt(3000, 2, 2), + "forward", + "daily", + None, + marks=pytest.mark.xfail(raises=ValueError), + ), + pytest.param( + dt(3000, 2, 2), + "upwards", + "yearly", + None, + marks=pytest.mark.xfail(raises=ValueError), + ), + ], +) +def test_fallback_date_roll(rollme, direction, freq, expected): + """The pandas date rolling does not always work for years beyound 2262. The + code should fallback automatically to hide that Pandas limitation""" + assert _fallback_date_roll(rollme, direction, freq) == expected + + +@pytest.mark.parametrize( + "start, end, freq, expected", + [ + ( + dt(3000, 1, 1), + dt(3002, 1, 1), + "yearly", + [ + dt(3000, 1, 1), + dt(3001, 1, 1), + dt(3002, 1, 1), + ], + ), + ( + dt(2999, 11, 1), + dt(3000, 2, 1), + "monthly", + [ + dt(2999, 11, 1), + dt(2999, 12, 1), + dt(3000, 1, 1), + dt(3000, 2, 1), + ], + ), + pytest.param( + dt(3000, 1, 1), + dt(3000, 2, 1), + "weekly", + None, + marks=pytest.mark.xfail(raises=ValueError), + ), + ( + # Crossing the problematic time boundary: + dt(2260, 1, 1), + dt(2263, 1, 1), + "yearly", + [ + dt(2260, 1, 1), + dt(2261, 1, 1), + dt(2262, 1, 1), + dt(2263, 1, 1), + ], + ), + ( + dt(3000, 1, 1), + dt(3000, 1, 1), + "yearly", + [ + dt(3000, 1, 1), + ], + ), + ( + dt(2000, 1, 1), + dt(2000, 1, 1), + "yearly", + [ + dt(2000, 1, 1), + ], + ), + ( + dt(2000, 1, 1), + dt(1000, 1, 1), + "yearly", + [], + ), + ( + dt(3000, 1, 1), + dt(2000, 1, 1), + "yearly", + [], + ), + ( + dt(2300, 5, 6), + dt(2302, 3, 1), + "yearly", + [ + dt(2300, 5, 6), + dt(2301, 1, 1), + dt(2302, 1, 1), + dt(2302, 3, 1), + ], + ), + ( + dt(2304, 5, 6), + dt(2302, 3, 1), + "yearly", + [], + ), + ( + dt(2302, 3, 1), + dt(2302, 3, 1), + "yearly", + [dt(2302, 3, 1)], + ), + ], +) +def test_date_range(start, end, freq, expected): + """When dates are beyond year 2262, + the function _fallback_date_range() is triggered.""" + assert date_range(start, end, freq) == expected diff --git a/tests/test_realization.py b/tests/test_realization.py index f9a6a9c9..0b1d9b0d 100644 --- a/tests/test_realization.py +++ b/tests/test_realization.py @@ -1,22 +1,22 @@ """Testing fmu-ensemble.""" # pylint: disable=protected-access -import os import datetime -import shutil import logging +import os +import shutil +from pathlib import Path + +import numpy as np import pandas as pd +import pytest import yaml from dateutil.relativedelta import relativedelta - -import pytest from resdata.summary import Summary -import numpy as np - -from .test_ensembleset import symlink_iter from fmu import ensemble +from .test_ensembleset import symlink_iter try: SKIP_FMU_TOOLS = False @@ -615,6 +615,36 @@ def test_singlereal_ecl(tmp="TMP"): "FOPT" in real["unsmry--yearly"] +def test_can_import_summary_files_beyond_2262(tmpdir, monkeypatch): + """Pandas is/has been eager to use datetime64[ns] which overflows in year 2262, + ensure this limitation is sufficiently worked around.""" + monkeypatch.chdir(tmpdir) + res_sum = Summary.from_pandas( + "TESTCASE", + pd.DataFrame( + [ + {"DATE": datetime.date(2000, 1, 1), "FPR": 200}, + {"DATE": datetime.date(2263, 1, 1), "FPR": 1}, + ] + ).set_index("DATE"), + ) + runpath = "realization-0/iter-0" + Path(runpath).mkdir(parents=True) + os.chdir(runpath) + # fwrite() can only write to cwd + Summary.fwrite(res_sum) + os.chdir(tmpdir) + + real = ensemble.ScratchRealization(runpath) + real.find_files("TESTCASE.UNSMRY") + for time_index in ["raw", "monthly", "yearly"]: + assert "2263-01-01" in str( + real.get_smry(column_keys="*", time_index=time_index) + ) + with pytest.raises(ValueError): + real.get_smry(column_keys="*", time_index="weekly") + + def test_independent_realization(tmp="TMP"): """Test what we are able to load a single Eclipse run that might have nothing to do with FMU"""