Skip to content

Commit

Permalink
Add support for summary files beyond year 2262 (#238)
Browse files Browse the repository at this point in the history
Copied fallback code from res2df
  • Loading branch information
berland authored Dec 9, 2023
1 parent 392218d commit 364ceb3
Show file tree
Hide file tree
Showing 3 changed files with 319 additions and 13 deletions.
106 changes: 100 additions & 6 deletions src/fmu/ensemble/util/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import dateutil
import pandas as pd
import logging
from typing import List, Tuple

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -33,8 +34,12 @@ def date_range(start_date, end_date, freq):
Returns:
list of datetimes
"""
freq = PD_FREQ_MNEMONICS.get(freq, freq)
return pd.date_range(start_date, end_date, freq=freq)
try:
return pd.date_range(
start_date, end_date, freq=PD_FREQ_MNEMONICS.get(freq, freq)
)
except pd.errors.OutOfBoundsDatetime:
return _fallback_date_range(start_date, end_date, freq)


def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date=None):
Expand Down Expand Up @@ -125,7 +130,9 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
return datetimes


def normalize_dates(start_date, end_date, freq):
def normalize_dates(
start_date: datetime.date, end_date: datetime.date, freq: str
) -> Tuple[datetime.date, datetime.date]:
"""
Normalize start and end date according to frequency
by extending the time range.
Expand All @@ -145,6 +152,93 @@ def normalize_dates(start_date, end_date, freq):
Return:
Tuple of normalized (start_date, end_date)
"""
freq = PD_FREQ_MNEMONICS.get(freq, freq)
offset = pd.tseries.frequencies.to_offset(freq)
return (offset.rollback(start_date).date(), offset.rollforward(end_date).date())
offset = pd.tseries.frequencies.to_offset(PD_FREQ_MNEMONICS.get(freq, freq))
try:
start_normalized = offset.rollback(start_date).date()
except pd.errors.OutOfBoundsDatetime:
# Pandas only supports datetime up to year 2262
start_normalized = _fallback_date_roll(
datetime.datetime.combine(start_date, datetime.time()), "back", freq
).date()
try:
end_normalized = offset.rollforward(end_date).date()
except pd.errors.OutOfBoundsDatetime:
# Pandas only supports datetime up to year 2262
end_normalized = _fallback_date_roll(
datetime.datetime.combine(end_date, datetime.time()), "forward", freq
).date()

return (start_normalized, end_normalized)


def _fallback_date_roll(
rollme: datetime.datetime, direction: str, freq: str
) -> datetime.datetime:
"""Fallback function for rolling dates forward or backward onto a
date frequency boundary.
This function reimplements pandas' DateOffset.roll_forward() and backward()
only for monthly and yearly frequency. This is necessary as Pandas does not
support datetimes beyond year 2262 due to all datetimes in Pandas being
represented by nanosecond accuracy.
This function is a fallback only, to keep support for using all Pandas timeoffsets
in situations where years beyond 2262 is not a issue."""
if direction not in ["back", "forward"]:
raise ValueError(f"Unknown direction {direction}")

if freq == "yearly":
if direction == "forward":
if rollme <= datetime.datetime(year=rollme.year, month=1, day=1):
return datetime.datetime(year=rollme.year, month=1, day=1)
return datetime.datetime(year=rollme.year + 1, month=1, day=1)
return datetime.datetime(year=rollme.year, month=1, day=1)

if freq == "monthly":
if direction == "forward":
if rollme <= datetime.datetime(year=rollme.year, month=rollme.month, day=1):
return datetime.datetime(year=rollme.year, month=rollme.month, day=1)
return datetime.datetime(
year=rollme.year, month=rollme.month, day=1
) + dateutil.relativedelta.relativedelta( # type: ignore
months=1
)
return datetime.datetime(year=rollme.year, month=rollme.month, day=1)

raise ValueError(
"Only yearly or monthly frequencies are "
"supported for simulations beyond year 2262"
)


def _fallback_date_range(
start: datetime.date, end: datetime.date, freq: str
) -> List[datetime.datetime]:
"""Fallback routine for generating date ranges beyond Pandas datetime64[ns]
year-2262 limit.
Assumes that the start and end times already fall on a frequency boundary.
"""
if start == end:
return [datetime.datetime.combine(start, datetime.datetime.min.time())]
if end < start:
return []
if freq == "yearly":
dates = [datetime.datetime.combine(start, datetime.datetime.min.time())] + [
datetime.datetime(year=year, month=1, day=1)
for year in range(start.year + 1, end.year + 1)
]
if datetime.datetime.combine(end, datetime.datetime.min.time()) != dates[-1]:
dates = dates + [
datetime.datetime.combine(end, datetime.datetime.min.time())
]
return dates
if freq == "monthly":
dates = []
date = datetime.datetime.combine(start, datetime.datetime.min.time())
enddatetime = datetime.datetime.combine(end, datetime.datetime.min.time())
while date <= enddatetime:
dates.append(date)
date = date + dateutil.relativedelta.relativedelta(months=1) # type: ignore
return dates
raise ValueError("Unsupported frequency for datetimes beyond year 2262")
182 changes: 182 additions & 0 deletions tests/test_dates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
from datetime import datetime as dt

import pytest

from fmu.ensemble.util.dates import _fallback_date_roll, date_range

# These tests are duplicated from https://github.com/equinor/res2df/blob/master/tests/test_summary.py


@pytest.mark.parametrize(
"rollme, direction, freq, expected",
[
(
dt(3000, 1, 1),
"forward",
"yearly",
dt(3000, 1, 1),
),
(
dt(3000, 1, 1),
"forward",
"monthly",
dt(3000, 1, 1),
),
(
dt(3000, 1, 2),
"forward",
"yearly",
dt(3001, 1, 1),
),
(
dt(3000, 1, 2),
"forward",
"monthly",
dt(3000, 2, 1),
),
(
dt(3000, 1, 1),
"back",
"yearly",
dt(3000, 1, 1),
),
(
dt(3000, 1, 1),
"back",
"monthly",
dt(3000, 1, 1),
),
(
dt(3000, 12, 31),
"back",
"yearly",
dt(3000, 1, 1),
),
(
dt(3000, 2, 2),
"back",
"monthly",
dt(3000, 2, 1),
),
pytest.param(
dt(3000, 2, 2),
"forward",
"daily",
None,
marks=pytest.mark.xfail(raises=ValueError),
),
pytest.param(
dt(3000, 2, 2),
"upwards",
"yearly",
None,
marks=pytest.mark.xfail(raises=ValueError),
),
],
)
def test_fallback_date_roll(rollme, direction, freq, expected):
"""The pandas date rolling does not always work for years beyound 2262. The
code should fallback automatically to hide that Pandas limitation"""
assert _fallback_date_roll(rollme, direction, freq) == expected


@pytest.mark.parametrize(
"start, end, freq, expected",
[
(
dt(3000, 1, 1),
dt(3002, 1, 1),
"yearly",
[
dt(3000, 1, 1),
dt(3001, 1, 1),
dt(3002, 1, 1),
],
),
(
dt(2999, 11, 1),
dt(3000, 2, 1),
"monthly",
[
dt(2999, 11, 1),
dt(2999, 12, 1),
dt(3000, 1, 1),
dt(3000, 2, 1),
],
),
pytest.param(
dt(3000, 1, 1),
dt(3000, 2, 1),
"weekly",
None,
marks=pytest.mark.xfail(raises=ValueError),
),
(
# Crossing the problematic time boundary:
dt(2260, 1, 1),
dt(2263, 1, 1),
"yearly",
[
dt(2260, 1, 1),
dt(2261, 1, 1),
dt(2262, 1, 1),
dt(2263, 1, 1),
],
),
(
dt(3000, 1, 1),
dt(3000, 1, 1),
"yearly",
[
dt(3000, 1, 1),
],
),
(
dt(2000, 1, 1),
dt(2000, 1, 1),
"yearly",
[
dt(2000, 1, 1),
],
),
(
dt(2000, 1, 1),
dt(1000, 1, 1),
"yearly",
[],
),
(
dt(3000, 1, 1),
dt(2000, 1, 1),
"yearly",
[],
),
(
dt(2300, 5, 6),
dt(2302, 3, 1),
"yearly",
[
dt(2300, 5, 6),
dt(2301, 1, 1),
dt(2302, 1, 1),
dt(2302, 3, 1),
],
),
(
dt(2304, 5, 6),
dt(2302, 3, 1),
"yearly",
[],
),
(
dt(2302, 3, 1),
dt(2302, 3, 1),
"yearly",
[dt(2302, 3, 1)],
),
],
)
def test_date_range(start, end, freq, expected):
"""When dates are beyond year 2262,
the function _fallback_date_range() is triggered."""
assert date_range(start, end, freq) == expected
44 changes: 37 additions & 7 deletions tests/test_realization.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
"""Testing fmu-ensemble."""
# pylint: disable=protected-access

import os
import datetime
import shutil
import logging
import os
import shutil
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
import yaml
from dateutil.relativedelta import relativedelta

import pytest
from resdata.summary import Summary

import numpy as np

from .test_ensembleset import symlink_iter
from fmu import ensemble

from .test_ensembleset import symlink_iter

try:
SKIP_FMU_TOOLS = False
Expand Down Expand Up @@ -615,6 +615,36 @@ def test_singlereal_ecl(tmp="TMP"):
"FOPT" in real["unsmry--yearly"]


def test_can_import_summary_files_beyond_2262(tmpdir, monkeypatch):
"""Pandas is/has been eager to use datetime64[ns] which overflows in year 2262,
ensure this limitation is sufficiently worked around."""
monkeypatch.chdir(tmpdir)
res_sum = Summary.from_pandas(
"TESTCASE",
pd.DataFrame(
[
{"DATE": datetime.date(2000, 1, 1), "FPR": 200},
{"DATE": datetime.date(2263, 1, 1), "FPR": 1},
]
).set_index("DATE"),
)
runpath = "realization-0/iter-0"
Path(runpath).mkdir(parents=True)
os.chdir(runpath)
# fwrite() can only write to cwd
Summary.fwrite(res_sum)
os.chdir(tmpdir)

real = ensemble.ScratchRealization(runpath)
real.find_files("TESTCASE.UNSMRY")
for time_index in ["raw", "monthly", "yearly"]:
assert "2263-01-01" in str(
real.get_smry(column_keys="*", time_index=time_index)
)
with pytest.raises(ValueError):
real.get_smry(column_keys="*", time_index="weekly")


def test_independent_realization(tmp="TMP"):
"""Test what we are able to load a single Eclipse run
that might have nothing to do with FMU"""
Expand Down

0 comments on commit 364ceb3

Please sign in to comment.