Skip to content

Commit

Permalink
fixe for new data_freq formatting (ME and MS) and hack for allowing y…
Browse files Browse the repository at this point in the history
…early data when asking for data aggr.
  • Loading branch information
semvijverberg committed Jan 31, 2024
1 parent 417223d commit f9cde74
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
10 changes: 8 additions & 2 deletions lilio/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def infer_input_data_freq(
if data_freq is None: # Manually infer the frequency
data_freq = (data.time.values[1:] - data.time.values[:-1]).min()

# anoying switch from "2M" to "2ME" format in pandas > 2.2.
# We will need to adapt to this in the future.
if len(data_freq) in [3, 4] and data_freq[1:] in ["ME", "MS"]:
data_freq = data_freq.replace(data_freq[1:], "M")

if isinstance(data_freq, str):
data_freq.replace("-", "") # Get the absolute frequency

Expand Down Expand Up @@ -170,8 +175,9 @@ def check_input_frequency(
data_freq = infer_input_data_freq(data)
calendar_freq = get_smallest_calendar_freq(calendar)

# if "label" in data.coords:
# return
if data_freq == pd.Timedelta("365.25d") and calendar_freq == pd.Timedelta("1d"):
# Allow yearly (one-datapoint-per-year) data to be resampled to daily data.
return None

if calendar_freq < data_freq:
raise ValueError(
Expand Down
21 changes: 21 additions & 0 deletions tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,27 @@ def test_resample_with_year_freq(
resampled = resample(cal, series)
assert all(np.equal(test_data, resampled.data.values)), "Data not equal."

def test_resample_with_one_datapoint_per_year(
self,
):
"""Testing resampling when you have only 1 datapoint per year."""
years = list(range(2019, 2022))
time_index = pd.to_datetime([f"{year}-02-01" for year in years])
test_data = np.random.random(len(time_index))
initseries = pd.Series(test_data, index=time_index, name="data1")
# The calendar will skip the last timestep because of how pd.intervals are
# defined (with left and right bounds). This is not a problem for resampling,
# but it is a problem for the user to be aware of.
series = initseries._append(
pd.Series([np.nan], index=[pd.to_datetime("2022-02-01")])
)
cal = Calendar(anchor="02-01")
cal.add_intervals("target", length="1d")
cal.map_to_data(series)
cal.get_intervals()
resampled = resample(cal, series)
assert all(np.equal(test_data, resampled.data.values)), "Data not equal."


TOO_LOW_FREQ_ERR = r".*lower time resolution than the calendar.*"
TOO_LOW_FREQ_WARN = r".*input data frequency is very close to the Calendar's freq.*"
Expand Down

0 comments on commit f9cde74

Please sign in to comment.