Skip to content

Commit

Permalink
coerce all parsed timestampts to UTC (#316)
Browse files Browse the repository at this point in the history
  • Loading branch information
atmorling authored Nov 15, 2024
1 parent 8aa05b5 commit 13158f4
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 1 deletion.
2 changes: 1 addition & 1 deletion ecoscope/io/earthranger_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def clean_time_cols(df):
for col in time_cols:
if col in df.columns:
# convert x is not None to pd.isna(x) is False
df[col] = df[col].apply(lambda x: pd.to_datetime(parser.parse(x)) if not pd.isna(x) else None)
df[col] = df[col].apply(lambda x: pd.to_datetime(parser.parse(x), utc=True) if not pd.isna(x) else None)
return df


Expand Down
53 changes: 53 additions & 0 deletions tests/test_earthranger_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pytest

import numpy as np
import pandas as pd

from ecoscope.io.earthranger_utils import clean_time_cols


@pytest.fixture
def df_with_times():
return pd.DataFrame(
data={
"time": [
"2023-01-30 11:26:13.805829-08:00",
"2023-09-27T06:16:46.158966",
"2023-09-27T06:16:46.23-07:00",
"2023-09-27T06:16:46.1589-07:00",
"2023-09-27T22:00:01.23-11:00",
"2023-09-27T06:16:46.00-07:00",
"2023-09-27T22:00:00.00-02:00",
pd.NA,
]
},
index=["A", "B", "C", "D", "E", "F", "G", "H"],
)


def test_clean_time_cols(df_with_times):
with pytest.raises(AttributeError):
df_with_times["time"].dt

cleaned = clean_time_cols(df_with_times)
assert pd.api.types.is_datetime64_ns_dtype(cleaned["time"])
# Check we have our dt accessor
df_with_times["time"].dt

expected_times = pd.arrays.DatetimeArray._from_sequence(
[
"2023-01-30 19:26:13.805829+00:00",
"2023-09-27 06:16:46.158966+00:00",
"2023-09-27 13:16:46.230000+00:00",
"2023-09-27 13:16:46.158900+00:00",
"2023-09-28 09:00:01.230000+00:00",
"2023-09-27 13:16:46+00:00",
"2023-09-28 00:00:00+00:00",
]
)

# Since the parser resolves nan's to pd.NaT,
# and pd.NaT != pd.NaT
# check the nan separately from the array equality
assert np.array_equal(expected_times, cleaned["time"].array[:-1])
assert pd.isnull(cleaned["time"]["H"])

0 comments on commit 13158f4

Please sign in to comment.