diff --git a/python/arcticdb/version_store/processing.py b/python/arcticdb/version_store/processing.py index b6631f8524..dba4a507ae 100644 --- a/python/arcticdb/version_store/processing.py +++ b/python/arcticdb/version_store/processing.py @@ -242,6 +242,18 @@ def is_supported_sequence(obj): return isinstance(obj, (list, set, frozenset, tuple, np.ndarray)) +def nanoseconds_from_utc(time): + # We convert every time type to a pandas Timestamp because it: + # - is consistent with how we normalize time types when normalizing + # - has nanosecond precision + # - if tzinfo is set will give nanoseconds since UTC + return int(pd.Timestamp(time).value) + + +def nanoseconds_timedelta(timedelta): + return int(pd.Timedelta(timedelta).value) + + def value_list_from_args(*args): if len(args) == 1 and is_supported_sequence(args[0]): collection = args[0] @@ -255,7 +267,7 @@ def value_list_from_args(*args): if value not in value_set: value_set.add(value) if isinstance(value, supported_time_types): - value = int(value.timestamp() * 1_000_000_000) + value = nanoseconds_from_utc(value) elem = np.array([value]) if isinstance(value, (int, np.integer)) else np.full(1, value, dtype=None) array_list.append(elem) contains_integer = contains_integer or isinstance(value, (int, np.integer)) @@ -708,15 +720,11 @@ def create_value(value): elif isinstance(value, np.integer): min_scalar_type = np.min_scalar_type(value) f = CONSTRUCTOR_MAP.get(min_scalar_type.kind).get(min_scalar_type.itemsize) - elif isinstance(value, (pd.Timestamp, pd.Timedelta)): - # pd.Timestamp is in supported_time_types, but its timestamp() method can't provide ns precision - value = value.value - f = ValueInt64 elif isinstance(value, supported_time_types): - value = int(value.timestamp() * 1_000_000_000) + value = nanoseconds_from_utc(value) f = ValueInt64 - elif isinstance(value, datetime.timedelta): - value = int(value.total_seconds() * 1_000_000_000) + elif isinstance(value, (datetime.timedelta, pd.Timedelta)): + value = nanoseconds_timedelta(value) f = ValueInt64 elif isinstance(value, bool): f = ValueBool diff --git a/python/tests/unit/arcticdb/version_store/test_query_builder.py b/python/tests/unit/arcticdb/version_store/test_query_builder.py index 2f32ca2411..56c15b90f9 100644 --- a/python/tests/unit/arcticdb/version_store/test_query_builder.py +++ b/python/tests/unit/arcticdb/version_store/test_query_builder.py @@ -8,6 +8,8 @@ import numpy as np import pandas as pd import pytest +import datetime +import dateutil from arcticdb.version_store.processing import QueryBuilder from arcticdb.util.test import assert_frame_equal @@ -60,6 +62,39 @@ def test_querybuilder_date_range_then_filter(lmdb_version_store_tiny_segment, us assert_frame_equal(expected, received) +def test_querybuilder_filter_datetime_with_timezone(lmdb_version_store_tiny_segment): + lib = lmdb_version_store_tiny_segment + symbol = "symbol" + def can_read_back(write_with_time, filter_with_time): + df = pd.DataFrame({"col": [write_with_time]}) + lib.delete(symbol) + lib.write(symbol, df) + + q = QueryBuilder() + q = q[q["col"] == filter_with_time] + read_df = lib.read(symbol, query_builder=q).data + + return len(read_df) == 1 + + notz_winter_time = datetime.datetime(2024, 1, 1) + notz_summer_time = datetime.datetime(2024, 6, 1) + utc_time = datetime.datetime(2024, 6, 1, tzinfo=dateutil.tz.tzutc()) + us_time = datetime.datetime(2024, 6, 1, tzinfo=dateutil.tz.gettz('America/New_York')) + + # Reading back the same time should always succeed + assert can_read_back(notz_winter_time, notz_winter_time) + assert can_read_back(notz_summer_time, notz_summer_time) + assert can_read_back(utc_time, utc_time) + assert can_read_back(us_time, us_time) + + # If tzinfo is not specified we assume UTC + assert can_read_back(notz_summer_time, utc_time) + assert can_read_back(utc_time, notz_summer_time) + assert not can_read_back(notz_summer_time, us_time) + assert not can_read_back(us_time, notz_summer_time) + + + @pytest.mark.parametrize("use_date_range_clause", [True, False]) def test_querybuilder_date_range_then_project(lmdb_version_store_tiny_segment, use_date_range_clause): lib = lmdb_version_store_tiny_segment