Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: disallow parsing datetimes with mixed time zones unless utc=True #57275

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def time_same_offset(self):
to_datetime(self.same_offset)

def time_different_offset(self):
to_datetime(self.diff_offset)
to_datetime(self.diff_offset, utc=True)


class ToDatetimeFormatQuarters:
Expand Down Expand Up @@ -231,9 +231,6 @@ def time_no_exact(self):
def time_same_offset(self):
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")

def time_different_offset(self):
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")

def time_same_offset_to_utc(self):
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ Removal of prior version deprecations/changes
- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)
Expand Down
10 changes: 1 addition & 9 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -744,15 +744,7 @@ cdef _array_to_datetime_object(

cnp.PyArray_MultiIter_NEXT(mi)

warnings.warn(
"In a future version of pandas, parsing datetimes with mixed time "
"zones will raise an error unless `utc=True`. "
"Please specify `utc=True` to opt in to the new behaviour "
"and silence this warning. To create a `Series` with mixed offsets and "
"`object` dtype, please use `apply` and `datetime.datetime.strptime`",
FutureWarning,
stacklevel=find_stack_level(),
)
raise ValueError("cannot parse datetimes with mixed time zones unless `utc=True`")
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
return oresult_nd, None


Expand Down
13 changes: 1 addition & 12 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -923,18 +923,7 @@ def _array_strptime_object_fallback(
else:
raise

import warnings

from pandas.util._exceptions import find_stack_level
warnings.warn(
"In a future version of pandas, parsing datetimes with mixed time "
"zones will raise an error unless `utc=True`. Please specify `utc=True` "
"to opt in to the new behaviour and silence this warning. "
"To create a `Series` with mixed offsets and `object` dtype, "
"please use `apply` and `datetime.datetime.strptime`",
FutureWarning,
stacklevel=find_stack_level(),
)
raise ValueError("cannot parse datetimes with mixed time zones unless `utc=True`")
mroeschke marked this conversation as resolved.
Show resolved Hide resolved

return result

Expand Down
47 changes: 19 additions & 28 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,14 +723,6 @@ def to_datetime(
offsets (typically, daylight savings), see :ref:`Examples
<to_datetime_tz_examples>` section for details.

.. warning::

In a future version of pandas, parsing datetimes with mixed time
zones will raise an error unless `utc=True`.
Please specify `utc=True` to opt in to the new behaviour
and silence this warning. To create a `Series` with mixed offsets and
`object` dtype, please use `apply` and `datetime.datetime.strptime`.

See also: pandas general documentation about `timezone conversion and
localization
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
Expand Down Expand Up @@ -812,7 +804,9 @@ def to_datetime(
When another datetime conversion error happens. For example when one
of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or
when a Timezone-aware :class:`datetime.datetime` is found in an array-like
of mixed time offsets, and ``utc=False``.
of mixed time offsets, and ``utc=False``, or when parsing datetimes
with mixed time zones unless ``utc=True``. Parsing datetimes with mixed
time zones, please specify ``utc=True`` to opt in to the new behaviour.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there's a word missing in this sentence? Like, "If parsing"? Also, no need to say "to opt in to the new behaviour" - this is the new behaviour :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the comment. I corrected the wording as you suggested.


See Also
--------
Expand Down Expand Up @@ -932,35 +926,32 @@ def to_datetime(
- However, timezone-aware inputs *with mixed time offsets* (for example
issued from a timezone with daylight savings, such as Europe/Paris)
are **not successfully converted** to a :class:`DatetimeIndex`.
Parsing datetimes with mixed time zones will show a warning unless
`utc=True`. If you specify `utc=False` the warning below will be shown
and a simple :class:`Index` containing :class:`datetime.datetime`
objects will be returned:
Parsing datetimes with mixed time zones will raise a ValueError unless
``utc=True``:

>>> pd.to_datetime(
... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"]
... ) # doctest: +SKIP
FutureWarning: In a future version of pandas, parsing datetimes with mixed
time zones will raise an error unless `utc=True`. Please specify `utc=True`
to opt in to the new behaviour and silence this warning. To create a `Series`
with mixed offsets and `object` dtype, please use `apply` and
`datetime.datetime.strptime`.
Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00],
dtype='object')
ValueError: cannot parse datetimes with mixed time zones unless `utc=True`

- To create a :class:`Series` with mixed offsets and ``object`` dtype, please use
:meth:`Series.apply` and :func:`datetime.datetime.strptime`:

>>> import datetime as dt
>>> ser = pd.Series(["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"])
>>> ser.apply(lambda x: dt.datetime.strptime(x, "%Y-%m-%d %H:%M %z"))
0 2020-10-25 02:00:00+02:00
1 2020-10-25 04:00:00+01:00
dtype: object

- A mix of timezone-aware and timezone-naive inputs is also converted to
a simple :class:`Index` containing :class:`datetime.datetime` objects:
- A mix of timezone-aware and timezone-naive inputs will also raise a ValueError
unless ``utc=True``:

>>> from datetime import datetime
>>> pd.to_datetime(
... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)]
... ) # doctest: +SKIP
FutureWarning: In a future version of pandas, parsing datetimes with mixed
time zones will raise an error unless `utc=True`. Please specify `utc=True`
to opt in to the new behaviour and silence this warning. To create a `Series`
with mixed offsets and `object` dtype, please use `apply` and
`datetime.datetime.strptime`.
Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object')
ValueError: cannot parse datetimes with mixed time zones unless `utc=True`

|

Expand Down
9 changes: 1 addition & 8 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,14 +1292,7 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS
for date_unit in date_units:
try:
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
".*parsing datetimes with mixed time "
"zones will raise an error",
category=FutureWarning,
)
new_data = to_datetime(new_data, errors="raise", unit=date_unit)
new_data = to_datetime(new_data, errors="raise", unit=date_unit)
except (ValueError, OverflowError, TypeError):
continue
return new_data, True
Expand Down
80 changes: 30 additions & 50 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,24 +1158,18 @@ def converter(*date_cols, col: Hashable):
date_format.get(col) if isinstance(date_format, dict) else date_format
)

with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
".*parsing datetimes with mixed time zones will raise an error",
category=FutureWarning,
str_objs = ensure_object(strs)
try:
result = tools.to_datetime(
str_objs,
format=date_fmt,
utc=False,
dayfirst=dayfirst,
cache=cache_dates,
)
str_objs = ensure_object(strs)
try:
result = tools.to_datetime(
str_objs,
format=date_fmt,
utc=False,
dayfirst=dayfirst,
cache=cache_dates,
)
except (ValueError, TypeError):
# test_usecols_with_parse_dates4
return str_objs
except (ValueError, TypeError):
# test_usecols_with_parse_dates4
return str_objs

if isinstance(result, DatetimeIndex):
arr = result.to_numpy()
Expand All @@ -1184,45 +1178,31 @@ def converter(*date_cols, col: Hashable):
return result._values
else:
try:
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
".*parsing datetimes with mixed time zones "
"will raise an error",
category=FutureWarning,
)
pre_parsed = date_parser(
*(unpack_if_single_element(arg) for arg in date_cols)
pre_parsed = date_parser(
*(unpack_if_single_element(arg) for arg in date_cols)
)
try:
result = tools.to_datetime(
pre_parsed,
cache=cache_dates,
)
try:
result = tools.to_datetime(
pre_parsed,
cache=cache_dates,
)
except (ValueError, TypeError):
# test_read_csv_with_custom_date_parser
result = pre_parsed
except (ValueError, TypeError):
# test_read_csv_with_custom_date_parser
result = pre_parsed
if isinstance(result, datetime.datetime):
raise Exception("scalar parser")
return result
except Exception:
# e.g. test_datetime_fractional_seconds
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
".*parsing datetimes with mixed time zones "
"will raise an error",
category=FutureWarning,
)
pre_parsed = parsing.try_parse_dates(
parsing.concat_date_cols(date_cols),
parser=date_parser,
)
try:
return tools.to_datetime(pre_parsed)
except (ValueError, TypeError):
# TODO: not reached in tests 2023-10-27; needed?
return pre_parsed
pre_parsed = parsing.try_parse_dates(
parsing.concat_date_cols(date_cols),
parser=date_parser,
)
try:
return tools.to_datetime(pre_parsed)
except (ValueError, TypeError):
# TODO: not reached in tests 2023-10-27; needed?
return pre_parsed

return converter

Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,9 @@ def test_construction_index_with_mixed_timezones(self):
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)

msg = "DatetimeIndex has mixed timezones"
msg_depr = "parsing datetimes with mixed time zones will raise an error"
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg_depr):
DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"])
msg = "cannot parse datetimes with mixed time zones unless `utc=True`"
with pytest.raises(ValueError, match=msg):
DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"])

# length = 1
result = Index([Timestamp("2011-01-01")], name="idx")
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/io/parser/common/test_read_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
ParserWarning,
)

from pandas import DataFrame
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
Expand Down Expand Up @@ -161,10 +164,10 @@ def test_suppress_error_output(all_parsers):
# see gh-15925
parser = all_parsers
data = "a\n1\n1,2,3\n4\n5,6,7"
expected = DataFrame({"a": [1, 4]})
expected = Series([1, 4], name="a", index=[0, 1])
MarcoGorelli marked this conversation as resolved.
Show resolved Hide resolved

result = parser.read_csv(StringIO(data), on_bad_lines="skip")
tm.assert_frame_equal(result, expected)
result = parser.read_csv(StringIO(data), on_bad_lines="skip")["a"]
tm.assert_series_equal(result, expected)


def test_error_bad_lines(all_parsers):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -2330,8 +2330,8 @@ def test_from_csv_with_mixed_offsets(all_parsers):
result = parser.read_csv(StringIO(data), parse_dates=["a"])["a"]
expected = Series(
[
Timestamp("2020-01-01 00:00:00+01:00"),
Timestamp("2020-01-01 00:00:00+00:00"),
"2020-01-01T00:00:00+01:00",
"2020-01-01T00:00:00+00:00",
],
name="a",
index=[0, 1],
Expand Down
Loading