From 7539b80cd9b038584ea3a94c3bf864d4555c2295 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 5 Feb 2024 23:59:30 +0100 Subject: [PATCH 01/14] correct def _array_to_datetime_object, _array_strptime_object_fallback, fix tests --- pandas/_libs/tslib.pyx | 10 +- pandas/_libs/tslibs/strptime.pyx | 13 +- pandas/io/json/_json.py | 9 +- pandas/io/parsers/base_parser.py | 80 +++++------ .../indexes/datetimes/test_constructors.py | 8 +- pandas/tests/tools/test_to_datetime.py | 126 +++++------------- pandas/tests/tslibs/test_array_to_datetime.py | 16 +-- 7 files changed, 75 insertions(+), 187 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 017fdc4bc834f..b601683bbd926 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -752,15 +752,7 @@ cdef _array_to_datetime_object( cnp.PyArray_MultiIter_NEXT(mi) - warnings.warn( - "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise an error unless `utc=True`. " - "Please specify `utc=True` to opt in to the new behaviour " - "and silence this warning. To create a `Series` with mixed offsets and " - "`object` dtype, please use `apply` and `datetime.datetime.strptime`", - FutureWarning, - stacklevel=find_stack_level(), - ) + raise ValueError("cannot parse datetimes with mixed time zones unless `utc=True`") return oresult_nd, None diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index c09835c9661f3..82f4eaee34711 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -926,18 +926,7 @@ def _array_strptime_object_fallback( raise return values - import warnings - - from pandas.util._exceptions import find_stack_level - warnings.warn( - "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise an error unless `utc=True`. Please specify `utc=True` " - "to opt in to the new behaviour and silence this warning. " - "To create a `Series` with mixed offsets and `object` dtype, " - "please use `apply` and `datetime.datetime.strptime`", - FutureWarning, - stacklevel=find_stack_level(), - ) + raise ValueError("cannot parse datetimes with mixed time zones unless `utc=True`") return result diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index cea34cdfb0b9d..27e0f1c856e86 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1345,14 +1345,7 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]: date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time " - "zones will raise an error", - category=FutureWarning, - ) - new_data = to_datetime(new_data, errors="raise", unit=date_unit) + new_data = to_datetime(new_data, errors="raise", unit=date_unit) except (ValueError, OverflowError, TypeError): continue return new_data, True diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 3aef0692d5f59..67062b5418eeb 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1158,24 +1158,18 @@ def converter(*date_cols, col: Hashable): date_format.get(col) if isinstance(date_format, dict) else date_format ) - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time zones will raise an error", - category=FutureWarning, + str_objs = ensure_object(strs) + try: + result = tools.to_datetime( + str_objs, + format=date_fmt, + utc=False, + dayfirst=dayfirst, + cache=cache_dates, ) - str_objs = ensure_object(strs) - try: - result = tools.to_datetime( - str_objs, - format=date_fmt, - utc=False, - dayfirst=dayfirst, - cache=cache_dates, - ) - except (ValueError, TypeError): - # test_usecols_with_parse_dates4 - return str_objs + except (ValueError, TypeError): + # test_usecols_with_parse_dates4 + return str_objs if isinstance(result, DatetimeIndex): arr = result.to_numpy() @@ -1184,45 +1178,31 @@ def converter(*date_cols, col: Hashable): return result._values else: try: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time zones " - "will raise an error", - category=FutureWarning, - ) - pre_parsed = date_parser( - *(unpack_if_single_element(arg) for arg in date_cols) + pre_parsed = date_parser( + *(unpack_if_single_element(arg) for arg in date_cols) + ) + try: + result = tools.to_datetime( + pre_parsed, + cache=cache_dates, ) - try: - result = tools.to_datetime( - pre_parsed, - cache=cache_dates, - ) - except (ValueError, TypeError): - # test_read_csv_with_custom_date_parser - result = pre_parsed + except (ValueError, TypeError): + # test_read_csv_with_custom_date_parser + result = pre_parsed if isinstance(result, datetime.datetime): raise Exception("scalar parser") return result except Exception: # e.g. test_datetime_fractional_seconds - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time zones " - "will raise an error", - category=FutureWarning, - ) - pre_parsed = parsing.try_parse_dates( - parsing.concat_date_cols(date_cols), - parser=date_parser, - ) - try: - return tools.to_datetime(pre_parsed) - except (ValueError, TypeError): - # TODO: not reached in tests 2023-10-27; needed? - return pre_parsed + pre_parsed = parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, + ) + try: + return tools.to_datetime(pre_parsed) + except (ValueError, TypeError): + # TODO: not reached in tests 2023-10-27; needed? + return pre_parsed return converter diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 97e768b348d55..0756d490cd4fa 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -296,11 +296,9 @@ def test_construction_index_with_mixed_timezones(self): tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) - msg = "DatetimeIndex has mixed timezones" - msg_depr = "parsing datetimes with mixed time zones will raise an error" - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg_depr): - DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) # length = 1 result = Index([Timestamp("2011-01-01")], name="idx") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 9d364c2f86ac5..5d524562fb706 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -12,7 +12,6 @@ import locale from dateutil.parser import parse -from dateutil.tz.tz import tzoffset import numpy as np import pytest import pytz @@ -508,15 +507,13 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): ], ], ) - def test_to_datetime_parse_tzname_or_tzoffset_utc_false_deprecated( + def test_to_datetime_parse_tzname_or_tzoffset_utc_false_removed( self, fmt, dates, expected_dates ): # GH 13486, 50887 - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(dates, format=fmt) - expected = Index(expected_dates) - tm.assert_equal(result, expected) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(dates, format=fmt) def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): # GH 32792 @@ -691,7 +688,7 @@ def test_to_datetime_mixed_datetime_and_string_with_format( "constructor", [Timestamp, lambda x: Timestamp(x).to_pydatetime()], ) - def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_false( + def test_to_datetime_mixed_dt_and_str_with_format_mixed_offsets_utc_false_removed( self, fmt, constructor ): # https://github.com/pandas-dev/pandas/issues/49298 @@ -701,17 +698,10 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_fal args = ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"] ts1 = constructor(args[0]) ts2 = args[1] - msg = "parsing datetimes with mixed time zones will raise an error" + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" - expected = Index( - [ - Timestamp("2000-01-01 01:00:00"), - Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), - ], - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime([ts1, ts2], format=fmt, utc=False) - tm.assert_index_equal(result, expected) + with pytest.raises(ValueError, match=msg): + to_datetime([ts1, ts2], format=fmt, utc=False) @pytest.mark.parametrize( "fmt, expected", @@ -736,18 +726,18 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_fal ), ], ) - def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, expected): + def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( + self, fmt, expected + ): # https://github.com/pandas-dev/pandas/issues/50071 - msg = "parsing datetimes with mixed time zones will raise an error" + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime( + with pytest.raises(ValueError, match=msg): + to_datetime( ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None], format=fmt, utc=False, ) - expected = Index(expected) - tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "fmt, expected", @@ -1253,11 +1243,9 @@ def test_to_datetime_different_offsets(self, cache): ts_string_1 = "March 1, 2018 12:00:00+0400" ts_string_2 = "March 1, 2018 12:00:00+0500" arr = [ts_string_1] * 5 + [ts_string_2] * 5 - expected = Index([parse(x) for x in arr]) - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(arr, cache=cache) - tm.assert_index_equal(result, expected) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(arr, cache=cache) def test_to_datetime_tz_pytz(self, cache): # see gh-8260 @@ -1624,17 +1612,9 @@ def test_to_datetime_coerce(self): "March 1, 2018 12:00:00+0500", "20100240", ] - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(ts_strings, errors="coerce") - expected = Index( - [ - datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)), - datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 18000)), - NaT, - ] - ) - tm.assert_index_equal(result, expected) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(ts_strings, errors="coerce") @pytest.mark.parametrize( "string_arg, format", @@ -1711,20 +1691,9 @@ def test_iso_8601_strings_with_same_offset(self): def test_iso_8601_strings_with_different_offsets(self): # GH 17697, 11736, 50887 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(ts_strings) - expected = np.array( - [ - datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), - datetime(2015, 11, 18, 16, 30, tzinfo=tzoffset(None, 23400)), - NaT, - ], - dtype=object, - ) - # GH 21864 - expected = Index(expected) - tm.assert_index_equal(result, expected) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(ts_strings) def test_iso_8601_strings_with_different_offsets_utc(self): ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] @@ -1734,7 +1703,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self): ) tm.assert_index_equal(result, expected) - def test_mixed_offsets_with_native_datetime_raises(self): + def test_mixed_offsets_with_native_datetime_utc_false_raises(self): # GH 25978 vals = [ @@ -1749,29 +1718,9 @@ def test_mixed_offsets_with_native_datetime_raises(self): ser = Series(vals) assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111 - now = Timestamp("now") - today = Timestamp("today") - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - mixed = to_datetime(ser) - expected = Series( - [ - "NaT", - Timestamp("1990-01-01"), - Timestamp("2015-03-14T16:15:14.123-08:00").to_pydatetime(), - Timestamp("2019-03-04T21:56:32.620-07:00").to_pydatetime(), - None, - ], - dtype=object, - ) - tm.assert_series_equal(mixed[:-2], expected) - # we'll check mixed[-1] and mixed[-2] match now and today to within - # call-timing tolerances - assert (now - mixed.iloc[-1]).total_seconds() <= 0.1 - assert (today - mixed.iloc[-2]).total_seconds() <= 0.1 - - with pytest.raises(ValueError, match="Tz-aware datetime.datetime"): - to_datetime(mixed) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(ser) def test_non_iso_strings_with_tz_offset(self): result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) @@ -1834,8 +1783,8 @@ def test_to_datetime_fixed_offset(self): ) def test_to_datetime_mixed_offsets_with_utc_false_deprecated(self, date): # GH 50887 - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): to_datetime(date, utc=False) @@ -3713,9 +3662,9 @@ def test_to_datetime_with_empty_str_utc_false_format_mixed(): def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed(): # GH 50887 - msg = "parsing datetimes with mixed time zones will raise an error" + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match=msg): to_datetime( ["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], format="mixed" ) @@ -3788,8 +3737,6 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir both_strs = isinstance(aware_val, str) and isinstance(naive_val, str) has_numeric = isinstance(naive_val, (int, float)) - depr_msg = "In a future version of pandas, parsing datetimes with mixed time zones" - first_non_null = next(x for x in vec if x != "") # if first_non_null is a not a string, _guess_datetime_format_for_array # doesn't guess a format so we don't go through array_strptime @@ -3830,19 +3777,18 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir to_datetime(vec, utc=True) else: - with tm.assert_produces_warning(FutureWarning, match=depr_msg): + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): to_datetime(vec) # No warning/error with utc=True to_datetime(vec, utc=True) if both_strs: - with tm.assert_produces_warning(FutureWarning, match=depr_msg): + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): to_datetime(vec, format="mixed") - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - msg = "DatetimeIndex has mixed timezones" - with pytest.raises(TypeError, match=msg): - DatetimeIndex(vec) + DatetimeIndex(vec) else: msg = "Cannot mix tz-aware with tz-naive values" if naive_first and isinstance(aware_val, Timestamp): diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 3798e68b3780b..45c8f3a2d73a6 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -200,19 +200,9 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result, result_tz = tslib.array_to_datetime(data) - expected = np.array( - [ - datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), - datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400)), - ], - dtype=object, - ) - - tm.assert_numpy_array_equal(result, expected) - assert result_tz is None + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + tslib.array_to_datetime(data) @pytest.mark.parametrize( From 4bd4969404eacd683994ed19541cfeff9eb768a2 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 8 Feb 2024 12:09:33 +0100 Subject: [PATCH 02/14] fix tests --- pandas/tests/io/parser/common/test_read_errors.py | 12 +++++++----- pandas/tests/io/parser/test_parse_dates.py | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index f5a724bad4fa2..b6a3b46ffb004 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -18,7 +18,9 @@ ParserWarning, ) -from pandas import DataFrame +from pandas import ( + DataFrame, Series, +) import pandas._testing as tm xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @@ -161,10 +163,10 @@ def test_suppress_error_output(all_parsers): # see gh-15925 parser = all_parsers data = "a\n1\n1,2,3\n4\n5,6,7" - expected = DataFrame({"a": [1, 4]}) - - result = parser.read_csv(StringIO(data), on_bad_lines="skip") - tm.assert_frame_equal(result, expected) + expected = Series([1, 4], name="a", index=[0, 1]) + + result = parser.read_csv(StringIO(data), on_bad_lines="skip")["a"] + tm.assert_series_equal(result, expected) def test_error_bad_lines(all_parsers): diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index f02dd997e62d0..0bc0c3e744db7 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2330,8 +2330,8 @@ def test_from_csv_with_mixed_offsets(all_parsers): result = parser.read_csv(StringIO(data), parse_dates=["a"])["a"] expected = Series( [ - Timestamp("2020-01-01 00:00:00+01:00"), - Timestamp("2020-01-01 00:00:00+00:00"), + "2020-01-01T00:00:00+01:00", + "2020-01-01T00:00:00+00:00", ], name="a", index=[0, 1], From 7bcde0242c79f96eb420b21751919d79a15ae689 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 8 Feb 2024 15:56:43 +0100 Subject: [PATCH 03/14] correct to_datetime docs, add a note to v3.0.0 --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/tools/datetimes.py | 37 +++++++++++++++------------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 81c3f88f7e8ad..63638e1f9e5bf 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,6 +102,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Removed :meth:`DataFrame.first` and :meth:`DataFrame.last` (:issue:`53710`) - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index e937d5c399820..b71001284b22c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -737,14 +737,6 @@ def to_datetime( offsets (typically, daylight savings), see :ref:`Examples ` section for details. - .. warning:: - - In a future version of pandas, parsing datetimes with mixed time - zones will raise an error unless `utc=True`. - Please specify `utc=True` to opt in to the new behaviour - and silence this warning. To create a `Series` with mixed offsets and - `object` dtype, please use `apply` and `datetime.datetime.strptime`. - See also: pandas general documentation about `timezone conversion and localization >> pd.to_datetime(['2020-10-25 02:00 +0200', ... '2020-10-25 04:00 +0100']) # doctest: +SKIP - FutureWarning: In a future version of pandas, parsing datetimes with mixed - time zones will raise an error unless `utc=True`. Please specify `utc=True` - to opt in to the new behaviour and silence this warning. To create a `Series` - with mixed offsets and `object` dtype, please use `apply` and - `datetime.datetime.strptime`. - Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], - dtype='object') + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` + + To create a `Series` with mixed offsets and `object` dtype, please use `apply` + and `datetime.datetime.strptime`. + + >>> import datetime as dt + >>> ser = pd.Series(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) + >>> ser.apply(lambda x: dt.datetime.strptime(x, "%Y-%m-%d %H:%M %z")) + 0 2020-10-25 02:00:00+02:00 + 1 2020-10-25 04:00:00+01:00 + dtype: object - A mix of timezone-aware and timezone-naive inputs is also converted to a simple :class:`Index` containing :class:`datetime.datetime` objects: @@ -981,12 +981,7 @@ def to_datetime( >>> from datetime import datetime >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP - FutureWarning: In a future version of pandas, parsing datetimes with mixed - time zones will raise an error unless `utc=True`. Please specify `utc=True` - to opt in to the new behaviour and silence this warning. To create a `Series` - with mixed offsets and `object` dtype, please use `apply` and - `datetime.datetime.strptime`. - Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` | From a55d861cab6a753e24bc10b740a05114099f1fef Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 8 Feb 2024 16:34:09 +0100 Subject: [PATCH 04/14] correct to_datetime docs --- pandas/core/tools/datetimes.py | 23 +++++++++---------- .../io/parser/common/test_read_errors.py | 5 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b71001284b22c..734abb5dbfb63 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -829,10 +829,11 @@ def to_datetime( of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or when a Timezone-aware :class:`datetime.datetime` is found in an array-like of mixed time offsets, and ``utc=False``. - When parsing datetimes with mixed time zones unless `utc=True`. - Please specify `utc=True` to opt in to the new behaviour. - To create a `Series` with mixed offsets and `object` dtype, - please use `apply` and `datetime.datetime.strptime`. + + When parsing datetimes with mixed time zones unless ``utc=True``. + Please specify ``utc=True`` to opt in to the new behaviour. + To create a :class:`Series` with mixed offsets and ``object`` dtype, + please use :meth:`Series.apply` and :func:`datetime.datetime.strptime`. See Also -------- @@ -956,17 +957,15 @@ def to_datetime( - However, timezone-aware inputs *with mixed time offsets* (for example issued from a timezone with daylight savings, such as Europe/Paris) are **not successfully converted** to a :class:`DatetimeIndex`. - Parsing datetimes with mixed time zones will show a warning unless - `utc=True`. If you specify `utc=False` the warning below will be shown - and a simple :class:`Index` containing :class:`datetime.datetime` - objects will be returned: + Parsing datetimes with mixed time zones will raise a ValueError unless + ``utc=True``: >>> pd.to_datetime(['2020-10-25 02:00 +0200', ... '2020-10-25 04:00 +0100']) # doctest: +SKIP ValueError: cannot parse datetimes with mixed time zones unless `utc=True` - To create a `Series` with mixed offsets and `object` dtype, please use `apply` - and `datetime.datetime.strptime`. + To create a :class:`Series` with mixed offsets and ``object`` dtype, please use + :meth:`Series.apply` and :func:`datetime.datetime.strptime`: >>> import datetime as dt >>> ser = pd.Series(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) @@ -975,8 +974,8 @@ def to_datetime( 1 2020-10-25 04:00:00+01:00 dtype: object - - A mix of timezone-aware and timezone-naive inputs is also converted to - a simple :class:`Index` containing :class:`datetime.datetime` objects: + - A mix of timezone-aware and timezone-naive inputs will also raise a ValueError + unless ``utc=True``: >>> from datetime import datetime >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index b6a3b46ffb004..28cfcf913235b 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -19,7 +19,8 @@ ) from pandas import ( - DataFrame, Series, + DataFrame, + Series, ) import pandas._testing as tm @@ -164,7 +165,7 @@ def test_suppress_error_output(all_parsers): parser = all_parsers data = "a\n1\n1,2,3\n4\n5,6,7" expected = Series([1, 4], name="a", index=[0, 1]) - + result = parser.read_csv(StringIO(data), on_bad_lines="skip")["a"] tm.assert_series_equal(result, expected) From c2e09766ab3ccf9018cf241f55fad1a65d3ad842 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 8 Feb 2024 17:48:29 +0100 Subject: [PATCH 05/14] fix failures in benchmarks/inference.py --- asv_bench/benchmarks/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index d5c58033c1157..eeeefd6dca9b1 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -203,7 +203,7 @@ def time_same_offset(self): to_datetime(self.same_offset) def time_different_offset(self): - to_datetime(self.diff_offset) + to_datetime(self.diff_offset, utc=True) class ToDatetimeFormatQuarters: @@ -235,7 +235,7 @@ def time_same_offset(self): to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z") def time_different_offset(self): - to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z") + to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) def time_same_offset_to_utc(self): to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) From b0b970b355bc2b6d9fbe65b8cba5b4399f530736 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 8 Feb 2024 17:59:58 +0100 Subject: [PATCH 06/14] fix pre-commit error --- pandas/core/tools/datetimes.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 98de46aed4057..07c9c9df1c422 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -956,15 +956,16 @@ def to_datetime( Parsing datetimes with mixed time zones will raise a ValueError unless ``utc=True``: - >>> pd.to_datetime(['2020-10-25 02:00 +0200', - ... '2020-10-25 04:00 +0100']) # doctest: +SKIP + >>> pd.to_datetime( + ... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"] + ... ) # doctest: +SKIP ValueError: cannot parse datetimes with mixed time zones unless `utc=True` To create a :class:`Series` with mixed offsets and ``object`` dtype, please use :meth:`Series.apply` and :func:`datetime.datetime.strptime`: >>> import datetime as dt - >>> ser = pd.Series(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) + >>> ser = pd.Series(["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"]) >>> ser.apply(lambda x: dt.datetime.strptime(x, "%Y-%m-%d %H:%M %z")) 0 2020-10-25 02:00:00+02:00 1 2020-10-25 04:00:00+01:00 @@ -974,8 +975,9 @@ def to_datetime( unless ``utc=True``: >>> from datetime import datetime - >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", - ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP + >>> pd.to_datetime( + ... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)] + ... ) # doctest: +SKIP ValueError: cannot parse datetimes with mixed time zones unless `utc=True` | From 95203cd0b9f461c75965f567bceeb4a26534d75c Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 8 Feb 2024 19:52:07 +0100 Subject: [PATCH 07/14] correct examples in to_datetime docs --- pandas/core/tools/datetimes.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 07c9c9df1c422..628fd6dd55907 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -956,10 +956,12 @@ def to_datetime( Parsing datetimes with mixed time zones will raise a ValueError unless ``utc=True``: - >>> pd.to_datetime( - ... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"] - ... ) # doctest: +SKIP - ValueError: cannot parse datetimes with mixed time zones unless `utc=True` + .. code-block:: python + + >>> pd.to_datetime( + ... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"] + ... ) # doctest: +SKIP + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` To create a :class:`Series` with mixed offsets and ``object`` dtype, please use :meth:`Series.apply` and :func:`datetime.datetime.strptime`: @@ -974,11 +976,13 @@ def to_datetime( - A mix of timezone-aware and timezone-naive inputs will also raise a ValueError unless ``utc=True``: - >>> from datetime import datetime - >>> pd.to_datetime( - ... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)] - ... ) # doctest: +SKIP - ValueError: cannot parse datetimes with mixed time zones unless `utc=True` + .. code-block:: python + + >>> from datetime import datetime + >>> pd.to_datetime( + ... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)] + ... ) # doctest: +SKIP + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` | From 6a5cc06a66f26cea5b14be2db2e2c853a9f78367 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 8 Feb 2024 22:32:49 +0100 Subject: [PATCH 08/14] correct to_datetime docs --- pandas/core/tools/datetimes.py | 37 ++++++++++++++-------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 628fd6dd55907..f4b4c3dc3db9d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -828,12 +828,9 @@ def to_datetime( When another datetime conversion error happens. For example when one of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or when a Timezone-aware :class:`datetime.datetime` is found in an array-like - of mixed time offsets, and ``utc=False``. - - When parsing datetimes with mixed time zones unless ``utc=True``. - Please specify ``utc=True`` to opt in to the new behaviour. - To create a :class:`Series` with mixed offsets and ``object`` dtype, - please use :meth:`Series.apply` and :func:`datetime.datetime.strptime`. + of mixed time offsets, and ``utc=False``, or when parsing datetimes + with mixed time zones unless ``utc=True``. Parsing datetimes with mixed + time zones, please specify ``utc=True`` to opt in to the new behaviour. See Also -------- @@ -956,15 +953,13 @@ def to_datetime( Parsing datetimes with mixed time zones will raise a ValueError unless ``utc=True``: - .. code-block:: python - - >>> pd.to_datetime( - ... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"] - ... ) # doctest: +SKIP - ValueError: cannot parse datetimes with mixed time zones unless `utc=True` + >>> pd.to_datetime( + ... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"] + ... ) # doctest: +SKIP + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` - To create a :class:`Series` with mixed offsets and ``object`` dtype, please use - :meth:`Series.apply` and :func:`datetime.datetime.strptime`: + - To create a :class:`Series` with mixed offsets and ``object`` dtype, please use + :meth:`Series.apply` and :func:`datetime.datetime.strptime`: >>> import datetime as dt >>> ser = pd.Series(["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"]) @@ -974,15 +969,13 @@ def to_datetime( dtype: object - A mix of timezone-aware and timezone-naive inputs will also raise a ValueError - unless ``utc=True``: - - .. code-block:: python + unless ``utc=True``: - >>> from datetime import datetime - >>> pd.to_datetime( - ... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)] - ... ) # doctest: +SKIP - ValueError: cannot parse datetimes with mixed time zones unless `utc=True` + >>> from datetime import datetime + >>> pd.to_datetime( + ... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)] + ... ) # doctest: +SKIP + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` | From fdd22db57eb5a49412cdf70f5f236b0392543526 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 9 Feb 2024 00:29:51 +0100 Subject: [PATCH 09/14] delete time_different_offset from benchmarks/inference.py as redundant --- asv_bench/benchmarks/inference.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index eeeefd6dca9b1..a7ab45aaea2e9 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -234,9 +234,6 @@ def time_no_exact(self): def time_same_offset(self): to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z") - def time_different_offset(self): - to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) - def time_same_offset_to_utc(self): to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) From 3783ea7b53fd6514edea09d2ad857c1a0d48b14b Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 9 Feb 2024 10:43:03 +0100 Subject: [PATCH 10/14] correct v3.0.0 --- doc/source/whatsnew/v3.0.0.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 19dd88a60c74d..dd76f082b83d1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -105,8 +105,6 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) -- Removed :meth:`DataFrame.first` and :meth:`DataFrame.last` (:issue:`53710`) -- Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) - Removed ``DataFrameGroupby.fillna`` and ``SeriesGroupBy.fillna``` (:issue:`55719`) - Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) From 00483ef049da20f5dc59924b5a3016f5d460dcda Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 15 Feb 2024 01:39:10 +0100 Subject: [PATCH 11/14] removed _array_to_datetime_object and _array_strptime_object_fallback --- pandas/_libs/tslib.pyx | 106 +---------------- pandas/_libs/tslibs/strptime.pyx | 156 ++----------------------- pandas/tests/tools/test_to_datetime.py | 23 ++-- 3 files changed, 23 insertions(+), 262 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 67131c6ac3e0c..023d5b786753e 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -9,7 +9,6 @@ from datetime import timezone from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - datetime, import_datetime, timedelta, tzinfo, @@ -598,7 +597,9 @@ cpdef array_to_datetime( # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" + ) elif state.found_naive or state.found_other: # e.g. test_to_datetime_mixed_awareness_mixed_types raise ValueError("Cannot mix tz-aware with tz-naive values") @@ -647,107 +648,6 @@ cpdef array_to_datetime( return result, tz_out -@cython.wraparound(False) -@cython.boundscheck(False) -cdef _array_to_datetime_object( - ndarray[object] values, - str errors, - bint dayfirst=False, - bint yearfirst=False, -): - """ - Fall back function for array_to_datetime - - Attempts to parse datetime strings with dateutil to return an array - of datetime objects - - Parameters - ---------- - values : ndarray[object] - date-like objects to convert - errors : str - error behavior when parsing - dayfirst : bool, default False - dayfirst parsing behavior when encountering datetime strings - yearfirst : bool, default False - yearfirst parsing behavior when encountering datetime strings - - Returns - ------- - np.ndarray[object] - Literal[None] - """ - cdef: - Py_ssize_t i, n = values.size - object val - bint is_coerce = errors == "coerce" - bint is_raise = errors == "raise" - ndarray oresult_nd - ndarray[object] oresult - npy_datetimestruct dts - cnp.broadcast mi - _TSObject tsobj - - assert is_raise or is_coerce - - oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) - mi = cnp.PyArray_MultiIterNew2(oresult_nd, values) - oresult = oresult_nd.ravel() - - # We return an object array and only attempt to parse: - # 1) NaT or NaT-like values - # 2) datetime strings, which we return as datetime.datetime - # 3) special strings - "now" & "today" - for i in range(n): - # Analogous to: val = values[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - - if checknull_with_nat_and_na(val) or PyDateTime_Check(val): - # GH 25978. No need to parse NaT-like or datetime-like vals - oresult[i] = val - elif isinstance(val, str): - if type(val) is not str: - # GH#32264 np.str_ objects - val = str(val) - - if len(val) == 0 or val in nat_strings: - oresult[i] = "NaT" - cnp.PyArray_MultiIter_NEXT(mi) - continue - - try: - tsobj = convert_str_to_tsobject( - val, None, dayfirst=dayfirst, yearfirst=yearfirst - ) - tsobj.ensure_reso(NPY_FR_ns, val) - - dts = tsobj.dts - oresult[i] = datetime( - dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, - tzinfo=tsobj.tzinfo, - fold=tsobj.fold, - ) - - except (ValueError, OverflowError) as ex: - ex.args = (f"{ex}, at position {i}", ) - if is_coerce: - oresult[i] = NaT - cnp.PyArray_MultiIter_NEXT(mi) - continue - if is_raise: - raise - return values, None - else: - if is_raise: - raise - return values, None - - cnp.PyArray_MultiIter_NEXT(mi) - - raise ValueError("cannot parse datetimes with mixed time zones unless `utc=True`") - return oresult_nd, None - - def array_to_datetime_with_tz( ndarray values, tzinfo tz, bint dayfirst, bint yearfirst, NPY_DATETIMEUNIT creso ): diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 0ee2c9cf42e92..cd2475830b013 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -58,7 +58,6 @@ from pandas._libs.tslibs.dtypes cimport ( ) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, - c_NaT as NaT, c_nat_strings as nat_strings, ) from pandas._libs.tslibs.np_datetime cimport ( @@ -503,20 +502,18 @@ def array_strptime( if seen_datetime_offset and not utc: is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets or (state.found_naive or state.found_other): - result2 = _array_strptime_object_fallback( - values, fmt=fmt, exact=exact, errors=errors, utc=utc + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" ) - return result2, None elif tz_out is not None: # GH#55693 tz_offset = out_tzoffset_vals.pop() tz_out2 = timezone(timedelta(seconds=tz_offset)) if not tz_compare(tz_out, tz_out2): - # e.g. test_to_datetime_mixed_offsets_with_utc_false_deprecated - result2 = _array_strptime_object_fallback( - values, fmt=fmt, exact=exact, errors=errors, utc=utc + # e.g. test_to_datetime_mixed_offsets_with_utc_false_removed + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" ) - return result2, None # e.g. test_guess_datetime_format_with_parseable_formats else: # e.g. test_to_datetime_iso8601_with_timezone_valid @@ -525,10 +522,9 @@ def array_strptime( elif not utc: if tz_out and (state.found_other or state.found_naive_str): # found_other indicates a tz-naive int, float, dt64, or date - result2 = _array_strptime_object_fallback( - values, fmt=fmt, exact=exact, errors=errors, utc=utc + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" ) - return result2, None if infer_reso: if state.creso_ever_changed: @@ -790,144 +786,6 @@ cdef tzinfo _parse_with_format( return tz -def _array_strptime_object_fallback( - ndarray[object] values, - str fmt, - bint exact=True, - errors="raise", - bint utc=False, -): - - cdef: - Py_ssize_t i, n = len(values) - npy_datetimestruct dts - int64_t iresult - object val - tzinfo tz - bint is_raise = errors=="raise" - bint is_coerce = errors=="coerce" - bint iso_format = format_is_iso(fmt) - NPY_DATETIMEUNIT creso, out_bestunit, item_reso - int out_local = 0, out_tzoffset = 0 - bint string_to_dts_succeeded = 0 - - assert is_raise or is_coerce - - item_reso = NPY_DATETIMEUNIT.NPY_FR_GENERIC - format_regex, locale_time = _get_format_regex(fmt) - - result = np.empty(n, dtype=object) - - dts.us = dts.ps = dts.as = 0 - - for i in range(n): - val = values[i] - try: - if isinstance(val, str): - if len(val) == 0 or val in nat_strings: - result[i] = NaT - continue - elif checknull_with_nat_and_na(val): - result[i] = NaT - continue - elif PyDateTime_Check(val): - result[i] = Timestamp(val) - continue - elif PyDate_Check(val): - result[i] = Timestamp(val) - continue - elif cnp.is_datetime64_object(val): - result[i] = Timestamp(val) - continue - elif ( - (is_integer_object(val) or is_float_object(val)) - and (val != val or val == NPY_NAT) - ): - result[i] = NaT - continue - else: - val = str(val) - - if fmt == "ISO8601": - string_to_dts_succeeded = not string_to_dts( - val, &dts, &out_bestunit, &out_local, - &out_tzoffset, False, None, False - ) - elif iso_format: - string_to_dts_succeeded = not string_to_dts( - val, &dts, &out_bestunit, &out_local, - &out_tzoffset, False, fmt, exact - ) - if string_to_dts_succeeded: - # No error reported by string_to_dts, pick back up - # where we left off - creso = get_supported_reso(out_bestunit) - try: - value = npy_datetimestruct_to_datetime(creso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime( - f"Out of bounds nanosecond timestamp: {val}" - ) from err - if out_local == 1: - tz = timezone(timedelta(minutes=out_tzoffset)) - value = tz_localize_to_utc_single( - value, tz, ambiguous="raise", nonexistent=None, creso=creso - ) - else: - tz = None - ts = Timestamp._from_value_and_reso(value, creso, tz) - result[i] = ts - continue - - if parse_today_now(val, &iresult, utc, NPY_FR_ns): - result[i] = Timestamp(val) - continue - - # Some ISO formats can't be parsed by string_to_dts - # For example, 6-digit YYYYMD. So, if there's an error, and a format - # was specified, then try the string-matching code below. If the format - # specified was 'ISO8601', then we need to error, because - # only string_to_dts handles mixed ISO8601 formats. - if not string_to_dts_succeeded and fmt == "ISO8601": - raise ValueError(f"Time data {val} is not ISO8601 format") - - tz = _parse_with_format( - val, fmt, exact, format_regex, locale_time, &dts, &item_reso - ) - try: - iresult = npy_datetimestruct_to_datetime(item_reso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime( - f"Out of bounds nanosecond timestamp: {val}" - ) from err - if tz is not None: - iresult = tz_localize_to_utc_single( - iresult, tz, ambiguous="raise", nonexistent=None, creso=item_reso - ) - ts = Timestamp._from_value_and_reso(iresult, item_reso, tz) - result[i] = ts - - except (ValueError, OutOfBoundsDatetime) as ex: - ex.args = ( - f"{str(ex)}, at position {i}. You might want to try:\n" - " - passing `format` if your strings have a consistent format;\n" - " - passing `format='ISO8601'` if your strings are " - "all ISO8601 but not necessarily in exactly the same format;\n" - " - passing `format='mixed'`, and the format will be " - "inferred for each element individually. " - "You might want to use `dayfirst` alongside this.", - ) - if is_coerce: - result[i] = NaT - continue - else: - raise - - raise ValueError("cannot parse datetimes with mixed time zones unless `utc=True`") - - return result - - class TimeRE(_TimeRE): """ Handle conversion from format directives to regexes. diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 9a4b5e806bb1e..afa22160aff7d 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -462,7 +462,7 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): def test_to_datetime_parse_tzname_or_tzoffset_utc_false_removed( self, fmt, dates, expected_dates ): - # GH 13486, 50887 + # GH#13486, GH#50887, GH#57275 msg = "cannot parse datetimes with mixed time zones unless `utc=True`" with pytest.raises(ValueError, match=msg): to_datetime(dates, format=fmt) @@ -640,6 +640,7 @@ def test_to_datetime_mixed_dt_and_str_with_format_mixed_offsets_utc_false_remove ): # https://github.com/pandas-dev/pandas/issues/49298 # https://github.com/pandas-dev/pandas/issues/50254 + # GH#57275 # note: ISO8601 formats go down a fastpath, so we need to check both # a ISO8601 format and a non-ISO8601 one args = ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"] @@ -677,6 +678,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( self, fmt, expected ): # https://github.com/pandas-dev/pandas/issues/50071 + # GH#57275 msg = "cannot parse datetimes with mixed time zones unless `utc=True`" with pytest.raises(ValueError, match=msg): @@ -1143,9 +1145,10 @@ def test_to_datetime_tz_mixed(self, cache): ) tm.assert_index_equal(result, expected) - def test_to_datetime_different_offsets(self, cache): + def test_to_datetime_different_offsets_removed(self, cache): # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark # see GH-26097 for more + # GH#57275 ts_string_1 = "March 1, 2018 12:00:00+0400" ts_string_2 = "March 1, 2018 12:00:00+0500" arr = [ts_string_1] * 5 + [ts_string_2] * 5 @@ -1500,7 +1503,7 @@ def test_week_without_day_and_calendar_year(self, date, format): to_datetime(date, format=format) def test_to_datetime_coerce(self): - # GH 26122 + # GH#26122, GH#57275 ts_strings = [ "March 1, 2018 12:00:00+0400", "March 1, 2018 12:00:00+0500", @@ -1575,8 +1578,8 @@ def test_iso_8601_strings_with_same_offset(self): result = DatetimeIndex([ts_str] * 2) tm.assert_index_equal(result, expected) - def test_iso_8601_strings_with_different_offsets(self): - # GH 17697, 11736, 50887 + def test_iso_8601_strings_with_different_offsets_removed(self): + # GH#17697, GH#11736, GH#50887, GH#57275 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] msg = "cannot parse datetimes with mixed time zones unless `utc=True`" with pytest.raises(ValueError, match=msg): @@ -1591,7 +1594,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self): tm.assert_index_equal(result, expected) def test_mixed_offsets_with_native_datetime_utc_false_raises(self): - # GH 25978 + # GH#25978, GH#57275 vals = [ "nan", @@ -1668,8 +1671,8 @@ def test_to_datetime_fixed_offset(self): ], ], ) - def test_to_datetime_mixed_offsets_with_utc_false_deprecated(self, date): - # GH 50887 + def test_to_datetime_mixed_offsets_with_utc_false_removed(self, date): + # GH#50887, GH#57275 msg = "cannot parse datetimes with mixed time zones unless `utc=True`" with pytest.raises(ValueError, match=msg): to_datetime(date, utc=False) @@ -3459,7 +3462,7 @@ def test_to_datetime_with_empty_str_utc_false_format_mixed(): def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed(): - # GH 50887 + # GH#50887, GH#57275 msg = "cannot parse datetimes with mixed time zones unless `utc=True`" with pytest.raises(ValueError, match=msg): @@ -3521,7 +3524,7 @@ def test_to_datetime_mixed_types_matching_tzs(): ) @pytest.mark.parametrize("naive_first", [True, False]) def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_first): - # GH#55793, GH#55693 + # GH#55793, GH#55693, GH#57275 # Empty string parses to NaT vals = [aware_val, naive_val, ""] From 7410a9b07b79f8cf0c7d30b2aa4f8e0a01d35fde Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 15 Feb 2024 12:02:24 +0100 Subject: [PATCH 12/14] correct to_datetime docstring, roll back changes in test_suppress_error_output --- pandas/core/tools/datetimes.py | 4 ++-- pandas/tests/io/parser/common/test_read_errors.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 617e66cc02bda..c785f0c3a6985 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -805,8 +805,8 @@ def to_datetime( of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or when a Timezone-aware :class:`datetime.datetime` is found in an array-like of mixed time offsets, and ``utc=False``, or when parsing datetimes - with mixed time zones unless ``utc=True``. Parsing datetimes with mixed - time zones, please specify ``utc=True`` to opt in to the new behaviour. + with mixed time zones unless ``utc=True``. If parsing datetimes with mixed + time zones, please specify ``utc=True``. See Also -------- diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 28cfcf913235b..322cb7fb7cda4 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -164,10 +164,10 @@ def test_suppress_error_output(all_parsers): # see gh-15925 parser = all_parsers data = "a\n1\n1,2,3\n4\n5,6,7" - expected = Series([1, 4], name="a", index=[0, 1]) + expected = DataFrame({"a": [1, 4]}) - result = parser.read_csv(StringIO(data), on_bad_lines="skip")["a"] - tm.assert_series_equal(result, expected) + result = parser.read_csv(StringIO(data), on_bad_lines="skip") + tm.assert_frame_equal(result, expected) def test_error_bad_lines(all_parsers): From 5bc7ae8cb33598127f19dd60ae8519a6f3eced5b Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 15 Feb 2024 12:08:57 +0100 Subject: [PATCH 13/14] fix pre-commit error --- pandas/tests/io/parser/common/test_read_errors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 322cb7fb7cda4..f5a724bad4fa2 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -18,10 +18,7 @@ ParserWarning, ) -from pandas import ( - DataFrame, - Series, -) +from pandas import DataFrame import pandas._testing as tm xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") From 848edd9a556de2e19c72e0fd9b6b3b83a65fba56 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 15 Feb 2024 23:11:54 +0100 Subject: [PATCH 14/14] correct test_to_datetime_mixed_awareness_mixed_types, and a comment in array_to_datetime --- pandas/_libs/tslib.pyx | 8 ++++---- pandas/tests/tools/test_to_datetime.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 023d5b786753e..a09f4321c0d3c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -589,12 +589,12 @@ cpdef array_to_datetime( return values, None if seen_datetime_offset and not utc_convert: - # GH#17697 + # GH#17697, GH#57275 # 1) If all the offsets are equal, return one offset for # the parsed dates to (maybe) pass to DatetimeIndex - # 2) If the offsets are different, then force the parsing down the - # object path where an array of datetimes - # (with individual dateutil.tzoffsets) are returned + # 2) If the offsets are different, then do not force the parsing + # and raise a ValueError: "cannot parse datetimes with + # mixed time zones unless `utc=True`" instead is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: raise ValueError( diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index afa22160aff7d..3e617138c4a6a 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3589,6 +3589,7 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir msg = "cannot parse datetimes with mixed time zones unless `utc=True`" with pytest.raises(ValueError, match=msg): to_datetime(vec, format="mixed") + with pytest.raises(ValueError, match=msg): DatetimeIndex(vec) else: msg = "Cannot mix tz-aware with tz-naive values"