From bcbe2ac76f8f2deb44114eef602920682935e83e Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 5 Jul 2023 15:06:49 +0200 Subject: [PATCH 01/25] add raising FutureWarning in _return_parsed_timezone_results and in array_to_datetime, fix tests --- pandas/_libs/tslib.pyx | 8 ++++++++ pandas/core/tools/datetimes.py | 11 ++++++++++- pandas/tests/indexes/datetimes/test_constructors.py | 5 ++++- pandas/tests/io/json/test_readlines.py | 5 ++++- pandas/tests/tools/test_to_datetime.py | 5 ++++- pandas/tests/tslibs/test_array_to_datetime.py | 5 ++++- 6 files changed, 34 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 106f203a16855..a35cc574575ba 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -561,6 +561,14 @@ cpdef array_to_datetime( # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: + warnings.warn( + "In a future version of pandas, parsing datetimes with mixed time " + "zones will raise a warning unless `utc=True`. " + "Please specify `utc=True to opt in to the new behaviour " + "and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) return _array_to_datetime_object(values, errors, dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 13a434812db3b..bffc64bede815 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -337,7 +337,16 @@ def _return_parsed_timezone_results( tz_result : Index-like of parsed dates with timezone """ tz_results = np.empty(len(result), dtype=object) - for zone in unique(timezones): + unique_timezones = unique(timezones) + if len(unique_timezones) > 1 and not utc and name is not None: + warnings.warn( + "In a future version of pandas, parsing datetimes with mixed time " + "zones will raise a warning unless `utc=True`. Please specify `utc=True " + "to opt in to the new behaviour and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + for zone in unique_timezones: mask = timezones == zone dta = DatetimeArray(result[mask]).tz_localize(zone) if utc: diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 6d18a292061b9..00995b32bff8a 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -301,7 +301,10 @@ def test_construction_index_with_mixed_timezones(self): msg = "DatetimeIndex has mixed timezones" with pytest.raises(TypeError, match=msg): - DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) # length = 1 result = Index([Timestamp("2011-01-01")], name="idx") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 54f4980b1e4e3..abde17d68026e 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -54,7 +54,10 @@ def test_read_datetime(request, engine): if engine == "pyarrow": result = read_json(StringIO(json_line), engine=engine) else: - result = read_json(StringIO(json_line), engine=engine) + msg = "In a future version of pandas, parsing datetimes with mixed time zones " + "will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result = read_json(StringIO(json_line), engine=engine) expected = DataFrame( [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]], columns=["accounts", "date", "name"], diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4466ee96235f5..dab3541eeac37 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1649,7 +1649,10 @@ def test_mixed_offsets_with_native_datetime_raises(self): now = Timestamp("now") today = Timestamp("today") - mixed = to_datetime(ser) + msg = "In a future version of pandas, parsing datetimes with mixed time zones " + "will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + mixed = to_datetime(ser) expected = Series( [ "NaT", diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index ba188c3182f57..1eb6c69893a75 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -85,7 +85,10 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - result, result_tz = tslib.array_to_datetime(data) + msg = "In a future version of pandas, parsing datetimes with mixed time zones " + "will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result, result_tz = tslib.array_to_datetime(data) expected = np.array( [ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), From ba99e93dc661ac0c07ab3e96253dfe7ee9b0a115 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 5 Jul 2023 23:42:38 +0200 Subject: [PATCH 02/25] correct the definition of _return_parsed_timezone_results, added a test for FutureWarning, fix tests --- pandas/core/tools/datetimes.py | 2 +- pandas/tests/frame/methods/test_to_csv.py | 7 +- .../tests/groupby/aggregate/test_aggregate.py | 23 +- pandas/tests/tools/test_to_datetime.py | 202 +++++++++++++----- 4 files changed, 172 insertions(+), 62 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index bffc64bede815..b0736549c6854 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -338,7 +338,7 @@ def _return_parsed_timezone_results( """ tz_results = np.empty(len(result), dtype=object) unique_timezones = unique(timezones) - if len(unique_timezones) > 1 and not utc and name is not None: + if len(unique_timezones) > 1 and not utc: warnings.warn( "In a future version of pandas, parsing datetimes with mixed time " "zones will raise a warning unless `utc=True`. Please specify `utc=True " diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index ee9c4f05991a0..41951afd28a6e 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -140,8 +140,11 @@ def test_to_csv_from_csv5(self, timezone_frame): .dt.tz_convert("UTC") .dt.tz_convert(timezone_frame[c].dt.tz) ) - result["B"] = converter("B") - result["C"] = converter("C") + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result["B"] = converter("B") + result["C"] = converter("C") tm.assert_frame_equal(result, timezone_frame) def test_to_csv_cols_reordering(self): diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3558377907931..5270cd0df0f99 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1451,15 +1451,32 @@ def test_group_mean_timedelta_nat(): ["2021-01-01T00:00", "NaT", "2021-01-01T02:00"], ["2021-01-01T01:00"], ), + ], +) +def test_group_mean_datetime64_nat_notz(input_data, expected_output): + # GH43132 + data = to_datetime(Series(input_data)) + expected = to_datetime(Series(expected_output, index=np.array([0]))) + + result = data.groupby([0, 0, 0]).mean() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "input_data, expected_output", + [ ( # timezone ["2021-01-01T00:00-0100", "NaT", "2021-01-01T02:00-0100"], ["2021-01-01T01:00-0100"], ), ], ) -def test_group_mean_datetime64_nat(input_data, expected_output): - # GH43132 - data = to_datetime(Series(input_data)) +def test_group_mean_datetime64_nat_tz(input_data, expected_output): + # GH43132, 50887 + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + data = to_datetime(Series(input_data)) expected = to_datetime(Series(expected_output, index=np.array([0]))) result = data.groupby([0, 0, 0]).mean() diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index dab3541eeac37..5333150ae649b 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -446,19 +446,6 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache): ["2010-01-01 12:00:00 UTC"] * 2, [Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2, ], - [ - "%Y-%m-%d %H:%M:%S %Z", - [ - "2010-01-01 12:00:00 UTC", - "2010-01-01 12:00:00 GMT", - "2010-01-01 12:00:00 US/Pacific", - ], - [ - Timestamp("2010-01-01 12:00:00", tz="UTC"), - Timestamp("2010-01-01 12:00:00", tz="GMT"), - Timestamp("2010-01-01 12:00:00", tz="US/Pacific"), - ], - ], [ "%Y-%m-%d %H:%M:%S%z", ["2010-01-01 12:00:00+0100"] * 2, @@ -479,18 +466,6 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache): ] * 2, ], - [ - "%Y-%m-%d %H:%M:%S %z", - ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], - [ - Timestamp( - "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60)) - ), - Timestamp( - "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60)) - ), - ], - ], [ "%Y-%m-%d %H:%M:%S %z", ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"], @@ -509,6 +484,47 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): expected = Index(expected_dates) tm.assert_equal(result, expected) + @pytest.mark.parametrize( + "fmt,dates,expected_dates", + [ + [ + "%Y-%m-%d %H:%M:%S %Z", + [ + "2010-01-01 12:00:00 UTC", + "2010-01-01 12:00:00 GMT", + "2010-01-01 12:00:00 US/Pacific", + ], + [ + Timestamp("2010-01-01 12:00:00", tz="UTC"), + Timestamp("2010-01-01 12:00:00", tz="GMT"), + Timestamp("2010-01-01 12:00:00", tz="US/Pacific"), + ], + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], + [ + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60)) + ), + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60)) + ), + ], + ], + ], + ) + def test_to_datetime_parse_tzname_or_tzoffset_utcFalse_deprecated( + self, fmt, dates, expected_dates + ): + # GH 13486 + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(dates, format=fmt) + expected = Index(expected_dates) + tm.assert_equal(result, expected) + def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): # GH 32792 dates = [ @@ -632,17 +648,6 @@ def test_to_datetime_mixed_date_and_string(self, format): ), id="all tz-aware, mixed offsets, with utc", ), - pytest.param( - False, - ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"], - Index( - [ - Timestamp("2000-01-01 01:00:00"), - Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), - ], - ), - id="tz-aware string, naive pydatetime, without utc", - ), pytest.param( True, ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"], @@ -671,17 +676,48 @@ def test_to_datetime_mixed_datetime_and_string_with_format( tm.assert_index_equal(result, expected) @pytest.mark.parametrize( - "fmt, utc, expected", + "fmt", + ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"], + ids=["non-ISO8601 format", "ISO8601 format"], + ) + @pytest.mark.parametrize( + "utc, args, expected", [ pytest.param( - "%Y-%m-%d %H:%M:%S%z", - True, - DatetimeIndex( - ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], - dtype="datetime64[ns, UTC]", + False, + ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"], + Index( + [ + Timestamp("2000-01-01 01:00:00"), + Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), + ], ), - id="ISO8601, UTC", + id="tz-aware string, naive pydatetime, without utc", ), + ], + ) + @pytest.mark.parametrize( + "constructor", + [Timestamp, lambda x: Timestamp(x).to_pydatetime()], + ) + def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utcFalse( + self, fmt, utc, args, expected, constructor + ): + # https://github.com/pandas-dev/pandas/issues/49298 + # https://github.com/pandas-dev/pandas/issues/50254 + # note: ISO8601 formats go down a fastpath, so we need to check both + # a ISO8601 format and a non-ISO8601 one + ts1 = constructor(args[0]) + ts2 = args[1] + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime([ts1, ts2], format=fmt, utc=utc) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "fmt, utc, expected", + [ pytest.param( "%Y-%m-%d %H:%M:%S%z", False, @@ -694,15 +730,6 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ), id="ISO8601, non-UTC", ), - pytest.param( - "%Y-%d-%m %H:%M:%S%z", - True, - DatetimeIndex( - ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], - dtype="datetime64[ns, UTC]", - ), - id="non-ISO8601, UTC", - ), pytest.param( "%Y-%d-%m %H:%M:%S%z", False, @@ -717,6 +744,42 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ), ], ) + def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, utc, expected): + # https://github.com/pandas-dev/pandas/issues/50071 + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime( + ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None], + format=fmt, + utc=utc, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "fmt, utc, expected", + [ + pytest.param( + "%Y-%m-%d %H:%M:%S%z", + True, + DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], + dtype="datetime64[ns, UTC]", + ), + id="ISO8601, UTC", + ), + pytest.param( + "%Y-%d-%m %H:%M:%S%z", + True, + DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], + dtype="datetime64[ns, UTC]", + ), + id="non-ISO8601, UTC", + ), + ], + ) def test_to_datetime_mixed_offsets_with_none(self, fmt, utc, expected): # https://github.com/pandas-dev/pandas/issues/50071 result = to_datetime( @@ -1162,7 +1225,10 @@ def test_to_datetime_different_offsets(self, cache): ts_string_2 = "March 1, 2018 12:00:00+0500" arr = [ts_string_1] * 5 + [ts_string_2] * 5 expected = Index([parse(x) for x in arr]) - result = to_datetime(arr, cache=cache) + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(arr, cache=cache) tm.assert_index_equal(result, expected) def test_to_datetime_tz_pytz(self, cache): @@ -1528,7 +1594,10 @@ def test_to_datetime_coerce(self): "March 1, 2018 12:00:00+0500", "20100240", ] - result = to_datetime(ts_strings, errors="coerce") + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(ts_strings, errors="coerce") expected = Index( [ datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)), @@ -1609,9 +1678,12 @@ def test_iso_8601_strings_with_same_offset(self): tm.assert_index_equal(result, expected) def test_iso_8601_strings_with_different_offsets(self): - # GH 17697, 11736 + # GH 17697, 11736, 50887 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] - result = to_datetime(ts_strings) + msg = "In a future version of pandas, parsing datetimes with mixed " + "time zones will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(ts_strings) expected = np.array( [ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), @@ -1718,6 +1790,24 @@ def test_to_datetime_fixed_offset(self): result = to_datetime(dates) assert result.tz == fixed_off + @pytest.mark.parametrize( + "date", + [ + ["2020-10-26 00:00:00+06:00", "2020-10-26 00:00:00+01:00"], + ["2020-10-26 00:00:00+06:00", Timestamp("2018-01-01", tz="US/Pacific")], + [ + "2020-10-26 00:00:00+06:00", + datetime(2020, 1, 1, 18, tzinfo=pytz.timezone("Australia/Melbourne")), + ], + ], + ) + def test_to_datetime_mixed_offsets_with_utcFalse_deprecated(self, date): + # GH 50887 + msg = "In a future version of pandas, parsing datetimes with mixed time zones " + "will raise a warning unless `utc=True`." + with tm.assert_produces_warning(FutureWarning, match=msg): + to_datetime(date, utc=False) + class TestToDatetimeUnit: @pytest.mark.parametrize("unit", ["Y", "M"]) From 441e17f93e8f5f39471bc3f23da641376ae641ae Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 6 Jul 2023 10:46:06 +0200 Subject: [PATCH 03/25] fix tests in pandas/tests/extension/test_arrow.py --- pandas/tests/extension/test_arrow.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 56e35d30ad83c..8354e355087cf 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -24,6 +24,7 @@ import operator import pickle import re +import warnings import numpy as np import pytest @@ -323,11 +324,15 @@ def test_from_sequence_of_strings_pa_array(self, data, request): ) ) pa_array = data._pa_array.cast(pa.string()) - result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) + with tm.assert_produces_warning(None): + warnings.simplefilter("ignore", FutureWarning) + result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) tm.assert_extension_array_equal(result, data) pa_array = pa_array.combine_chunks() - result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) + with tm.assert_produces_warning(None): + warnings.simplefilter("ignore", FutureWarning) + result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) tm.assert_extension_array_equal(result, data) @@ -747,12 +752,14 @@ def test_EA_types(self, engine, data, dtype_backend, request): csv_output = BytesIO(csv_output) else: csv_output = StringIO(csv_output) - result = pd.read_csv( - csv_output, - dtype={"with_dtype": str(data.dtype)}, - engine=engine, - dtype_backend=dtype_backend, - ) + with tm.assert_produces_warning(None): + warnings.simplefilter("ignore", FutureWarning) + result = pd.read_csv( + csv_output, + dtype={"with_dtype": str(data.dtype)}, + engine=engine, + dtype_backend=dtype_backend, + ) expected = df self.assert_frame_equal(result, expected) From 5e568bd7965c673fcb02894fe478668003813ee7 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 7 Jul 2023 09:10:32 +0200 Subject: [PATCH 04/25] correct the definition of _return_parsed_timezone_results --- pandas/core/tools/datetimes.py | 27 ++++++++++--------- pandas/tests/extension/test_arrow.py | 23 ++++++---------- pandas/tests/frame/methods/test_to_csv.py | 7 ++--- .../tests/groupby/aggregate/test_aggregate.py | 23 +++------------- 4 files changed, 28 insertions(+), 52 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b0736549c6854..583ccd27d1c2d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -337,16 +337,9 @@ def _return_parsed_timezone_results( tz_result : Index-like of parsed dates with timezone """ tz_results = np.empty(len(result), dtype=object) - unique_timezones = unique(timezones) - if len(unique_timezones) > 1 and not utc: - warnings.warn( - "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise a warning unless `utc=True`. Please specify `utc=True " - "to opt in to the new behaviour and silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - for zone in unique_timezones: + unique(timezones) + non_na_timezones = set() + for zone in unique(timezones): mask = timezones == zone dta = DatetimeArray(result[mask]).tz_localize(zone) if utc: @@ -354,8 +347,18 @@ def _return_parsed_timezone_results( dta = dta.tz_localize("utc") else: dta = dta.tz_convert("utc") + else: + if not dta.isna().all(): + non_na_timezones.add(zone) tz_results[mask] = dta - + if len(non_na_timezones) > 1: + warnings.warn( + "In a future version of pandas, parsing datetimes with mixed time " + "zones will raise a warning unless `utc=True`. Please specify `utc=True " + "to opt in to the new behaviour and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) return Index(tz_results, name=name) @@ -457,7 +460,7 @@ def _convert_listlike_datetimes( if format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) - # `format` could be inferred, or user didn't ask for mixed-format parsing. + # `format` could be inferred, or user didn't ask for mixed-format parsing. if format is not None and format != "mixed": return _array_strptime_with_fallback(arg, name, utc, format, exact, errors) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 8354e355087cf..56e35d30ad83c 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -24,7 +24,6 @@ import operator import pickle import re -import warnings import numpy as np import pytest @@ -324,15 +323,11 @@ def test_from_sequence_of_strings_pa_array(self, data, request): ) ) pa_array = data._pa_array.cast(pa.string()) - with tm.assert_produces_warning(None): - warnings.simplefilter("ignore", FutureWarning) - result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) + result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) tm.assert_extension_array_equal(result, data) pa_array = pa_array.combine_chunks() - with tm.assert_produces_warning(None): - warnings.simplefilter("ignore", FutureWarning) - result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) + result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) tm.assert_extension_array_equal(result, data) @@ -752,14 +747,12 @@ def test_EA_types(self, engine, data, dtype_backend, request): csv_output = BytesIO(csv_output) else: csv_output = StringIO(csv_output) - with tm.assert_produces_warning(None): - warnings.simplefilter("ignore", FutureWarning) - result = pd.read_csv( - csv_output, - dtype={"with_dtype": str(data.dtype)}, - engine=engine, - dtype_backend=dtype_backend, - ) + result = pd.read_csv( + csv_output, + dtype={"with_dtype": str(data.dtype)}, + engine=engine, + dtype_backend=dtype_backend, + ) expected = df self.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 41951afd28a6e..ee9c4f05991a0 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -140,11 +140,8 @@ def test_to_csv_from_csv5(self, timezone_frame): .dt.tz_convert("UTC") .dt.tz_convert(timezone_frame[c].dt.tz) ) - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." - with tm.assert_produces_warning(FutureWarning, match=msg): - result["B"] = converter("B") - result["C"] = converter("C") + result["B"] = converter("B") + result["C"] = converter("C") tm.assert_frame_equal(result, timezone_frame) def test_to_csv_cols_reordering(self): diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 5270cd0df0f99..3558377907931 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1451,32 +1451,15 @@ def test_group_mean_timedelta_nat(): ["2021-01-01T00:00", "NaT", "2021-01-01T02:00"], ["2021-01-01T01:00"], ), - ], -) -def test_group_mean_datetime64_nat_notz(input_data, expected_output): - # GH43132 - data = to_datetime(Series(input_data)) - expected = to_datetime(Series(expected_output, index=np.array([0]))) - - result = data.groupby([0, 0, 0]).mean() - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - "input_data, expected_output", - [ ( # timezone ["2021-01-01T00:00-0100", "NaT", "2021-01-01T02:00-0100"], ["2021-01-01T01:00-0100"], ), ], ) -def test_group_mean_datetime64_nat_tz(input_data, expected_output): - # GH43132, 50887 - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." - with tm.assert_produces_warning(FutureWarning, match=msg): - data = to_datetime(Series(input_data)) +def test_group_mean_datetime64_nat(input_data, expected_output): + # GH43132 + data = to_datetime(Series(input_data)) expected = to_datetime(Series(expected_output, index=np.array([0]))) result = data.groupby([0, 0, 0]).mean() From 2aa5c103cdb7ce102ba97955475011652801dcff Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 7 Jul 2023 10:04:03 +0200 Subject: [PATCH 05/25] fix an exanple in docs: Parsing a CSV with mixed timezones --- doc/source/user_guide/io.rst | 10 ++-------- pandas/core/tools/datetimes.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 0084e885db2b5..c3f4ca370f6ff 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -935,6 +935,8 @@ Parsing a CSV with mixed timezones pandas cannot natively represent a column or index with mixed timezones. If your CSV file contains columns with a mixture of timezones, the default result will be an object-dtype column with strings, even with ``parse_dates``. +To parse the mixed-timezone values as a datetime column, read in as ``object`` dtype and +then call :func:`to_datetime` with ``utc=True``. .. ipython:: python @@ -943,14 +945,6 @@ an object-dtype column with strings, even with ``parse_dates``. a 2000-01-01T00:00:00+05:00 2000-01-01T00:00:00+06:00""" - df = pd.read_csv(StringIO(content), parse_dates=["a"]) - df["a"] - -To parse the mixed-timezone values as a datetime column, read in as ``object`` dtype and -then call :func:`to_datetime` with ``utc=True``. - -.. ipython:: python - df = pd.read_csv(StringIO(content)) df["a"] = pd.to_datetime(df["a"], utc=True) df["a"] diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 583ccd27d1c2d..603efa40176e7 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -460,7 +460,7 @@ def _convert_listlike_datetimes( if format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) - # `format` could be inferred, or user didn't ask for mixed-format parsing. + # `format` could be inferred, or user didn't ask for mixed-format parsing. if format is not None and format != "mixed": return _array_strptime_with_fallback(arg, name, utc, format, exact, errors) From 8197005fb11e15fec5450d360806416f3475e0ef Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 7 Jul 2023 13:30:00 +0200 Subject: [PATCH 06/25] correct def _array_to_datetime_object, add a test for mixed format, fix errors in docs --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/_libs/tslib.pyx | 19 +++++++++++-------- pandas/tests/tools/test_to_datetime.py | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7f5025e6ce60b..58c3534e4e130 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -643,7 +643,7 @@ Parsing datetime strings with different UTC offsets will now create an Index of Passing ``utc=True`` will mimic the previous behavior but will correctly indicate that the dates have been converted to UTC -.. ipython:: python +.. code-block:: ipython pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a35cc574575ba..aff5be0004dfa 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -561,14 +561,6 @@ cpdef array_to_datetime( # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - warnings.warn( - "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise a warning unless `utc=True`. " - "Please specify `utc=True to opt in to the new behaviour " - "and silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) return _array_to_datetime_object(values, errors, dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() @@ -628,6 +620,7 @@ cdef _array_to_datetime_object( # 1) NaT or NaT-like values # 2) datetime strings, which we return as datetime.datetime # 3) special strings - "now" & "today" + unique_timezones = set() for i in range(n): # Analogous to: val = values[i] val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] @@ -657,6 +650,7 @@ cdef _array_to_datetime_object( tzinfo=tsobj.tzinfo, fold=tsobj.fold, ) + unique_timezones.add(tsobj.tzinfo) except (ValueError, OverflowError) as ex: ex.args = (f"{ex}, at position {i}", ) @@ -674,6 +668,15 @@ cdef _array_to_datetime_object( cnp.PyArray_MultiIter_NEXT(mi) + if len(unique_timezones) > 1: + warnings.warn( + "In a future version of pandas, parsing datetimes with mixed time " + "zones will raise a warning unless `utc=True`. " + "Please specify `utc=True to opt in to the new behaviour " + "and silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) return oresult_nd, None diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 5333150ae649b..9d9a1fdf11deb 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3680,3 +3680,27 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): result = to_datetime(ser) expected = Series([1, 2], dtype="datetime64[ns]") tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "date, date_expected, warning", + [ + (["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], None, FutureWarning), + ( + ["2020-01-01 00:00+00:00", ""], + [Timestamp("2020-01-01 00:00+00:00"), "NaT"], + None, + ), + ], +) +def test_to_datetime_with_empty_str_utcFalse_format_mixed(date, date_expected, warning): + msg = "In a future version of pandas, parsing datetimes with mixed time zones " + "will raise a warning unless `utc=True`." + + if warning is not None: + with tm.assert_produces_warning(warning, match=msg): + to_datetime(date, format="mixed") + else: + result = to_datetime(date, format="mixed") + expected = Index(date_expected, dtype=object) + tm.assert_index_equal(result, expected) From ca4b2141d612498c12a7b3745d0f7817ff2ee002 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 7 Jul 2023 15:23:49 +0200 Subject: [PATCH 07/25] correct example in whatsnew/v0.24.0.rst and fix pylint failures --- doc/source/whatsnew/v0.24.0.rst | 20 +++++++++----- .../indexes/datetimes/test_constructors.py | 5 ++-- pandas/tests/io/json/test_readlines.py | 3 +-- pandas/tests/tools/test_to_datetime.py | 27 +++++++------------ pandas/tests/tslibs/test_array_to_datetime.py | 3 +-- 5 files changed, 27 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 58c3534e4e130..d617dfd53d014 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -632,18 +632,24 @@ Parsing datetime strings with the same UTC offset will preserve the UTC offset i Parsing datetime strings with different UTC offsets will now create an Index of ``datetime.datetime`` objects with different UTC offsets -.. ipython:: python +.. code-block:: ipython + + In [59]: idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30"]) + + In[60]: idx + Out[60]: Index([2015-11-18 15:30:00+05:30, 2015-11-18 16:30:00+06:30], dtype='object') - idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", - "2015-11-18 16:30:00+06:30"]) - idx - idx[0] - idx[1] + In[61]: idx[0] + Out[61]: Timestamp('2015-11-18 15:30:00+0530', tz='UTC+05:30') + + In[62]: idx[1] + Out[62]: Timestamp('2015-11-18 16:30:00+0630', tz='UTC+06:30') Passing ``utc=True`` will mimic the previous behavior but will correctly indicate that the dates have been converted to UTC -.. code-block:: ipython +.. ipython:: python pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 00995b32bff8a..733c14f33567a 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -300,10 +300,9 @@ def test_construction_index_with_mixed_timezones(self): assert not isinstance(result, DatetimeIndex) msg = "DatetimeIndex has mixed timezones" + msg_depr = "parsing datetimes with mixed time zones will raise a warning" with pytest.raises(TypeError, match=msg): - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(FutureWarning, match=msg_depr): DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) # length = 1 diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index abde17d68026e..546e62fd5b2ff 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -54,8 +54,7 @@ def test_read_datetime(request, engine): if engine == "pyarrow": result = read_json(StringIO(json_line), engine=engine) else: - msg = "In a future version of pandas, parsing datetimes with mixed time zones " - "will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result = read_json(StringIO(json_line), engine=engine) expected = DataFrame( diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 9d9a1fdf11deb..62a496cc7a021 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -517,9 +517,8 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): def test_to_datetime_parse_tzname_or_tzoffset_utcFalse_deprecated( self, fmt, dates, expected_dates ): - # GH 13486 - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." + # GH 13486, 50887 + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result = to_datetime(dates, format=fmt) expected = Index(expected_dates) @@ -709,8 +708,7 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utcFals # a ISO8601 format and a non-ISO8601 one ts1 = constructor(args[0]) ts2 = args[1] - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result = to_datetime([ts1, ts2], format=fmt, utc=utc) tm.assert_index_equal(result, expected) @@ -746,8 +744,7 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utcFals ) def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, utc, expected): # https://github.com/pandas-dev/pandas/issues/50071 - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result = to_datetime( @@ -1225,8 +1222,7 @@ def test_to_datetime_different_offsets(self, cache): ts_string_2 = "March 1, 2018 12:00:00+0500" arr = [ts_string_1] * 5 + [ts_string_2] * 5 expected = Index([parse(x) for x in arr]) - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result = to_datetime(arr, cache=cache) tm.assert_index_equal(result, expected) @@ -1594,8 +1590,7 @@ def test_to_datetime_coerce(self): "March 1, 2018 12:00:00+0500", "20100240", ] - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result = to_datetime(ts_strings, errors="coerce") expected = Index( @@ -1680,8 +1675,7 @@ def test_iso_8601_strings_with_same_offset(self): def test_iso_8601_strings_with_different_offsets(self): # GH 17697, 11736, 50887 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] - msg = "In a future version of pandas, parsing datetimes with mixed " - "time zones will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result = to_datetime(ts_strings) expected = np.array( @@ -1721,8 +1715,7 @@ def test_mixed_offsets_with_native_datetime_raises(self): now = Timestamp("now") today = Timestamp("today") - msg = "In a future version of pandas, parsing datetimes with mixed time zones " - "will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): mixed = to_datetime(ser) expected = Series( @@ -1803,8 +1796,7 @@ def test_to_datetime_fixed_offset(self): ) def test_to_datetime_mixed_offsets_with_utcFalse_deprecated(self, date): # GH 50887 - msg = "In a future version of pandas, parsing datetimes with mixed time zones " - "will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): to_datetime(date, utc=False) @@ -3694,6 +3686,7 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): ], ) def test_to_datetime_with_empty_str_utcFalse_format_mixed(date, date_expected, warning): + # GH 50887 msg = "In a future version of pandas, parsing datetimes with mixed time zones " "will raise a warning unless `utc=True`." diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 1eb6c69893a75..435fe5f4b90d8 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -85,8 +85,7 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - msg = "In a future version of pandas, parsing datetimes with mixed time zones " - "will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): result, result_tz = tslib.array_to_datetime(data) expected = np.array( From a0e970a3c67f8a75560310d3c011e2a3fd405457 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 7 Jul 2023 15:37:42 +0200 Subject: [PATCH 08/25] correct str for message in FutureWarning in the test with format mixed --- pandas/tests/tools/test_to_datetime.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 62a496cc7a021..1d6cb95469416 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3687,8 +3687,7 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): ) def test_to_datetime_with_empty_str_utcFalse_format_mixed(date, date_expected, warning): # GH 50887 - msg = "In a future version of pandas, parsing datetimes with mixed time zones " - "will raise a warning unless `utc=True`." + msg = "parsing datetimes with mixed time zones will raise a warning" if warning is not None: with tm.assert_produces_warning(warning, match=msg): From 156ea8a7d0d66a52e94b98ebe53a74e54372c4d8 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 7 Jul 2023 16:24:20 +0200 Subject: [PATCH 09/25] fix an error in an example in whatsnew/v0.24.0.rst --- doc/source/whatsnew/v0.24.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d617dfd53d014..a7918cf98c93f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -686,7 +686,8 @@ Parsing mixed-timezones with :func:`read_csv` a 2000-01-01T00:00:00+05:00 2000-01-01T00:00:00+06:00""" - df = pd.read_csv(io.StringIO(content), parse_dates=['a']) + df = pd.read_csv(StringIO(content)) + df.a = pd.to_datetime(df['a'], utc=True) df.a As can be seen, the ``dtype`` is object; each value in the column is a string. From 643d3d6c46652ad0e0557e6c81f7db1d605952c7 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 8 Jul 2023 13:46:40 +0200 Subject: [PATCH 10/25] correct examples and the description of the param utc in docstring of to_datetime, correct an example in whatsnew/v1.1.0.rst --- doc/source/whatsnew/v1.1.0.rst | 9 ++++++++- pandas/_libs/tslib.pyx | 2 +- pandas/core/tools/datetimes.py | 31 ++++++++++++++++++++++++------- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index f31ab02725394..dd566eaab1e75 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -208,7 +208,14 @@ For example: tz_strs = ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100", "2010-01-01 12:00:00 +0300", "2010-01-01 12:00:00 +0400"] pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z', utc=True) - pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z') + +.. code-block:: ipython + + In[37]: pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z') + Out[37]: + Index([2010-01-01 12:00:00+01:00, 2010-01-01 12:00:00-01:00, + 2010-01-01 12:00:00+03:00, 2010-01-01 12:00:00+04:00], + dtype='object') .. _whatsnew_110.grouper_resample_origin: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index aff5be0004dfa..4b06686368ffb 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -672,7 +672,7 @@ cdef _array_to_datetime_object( warnings.warn( "In a future version of pandas, parsing datetimes with mixed time " "zones will raise a warning unless `utc=True`. " - "Please specify `utc=True to opt in to the new behaviour " + "Please specify `utc=True` to opt in to the new behaviour " "and silence this warning.", FutureWarning, stacklevel=find_stack_level(), diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 603efa40176e7..6a3a7369aad28 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -354,7 +354,7 @@ def _return_parsed_timezone_results( if len(non_na_timezones) > 1: warnings.warn( "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise a warning unless `utc=True`. Please specify `utc=True " + "zones will raise a warning unless `utc=True`. Please specify `utc=True` " "to opt in to the new behaviour and silence this warning.", FutureWarning, stacklevel=find_stack_level(), @@ -761,6 +761,13 @@ def to_datetime( offsets (typically, daylight savings), see :ref:`Examples ` section for details. + .. warning:: + + In a future version of pandas, parsing datetimes with mixed time + zones will raise a warning unless `utc=True`. + Please specify `utc=True` to opt in to the new behaviour + and silence this warning. + See also: pandas general documentation about `timezone conversion and localization >> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) + are **not successfully converted** to a :class:`DatetimeIndex`. + Parsing datetimes with mixed time zones will raise a warning unless + `utc=True`. If you specify `utc=False` the warning below will be raised + and a simple :class:`Index` containing :class:`datetime.datetime` + objects will be returned: + + >>> pd.to_datetime(['2020-10-25 02:00 +0200', + ... '2020-10-25 04:00 +0100']) # doctest: +SKIP + FutureWarning: In a future version of pandas, parsing datetimes with mixed + time zones will raise a warning unless `utc=True`. Please specify `utc=True` + to opt in to the new behaviour and silence this warning. Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], dtype='object') @@ -989,7 +1002,11 @@ def to_datetime( a simple :class:`Index` containing :class:`datetime.datetime` objects: >>> from datetime import datetime - >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)]) + >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", + ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP + FutureWarning: In a future version of pandas, parsing datetimes with mixed + time zones will raise a warning unless `utc=True`. Please specify `utc=True` + to opt in to the new behaviour and silence this warning. Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') | From 61d4debf4e62bd855a14cb669568b0ec750f992e Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sun, 9 Jul 2023 16:54:16 +0200 Subject: [PATCH 11/25] update whatsnew/v2.1.0.rst --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6390fbeed8548..d901b89bd1170 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -304,6 +304,7 @@ Deprecations - Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`) +- Deprecated parsing datetimes with mixed time zones unless user pass ``utc=True`` to :func:`to_datetime`, in a future version this will raise a warning and will advise to use ``utc=True`` (:issue:`50887`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) From f2bbedf0a8bd485acb7c279fd14d3794b8007f7d Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 10 Jul 2023 12:37:03 +0200 Subject: [PATCH 12/25] correct docstring for to_datetime, example in whatsnew/v0.24.0.rst, rename test functions --- doc/source/whatsnew/v0.24.0.rst | 24 +++++++++++++++--------- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/tools/datetimes.py | 5 ++--- pandas/tests/tools/test_to_datetime.py | 10 ++++++---- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index a7918cf98c93f..73a523b14f9f7 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -679,16 +679,22 @@ Parsing mixed-timezones with :func:`read_csv` *New behavior* -.. ipython:: python +.. code-block:: ipython + + In[64]: import io + + In[65]: content = """\ + ...: a + ...: 2000-01-01T00:00:00+05:00 + ...: 2000-01-01T00:00:00+06:00""" + + In[66]: df = pd.read_csv(io.StringIO(content), parse_dates=['a']) - import io - content = """\ - a - 2000-01-01T00:00:00+05:00 - 2000-01-01T00:00:00+06:00""" - df = pd.read_csv(StringIO(content)) - df.a = pd.to_datetime(df['a'], utc=True) - df.a + In[67]: df.a + Out[67]: + 0 2000-01-01 00:00:00+05:00 + 1 2000-01-01 00:00:00+06:00 + Name: a, Length: 2, dtype: object As can be seen, the ``dtype`` is object; each value in the column is a string. To convert the strings to an array of datetimes, the ``date_parser`` argument diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d901b89bd1170..6af722bbe8618 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -304,7 +304,7 @@ Deprecations - Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`) -- Deprecated parsing datetimes with mixed time zones unless user pass ``utc=True`` to :func:`to_datetime`, in a future version this will raise a warning and will advise to use ``utc=True`` (:issue:`50887`) +- Deprecated parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 6a3a7369aad28..173f8ef1d627d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -337,7 +337,6 @@ def _return_parsed_timezone_results( tz_result : Index-like of parsed dates with timezone """ tz_results = np.empty(len(result), dtype=object) - unique(timezones) non_na_timezones = set() for zone in unique(timezones): mask = timezones == zone @@ -985,8 +984,8 @@ def to_datetime( - However, timezone-aware inputs *with mixed time offsets* (for example issued from a timezone with daylight savings, such as Europe/Paris) are **not successfully converted** to a :class:`DatetimeIndex`. - Parsing datetimes with mixed time zones will raise a warning unless - `utc=True`. If you specify `utc=False` the warning below will be raised + Parsing datetimes with mixed time zones will show a warning unless + `utc=True`. If you specify `utc=False` the warning below will be shown and a simple :class:`Index` containing :class:`datetime.datetime` objects will be returned: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 1d6cb95469416..3c77bfa6d31a2 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -514,7 +514,7 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): ], ], ) - def test_to_datetime_parse_tzname_or_tzoffset_utcFalse_deprecated( + def test_to_datetime_parse_tzname_or_tzoffset_utc_false_deprecated( self, fmt, dates, expected_dates ): # GH 13486, 50887 @@ -699,7 +699,7 @@ def test_to_datetime_mixed_datetime_and_string_with_format( "constructor", [Timestamp, lambda x: Timestamp(x).to_pydatetime()], ) - def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utcFalse( + def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_false( self, fmt, utc, args, expected, constructor ): # https://github.com/pandas-dev/pandas/issues/49298 @@ -1794,7 +1794,7 @@ def test_to_datetime_fixed_offset(self): ], ], ) - def test_to_datetime_mixed_offsets_with_utcFalse_deprecated(self, date): + def test_to_datetime_mixed_offsets_with_utc_false_deprecated(self, date): # GH 50887 msg = "parsing datetimes with mixed time zones will raise a warning" with tm.assert_produces_warning(FutureWarning, match=msg): @@ -3685,7 +3685,9 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): ), ], ) -def test_to_datetime_with_empty_str_utcFalse_format_mixed(date, date_expected, warning): +def test_to_datetime_with_empty_str_utc_false_format_mixed( + date, date_expected, warning +): # GH 50887 msg = "parsing datetimes with mixed time zones will raise a warning" From 5c4904a8dcc7605bc7349a0f081d25fb3e4a179f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 13 Jul 2023 23:00:51 +0200 Subject: [PATCH 13/25] refactor tests for to_datetime --- pandas/tests/tools/test_to_datetime.py | 69 ++++++++++---------------- 1 file changed, 25 insertions(+), 44 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 3c77bfa6d31a2..c12d36fa8e949 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -679,38 +679,30 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"], ids=["non-ISO8601 format", "ISO8601 format"], ) - @pytest.mark.parametrize( - "utc, args, expected", - [ - pytest.param( - False, - ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"], - Index( - [ - Timestamp("2000-01-01 01:00:00"), - Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), - ], - ), - id="tz-aware string, naive pydatetime, without utc", - ), - ], - ) @pytest.mark.parametrize( "constructor", [Timestamp, lambda x: Timestamp(x).to_pydatetime()], ) def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_false( - self, fmt, utc, args, expected, constructor + self, fmt, constructor ): # https://github.com/pandas-dev/pandas/issues/49298 # https://github.com/pandas-dev/pandas/issues/50254 # note: ISO8601 formats go down a fastpath, so we need to check both # a ISO8601 format and a non-ISO8601 one + args = ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"] ts1 = constructor(args[0]) ts2 = args[1] msg = "parsing datetimes with mixed time zones will raise a warning" + + expected = Index( + [ + Timestamp("2000-01-01 01:00:00"), + Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), + ], + ) with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime([ts1, ts2], format=fmt, utc=utc) + result = to_datetime([ts1, ts2], format=fmt, utc=False) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -755,11 +747,10 @@ def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, utc, expected): tm.assert_index_equal(result, expected) @pytest.mark.parametrize( - "fmt, utc, expected", + "fmt, expected", [ pytest.param( "%Y-%m-%d %H:%M:%S%z", - True, DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], dtype="datetime64[ns, UTC]", @@ -768,7 +759,6 @@ def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, utc, expected): ), pytest.param( "%Y-%d-%m %H:%M:%S%z", - True, DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], dtype="datetime64[ns, UTC]", @@ -777,12 +767,12 @@ def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, utc, expected): ), ], ) - def test_to_datetime_mixed_offsets_with_none(self, fmt, utc, expected): + def test_to_datetime_mixed_offsets_with_none(self, fmt, expected): # https://github.com/pandas-dev/pandas/issues/50071 result = to_datetime( ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None], format=fmt, - utc=utc, + utc=True, ) tm.assert_index_equal(result, expected) @@ -3674,27 +3664,18 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "date, date_expected, warning", - [ - (["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], None, FutureWarning), - ( - ["2020-01-01 00:00+00:00", ""], - [Timestamp("2020-01-01 00:00+00:00"), "NaT"], - None, - ), - ], -) -def test_to_datetime_with_empty_str_utc_false_format_mixed( - date, date_expected, warning -): +def test_to_datetime_with_empty_str_utc_false_format_mixed(): + # GH 50887 + result = to_datetime(["2020-01-01 00:00+00:00", ""], format="mixed") + expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed(): # GH 50887 msg = "parsing datetimes with mixed time zones will raise a warning" - if warning is not None: - with tm.assert_produces_warning(warning, match=msg): - to_datetime(date, format="mixed") - else: - result = to_datetime(date, format="mixed") - expected = Index(date_expected, dtype=object) - tm.assert_index_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, match=msg): + to_datetime( + ["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], format="mixed" + ) From f5e5e1e371af4175403983467fca9ce45fb5a080 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 14 Jul 2023 10:27:26 +0200 Subject: [PATCH 14/25] refactor test for to_datetime --- pandas/tests/tools/test_to_datetime.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index c12d36fa8e949..df06a0db7481a 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -706,11 +706,10 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_fal tm.assert_index_equal(result, expected) @pytest.mark.parametrize( - "fmt, utc, expected", + "fmt, expected", [ pytest.param( "%Y-%m-%d %H:%M:%S%z", - False, Index( [ Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"), @@ -722,7 +721,6 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_fal ), pytest.param( "%Y-%d-%m %H:%M:%S%z", - False, Index( [ Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"), @@ -734,7 +732,7 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_fal ), ], ) - def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, utc, expected): + def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, expected): # https://github.com/pandas-dev/pandas/issues/50071 msg = "parsing datetimes with mixed time zones will raise a warning" @@ -742,7 +740,7 @@ def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, utc, expected): result = to_datetime( ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None], format=fmt, - utc=utc, + utc=False, ) tm.assert_index_equal(result, expected) From 88f3845e338ca1a2159fe2a2d4cad652d31cd8a3 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 17 Jul 2023 23:11:08 +0200 Subject: [PATCH 15/25] add example to whatsnew/v2.1.0.rst --- doc/source/whatsnew/v2.1.0.rst | 38 +++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6af722bbe8618..a1537167f78fc 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -238,8 +238,41 @@ Other API changes .. --------------------------------------------------------------------------- .. _whatsnew_210.deprecations: -Deprecations -~~~~~~~~~~~~ +Deprecate parsing datetimes with mixed time zones +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Parsing datetimes with mixed time zones is deprecated and will show a warning unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) + +*Previous behavior*: + +.. code-block:: ipython + + In [7]: l = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + + In [8]: pd.to_datetime(l, utc=False) + Out[8]: + Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') + +*New behavior*: + +.. code-block:: ipython + + In [9]: pd.to_datetime(l, utc=False) + FutureWarning: + In a future version of pandas, parsing datetimes with mixed time zones will raise + a warning unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour + and silence this warning. + Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') + +In order to opt in to the new behaviour and silence this warning please specify ``utc=True``: + +.. ipython:: python + + l = ["2020-10-26 00:00:00+06:00", "2020-10-26 00:00:00+01:00"] + pd.to_datetime(l, utc=True) + +Other Deprecations +~~~~~~~~~~~~~~~~~~ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) @@ -304,7 +337,6 @@ Deprecations - Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`) -- Deprecated parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) From 3d749727d033cbfa0661ab1554148ebee0e0e149 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 17 Jul 2023 23:19:46 +0200 Subject: [PATCH 16/25] correct the example --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index a1537167f78fc..a627bc0d21396 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -268,7 +268,7 @@ In order to opt in to the new behaviour and silence this warning please specify .. ipython:: python - l = ["2020-10-26 00:00:00+06:00", "2020-10-26 00:00:00+01:00"] + l = l = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] pd.to_datetime(l, utc=True) Other Deprecations From 88ed6c1e3db0bab439c5720a40b790f1620db788 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 19 Jul 2023 00:12:36 +0200 Subject: [PATCH 17/25] correct the example in whatsnew/v2.1.0.rst --- doc/source/whatsnew/v2.1.0.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index a627bc0d21396..1f436862b2afa 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -247,9 +247,9 @@ Parsing datetimes with mixed time zones is deprecated and will show a warning un .. code-block:: ipython - In [7]: l = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + In [7]: data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] - In [8]: pd.to_datetime(l, utc=False) + In [8]: pd.to_datetime(data, utc=False) Out[8]: Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') @@ -257,7 +257,7 @@ Parsing datetimes with mixed time zones is deprecated and will show a warning un .. code-block:: ipython - In [9]: pd.to_datetime(l, utc=False) + In [9]: pd.to_datetime(data, utc=False) FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise a warning unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour @@ -268,8 +268,8 @@ In order to opt in to the new behaviour and silence this warning please specify .. ipython:: python - l = l = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] - pd.to_datetime(l, utc=True) + data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + pd.to_datetime(data, utc=True) Other Deprecations ~~~~~~~~~~~~~~~~~~ From 6ecd997e1140deafec7d27a2edfc0b730ef268c0 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 21 Jul 2023 00:38:42 +0200 Subject: [PATCH 18/25] correct def _array_to_datetime_object and fix test for read_json --- pandas/_libs/tslib.pyx | 3 ++- pandas/tests/io/json/test_readlines.py | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4b06686368ffb..3dc5be92c1518 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -650,7 +650,8 @@ cdef _array_to_datetime_object( tzinfo=tsobj.tzinfo, fold=tsobj.fold, ) - unique_timezones.add(tsobj.tzinfo) + if tsobj.tzinfo is not None: + unique_timezones.add(tsobj.tzinfo) except (ValueError, OverflowError) as ex: ex.args = (f"{ex}, at position {i}", ) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 080e1f4c68265..c2f915e33df8a 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -54,9 +54,7 @@ def test_read_datetime(request, engine): if engine == "pyarrow": result = read_json(StringIO(json_line), engine=engine) else: - msg = "parsing datetimes with mixed time zones will raise a warning" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = read_json(StringIO(json_line), engine=engine) + result = read_json(StringIO(json_line), engine=engine) expected = DataFrame( [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]], columns=["accounts", "date", "name"], From dc7c54d0310f6579c52e71c6591d2fb37dc5312b Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 24 Jul 2023 08:42:22 +0200 Subject: [PATCH 19/25] add catch_warnings to filter the warning in test_read_datetime --- pandas/_libs/tslib.pyx | 3 +-- pandas/io/json/_json.py | 8 +++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3dc5be92c1518..4b06686368ffb 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -650,8 +650,7 @@ cdef _array_to_datetime_object( tzinfo=tsobj.tzinfo, fold=tsobj.fold, ) - if tsobj.tzinfo is not None: - unique_timezones.add(tsobj.tzinfo) + unique_timezones.add(tsobj.tzinfo) except (ValueError, OverflowError) as ex: ex.args = (f"{ex}, at position {i}", ) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index ec0469a393873..16009a43f9329 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1312,7 +1312,13 @@ def _try_convert_to_date(self, data): date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: - new_data = to_datetime(new_data, errors="raise", unit=date_unit) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "parsing datetimes with mixed time zones will raise a warning", + category=FutureWarning, + ) + new_data = to_datetime(new_data, errors="raise", unit=date_unit) except (ValueError, OverflowError, TypeError): continue return new_data, True From b5bbd2b4ab21619e529dd2147097aad9d21d437e Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 24 Jul 2023 11:27:55 +0200 Subject: [PATCH 20/25] correct msg in catch_warnings to filter the warning in test_read_datetime --- pandas/io/json/_json.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 16009a43f9329..facddfe3d934c 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1315,7 +1315,8 @@ def _try_convert_to_date(self, data): with warnings.catch_warnings(): warnings.filterwarnings( "ignore", - "parsing datetimes with mixed time zones will raise a warning", + ".*parsing datetimes with mixed time " + "zones will raise a warning", category=FutureWarning, ) new_data = to_datetime(new_data, errors="raise", unit=date_unit) From b01c3eff519375e0117a506810fb24ef28a3bc30 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 25 Jul 2023 21:58:34 +0200 Subject: [PATCH 21/25] catch the warning in test_from_csv_with_mixed_offsets --- pandas/io/parsers/base_parser.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 0a90deedf7ad2..5bdb831e923cb 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1144,14 +1144,20 @@ def converter(*date_cols, col: Hashable): date_format.get(col) if isinstance(date_format, dict) else date_format ) - result = tools.to_datetime( - ensure_object(strs), - format=date_fmt, - utc=False, - dayfirst=dayfirst, - errors="ignore", - cache=cache_dates, - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ".*parsing datetimes with mixed time zones will raise a warning", + category=FutureWarning, + ) + result = tools.to_datetime( + ensure_object(strs), + format=date_fmt, + utc=False, + dayfirst=dayfirst, + errors="ignore", + cache=cache_dates, + ) if isinstance(result, DatetimeIndex): arr = result.to_numpy() arr.flags.writeable = True From 04ef0367189a724cc65205d3cbd5d0882fa4da45 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:27:11 +0100 Subject: [PATCH 22/25] reword whatsnew --- doc/source/whatsnew/v2.1.0.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c310d52110f52..e794a16ea93a5 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -298,7 +298,7 @@ Other API changes Deprecate parsing datetimes with mixed time zones ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Parsing datetimes with mixed time zones is deprecated and will show a warning unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) +Parsing datetimes with mixed time zones is deprecated and shows a warning unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) *Previous behavior*: @@ -321,7 +321,8 @@ Parsing datetimes with mixed time zones is deprecated and will show a warning un and silence this warning. Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') -In order to opt in to the new behaviour and silence this warning please specify ``utc=True``: +In order to silence this warning and avoid an error in a future version of pandas, +please specify ``utc=True``: .. ipython:: python From c42f143e2018bcb3bb4aada7246ec48c6a8bbff5 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 26 Jul 2023 11:40:00 +0200 Subject: [PATCH 23/25] add catch_warnings to converter --- pandas/io/parsers/base_parser.py | 40 ++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 5bdb831e923cb..60996a7d42187 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1165,22 +1165,38 @@ def converter(*date_cols, col: Hashable): return result._values else: try: - result = tools.to_datetime( - date_parser(*(unpack_if_single_element(arg) for arg in date_cols)), - errors="ignore", - cache=cache_dates, - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ".*parsing datetimes with mixed time zones " + "will raise a warning", + category=FutureWarning, + ) + result = tools.to_datetime( + date_parser( + *(unpack_if_single_element(arg) for arg in date_cols) + ), + errors="ignore", + cache=cache_dates, + ) if isinstance(result, datetime.datetime): raise Exception("scalar parser") return result except Exception: - return tools.to_datetime( - parsing.try_parse_dates( - parsing.concat_date_cols(date_cols), - parser=date_parser, - ), - errors="ignore", - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ".*parsing datetimes with mixed time zones " + "will raise a warning", + category=FutureWarning, + ) + return tools.to_datetime( + parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, + ), + errors="ignore", + ) return converter From acee8dee3a83d16e646fccf7226ae29abffe2723 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 27 Jul 2023 10:47:51 +0200 Subject: [PATCH 24/25] describe how to maintain the old behavior --- doc/source/whatsnew/v2.1.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7f9d4dba1c43c..0b0b211824ca3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -329,6 +329,9 @@ please specify ``utc=True``: data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] pd.to_datetime(data, utc=True) +To create a ``Series`` with mixed offsets and ``object`` dtype, please use ``apply`` +and ``datetime.datetime.strptime``. + Other Deprecations ~~~~~~~~~~~~~~~~~~ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) From b7a220731e901777f9d0a8da05c770764477c336 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 27 Jul 2023 12:22:55 +0200 Subject: [PATCH 25/25] add an example how to get the old behavior and : correct the warning message --- doc/source/whatsnew/v2.1.0.rst | 16 ++++++++++++---- pandas/_libs/tslib.pyx | 3 ++- pandas/core/tools/datetimes.py | 15 +++++++++++---- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 0b0b211824ca3..91efcfd590c01 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -318,7 +318,8 @@ Parsing datetimes with mixed time zones is deprecated and shows a warning unless FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise a warning unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour - and silence this warning. + and silence this warning. To create a `Series` with mixed offsets and `object` dtype, + please use `apply` and `datetime.datetime.strptime`. Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') In order to silence this warning and avoid an error in a future version of pandas, @@ -326,11 +327,18 @@ please specify ``utc=True``: .. ipython:: python - data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] - pd.to_datetime(data, utc=True) + data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + pd.to_datetime(data, utc=True) To create a ``Series`` with mixed offsets and ``object`` dtype, please use ``apply`` -and ``datetime.datetime.strptime``. +and ``datetime.datetime.strptime``: + +.. ipython:: python + + import datetime as dt + + data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + pd.Series(data).apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S%z')) Other Deprecations ~~~~~~~~~~~~~~~~~~ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4b06686368ffb..20a18cf56779f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -673,7 +673,8 @@ cdef _array_to_datetime_object( "In a future version of pandas, parsing datetimes with mixed time " "zones will raise a warning unless `utc=True`. " "Please specify `utc=True` to opt in to the new behaviour " - "and silence this warning.", + "and silence this warning. To create a `Series` with mixed offsets and " + "`object` dtype, please use `apply` and `datetime.datetime.strptime`", FutureWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9e5f329d669da..95faea468fb5d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -357,7 +357,9 @@ def _return_parsed_timezone_results( warnings.warn( "In a future version of pandas, parsing datetimes with mixed time " "zones will raise a warning unless `utc=True`. Please specify `utc=True` " - "to opt in to the new behaviour and silence this warning.", + "to opt in to the new behaviour and silence this warning. " + "To create a `Series` with mixed offsets and `object` dtype, " + "please use `apply` and `datetime.datetime.strptime`", FutureWarning, stacklevel=find_stack_level(), ) @@ -788,7 +790,8 @@ def to_datetime( In a future version of pandas, parsing datetimes with mixed time zones will raise a warning unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour - and silence this warning. + and silence this warning. To create a `Series` with mixed offsets and + `object` dtype, please use `apply` and `datetime.datetime.strptime`. See also: pandas general documentation about `timezone conversion and localization @@ -1021,7 +1024,9 @@ def to_datetime( ... '2020-10-25 04:00 +0100']) # doctest: +SKIP FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise a warning unless `utc=True`. Please specify `utc=True` - to opt in to the new behaviour and silence this warning. + to opt in to the new behaviour and silence this warning. To create a `Series` + with mixed offsets and `object` dtype, please use `apply` and + `datetime.datetime.strptime`. Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], dtype='object') @@ -1033,7 +1038,9 @@ def to_datetime( ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise a warning unless `utc=True`. Please specify `utc=True` - to opt in to the new behaviour and silence this warning. + to opt in to the new behaviour and silence this warning. To create a `Series` + with mixed offsets and `object` dtype, please use `apply` and + `datetime.datetime.strptime`. Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') |