diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ff19ef4eae179..4cb716e5f8a49 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -284,11 +284,13 @@ Other Deprecations - Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`) - Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`) - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`) +- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`) - Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`) - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`) - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`) - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`) - Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`) +- .. --------------------------------------------------------------------------- .. _whatsnew_220.performance: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 387d43f47fe9b..03423a85db853 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -498,7 +498,11 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str): newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "", regex=True) # GH17627 Cast numerics suffixes to int/float - newdf[j] = to_numeric(newdf[j], errors="ignore") + try: + newdf[j] = to_numeric(newdf[j]) + except (TypeError, ValueError, OverflowError): + # TODO: anything else to catch? + pass return newdf.set_index(i + [j]) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3d8b043a90bd1..231d3a27bc6f5 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1076,6 +1076,16 @@ def to_datetime( "You can safely remove this argument.", stacklevel=find_stack_level(), ) + if errors == "ignore": + # GH#54467 + warnings.warn( + "errors='ignore' is deprecated and will raise in a future version. " + "Use to_datetime without passing `errors` and catch exceptions " + "explicitly instead", + FutureWarning, + stacklevel=find_stack_level(), + ) + if arg is None: return None diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 7a445ad7ac2b2..66994b4474935 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -4,10 +4,12 @@ TYPE_CHECKING, Literal, ) +import warnings import numpy as np from pandas._libs import lib +from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.cast import maybe_downcast_numeric @@ -167,6 +169,15 @@ def to_numeric( if errors not in ("ignore", "raise", "coerce"): raise ValueError("invalid error value specified") + if errors == "ignore": + # GH#54467 + warnings.warn( + "errors='ignore' is deprecated and will raise in a future version. " + "Use to_numeric without passing `errors` and catch exceptions " + "explicitly instead", + FutureWarning, + stacklevel=find_stack_level(), + ) check_dtype_backend(dtype_backend) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 587946aba5041..8909776d91369 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -7,6 +7,7 @@ TYPE_CHECKING, overload, ) +import warnings import numpy as np @@ -20,6 +21,7 @@ disallow_ambiguous_unit, parse_timedelta_unit, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.dtypes import ArrowDtype @@ -183,6 +185,15 @@ def to_timedelta( if errors not in ("ignore", "raise", "coerce"): raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.") + if errors == "ignore": + # GH#54467 + warnings.warn( + "errors='ignore' is deprecated and will raise in a future version. " + "Use to_timedelta without passing `errors` and catch exceptions " + "explicitly instead", + FutureWarning, + stacklevel=find_stack_level(), + ) if arg is None: return arg diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py index 1b3a3ae1be5f0..d77bcc91df709 100644 --- a/pandas/core/tools/times.py +++ b/pandas/core/tools/times.py @@ -5,10 +5,12 @@ time, ) from typing import TYPE_CHECKING +import warnings import numpy as np from pandas._libs.lib import is_list_like +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.generic import ( ABCIndex, @@ -52,6 +54,15 @@ def to_time( ------- datetime.time """ + if errors == "ignore": + # GH#54467 + warnings.warn( + "errors='ignore' is deprecated and will raise in a future version. " + "Use to_time without passing `errors` and catch exceptions " + "explicitly instead", + FutureWarning, + stacklevel=find_stack_level(), + ) def _convert_listlike(arg, format): if isinstance(arg, (list, tuple)): diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 6b1daa96782a0..86ec62d2b19b6 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1150,14 +1150,19 @@ def converter(*date_cols, col: Hashable): ".*parsing datetimes with mixed time zones will raise an error", category=FutureWarning, ) - result = tools.to_datetime( - ensure_object(strs), - format=date_fmt, - utc=False, - dayfirst=dayfirst, - errors="ignore", - cache=cache_dates, - ) + str_objs = ensure_object(strs) + try: + result = tools.to_datetime( + str_objs, + format=date_fmt, + utc=False, + dayfirst=dayfirst, + cache=cache_dates, + ) + except (ValueError, TypeError): + # test_usecols_with_parse_dates4 + return str_objs + if isinstance(result, DatetimeIndex): arr = result.to_numpy() arr.flags.writeable = True @@ -1172,17 +1177,22 @@ def converter(*date_cols, col: Hashable): "will raise an error", category=FutureWarning, ) - result = tools.to_datetime( - date_parser( - *(unpack_if_single_element(arg) for arg in date_cols) - ), - errors="ignore", - cache=cache_dates, + pre_parsed = date_parser( + *(unpack_if_single_element(arg) for arg in date_cols) ) + try: + result = tools.to_datetime( + pre_parsed, + cache=cache_dates, + ) + except (ValueError, TypeError): + # test_read_csv_with_custom_date_parser + result = pre_parsed if isinstance(result, datetime.datetime): raise Exception("scalar parser") return result except Exception: + # e.g. test_datetime_fractional_seconds with warnings.catch_warnings(): warnings.filterwarnings( "ignore", @@ -1190,13 +1200,15 @@ def converter(*date_cols, col: Hashable): "will raise an error", category=FutureWarning, ) - return tools.to_datetime( - parsing.try_parse_dates( - parsing.concat_date_cols(date_cols), - parser=date_parser, - ), - errors="ignore", + pre_parsed = parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, ) + try: + return tools.to_datetime(pre_parsed) + except (ValueError, TypeError): + # TODO: not reached in tests 2023-10-27; needed? + return pre_parsed return converter diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b4675513a99c2..c77567214a692 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -105,6 +105,12 @@ def _handle_date_column( # Format can take on custom to_datetime argument values such as # {"errors": "coerce"} or {"dayfirst": True} error: DateTimeErrorChoices = format.pop("errors", None) or "ignore" + if error == "ignore": + try: + return to_datetime(col, **format) + except (TypeError, ValueError): + # TODO: not reached 2023-10-27; needed? + return col return to_datetime(col, errors=error, **format) else: # Allow passing of formatting string for integers diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 918353c9c7181..fd5d92dc35249 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1278,7 +1278,9 @@ def test_value_counts_datetime_outofbounds(self): tm.assert_series_equal(res, exp) # GH 12424 # TODO: belongs elsewhere - res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore") + msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore") exp = Series(["2362-01-01", np.nan], dtype=object) tm.assert_series_equal(res, exp) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b41257b19686d..f9094e20a7f6a 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -57,6 +57,10 @@ r"alongside this." ) +pytestmark = pytest.mark.filterwarnings( + "ignore:errors='ignore' is deprecated:FutureWarning" +) + @pytest.fixture(params=[True, False]) def cache(request): @@ -1214,7 +1218,9 @@ def test_to_datetime_tz_mixed(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(arr, cache=cache) - result = to_datetime(arr, cache=cache, errors="ignore") + depr_msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + result = to_datetime(arr, cache=cache, errors="ignore") expected = Index( [ Timestamp("2013-01-01 13:00:00-08:00"), @@ -1460,11 +1466,15 @@ def test_datetime_invalid_index(self, values, format): warn = UserWarning else: warn = None - with tm.assert_produces_warning(warn, match="Could not infer format"): + with tm.assert_produces_warning( + warn, match="Could not infer format", raise_on_extra_warnings=False + ): res = to_datetime(values, errors="ignore", format=format) tm.assert_index_equal(res, Index(values)) - with tm.assert_produces_warning(warn, match="Could not infer format"): + with tm.assert_produces_warning( + warn, match="Could not infer format", raise_on_extra_warnings=False + ): res = to_datetime(values, errors="coerce", format=format) tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values))) @@ -1479,7 +1489,9 @@ def test_datetime_invalid_index(self, values, format): ] ) with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(warn, match="Could not infer format"): + with tm.assert_produces_warning( + warn, match="Could not infer format", raise_on_extra_warnings=False + ): to_datetime(values, errors="raise", format=format) @pytest.mark.parametrize("utc", [True, None]) @@ -1648,7 +1660,9 @@ def test_to_datetime_malformed_no_raise(self, errors, expected): # GH 28299 # GH 48633 ts_strings = ["200622-12-31", "111111-24-11"] - with tm.assert_produces_warning(UserWarning, match="Could not infer format"): + with tm.assert_produces_warning( + UserWarning, match="Could not infer format", raise_on_extra_warnings=False + ): result = to_datetime(ts_strings, errors=errors) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index da8e2fe9abc16..d6b085b7954db 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -112,6 +112,7 @@ def test_error(data, msg): @pytest.mark.parametrize( "errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])] ) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_ignore_error(errors, exp_data): ser = Series([1, -3.14, "apple"]) result = to_numeric(ser, errors=errors) @@ -129,6 +130,7 @@ def test_ignore_error(errors, exp_data): ("coerce", [1.0, 0.0, np.nan]), ], ) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_bool_handling(errors, exp): ser = Series([True, False, "apple"]) @@ -229,6 +231,7 @@ def test_all_nan(): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_type_check(errors): # see gh-11776 df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) @@ -243,6 +246,7 @@ def test_scalar(val, signed, transform): assert to_numeric(transform(val)) == float(val) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_really_large_scalar(large_val, signed, transform, errors): # see gh-24910 kwargs = {"errors": errors} if errors is not None else {} @@ -260,6 +264,7 @@ def test_really_large_scalar(large_val, signed, transform, errors): tm.assert_almost_equal(to_numeric(val, **kwargs), expected) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors): # see gh-24910 kwargs = {"errors": errors} if errors is not None else {} @@ -299,6 +304,7 @@ def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors): # see gh-24910 # @@ -337,6 +343,7 @@ def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors ("coerce", lambda x: np.isnan(x)), ], ) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_scalar_fail(errors, checker): scalar = "fail" @@ -412,6 +419,7 @@ def test_period(request, transform_assert_equal): ("coerce", Series([np.nan, 1.0, np.nan])), ], ) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_non_hashable(errors, expected): # see gh-13324 ser = Series([[10.0, 2], 1.0, "apple"]) @@ -496,7 +504,9 @@ def test_ignore_downcast_invalid_data(): data = ["foo", 2, 3] expected = np.array(data, dtype=object) - res = to_numeric(data, errors="ignore", downcast="unsigned") + msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = to_numeric(data, errors="ignore", downcast="unsigned") tm.assert_numpy_array_equal(res, expected) @@ -629,6 +639,7 @@ def test_coerce_uint64_conflict(data, exp_data): ("raise", "Unable to parse string"), ], ) +@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_non_coerce_uint64_conflict(errors, exp): # see gh-17007 and gh-17125 # @@ -755,7 +766,9 @@ def test_to_numeric_from_nullable_string_ignore(nullable_string_dtype): values = ["a", "1"] ser = Series(values, dtype=nullable_string_dtype) expected = ser.copy() - result = to_numeric(ser, errors="ignore") + msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_numeric(ser, errors="ignore") tm.assert_series_equal(result, expected) @@ -925,7 +938,9 @@ def test_to_numeric_dtype_backend_error(dtype_backend): with pytest.raises(ValueError, match="Unable to parse string"): to_numeric(ser, dtype_backend=dtype_backend) - result = to_numeric(ser, dtype_backend=dtype_backend, errors="ignore") + msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_numeric(ser, dtype_backend=dtype_backend, errors="ignore") tm.assert_series_equal(result, expected) result = to_numeric(ser, dtype_backend=dtype_backend, errors="coerce") diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index 5046fd9d0edc1..b673bd9c2ec71 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -54,7 +54,9 @@ def test_arraylike(self): assert to_time(arg, infer_time_format=True) == expected_arr assert to_time(arg, format="%I:%M%p", errors="coerce") == [None, None] - res = to_time(arg, format="%I:%M%p", errors="ignore") + msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = to_time(arg, format="%I:%M%p", errors="ignore") tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) msg = "Cannot convert.+to a time with given format" diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 120b5322adf3e..c4c9b41c218a0 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -92,6 +92,7 @@ def test_to_timedelta_oob_non_nano(self): "arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))] ) @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"]) + @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_to_timedelta_dataframe(self, arg, errors): # GH 11776 with pytest.raises(TypeError, match="1-d array"): @@ -137,22 +138,29 @@ def test_to_timedelta_bad_value_coerce(self): def test_to_timedelta_invalid_errors_ignore(self): # gh-13613: these should not error because errors='ignore' + msg = "errors='ignore' is deprecated" invalid_data = "apple" - assert invalid_data == to_timedelta(invalid_data, errors="ignore") + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_timedelta(invalid_data, errors="ignore") + assert invalid_data == result invalid_data = ["apple", "1 days"] - tm.assert_numpy_array_equal( - np.array(invalid_data, dtype=object), - to_timedelta(invalid_data, errors="ignore"), - ) + expected = np.array(invalid_data, dtype=object) + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_timedelta(invalid_data, errors="ignore") + tm.assert_numpy_array_equal(expected, result) invalid_data = pd.Index(["apple", "1 days"]) - tm.assert_index_equal(invalid_data, to_timedelta(invalid_data, errors="ignore")) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_timedelta(invalid_data, errors="ignore") + tm.assert_index_equal(invalid_data, result) invalid_data = Series(["apple", "1 days"]) - tm.assert_series_equal( - invalid_data, to_timedelta(invalid_data, errors="ignore") - ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_timedelta(invalid_data, errors="ignore") + tm.assert_series_equal(invalid_data, result) @pytest.mark.parametrize( "val, errors", @@ -239,7 +247,9 @@ def test_to_timedelta_coerce_strings_unit(self): def test_to_timedelta_ignore_strings_unit(self): arr = np.array([1, 2, "error"], dtype=object) - result = to_timedelta(arr, unit="ns", errors="ignore") + msg = "errors='ignore' is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_timedelta(arr, unit="ns", errors="ignore") tm.assert_numpy_array_equal(result, arr) @pytest.mark.parametrize(