Merge branch 'main' into main

pandas-dev · Nov 3, 2023 · 03b39d6 · 03b39d6
2 parents 2520356 + 0cdb37c
commit 03b39d6
Show file tree

Hide file tree

Showing 17 changed files with 183 additions and 103 deletions.
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -2261,23 +2261,6 @@ non-conforming elements intermixed that you want to represent as missing:
     m = ["apple", pd.Timedelta("1day")]
     pd.to_timedelta(m, errors="coerce")
 
-The ``errors`` parameter has a third option of ``errors='ignore'``, which will simply return the passed in data if it
-encounters any errors with the conversion to a desired data type:
-
-.. ipython:: python
-    :okwarning:
-
-    import datetime
-
-    m = ["apple", datetime.datetime(2016, 3, 2)]
-    pd.to_datetime(m, errors="ignore")
-
-    m = ["apple", 2, 3]
-    pd.to_numeric(m, errors="ignore")
-
-    m = ["apple", pd.Timedelta("1day")]
-    pd.to_timedelta(m, errors="ignore")
-
 In addition to object conversion, :meth:`~pandas.to_numeric` provides another argument ``downcast``, which gives the
 option of downcasting the newly (or already) numeric data to a smaller dtype, which can conserve memory:
 

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -294,12 +294,6 @@ The default behavior, ``errors='raise'``, is to raise when unparsable:
 
    pd.to_datetime(['2009/07/31', 'asd'], errors='raise')
 
-Pass ``errors='ignore'`` to return the original input when unparsable:
-
-.. ipython:: python
-
-   pd.to_datetime(["2009/07/31", "asd"], errors="ignore")
-
 Pass ``errors='coerce'`` to convert unparsable data to ``NaT`` (not a time):
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst
@@ -632,9 +632,10 @@ Of course you can coerce this as well.
 
 To keep the previous behavior, you can use ``errors='ignore'``:
 
-.. ipython:: python
+.. code-block:: ipython
 
-   pd.to_datetime(["2009-07-31", "asd"], errors="ignore")
+   In [4]: pd.to_datetime(["2009-07-31", "asd"], errors="ignore")
+   Out[4]: Index(['2009-07-31', 'asd'], dtype='object')
 
 Furthermore, ``pd.to_timedelta`` has gained a similar API, of ``errors='raise'|'ignore'|'coerce'``, and the ``coerce`` keyword
 has been deprecated in favor of ``errors='coerce'``.

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -284,11 +284,13 @@ Other Deprecations
 - Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
+- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
 - Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
 - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
 - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
 - Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.performance:

diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -498,7 +498,11 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str):
         newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "", regex=True)
 
         # GH17627 Cast numerics suffixes to int/float
-        newdf[j] = to_numeric(newdf[j], errors="ignore")
+        try:
+            newdf[j] = to_numeric(newdf[j])
+        except (TypeError, ValueError, OverflowError):
+            # TODO: anything else to catch?
+            pass
 
         return newdf.set_index(i + [j])
 

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -980,16 +980,9 @@ def to_datetime(
 
     **Non-convertible date/times**
 
-    If a date does not meet the `timestamp limitations
-    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
-    #timeseries-timestamp-limits>`_, passing ``errors='ignore'``
-    will return the original input instead of raising any exception.
-
     Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`,
     in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`.
 
-    >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')
-    '13000101'
     >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
     NaT
 
@@ -1079,6 +1072,16 @@ def to_datetime(
             "You can safely remove this argument.",
             stacklevel=find_stack_level(),
         )
+    if errors == "ignore":
+        # GH#54467
+        warnings.warn(
+            "errors='ignore' is deprecated and will raise in a future version. "
+            "Use to_datetime without passing `errors` and catch exceptions "
+            "explicitly instead",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
+
     if arg is None:
         return None
 

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
@@ -4,10 +4,12 @@
     TYPE_CHECKING,
     Literal,
 )
+import warnings
 
 import numpy as np
 
 from pandas._libs import lib
+from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import check_dtype_backend
 
 from pandas.core.dtypes.cast import maybe_downcast_numeric
@@ -68,6 +70,11 @@ def to_numeric(
         - If 'raise', then invalid parsing will raise an exception.
         - If 'coerce', then invalid parsing will be set as NaN.
         - If 'ignore', then invalid parsing will return the input.
+
+        .. versionchanged:: 2.2
+
+        "ignore" is deprecated. Catch exceptions explicitly instead.
+
     downcast : str, default None
         Can be 'integer', 'signed', 'unsigned', or 'float'.
         If not None, and if the data has been successfully cast to a
@@ -134,12 +141,6 @@ def to_numeric(
     2   -3
     dtype: int8
     >>> s = pd.Series(['apple', '1.0', '2', -3])
-    >>> pd.to_numeric(s, errors='ignore')
-    0    apple
-    1      1.0
-    2        2
-    3       -3
-    dtype: object
     >>> pd.to_numeric(s, errors='coerce')
     0    NaN
     1    1.0
@@ -167,6 +168,15 @@ def to_numeric(
 
     if errors not in ("ignore", "raise", "coerce"):
         raise ValueError("invalid error value specified")
+    if errors == "ignore":
+        # GH#54467
+        warnings.warn(
+            "errors='ignore' is deprecated and will raise in a future version. "
+            "Use to_numeric without passing `errors` and catch exceptions "
+            "explicitly instead",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
 
     check_dtype_backend(dtype_backend)
 

diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
@@ -7,6 +7,7 @@
     TYPE_CHECKING,
     overload,
 )
+import warnings
 
 import numpy as np
 
@@ -20,6 +21,7 @@
     disallow_ambiguous_unit,
     parse_timedelta_unit,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import is_list_like
 from pandas.core.dtypes.dtypes import ArrowDtype
@@ -183,6 +185,15 @@ def to_timedelta(
 
     if errors not in ("ignore", "raise", "coerce"):
         raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.")
+    if errors == "ignore":
+        # GH#54467
+        warnings.warn(
+            "errors='ignore' is deprecated and will raise in a future version. "
+            "Use to_timedelta without passing `errors` and catch exceptions "
+            "explicitly instead",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
 
     if arg is None:
         return arg

diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py
@@ -5,10 +5,12 @@
     time,
 )
 from typing import TYPE_CHECKING
+import warnings
 
 import numpy as np
 
 from pandas._libs.lib import is_list_like
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.generic import (
     ABCIndex,
@@ -52,6 +54,15 @@ def to_time(
     -------
     datetime.time
     """
+    if errors == "ignore":
+        # GH#54467
+        warnings.warn(
+            "errors='ignore' is deprecated and will raise in a future version. "
+            "Use to_time without passing `errors` and catch exceptions "
+            "explicitly instead",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
 
     def _convert_listlike(arg, format):
         if isinstance(arg, (list, tuple)):

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -1150,14 +1150,19 @@ def converter(*date_cols, col: Hashable):
                     ".*parsing datetimes with mixed time zones will raise an error",
                     category=FutureWarning,
                 )
-                result = tools.to_datetime(
-                    ensure_object(strs),
-                    format=date_fmt,
-                    utc=False,
-                    dayfirst=dayfirst,
-                    errors="ignore",
-                    cache=cache_dates,
-                )
+                str_objs = ensure_object(strs)
+                try:
+                    result = tools.to_datetime(
+                        str_objs,
+                        format=date_fmt,
+                        utc=False,
+                        dayfirst=dayfirst,
+                        cache=cache_dates,
+                    )
+                except (ValueError, TypeError):
+                    # test_usecols_with_parse_dates4
+                    return str_objs
+
             if isinstance(result, DatetimeIndex):
                 arr = result.to_numpy()
                 arr.flags.writeable = True
@@ -1172,31 +1177,38 @@ def converter(*date_cols, col: Hashable):
                         "will raise an error",
                         category=FutureWarning,
                     )
-                    result = tools.to_datetime(
-                        date_parser(
-                            *(unpack_if_single_element(arg) for arg in date_cols)
-                        ),
-                        errors="ignore",
-                        cache=cache_dates,
+                    pre_parsed = date_parser(
+                        *(unpack_if_single_element(arg) for arg in date_cols)
                     )
+                    try:
+                        result = tools.to_datetime(
+                            pre_parsed,
+                            cache=cache_dates,
+                        )
+                    except (ValueError, TypeError):
+                        # test_read_csv_with_custom_date_parser
+                        result = pre_parsed
                 if isinstance(result, datetime.datetime):
                     raise Exception("scalar parser")
                 return result
             except Exception:
+                # e.g. test_datetime_fractional_seconds
                 with warnings.catch_warnings():
                     warnings.filterwarnings(
                         "ignore",
                         ".*parsing datetimes with mixed time zones "
                         "will raise an error",
                         category=FutureWarning,
                     )
-                    return tools.to_datetime(
-                        parsing.try_parse_dates(
-                            parsing.concat_date_cols(date_cols),
-                            parser=date_parser,
-                        ),
-                        errors="ignore",
+                    pre_parsed = parsing.try_parse_dates(
+                        parsing.concat_date_cols(date_cols),
+                        parser=date_parser,
                     )
+                    try:
+                        return tools.to_datetime(pre_parsed)
+                    except (ValueError, TypeError):
+                        # TODO: not reached in tests 2023-10-27; needed?
+                        return pre_parsed
 
     return converter
 

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -105,6 +105,12 @@ def _handle_date_column(
         # Format can take on custom to_datetime argument values such as
         # {"errors": "coerce"} or {"dayfirst": True}
         error: DateTimeErrorChoices = format.pop("errors", None) or "ignore"
+        if error == "ignore":
+            try:
+                return to_datetime(col, **format)
+            except (TypeError, ValueError):
+                # TODO: not reached 2023-10-27; needed?
+                return col
         return to_datetime(col, errors=error, **format)
     else:
         # Allow passing of formatting string for integers

diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -1013,35 +1013,35 @@ def test_dti_convert_datetime_list(self, tzstr):
         dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
         tm.assert_index_equal(dr, dr2)
 
-    def test_dti_ambiguous_matches_timestamp(self):
+    @pytest.mark.parametrize(
+        "tz",
+        [
+            pytz.timezone("US/Eastern"),
+            gettz("US/Eastern"),
+        ],
+    )
+    @pytest.mark.parametrize("use_str", [True, False])
+    @pytest.mark.parametrize("box_cls", [Timestamp, DatetimeIndex])
+    def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request):
         # GH#47471 check that we get the same raising behavior in the DTI
         # constructor and Timestamp constructor
         dtstr = "2013-11-03 01:59:59.999999"
-        dtobj = Timestamp(dtstr).to_pydatetime()
-
-        tz = pytz.timezone("US/Eastern")
-        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-            Timestamp(dtstr, tz=tz)
-        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-            Timestamp(dtobj, tz=tz)
-        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-            DatetimeIndex([dtstr], tz=tz)
-        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-            DatetimeIndex([dtobj], tz=tz)
+        item = dtstr
+        if not use_str:
+            item = Timestamp(dtstr).to_pydatetime()
+        if box_cls is not Timestamp:
+            item = [item]
+
+        if not use_str and isinstance(tz, dateutil.tz.tzfile):
+            # FIXME: The Timestamp constructor here behaves differently than all
+            #  the other cases bc with dateutil/zoneinfo tzinfos we implicitly
+            #  get fold=0. Having this raise is not important, but having the
+            #  behavior be consistent across cases is.
+            mark = pytest.mark.xfail(reason="We implicitly get fold=0.")
+            request.applymarker(mark)
 
-        tz2 = gettz("US/Eastern")
-        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-            Timestamp(dtstr, tz=tz2)
-        # FIXME: The Timestamp constructor here behaves differently than all
-        #  the other cases bc with dateutil/zoneinfo tzinfos we implicitly
-        #  get fold=0. Having this raise is not important, but having the
-        #  behavior be consistent across cases is.
-        # with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-        #    Timestamp(dtobj, tz=tz2)
-        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-            DatetimeIndex([dtstr], tz=tz2)
         with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
-            DatetimeIndex([dtobj], tz=tz2)
+            box_cls(item, tz=tz)
 
     @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
     def test_dti_constructor_with_non_nano_dtype(self, tz):

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1278,7 +1278,9 @@ def test_value_counts_datetime_outofbounds(self):
         tm.assert_series_equal(res, exp)
 
         # GH 12424  # TODO: belongs elsewhere
-        res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
+        msg = "errors='ignore' is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
         exp = Series(["2362-01-01", np.nan], dtype=object)
         tm.assert_series_equal(res, exp)