Merge branch 'main' into quickfix/fix-to-replace-docs

pandas-dev · Nov 3, 2023 · fb4299d · fb4299d
2 parents c6a99f5 + 0cdb37c
commit fb4299d
Show file tree

Hide file tree

Showing 57 changed files with 1,114 additions and 358 deletions.
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -2261,23 +2261,6 @@ non-conforming elements intermixed that you want to represent as missing:
     m = ["apple", pd.Timedelta("1day")]
     pd.to_timedelta(m, errors="coerce")
 
-The ``errors`` parameter has a third option of ``errors='ignore'``, which will simply return the passed in data if it
-encounters any errors with the conversion to a desired data type:
-
-.. ipython:: python
-    :okwarning:
-
-    import datetime
-
-    m = ["apple", datetime.datetime(2016, 3, 2)]
-    pd.to_datetime(m, errors="ignore")
-
-    m = ["apple", 2, 3]
-    pd.to_numeric(m, errors="ignore")
-
-    m = ["apple", pd.Timedelta("1day")]
-    pd.to_timedelta(m, errors="ignore")
-
 In addition to object conversion, :meth:`~pandas.to_numeric` provides another argument ``downcast``, which gives the
 option of downcasting the newly (or already) numeric data to a smaller dtype, which can conserve memory:
 

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -294,12 +294,6 @@ The default behavior, ``errors='raise'``, is to raise when unparsable:
 
    pd.to_datetime(['2009/07/31', 'asd'], errors='raise')
 
-Pass ``errors='ignore'`` to return the original input when unparsable:
-
-.. ipython:: python
-
-   pd.to_datetime(["2009/07/31", "asd"], errors="ignore")
-
 Pass ``errors='coerce'`` to convert unparsable data to ``NaT`` (not a time):
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst
@@ -632,9 +632,10 @@ Of course you can coerce this as well.
 
 To keep the previous behavior, you can use ``errors='ignore'``:
 
-.. ipython:: python
+.. code-block:: ipython
 
-   pd.to_datetime(["2009-07-31", "asd"], errors="ignore")
+   In [4]: pd.to_datetime(["2009-07-31", "asd"], errors="ignore")
+   Out[4]: Index(['2009-07-31', 'asd'], dtype='object')
 
 Furthermore, ``pd.to_timedelta`` has gained a similar API, of ``errors='raise'|'ignore'|'coerce'``, and the ``coerce`` keyword
 has been deprecated in favor of ``errors='coerce'``.

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -284,11 +284,13 @@ Other Deprecations
 - Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
+- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
 - Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
 - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
 - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
 - Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.performance:
@@ -321,7 +323,9 @@ Categorical
 
 Datetimelike
 ^^^^^^^^^^^^
+- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
 - Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
+- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
 - Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
 - Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
 - Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -700,15 +700,15 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso)
             ival = NPY_NAT
 
         else:
-            ts = Timestamp(item)
+            if PyDateTime_Check(item) and item.tzinfo is not None:
+                # We can't call Timestamp constructor with a tz arg, have to
+                #  do 2-step
+                ts = Timestamp(item).tz_convert(tz)
+            else:
+                ts = Timestamp(item, tz=tz)
             if ts is NaT:
                 ival = NPY_NAT
             else:
-                if ts.tzinfo is not None:
-                    ts = ts.tz_convert(tz)
-                else:
-                    # datetime64, tznaive pydatetime, int, float
-                    ts = ts.tz_localize(tz)
                 ts = (<_Timestamp>ts)._as_creso(creso)
                 ival = ts._value
 

diff --git a/pandas/_libs/tslibs/strptime.pxd b/pandas/_libs/tslibs/strptime.pxd
@@ -14,5 +14,8 @@ cdef class DatetimeParseState:
     cdef:
         bint found_tz
         bint found_naive
+        bint creso_ever_changed
+        NPY_DATETIMEUNIT creso
 
     cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert)
+    cdef bint update_creso(self, NPY_DATETIMEUNIT item_reso) noexcept
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
@@ -49,6 +49,10 @@ from numpy cimport (
 
 from pandas._libs.missing cimport checknull_with_nat_and_na
 from pandas._libs.tslibs.conversion cimport get_datetime64_nanos
+from pandas._libs.tslibs.dtypes cimport (
+    get_supported_reso,
+    npy_unit_to_abbrev,
+)
 from pandas._libs.tslibs.nattype cimport (
     NPY_NAT,
     c_nat_strings as nat_strings,
@@ -57,6 +61,7 @@ from pandas._libs.tslibs.np_datetime cimport (
     NPY_DATETIMEUNIT,
     NPY_FR_ns,
     check_dts_bounds,
+    get_datetime64_unit,
     import_pandas_datetime,
     npy_datetimestruct,
     npy_datetimestruct_to_datetime,
@@ -232,9 +237,21 @@ cdef _get_format_regex(str fmt):
 
 
 cdef class DatetimeParseState:
-    def __cinit__(self):
+    def __cinit__(self, NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns):
         self.found_tz = False
         self.found_naive = False
+        self.creso = creso
+        self.creso_ever_changed = False
+
+    cdef bint update_creso(self, NPY_DATETIMEUNIT item_reso) noexcept:
+        # Return a bool indicating whether we bumped to a higher resolution
+        if self.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
+            self.creso = item_reso
+        elif item_reso > self.creso:
+            self.creso = item_reso
+            self.creso_ever_changed = True
+            return True
+        return False
 
     cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert):
         if dt.tzinfo is not None:
@@ -268,6 +285,7 @@ def array_strptime(
     bint exact=True,
     errors="raise",
     bint utc=False,
+    NPY_DATETIMEUNIT creso=NPY_FR_ns,
 ):
     """
     Calculates the datetime structs represented by the passed array of strings
@@ -278,6 +296,8 @@ def array_strptime(
     fmt : string-like regex
     exact : matches must be exact if True, search if False
     errors : string specifying error handling, {'raise', 'ignore', 'coerce'}
+    creso : NPY_DATETIMEUNIT, default NPY_FR_ns
+        Set to NPY_FR_GENERIC to infer a resolution.
     """
 
     cdef:
@@ -291,17 +311,22 @@ def array_strptime(
         bint is_coerce = errors=="coerce"
         tzinfo tz_out = None
         bint iso_format = format_is_iso(fmt)
-        NPY_DATETIMEUNIT out_bestunit
+        NPY_DATETIMEUNIT out_bestunit, item_reso
         int out_local = 0, out_tzoffset = 0
         bint string_to_dts_succeeded = 0
-        DatetimeParseState state = DatetimeParseState()
+        bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
+        DatetimeParseState state = DatetimeParseState(creso)
 
     assert is_raise or is_ignore or is_coerce
 
     _validate_fmt(fmt)
     format_regex, locale_time = _get_format_regex(fmt)
 
-    result = np.empty(n, dtype="M8[ns]")
+    if infer_reso:
+        abbrev = "ns"
+    else:
+        abbrev = npy_unit_to_abbrev(creso)
+    result = np.empty(n, dtype=f"M8[{abbrev}]")
     iresult = result.view("i8")
     result_timezone = np.empty(n, dtype="object")
 
@@ -318,20 +343,32 @@ def array_strptime(
                 iresult[i] = NPY_NAT
                 continue
             elif PyDateTime_Check(val):
+                if isinstance(val, _Timestamp):
+                    item_reso = val._creso
+                else:
+                    item_reso = NPY_DATETIMEUNIT.NPY_FR_us
+                state.update_creso(item_reso)
                 tz_out = state.process_datetime(val, tz_out, utc)
                 if isinstance(val, _Timestamp):
-                    iresult[i] = val.tz_localize(None).as_unit("ns")._value
+                    val = (<_Timestamp>val)._as_creso(state.creso)
+                    iresult[i] = val.tz_localize(None)._value
                 else:
-                    iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts)
-                    check_dts_bounds(&dts)
+                    iresult[i] = pydatetime_to_dt64(
+                        val.replace(tzinfo=None), &dts, reso=state.creso
+                    )
+                    check_dts_bounds(&dts, state.creso)
                 result_timezone[i] = val.tzinfo
                 continue
             elif PyDate_Check(val):
-                iresult[i] = pydate_to_dt64(val, &dts)
-                check_dts_bounds(&dts)
+                item_reso = NPY_DATETIMEUNIT.NPY_FR_s
+                state.update_creso(item_reso)
+                iresult[i] = pydate_to_dt64(val, &dts, reso=state.creso)
+                check_dts_bounds(&dts, state.creso)
                 continue
             elif is_datetime64_object(val):
-                iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
+                item_reso = get_supported_reso(get_datetime64_unit(val))
+                state.update_creso(item_reso)
+                iresult[i] = get_datetime64_nanos(val, state.creso)
                 continue
             elif (
                     (is_integer_object(val) or is_float_object(val))
@@ -355,7 +392,9 @@ def array_strptime(
             if string_to_dts_succeeded:
                 # No error reported by string_to_dts, pick back up
                 # where we left off
-                value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
+                item_reso = get_supported_reso(out_bestunit)
+                state.update_creso(item_reso)
+                value = npy_datetimestruct_to_datetime(state.creso, &dts)
                 if out_local == 1:
                     # Store the out_tzoffset in seconds
                     # since we store the total_seconds of
@@ -368,7 +407,9 @@ def array_strptime(
                 check_dts_bounds(&dts)
                 continue
 
-            if parse_today_now(val, &iresult[i], utc, NPY_FR_ns):
+            if parse_today_now(val, &iresult[i], utc, state.creso):
+                item_reso = NPY_DATETIMEUNIT.NPY_FR_us
+                state.update_creso(item_reso)
                 continue
 
             # Some ISO formats can't be parsed by string_to_dts
@@ -380,9 +421,10 @@ def array_strptime(
                 raise ValueError(f"Time data {val} is not ISO8601 format")
 
             tz = _parse_with_format(
-                val, fmt, exact, format_regex, locale_time, &dts
+                val, fmt, exact, format_regex, locale_time, &dts, &item_reso
             )
-            iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
+            state.update_creso(item_reso)
+            iresult[i] = npy_datetimestruct_to_datetime(state.creso, &dts)
             check_dts_bounds(&dts)
             result_timezone[i] = tz
 
@@ -403,11 +445,34 @@ def array_strptime(
                 raise
             return values, []
 
+    if infer_reso:
+        if state.creso_ever_changed:
+            # We encountered mismatched resolutions, need to re-parse with
+            #  the correct one.
+            return array_strptime(
+                values,
+                fmt=fmt,
+                exact=exact,
+                errors=errors,
+                utc=utc,
+                creso=state.creso,
+            )
+
+        # Otherwise we can use the single reso that we encountered and avoid
+        #  a second pass.
+        abbrev = npy_unit_to_abbrev(state.creso)
+        result = iresult.base.view(f"M8[{abbrev}]")
     return result, result_timezone.base
 
 
 cdef tzinfo _parse_with_format(
-    str val, str fmt, bint exact, format_regex, locale_time, npy_datetimestruct* dts
+    str val,
+    str fmt,
+    bint exact,
+    format_regex,
+    locale_time,
+    npy_datetimestruct* dts,
+    NPY_DATETIMEUNIT* item_reso,
 ):
     # Based on https://github.com/python/cpython/blob/main/Lib/_strptime.py#L293
     cdef:
@@ -441,6 +506,8 @@ cdef tzinfo _parse_with_format(
                 f"time data \"{val}\" doesn't match format \"{fmt}\""
             )
 
+    item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_s
+
     iso_year = -1
     year = 1900
     month = day = 1
@@ -527,6 +594,12 @@ cdef tzinfo _parse_with_format(
         elif parse_code == 10:
             # e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f'
             s = found_dict["f"]
+            if len(s) <= 3:
+                item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ms
+            elif len(s) <= 6:
+                item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us
+            else:
+                item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ns
             # Pad to always return nanoseconds
             s += "0" * (9 - len(s))
             us = long(s)

diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
@@ -332,13 +332,16 @@ timedelta-like}
     # Shift the delta_idx by if the UTC offset of
     # the target tz is greater than 0 and we're moving forward
     # or vice versa
-    first_delta = info.deltas[0]
-    if (shift_forward or shift_delta > 0) and first_delta > 0:
-        delta_idx_offset = 1
-    elif (shift_backward or shift_delta < 0) and first_delta < 0:
-        delta_idx_offset = 1
-    else:
-        delta_idx_offset = 0
+    # TODO: delta_idx_offset and info.deltas are needed for zoneinfo timezones,
+    # but are not applicable for all timezones. Setting the former to 0 and
+    # length checking the latter avoids UB, but this could use a larger refactor
+    delta_idx_offset = 0
+    if len(info.deltas):
+        first_delta = info.deltas[0]
+        if (shift_forward or shift_delta > 0) and first_delta > 0:
+            delta_idx_offset = 1
+        elif (shift_backward or shift_delta < 0) and first_delta < 0:
+            delta_idx_offset = 1
 
     for i in range(n):
         val = vals[i]

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -81,6 +81,7 @@
 )
 from pandas.util._exceptions import find_stack_level
 
+from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
 from pandas.core.dtypes.common import (
     is_all_strings,
     is_integer_dtype,
@@ -2358,7 +2359,8 @@ def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):
         if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
             # i.e. generator
             data = list(data)
-        data = np.asarray(data)
+
+        data = construct_1d_object_array_from_listlike(data)
         copy = False
     elif isinstance(data, ABCMultiIndex):
         raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")

diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -498,7 +498,11 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str):
         newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "", regex=True)
 
         # GH17627 Cast numerics suffixes to int/float
-        newdf[j] = to_numeric(newdf[j], errors="ignore")
+        try:
+            newdf[j] = to_numeric(newdf[j])
+        except (TypeError, ValueError, OverflowError):
+            # TODO: anything else to catch?
+            pass
 
         return newdf.set_index(i + [j])