Merge remote-tracking branch 'upstream/main' into doc-rolling-depr

pandas-dev · Sep 18, 2023 · 226db2b · 226db2b
2 parents eac48c0 + 1496630
commit 226db2b
Show file tree

Hide file tree

Showing 46 changed files with 886 additions and 249 deletions.
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
@@ -525,6 +525,29 @@ Sparse-dtype specific methods and attributes are provided under the
    Series.sparse.from_coo
    Series.sparse.to_coo
 
+
+.. _api.series.struct:
+
+Struct accessor
+~~~~~~~~~~~~~~~
+
+Arrow struct-dtype specific methods and attributes are provided under the
+``Series.struct`` accessor.
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_attribute.rst
+
+   Series.struct.dtypes
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_method.rst
+
+   Series.struct.field
+   Series.struct.explode
+
+
 .. _api.series.flags:
 
 Flags

diff --git a/doc/source/user_guide/gotchas.rst b/doc/source/user_guide/gotchas.rst
@@ -379,7 +379,7 @@ constructors using something similar to the following:
 .. ipython:: python
 
    x = np.array(list(range(10)), ">i4")  # big endian
-   newx = x.byteswap().newbyteorder()  # force native byteorder
+   newx = x.byteswap().view(x.dtype.newbyteorder())  # force native byteorder
    s = pd.Series(newx)
 
 See `the NumPy documentation on byte order

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
@@ -401,7 +401,7 @@ Limit the number of NA values filled
 
    df.ffill(limit=1)
 
-NA values can be replaced with corresponding value from a :class:`Series`` or :class:`DataFrame``
+NA values can be replaced with corresponding value from a :class:`Series` or :class:`DataFrame`
 where the index and column aligns between the original object and the filled object.
 
 .. ipython:: python
@@ -660,7 +660,7 @@ Pass a list of regular expressions that will replace matches with a scalar.
 
 .. ipython:: python
 
-   df.replace([r"\s*\.\s*", r"a|b"], np.nan, regex=True)
+   df.replace([r"\s*\.\s*", r"a|b"], "placeholder", regex=True)
 
 All of the regular expression examples can also be passed with the
 ``to_replace`` argument as the ``regex`` argument. In this case the ``value``
@@ -669,7 +669,7 @@ dictionary.
 
 .. ipython:: python
 
-   df.replace(regex=[r"\s*\.\s*", r"a|b"], value=np.nan)
+   df.replace(regex=[r"\s*\.\s*", r"a|b"], value="placeholder")
 
 .. note::
 

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -36,6 +36,34 @@ There are two advantages of this engine:
 
 For more, see :ref:`io.calamine` in the user guide on IO tools.
 
+.. _whatsnew_220.enhancements.struct_accessor:
+
+Series.struct accessor to with PyArrow structured data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``Series.struct`` accessor provides attributes and methods for processing
+data with ``struct[pyarrow]`` dtype Series. For example,
+:meth:`Series.struct.explode` converts PyArrow structured data to a pandas
+DataFrame. (:issue:`54938`)
+
+.. ipython:: python
+
+    import pyarrow as pa
+    series = pd.Series(
+        [
+            {"project": "pandas", "version": "2.2.0"},
+            {"project": "numpy", "version": "1.25.2"},
+            {"project": "pyarrow", "version": "13.0.0"},
+        ],
+        dtype=pd.ArrowDtype(
+            pa.struct([
+                ("project", pa.string()),
+                ("version", pa.string()),
+            ])
+        ),
+    )
+    series.struct.explode()
+
 .. _whatsnew_220.enhancements.enhancement2:
 
 enhancement2
@@ -162,13 +190,15 @@ Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
-- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
+- Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`)
+- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
 - Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
 - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
 - Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
+- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.performance:
@@ -229,12 +259,12 @@ Strings
 
 Interval
 ^^^^^^^^
--
+- Bug in :class:`Interval` ``__repr__`` not displaying UTC offsets for :class:`Timestamp` bounds. Additionally the hour, minute and second components will now be shown. (:issue:`55015`)
 -
 
 Indexing
 ^^^^^^^^
--
+- Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
 -
 
 Missing
@@ -260,7 +290,7 @@ Period
 
 Plotting
 ^^^^^^^^
--
+- Bug in :meth:`DataFrame.plot.box` with ``vert=False`` and a matplotlib ``Axes`` created with ``sharey=True`` (:issue:`54941`)
 -
 
 Groupby/resample/rolling

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -478,31 +478,16 @@ cdef class Interval(IntervalMixin):
         args = (self.left, self.right, self.closed)
         return (type(self), args)
 
-    def _repr_base(self):
-        left = self.left
-        right = self.right
-
-        # TODO: need more general formatting methodology here
-        if isinstance(left, _Timestamp) and isinstance(right, _Timestamp):
-            left = left._short_repr
-            right = right._short_repr
-
-        return left, right
-
     def __repr__(self) -> str:
-
-        left, right = self._repr_base()
-        disp = str if isinstance(left, np.generic) else repr
+        disp = str if isinstance(self.left, (np.generic, _Timestamp)) else repr
         name = type(self).__name__
-        repr_str = f"{name}({disp(left)}, {disp(right)}, closed={repr(self.closed)})"
+        repr_str = f"{name}({disp(self.left)}, {disp(self.right)}, closed={repr(self.closed)})"  # noqa: E501
         return repr_str
 
     def __str__(self) -> str:
-
-        left, right = self._repr_base()
         start_symbol = "[" if self.closed_left else "("
         end_symbol = "]" if self.closed_right else ")"
-        return f"{start_symbol}{left}, {right}{end_symbol}"
+        return f"{start_symbol}{self.left}, {self.right}{end_symbol}"
 
     def __add__(self, y):
         if (

diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx
@@ -460,7 +460,6 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil:
         return NPY_DATETIMEUNIT.NPY_FR_D
 
 
-# TODO: use in _matplotlib.converter?
 cpdef int64_t periods_per_day(
     NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns
 ) except? -1:

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1942,6 +1942,7 @@ class Timedelta(_Timedelta):
         ----------
         freq : str
             Frequency string indicating the rounding resolution.
+            It uses the same units as class contructor :class:`~pandas.Timedelta`.
 
         Returns
         -------
@@ -1969,6 +1970,7 @@ class Timedelta(_Timedelta):
         ----------
         freq : str
             Frequency string indicating the flooring resolution.
+            It uses the same units as class contructor :class:`~pandas.Timedelta`.
 
         Examples
         --------
@@ -1988,6 +1990,7 @@ class Timedelta(_Timedelta):
         ----------
         freq : str
             Frequency string indicating the ceiling resolution.
+            It uses the same units as class contructor :class:`~pandas.Timedelta`.
 
         Examples
         --------

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -1078,18 +1078,6 @@ cdef class _Timestamp(ABCTimestamp):
 
         return result
 
-    @property
-    def _short_repr(self) -> str:
-        # format a Timestamp with only _date_repr if possible
-        # otherwise _repr_base
-        if (self.hour == 0 and
-                self.minute == 0 and
-                self.second == 0 and
-                self.microsecond == 0 and
-                self.nanosecond == 0):
-            return self._date_repr
-        return self._repr_base
-
     # -----------------------------------------------------------------
     # Conversion Methods
 

diff --git a/pandas/core/arrays/arrow/__init__.py b/pandas/core/arrays/arrow/__init__.py
@@ -1,3 +1,4 @@
+from pandas.core.arrays.arrow.accessors import StructAccessor
 from pandas.core.arrays.arrow.array import ArrowExtensionArray
 
-__all__ = ["ArrowExtensionArray"]
+__all__ = ["ArrowExtensionArray", "StructAccessor"]