Merge branch 'main' of https://github.com/pandas-dev/pandas into json…

…_overflow
pandas-dev · Oct 12, 2023 · 99da098 · 99da098
2 parents dd3cc48 + c9a98f0
commit 99da098
Show file tree

Hide file tree

Showing 31 changed files with 181 additions and 127 deletions.
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -896,9 +896,9 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
     :class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
     :class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
     :class:`~pandas.tseries.offsets.YearEnd`, ``'Y'``, "calendar year end"
-    :class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin"
-    :class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end"
-    :class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin"
+    :class:`~pandas.tseries.offsets.YearBegin`, ``'YS'`` or ``'BYS'``,"calendar year begin"
+    :class:`~pandas.tseries.offsets.BYearEnd`, ``'BY'``, "business year end"
+    :class:`~pandas.tseries.offsets.BYearBegin`, ``'BYS'``, "business year begin"
     :class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year"
     :class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday"
     :class:`~pandas.tseries.offsets.BusinessHour`, ``'bh'``, "business hour"
@@ -1259,9 +1259,9 @@ frequencies. We will refer to these aliases as *offset aliases*.
     "QS", "quarter start frequency"
     "BQS", "business quarter start frequency"
     "Y", "year end frequency"
-    "BA, BY", "business year end frequency"
-    "AS, YS", "year start frequency"
-    "BAS, BYS", "business year start frequency"
+    "BY", "business year end frequency"
+    "YS", "year start frequency"
+    "BYS", "business year start frequency"
     "h", "hourly frequency"
     "bh", "business hour frequency"
     "cbh", "custom business hour frequency"
@@ -1692,7 +1692,7 @@ the end of the interval.
 .. warning::
 
     The default values for ``label`` and ``closed`` is '**left**' for all
-    frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BA', 'BQ', and 'W'
+    frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BY', 'BQ', and 'W'
     which all have a default of 'right'.
 
     This might unintendedly lead to looking ahead, where the value for a later

diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
@@ -886,11 +886,23 @@ This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622
 
 This is *unchanged* from prior versions, but shown for illustration purposes:
 
-.. ipython:: python
+.. code-block:: python
 
-   df = pd.DataFrame(np.arange(6), columns=['value'],
-                     index=pd.MultiIndex.from_product([list('BA'), range(3)]))
-   df
+   In [81]: df = pd.DataFrame(np.arange(6), columns=['value'],
+      ....:                   index=pd.MultiIndex.from_product([list('BA'), range(3)]))
+      ....:
+   In [82]: df
+
+   Out[82]:
+        value
+   B 0      0
+     1      1
+     2      2
+   A 0      3
+     1      4
+     2      5
+
+   [6 rows x 1 columns]
 
 .. code-block:: python
 

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -395,6 +395,7 @@ Other
 ^^^^^
 - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`)
 - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
+- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`)
 - Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`)
 -
 

diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
@@ -14,4 +14,3 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
 def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
-def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -255,31 +255,6 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def is_float_nan(values: ndarray) -> ndarray:
-    """
-    True for elements which correspond to a float nan
-
-    Returns
-    -------
-    ndarray[bool]
-    """
-    cdef:
-        ndarray[uint8_t] result
-        Py_ssize_t i, N
-        object val
-
-    N = len(values)
-    result = np.zeros(N, dtype=np.uint8)
-
-    for i in range(N):
-        val = values[i]
-        if util.is_nan(val):
-            result[i] = True
-    return result.view(bool)
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_numeric_na(values: ndarray) -> ndarray:

diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx
@@ -192,9 +192,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
     "BQS": "Q",
     "QS": "Q",
     "BQ": "Q",
-    "BA": "Y",
-    "AS": "Y",
-    "BAS": "Y",
     "MS": "M",
     "D": "D",
     "B": "B",
@@ -205,9 +202,9 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
     "ns": "ns",
     "h": "h",
     "Q": "Q",
-    "Y": "Y",
     "W": "W",
     "ME": "M",
+    "Y": "Y",
     "BY": "Y",
     "YS": "Y",
     "BYS": "Y",
@@ -244,6 +241,45 @@ DEPR_ABBREVS: dict[str, str]= {
     "A-SEP": "Y-SEP",
     "A-OCT": "Y-OCT",
     "A-NOV": "Y-NOV",
+    "BA": "BY",
+    "BA-DEC": "BY-DEC",
+    "BA-JAN": "BY-JAN",
+    "BA-FEB": "BY-FEB",
+    "BA-MAR": "BY-MAR",
+    "BA-APR": "BY-APR",
+    "BA-MAY": "BY-MAY",
+    "BA-JUN": "BY-JUN",
+    "BA-JUL": "BY-JUL",
+    "BA-AUG": "BY-AUG",
+    "BA-SEP": "BY-SEP",
+    "BA-OCT": "BY-OCT",
+    "BA-NOV": "BY-NOV",
+    "AS": "YS",
+    "AS-DEC": "YS-DEC",
+    "AS-JAN": "YS-JAN",
+    "AS-FEB": "YS-FEB",
+    "AS-MAR": "YS-MAR",
+    "AS-APR": "YS-APR",
+    "AS-MAY": "YS-MAY",
+    "AS-JUN": "YS-JUN",
+    "AS-JUL": "YS-JUL",
+    "AS-AUG": "YS-AUG",
+    "AS-SEP": "YS-SEP",
+    "AS-OCT": "YS-OCT",
+    "AS-NOV": "YS-NOV",
+    "BAS": "BYS",
+    "BAS-DEC": "BYS-DEC",
+    "BAS-JAN": "BYS-JAN",
+    "BAS-FEB": "BYS-FEB",
+    "BAS-MAR": "BYS-MAR",
+    "BAS-APR": "BYS-APR",
+    "BAS-MAY": "BYS-MAY",
+    "BAS-JUN": "BYS-JUN",
+    "BAS-JUL": "BYS-JUL",
+    "BAS-AUG": "BYS-AUG",
+    "BAS-SEP": "BYS-SEP",
+    "BAS-OCT": "BYS-OCT",
+    "BAS-NOV": "BYS-NOV",
     "H": "h",
     "BH": "bh",
     "CBH": "cbh",

diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -253,8 +253,8 @@ def get_start_end_field(
         # month of year. Other offsets use month, startingMonth as ending
         # month of year.
 
-        if (freqstr[0:2] in ["MS", "QS", "AS"]) or (
-                freqstr[1:3] in ["MS", "QS", "AS"]):
+        if (freqstr[0:2] in ["MS", "QS", "YS"]) or (
+                freqstr[1:3] in ["MS", "QS", "YS"]):
             end_month = 12 if month_kw == 1 else month_kw - 1
             start_month = month_kw
         else:

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -2414,7 +2414,7 @@ cdef class BYearEnd(YearOffset):
 
     _outputName = "BusinessYearEnd"
     _default_month = 12
-    _prefix = "BA"
+    _prefix = "BY"
     _day_opt = "business_end"
 
 
@@ -2453,7 +2453,7 @@ cdef class BYearBegin(YearOffset):
 
     _outputName = "BusinessYearBegin"
     _default_month = 1
-    _prefix = "BAS"
+    _prefix = "BYS"
     _day_opt = "business_start"
 
 
@@ -2552,7 +2552,7 @@ cdef class YearBegin(YearOffset):
     """
 
     _default_month = 1
-    _prefix = "AS"
+    _prefix = "YS"
     _day_opt = "start"
 
 
@@ -4540,10 +4540,10 @@ CDay = CustomBusinessDay
 prefix_mapping = {
     offset._prefix: offset
     for offset in [
-        YearBegin,  # 'AS'
+        YearBegin,  # 'YS'
         YearEnd,  # 'Y'
-        BYearBegin,  # 'BAS'
-        BYearEnd,  # 'BA'
+        BYearBegin,  # 'BYS'
+        BYearEnd,  # 'BY'
         BusinessDay,  # 'B'
         BusinessMonthBegin,  # 'BMS'
         BusinessMonthEnd,  # 'BM'
@@ -4584,12 +4584,9 @@ _lite_rule_alias = {
     "Q": "Q-DEC",
 
     "Y": "Y-DEC",      # YearEnd(month=12),
-    "AS": "AS-JAN",    # YearBegin(month=1),
-    "YS": "AS-JAN",
-    "BA": "BA-DEC",    # BYearEnd(month=12),
-    "BY": "BA-DEC",
-    "BAS": "BAS-JAN",  # BYearBegin(month=1),
-    "BYS": "BAS-JAN",
+    "YS": "YS-JAN",    # YearBegin(month=1),
+    "BY": "BY-DEC",    # BYearEnd(month=12),
+    "BYS": "BYS-JAN",  # BYearBegin(month=1),
 
     "Min": "min",
     "min": "min",

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -2526,7 +2526,7 @@ def _round_temporally(
             raise ValueError(f"Must specify a valid frequency: {freq}")
         pa_supported_unit = {
             "Y": "year",
-            "AS": "year",
+            "YS": "year",
             "Q": "quarter",
             "QS": "quarter",
             "M": "month",

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -1663,7 +1663,14 @@ def __repr__(self) -> str:
             self, self._formatter(), indent_for_name=False
         ).rstrip(", \n")
         class_name = f"<{type(self).__name__}>\n"
-        return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
+        footer = self._get_repr_footer()
+        return f"{class_name}{data}\n{footer}"
+
+    def _get_repr_footer(self) -> str:
+        # GH#24278
+        if self.ndim > 1:
+            return f"Shape: {self.shape}, dtype: {self.dtype}"
+        return f"Length: {len(self)}, dtype: {self.dtype}"
 
     def _repr_2d(self) -> str:
         from pandas.io.formats.printing import format_object_summary
@@ -1679,7 +1686,8 @@ def _repr_2d(self) -> str:
         ]
         data = ",\n".join(lines)
         class_name = f"<{type(self).__name__}>"
-        return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
+        footer = self._get_repr_footer()
+        return f"{class_name}\n[\n{data}\n]\n{footer}"
 
     def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
         """

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2177,7 +2177,7 @@ def _repr_categories(self) -> list[str]:
         category_strs = [x.strip() for x in category_strs]
         return category_strs
 
-    def _repr_categories_info(self) -> str:
+    def _get_repr_footer(self) -> str:
         """
         Returns a string representation of the footer.
         """
@@ -2229,7 +2229,7 @@ def __repr__(self) -> str:
         """
         String representation.
         """
-        footer = self._repr_categories_info()
+        footer = self._get_repr_footer()
         length = len(self)
         max_len = 10
         if length > max_len:

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -37,7 +37,6 @@
     is_datetime_array,
     no_default,
 )
-from pandas._libs.missing import is_float_nan
 from pandas._libs.tslibs import (
     IncompatibleFrequency,
     OutOfBoundsDatetime,
@@ -1390,16 +1389,8 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
 
         if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
             values = np.asarray(values)
-            values = lib.maybe_convert_objects(values, safe=True)
-
-            result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
-
-            # could have nans
-            mask = is_float_nan(values)
-            if mask.any():
-                result_arr = np.array(result)
-                result_arr[mask] = na_rep
-                result = result_arr.tolist()
+            # TODO: why do we need different justify for these cases?
+            result = trim_front(format_array(values, None, justify="all"))
         else:
             result = trim_front(format_array(values, None, justify="left"))
         return header + result

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -992,11 +992,11 @@ def date_range(
 
     **Specify a unit**
 
-    >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s")
+    >>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s")
     DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
                    '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
                    '2817-01-01', '2917-01-01'],
-                  dtype='datetime64[s]', freq='100AS-JAN')
+                  dtype='datetime64[s]', freq='100YS-JAN')
     """
     if freq is None and com.any_none(periods, start, end):
         freq = "D"

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -2101,7 +2101,7 @@ def __init__(
         else:
             freq = to_offset(freq)
 
-        end_types = {"ME", "Y", "Q", "BM", "BA", "BQ", "W"}
+        end_types = {"ME", "Y", "Q", "BM", "BY", "BQ", "W"}
         rule = freq.rule_code
         if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
             if closed is None:
@@ -2299,7 +2299,7 @@ def _adjust_bin_edges(
 
         if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
             "BQ",
-            "BA",
+            "BY",
             "Q",
             "Y",
             "W",

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -5729,7 +5729,7 @@ def to_timestamp(
         2023-01-01    1
         2024-01-01    2
         2025-01-01    3
-        Freq: AS-JAN, dtype: int64
+        Freq: YS-JAN, dtype: int64
 
         Using `freq` which is the offset that the Timestamps will have
-Original file line number
+Diff line change
@@ Expand Up / @@ -5729,7 +5729,7 @@ def to_timestamp( @@
 -01-01    1
 -01-01    2
 -01-01    3
-            Freq: AS-JAN, dtype: int64
+            Freq: YS-JAN, dtype: int64
             Using `freq` which is the offset that the Timestamps will have
@@ Expand Down @@