From 943c3cb38d23e60905891724f878b91483597c83 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Oct 2023 10:54:57 -0700 Subject: [PATCH] BUG: repr of inf values with use_inf_as_na (#55483) * BUG: repr of inf values with use_inf_as_na * GH ref --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/_libs/missing.pyi | 1 - pandas/_libs/missing.pyx | 25 ------------------------- pandas/core/indexes/base.py | 13 ++----------- pandas/io/formats/format.py | 22 ++++++++++------------ pandas/tests/frame/test_repr_info.py | 2 +- 6 files changed, 14 insertions(+), 50 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 29a2d5c0b5877..eec82ae26afcc 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -394,6 +394,7 @@ Other ^^^^^ - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`) - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`) +- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`) - Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`) - diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi index d5c9f1342a089..282dcee3ed6cf 100644 --- a/pandas/_libs/missing.pyi +++ b/pandas/_libs/missing.pyi @@ -14,4 +14,3 @@ def isneginf_scalar(val: object) -> bool: ... def checknull(val: object, inf_as_na: bool = ...) -> bool: ... def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ... -def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index e3e7d8daa03e1..8ef59b46ca25f 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -255,31 +255,6 @@ cdef bint checknull_with_nat_and_na(object obj): return checknull_with_nat(obj) or obj is C_NA -@cython.wraparound(False) -@cython.boundscheck(False) -def is_float_nan(values: ndarray) -> ndarray: - """ - True for elements which correspond to a float nan - - Returns - ------- - ndarray[bool] - """ - cdef: - ndarray[uint8_t] result - Py_ssize_t i, N - object val - - N = len(values) - result = np.zeros(N, dtype=np.uint8) - - for i in range(N): - val = values[i] - if util.is_nan(val): - result[i] = True - return result.view(bool) - - @cython.wraparound(False) @cython.boundscheck(False) def is_numeric_na(values: ndarray) -> ndarray: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c3cab965041e0..d749235e2cd2c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -37,7 +37,6 @@ is_datetime_array, no_default, ) -from pandas._libs.missing import is_float_nan from pandas._libs.tslibs import ( IncompatibleFrequency, OutOfBoundsDatetime, @@ -1390,16 +1389,8 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): values = np.asarray(values) - values = lib.maybe_convert_objects(values, safe=True) - - result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] - - # could have nans - mask = is_float_nan(values) - if mask.any(): - result_arr = np.array(result) - result_arr[mask] = na_rep - result = result_arr.tolist() + # TODO: why do we need different justify for these cases? + result = trim_front(format_array(values, None, justify="all")) else: result = trim_front(format_array(values, None, justify="left")) return header + result diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 322f7f88494de..bb976b3a0208e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1216,18 +1216,16 @@ def _format_strings(self) -> list[str]: def _format(x): if self.na_rep is not None and is_scalar(x) and isna(x): - try: - # try block for np.isnat specifically - # determine na_rep if x is None or NaT-like - if x is None: - return "None" - elif x is NA: - return str(NA) - elif x is NaT or np.isnat(x): - return "NaT" - except (TypeError, ValueError): - # np.isnat only handles datetime or timedelta objects - pass + if x is None: + return "None" + elif x is NA: + return str(NA) + elif lib.is_float(x) and np.isinf(x): + # TODO(3.0): this will be unreachable when use_inf_as_na + # deprecation is enforced + return str(x) + elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)): + return "NaT" return self.na_rep elif isinstance(x, PandasObject): return str(x) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 0634b8268c04c..63ecdfa5e001b 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -411,7 +411,7 @@ def test_to_records_with_na_record(self): def test_to_records_with_inf_as_na_record(self): # GH 48526 expected = """ NaN inf record -0 NaN b [0, inf, b] +0 inf b [0, inf, b] 1 NaN NaN [1, nan, nan] 2 e f [2, e, f]""" msg = "use_inf_as_na option is deprecated"