From e28b4016f7594df9f1a57af55a3f17ab30c0fc89 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 10 Jan 2024 23:40:55 +0100 Subject: [PATCH] Backport PR #56788 on branch 2.2.x (Bug: Interchange protocol implementation does not allow for empty string columns) (#56816) Backport PR #56788: Bug: Interchange protocol implementation does not allow for empty string columns Co-authored-by: yashb <74137864+roadrollerdafjorst@users.noreply.github.com> --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/interchange/column.py | 2 +- pandas/tests/interchange/test_impl.py | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index b5df0a319bc18..3a4c9438dbc21 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -936,6 +936,7 @@ Other - Bug in :func:`cut` and :func:`qcut` with ``datetime64`` dtype values with non-nanosecond units incorrectly returning nanosecond-unit bins (:issue:`56101`) - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`) - Bug in :func:`infer_freq` and :meth:`DatetimeIndex.inferred_freq` with weekly frequencies and non-nanosecond resolutions (:issue:`55609`) +- Bug in :func:`pd.api.interchange.from_dataframe` where it raised ``NotImplementedError`` when handling empty string columns (:issue:`56703`) - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`) - Bug in :meth:`DataFrame.from_dict` which would always sort the rows of the created :class:`DataFrame`. (:issue:`55683`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` raising a ``ValueError`` (:issue:`56478`) @@ -944,7 +945,6 @@ Other - Bug in the error message when assigning an empty :class:`DataFrame` to a column (:issue:`55956`) - Bug when time-like strings were being cast to :class:`ArrowDtype` with ``pyarrow.time64`` type (:issue:`56463`) - .. --------------------------------------------------------------------------- .. _whatsnew_220.contributors: diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 7f524d6823f30..ee1b5cd34a7f7 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -116,7 +116,7 @@ def dtype(self) -> tuple[DtypeKind, int, str, str]: Endianness.NATIVE, ) elif is_string_dtype(dtype): - if infer_dtype(self._col) == "string": + if infer_dtype(self._col) in ("string", "empty"): return ( DtypeKind.STRING, 8, diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 27ea8ccdd17b1..c7b13f9fd7b2d 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -364,6 +364,14 @@ def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None: pd.api.interchange.from_dataframe(df) +def test_empty_string_column(): + # https://github.com/pandas-dev/pandas/issues/56703 + df = pd.DataFrame({"a": []}, dtype=str) + df2 = df.__dataframe__() + result = pd.api.interchange.from_dataframe(df2) + tm.assert_frame_equal(df, result) + + def test_large_string(): # GH#56702 pytest.importorskip("pyarrow")