Skip to content

Commit

Permalink
following up on comments from rhshadrach
Browse files Browse the repository at this point in the history
  • Loading branch information
veljanin committed Oct 3, 2024
1 parent ba7e83d commit ef256bf
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 2 deletions.
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -544,8 +544,7 @@ Bug fixes

Categorical
^^^^^^^^^^^
-
-
- Bug in :func:`convert_dtypes` with ``dtype_backend='pyarrow'`` parameter where empty categorical series raise error or get converted to null[pyarrow] (:issue:`59934`)

Datetimelike
^^^^^^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,7 @@ def convert_dtypes(
base_dtype.kind == "O" # type: ignore[union-attr]
and input_array.size > 0
and isna(input_array).all()
and not isinstance(input_array.dtype, CategoricalDtype)
):
import pyarrow as pa

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@ def test_convert_empty(self):
# Empty DataFrame can pass convert_dtypes, see GH#40393
empty_df = pd.DataFrame()
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())

def test_convert_empty_categorical_to_pyarrow(self):
# GH#59934
df = pd.DataFrame(
{
"A": pd.Categorical([None] * 5),
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
}
)
converted = df.convert_dtypes(dtype_backend="pyarrow")
expected = df
tm.assert_frame_equal(converted, expected)

assert converted.A.dtype == "category", "Dtype in column A is not 'category'"
assert converted.B.dtype == "category", "Dtype in column B is not 'category'"
assert converted.A.cat.categories.empty, "Categories in column A are not empty"
assert (
converted.B.cat.categories.__contains__("B1")
and converted.B.cat.categories.__contains__("B2")
), "Categories in column B doesn't contain adequate categories"

def test_convert_dtypes_retain_column_names(self):
# GH#41435
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,20 @@ def test_convert_dtypes_pyarrow_null(self):
result = ser.convert_dtypes(dtype_backend="pyarrow")
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
tm.assert_series_equal(result, expected)

def test_convert_empty_categorical_to_pyarrow(self):
# GH#59934
ser1 = pd.Series(pd.Categorical([None] * 5))
converted1 = ser1.convert_dtypes(dtype_backend="pyarrow")
expected = ser1

tm.assert_series_equal(converted1, expected)
assert converted1.dtype == "category", "Series dtype is not 'category'"
assert converted1.cat.categories.empty, "Series categories are not empty"

ser2 = pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"]))
converted2 = ser2.convert_dtypes(dtype_backend="pyarrow")
assert (
converted2.cat.categories.__contains__("S1")
and converted2.cat.categories.__contains__("S2")
), "Categories in ser2 doesn't contain adequate categories"

0 comments on commit ef256bf

Please sign in to comment.