Skip to content

Commit

Permalink
BUG: setitem casting object Index to arrow strings (#55639)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Nov 21, 2023
1 parent e8d9a32 commit 639bd66
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Fixed regressions
Bug fixes
~~~~~~~~~
- Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`)
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
-

.. ---------------------------------------------------------------------------
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from pandas.core.dtypes.common import (
is_list_like,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import NumpyEADtype
Expand Down Expand Up @@ -548,6 +549,10 @@ def sanitize_array(
# Avoid ending up with a NumpyExtensionArray
dtype = dtype.numpy_dtype

object_index = False
if isinstance(data, ABCIndex) and data.dtype == object and dtype is None:
object_index = True

# extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
data = extract_array(data, extract_numpy=True, extract_range=True)

Expand Down Expand Up @@ -601,6 +606,13 @@ def sanitize_array(
subarr = data
if data.dtype == object:
subarr = maybe_infer_to_datetimelike(data)
if (
object_index
and using_pyarrow_string_dtype()
and is_string_dtype(subarr)
):
# Avoid inference when string option is set
subarr = data
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
from pandas.core.arrays.string_ import StringDtype

Expand Down
10 changes: 9 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pandas._config import (
get_option,
using_copy_on_write,
using_pyarrow_string_dtype,
)

from pandas._libs import (
Expand Down Expand Up @@ -6919,7 +6920,14 @@ def insert(self, loc: int, item) -> Index:
loc = loc if loc >= 0 else loc - 1
new_values[loc] = item

return Index._with_infer(new_values, name=self.name)
idx = Index._with_infer(new_values, name=self.name)
if (
using_pyarrow_string_dtype()
and is_string_dtype(idx.dtype)
and new_values.dtype == object
):
idx = idx.astype(new_values.dtype)
return idx

def drop(
self,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1930,7 +1930,7 @@ def test_add_new_column_infer_string():
df.loc[df["x"] == 1, "y"] = "1"
expected = DataFrame(
{"x": [1], "y": Series(["1"], dtype="string[pyarrow_numpy]")},
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
columns=Index(["x", "y"], dtype=object),
)
tm.assert_frame_equal(df, expected)

Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,15 @@ def test_setitem_frame_overwrite_with_ea_dtype(self, any_numeric_ea_dtype):
)
tm.assert_frame_equal(df, expected)

def test_setitem_string_option_object_index(self):
# GH#55638
pytest.importorskip("pyarrow")
df = DataFrame({"a": [1, 2]})
with pd.option_context("future.infer_string", True):
df["b"] = Index(["a", "b"], dtype=object)
expected = DataFrame({"a": [1, 2], "b": Series(["a", "b"], dtype=object)})
tm.assert_frame_equal(df, expected)

def test_setitem_frame_midx_columns(self):
# GH#49121
df = DataFrame({("a", "b"): [10]})
Expand Down

0 comments on commit 639bd66

Please sign in to comment.