From 57cb8f43a9942199b6f4d512ecf22baedce754c2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 3 Nov 2023 21:09:40 -0400 Subject: [PATCH 1/4] TST: Fix shares_memory for arrow string dtype --- pandas/_testing/__init__.py | 13 +++++++++++-- pandas/tests/util/test_shares_memory.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 81cd504119c38..ee173c6892743 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -30,6 +30,7 @@ is_float_dtype, is_sequence, is_signed_integer_dtype, + is_string_dtype, is_unsigned_integer_dtype, pandas_dtype, ) @@ -1044,10 +1045,18 @@ def shares_memory(left, right) -> bool: if isinstance(left, pd.core.arrays.IntervalArray): return shares_memory(left._left, right) or shares_memory(left._right, right) - if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]": + if ( + isinstance(left, ExtensionArray) + and is_string_dtype(left.dtype) + and left.dtype.storage in ("pyarrow", "pyarrow_numpy") + ): # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 left = cast("ArrowExtensionArray", left) - if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]": + if ( + isinstance(right, ExtensionArray) + and is_string_dtype(right.dtype) + and right.dtype.storage in ("pyarrow", "pyarrow_numpy") + ): right = cast("ArrowExtensionArray", right) left_pa_data = left._pa_array right_pa_data = right._pa_array diff --git a/pandas/tests/util/test_shares_memory.py b/pandas/tests/util/test_shares_memory.py index ed8227a5c4307..e4dca0f36b111 100644 --- a/pandas/tests/util/test_shares_memory.py +++ b/pandas/tests/util/test_shares_memory.py @@ -1,3 +1,5 @@ +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm @@ -11,3 +13,17 @@ def test_shares_memory_interval(): assert tm.shares_memory(obj, obj[:2]) assert not tm.shares_memory(obj, obj._data.copy()) + + +@td.skip_if_no("pyarrow") +def test_shares_memory_string(): + import pyarrow as pa + + obj = pd.array(["a", "b"], dtype="string[pyarrow]") + assert tm.shares_memory(obj, obj) + + obj = pd.array(["a", "b"], dtype="string[pyarrow_numpy]") + assert tm.shares_memory(obj, obj) + + obj = pd.array(["a", "b"], dtype=pd.ArrowDtype(pa.string())) + assert tm.shares_memory(obj, obj) From 7697059e5b653f74ae5f3962ec02f5e1b715aa2d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 3 Nov 2023 21:09:51 -0400 Subject: [PATCH 2/4] TST: Fix shares_memory for arrow string dtype --- pandas/tests/util/test_shares_memory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/util/test_shares_memory.py b/pandas/tests/util/test_shares_memory.py index e4dca0f36b111..090b04d5b6b01 100644 --- a/pandas/tests/util/test_shares_memory.py +++ b/pandas/tests/util/test_shares_memory.py @@ -17,6 +17,7 @@ def test_shares_memory_interval(): @td.skip_if_no("pyarrow") def test_shares_memory_string(): + # GH#55822 import pyarrow as pa obj = pd.array(["a", "b"], dtype="string[pyarrow]") From 285f7bbf1436cff70fc7701bd6b172bf905e2d66 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 3 Nov 2023 21:10:22 -0400 Subject: [PATCH 3/4] TST: Fix shares_memory for arrow string dtype --- pandas/tests/util/test_shares_memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/util/test_shares_memory.py b/pandas/tests/util/test_shares_memory.py index 090b04d5b6b01..00a897d574a07 100644 --- a/pandas/tests/util/test_shares_memory.py +++ b/pandas/tests/util/test_shares_memory.py @@ -17,7 +17,7 @@ def test_shares_memory_interval(): @td.skip_if_no("pyarrow") def test_shares_memory_string(): - # GH#55822 + # GH#55823 import pyarrow as pa obj = pd.array(["a", "b"], dtype="string[pyarrow]") From dfc83f9351a0cf6df0d4153007e5b381c6d77ffc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 4 Nov 2023 22:51:03 -0400 Subject: [PATCH 4/4] Fix mypy --- pandas/_testing/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index ee173c6892743..d9f6bb8454d93 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1048,14 +1048,14 @@ def shares_memory(left, right) -> bool: if ( isinstance(left, ExtensionArray) and is_string_dtype(left.dtype) - and left.dtype.storage in ("pyarrow", "pyarrow_numpy") + and left.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined] # noqa: E501 ): # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 left = cast("ArrowExtensionArray", left) if ( isinstance(right, ExtensionArray) and is_string_dtype(right.dtype) - and right.dtype.storage in ("pyarrow", "pyarrow_numpy") + and right.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined] # noqa: E501 ): right = cast("ArrowExtensionArray", right) left_pa_data = left._pa_array