From e5f6d1dc7da42fd029e6e721d04e923c150efe5a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 4 Nov 2024 02:38:45 -0800 Subject: [PATCH 1/3] TST (string dtype): un-xfail string tests specific to object dtype (#59433) Co-authored-by: Joris Van den Bossche (cherry picked from commit dbeeb1f05bca199b3c1aed979e6ae72074a82243) --- pandas/tests/copy_view/test_interp_fillna.py | 11 +++------ pandas/tests/copy_view/test_replace.py | 3 +-- pandas/tests/test_algos.py | 26 ++++++++++++++------ 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 338b76cbf1e7a..e15509c5140dd 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( NA, ArrowDtype, @@ -137,10 +135,9 @@ def test_interp_fill_functions_inplace( assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64") -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_interpolate_cleaned_fill_method(using_copy_on_write): - # Check that "method is set to None" case works correctly +def test_interpolate_cannot_with_object_dtype(using_copy_on_write): df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) + df["a"] = df["a"].astype(object) df_orig = df.copy() msg = "DataFrame.interpolate with object dtype" @@ -159,9 +156,9 @@ def test_interpolate_cleaned_fill_method(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_interpolate_object_convert_no_op(using_copy_on_write): +def test_interpolate_object_convert_no_op(using_copy_on_write, using_infer_string): df = DataFrame({"a": ["a", "b", "c"], "b": 1}) + df["a"] = df["a"].astype(object) arr_a = get_array(df, "a") msg = "DataFrame.interpolate with method=pad is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index bc3edb1f72214..9e24ce319e3bf 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -356,10 +356,9 @@ def test_replace_empty_list(using_copy_on_write): assert not df2._mgr._has_no_reference(0) -@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") @pytest.mark.parametrize("value", ["d", None]) def test_replace_object_list_inplace(using_copy_on_write, value): - df = DataFrame({"a": ["a", "b", "c"]}) + df = DataFrame({"a": ["a", "b", "c"]}, dtype=object) arr = get_array(df, "a") df.replace(["c"], value, inplace=True) if using_copy_on_write or value is None: diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 97d6415e0de05..3d686d730d872 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1704,12 +1704,17 @@ def test_unique_complex_numbers(self, array, expected): class TestHashTable: - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "htable, data", [ - (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]), - (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]), + ( + ht.PyObjectHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), + ( + ht.StringHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), (ht.Float64HashTable, np.arange(1000, dtype=np.float64)), (ht.Int64HashTable, np.arange(1000, dtype=np.int64)), (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)), @@ -1717,7 +1722,7 @@ class TestHashTable: ) def test_hashtable_unique(self, htable, data, writable): # output of maker has guaranteed unique elements - s = Series(data) + s = Series(data, dtype=data.dtype) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan @@ -1744,12 +1749,17 @@ def test_hashtable_unique(self, htable, data, writable): reconstr = result_unique[result_inverse] tm.assert_numpy_array_equal(reconstr, s_duplicated.values) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "htable, data", [ - (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]), - (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]), + ( + ht.PyObjectHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), + ( + ht.StringHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), (ht.Float64HashTable, np.arange(1000, dtype=np.float64)), (ht.Int64HashTable, np.arange(1000, dtype=np.int64)), (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)), @@ -1757,7 +1767,7 @@ def test_hashtable_unique(self, htable, data, writable): ) def test_hashtable_factorize(self, htable, writable, data): # output of maker has guaranteed unique elements - s = Series(data) + s = Series(data, dtype=data.dtype) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan From 712d19f8b397d391083a39f2a58517f5ccdbd632 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Nov 2024 13:52:23 +0100 Subject: [PATCH 2/3] fixup tests for 2.3 --- pandas/tests/copy_view/test_interp_fillna.py | 2 +- pandas/tests/copy_view/test_replace.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index e15509c5140dd..d0c4fa53faab9 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -165,7 +165,7 @@ def test_interpolate_object_convert_no_op(using_copy_on_write, using_infer_strin df.interpolate(method="pad", inplace=True) # Now CoW makes a copy, it should not! - if using_copy_on_write: + if using_copy_on_write and not using_infer_string: assert df._mgr._has_no_reference(0) assert np.shares_memory(arr_a, get_array(df, "a")) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 9e24ce319e3bf..c260ad9604616 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -357,11 +357,14 @@ def test_replace_empty_list(using_copy_on_write): @pytest.mark.parametrize("value", ["d", None]) -def test_replace_object_list_inplace(using_copy_on_write, value): +def test_replace_object_list_inplace(using_copy_on_write, using_infer_string, value): df = DataFrame({"a": ["a", "b", "c"]}, dtype=object) arr = get_array(df, "a") - df.replace(["c"], value, inplace=True) - if using_copy_on_write or value is None: + # with future.infer_string we get warning about object dtype getting cast + warning = FutureWarning if using_infer_string and value is not None else None + with tm.assert_produces_warning(warning): + df.replace(["c"], value, inplace=True) + if (using_copy_on_write or value is None) and not warning: assert np.shares_memory(arr, get_array(df, "a")) else: # This could be inplace From 3cf8d5f8c442556fbd81328ce635ab90a486c222 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 18 Dec 2024 19:24:12 +0100 Subject: [PATCH 3/3] simplify test --- pandas/tests/copy_view/test_replace.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index c260ad9604616..9e24ce319e3bf 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -357,14 +357,11 @@ def test_replace_empty_list(using_copy_on_write): @pytest.mark.parametrize("value", ["d", None]) -def test_replace_object_list_inplace(using_copy_on_write, using_infer_string, value): +def test_replace_object_list_inplace(using_copy_on_write, value): df = DataFrame({"a": ["a", "b", "c"]}, dtype=object) arr = get_array(df, "a") - # with future.infer_string we get warning about object dtype getting cast - warning = FutureWarning if using_infer_string and value is not None else None - with tm.assert_produces_warning(warning): - df.replace(["c"], value, inplace=True) - if (using_copy_on_write or value is None) and not warning: + df.replace(["c"], value, inplace=True) + if using_copy_on_write or value is None: assert np.shares_memory(arr, get_array(df, "a")) else: # This could be inplace