From e559a1b35cf9ed3c70e7416414452d09d744a2a2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 16 Nov 2024 10:25:45 -0500 Subject: [PATCH] TST (string dtype): resolve xfails for frame methods --- pandas/core/frame.py | 4 ++++ pandas/core/internals/blocks.py | 3 ++- pandas/tests/frame/methods/test_astype.py | 11 ++--------- pandas/tests/frame/methods/test_combine_first.py | 7 +------ pandas/tests/frame/methods/test_cov_corr.py | 7 +------ pandas/tests/frame/methods/test_dropna.py | 6 +----- pandas/tests/frame/methods/test_dtypes.py | 8 +------- pandas/tests/frame/methods/test_interpolate.py | 1 - pandas/tests/frame/methods/test_reset_index.py | 3 --- pandas/tests/frame/methods/test_to_dict_of_blocks.py | 3 --- 10 files changed, 12 insertions(+), 41 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 34b448a0d8d1c..bef474b949094 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6273,6 +6273,10 @@ class max type else: to_insert = ((self.index, None),) + if len(new_obj.columns) == 0 and names: + target_dtype = Index(names).dtype + new_obj.columns = new_obj.columns.astype(target_dtype) + multi_col = isinstance(self.columns, MultiIndex) for j, (lev, lab) in enumerate(to_insert, start=1): i = self.index.nlevels - j diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f44ad926dda5c..639d5a410213f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2361,5 +2361,6 @@ def external_values(values: ArrayLike) -> ArrayLike: values.flags.writeable = False # TODO(CoW) we should also mark our ExtensionArrays as read-only - + if isinstance(values, ExtensionArray): + ... # this is why test_to_dict_of_blocks_item_cache fails return values diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index ab3743283ea13..b3fe7460d2da9 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas.util._test_decorators as td import pandas as pd @@ -745,7 +743,6 @@ def test_astype_tz_object_conversion(self, tz): result = result.astype({"tz": "datetime64[ns, Europe/London]"}) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_astype_dt64_to_string( self, frame_or_series, tz_naive_fixture, using_infer_string ): @@ -767,13 +764,9 @@ def test_astype_dt64_to_string( if frame_or_series is DataFrame: item = item.iloc[0] if using_infer_string: - assert item is np.nan - else: assert item is pd.NA - - # For non-NA values, we should match what we get for non-EA str - alt = obj.astype(str) - assert np.all(alt.iloc[1:] == result.iloc[1:]) + else: + assert item is np.nan def test_astype_td64_to_string(self, frame_or_series): # GH#41409 diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 87b7d5052a345..210394e33bd3b 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import is_dtype_equal @@ -32,7 +30,6 @@ def test_combine_first_mixed(self): combined = f.combine_first(g) tm.assert_frame_equal(combined, exp) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_combine_first(self, float_frame, using_infer_string): # disjoint head, tail = float_frame[:5], float_frame[5:] @@ -79,9 +76,7 @@ def test_combine_first(self, float_frame, using_infer_string): tm.assert_series_equal(combined["A"].reindex(g.index), g["A"]) # corner cases - warning = FutureWarning if using_infer_string else None - with tm.assert_produces_warning(warning, match="empty entries"): - comb = float_frame.combine_first(DataFrame()) + comb = float_frame.combine_first(DataFrame()) tm.assert_frame_equal(comb, float_frame) comb = DataFrame().combine_first(float_frame) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index c15952339ef18..02e0932eaaf82 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas.util._test_decorators as td import pandas as pd @@ -320,7 +318,6 @@ def test_corrwith_non_timeseries_data(self): for row in index[:4]: tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_corrwith_with_objects(self, using_infer_string): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), @@ -334,9 +331,7 @@ def test_corrwith_with_objects(self, using_infer_string): df2["obj"] = "bar" if using_infer_string: - import pyarrow as pa - - with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"): + with pytest.raises(TypeError, match="Cannot perform reduction"): df1.corrwith(df2) else: with pytest.raises(TypeError, match="Could not convert"): diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 4a60dc09cfe07..7da4149bd4ca8 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -184,13 +182,11 @@ def test_dropna_multiple_axes(self): with pytest.raises(TypeError, match="supplying multiple axes"): inp.dropna(how="all", axis=(0, 1), inplace=True) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_dropna_tz_aware_datetime(self): # GH13407 - df = DataFrame() dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) - df["Time"] = [dt1] + df = DataFrame({"Time": [dt1]}) result = df.dropna(axis=0) expected = DataFrame({"Time": [dt1]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index 1685f9ee331f5..dd8afccf2e25c 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd @@ -135,13 +133,9 @@ def test_dtypes_timedeltas(self): ) tm.assert_series_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_frame_apply_np_array_return_type(self, using_infer_string): # GH 35517 df = DataFrame([["foo"]]) result = df.apply(lambda col: np.array("bar")) - if using_infer_string: - expected = Series([np.array(["bar"])]) - else: - expected = Series(["bar"]) + expected = Series(np.array(["bar"]), dtype=object) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index b8a34d5eaa226..7b206cc67d40d 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -64,7 +64,6 @@ def test_interpolate_inplace(self, frame_or_series, request): assert np.shares_memory(orig, obj.values) assert orig.squeeze()[1] == 1.5 - # TODO(infer_string) raise proper TypeError in case of string dtype @pytest.mark.xfail( using_string_dtype(), reason="interpolate doesn't work for string" ) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 88e43b678a7e4..80da849cc59d4 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -644,7 +642,6 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes): tm.assert_frame_equal(res, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.parametrize( "array, dtype", [ diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 4f621b4643b70..0f1f643209db0 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( DataFrame, MultiIndex, @@ -27,7 +25,6 @@ def test_no_copy_blocks(self, float_frame): assert _last_df is not None and not _last_df[column].equals(df[column]) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_to_dict_of_blocks_item_cache(): # Calling to_dict_of_blocks should not poison item_cache df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})