diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 885adb3543b46..2c441a6ed91c1 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -10,8 +10,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas._libs.tslibs.timezones import maybe_get_tz from pandas.core.dtypes.common import ( @@ -556,7 +554,6 @@ def test_strftime(self): ) tm.assert_series_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_strftime_dt64_days(self): ser = Series(date_range("20130101", periods=5)) ser.iloc[0] = pd.NaT @@ -571,7 +568,6 @@ def test_strftime_dt64_days(self): expected = Index( ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"], - dtype=np.object_, ) # dtype may be S10 or U10 depending on python version tm.assert_index_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 9f310d8c8ab5f..d3556b644c4bf 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -6,8 +6,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.errors import IndexingError from pandas import ( @@ -251,18 +249,29 @@ def test_slice(string_series, object_series): tm.assert_series_equal(string_series, original) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_timedelta_assignment(): # GH 8209 s = Series([], dtype=object) s.loc["B"] = timedelta(1) - tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"])) + expected = Series( + Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object) + ) + tm.assert_series_equal(s, expected) s = s.reindex(s.index.insert(0, "A")) - tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"])) + expected = Series( + [np.nan, Timedelta("1 days")], + dtype="timedelta64[ns]", + index=Index(["A", "B"], dtype=object), + ) + tm.assert_series_equal(s, expected) s.loc["A"] = timedelta(1) - expected = Series(Timedelta("1 days"), index=["A", "B"]) + expected = Series( + Timedelta("1 days"), + dtype="timedelta64[ns]", + index=Index(["A", "B"], dtype=object), + ) tm.assert_series_equal(s, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index ed5cb5a8d1237..82c616132456b 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -9,12 +9,7 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import ( - HAS_PYARROW, - WASM, -) +from pandas.compat import WASM from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import IndexingError @@ -32,6 +27,7 @@ NaT, Period, Series, + StringDtype, Timedelta, Timestamp, array, @@ -535,14 +531,16 @@ def test_append_timedelta_does_not_cast(self, td, using_infer_string, request): tm.assert_series_equal(ser, expected) assert isinstance(ser["td"], Timedelta) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_with_expansion_type_promotion(self): # GH#12599 ser = Series(dtype=object) ser["a"] = Timestamp("2016-01-01") ser["b"] = 3.0 ser["c"] = "foo" - expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) + expected = Series( + [Timestamp("2016-01-01"), 3.0, "foo"], + index=Index(["a", "b", "c"], dtype=object), + ) tm.assert_series_equal(ser, expected) def test_setitem_not_contained(self, string_series): @@ -826,11 +824,6 @@ def test_mask_key(self, obj, key, expected, raises, val, indexer_sli): else: indexer_sli(obj)[mask] = val - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, - reason="TODO(infer_string)", - strict=False, - ) def test_series_where(self, obj, key, expected, raises, val, is_inplace): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -846,6 +839,11 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace): obj = obj.copy() arr = obj._values + if raises and obj.dtype == "string": + with pytest.raises(TypeError, match="Invalid value"): + obj.where(~mask, val) + return + res = obj.where(~mask, val) if val is NA and res.dtype == object: @@ -858,12 +856,11 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace): self._check_inplace(is_inplace, orig, arr, obj) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) - def test_index_where(self, obj, key, expected, raises, val, using_infer_string): + def test_index_where(self, obj, key, expected, raises, val): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True - if using_infer_string and obj.dtype == object: + if raises and obj.dtype == "string": with pytest.raises(TypeError, match="Invalid value"): Index(obj).where(~mask, val) else: @@ -871,12 +868,11 @@ def test_index_where(self, obj, key, expected, raises, val, using_infer_string): expected_idx = Index(expected, dtype=expected.dtype) tm.assert_index_equal(res, expected_idx) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) - def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string): + def test_index_putmask(self, obj, key, expected, raises, val): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True - if using_infer_string and obj.dtype == object: + if raises and obj.dtype == "string": with pytest.raises(TypeError, match="Invalid value"): Index(obj).putmask(mask, val) else: @@ -1372,6 +1368,19 @@ def raises(self): return False +@pytest.mark.parametrize( + "val,exp_dtype,raises", + [ + (1, object, True), + ("e", StringDtype(na_value=np.nan), False), + ], +) +class TestCoercionString(CoercionTest): + @pytest.fixture + def obj(self): + return Series(["a", "b", "c", "d"], dtype=StringDtype(na_value=np.nan)) + + @pytest.mark.parametrize( "val,exp_dtype,raises", [ diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 053c290999f2f..663ee8ad0ee38 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.common import is_integer import pandas as pd @@ -231,7 +229,6 @@ def test_where_ndframe_align(): tm.assert_series_equal(out, expected) -@pytest.mark.xfail(using_string_dtype(), reason="can't set ints into string") def test_where_setitem_invalid(): # GH 2702 # make sure correct exceptions are raised on invalid list assignment @@ -241,7 +238,7 @@ def test_where_setitem_invalid(): "different length than the value" ) # slice - s = Series(list("abc")) + s = Series(list("abc"), dtype=object) with pytest.raises(ValueError, match=msg("slice")): s[0:3] = list(range(27)) @@ -251,18 +248,18 @@ def test_where_setitem_invalid(): tm.assert_series_equal(s.astype(np.int64), expected) # slice with step - s = Series(list("abcdef")) + s = Series(list("abcdef"), dtype=object) with pytest.raises(ValueError, match=msg("slice")): s[0:4:2] = list(range(27)) - s = Series(list("abcdef")) + s = Series(list("abcdef"), dtype=object) s[0:4:2] = list(range(2)) expected = Series([0, "b", 1, "d", "e", "f"]) tm.assert_series_equal(s, expected) # neg slices - s = Series(list("abcdef")) + s = Series(list("abcdef"), dtype=object) with pytest.raises(ValueError, match=msg("slice")): s[:-1] = list(range(27)) @@ -272,18 +269,18 @@ def test_where_setitem_invalid(): tm.assert_series_equal(s, expected) # list - s = Series(list("abc")) + s = Series(list("abc"), dtype=object) with pytest.raises(ValueError, match=msg("list-like")): s[[0, 1, 2]] = list(range(27)) - s = Series(list("abc")) + s = Series(list("abc"), dtype=object) with pytest.raises(ValueError, match=msg("list-like")): s[[0, 1, 2]] = list(range(2)) # scalar - s = Series(list("abc")) + s = Series(list("abc"), dtype=object) s[0] = list(range(10)) expected = Series([list(range(10)), "b", "c"]) tm.assert_series_equal(s, expected) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 611fcc114db6c..1ebef333f054a 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd import pandas._testing as tm from pandas.core.arrays import IntervalArray @@ -628,15 +626,23 @@ def test_replace_nullable_numeric(self): with pytest.raises(TypeError, match="Invalid value"): ints.replace(1, 9.5) - @pytest.mark.xfail(using_string_dtype(), reason="can't fill 1 in string") @pytest.mark.parametrize("regex", [False, True]) def test_replace_regex_dtype_series(self, regex): # GH-48644 - series = pd.Series(["0"]) + series = pd.Series(["0"], dtype=object) expected = pd.Series([1], dtype=object) result = series.replace(to_replace="0", value=1, regex=regex) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("regex", [False, True]) + def test_replace_regex_dtype_series_string(self, regex, using_infer_string): + if not using_infer_string: + # then this is object dtype which is already tested above + return + series = pd.Series(["0"], dtype="str") + with pytest.raises(TypeError, match="Invalid value"): + series.replace(to_replace="0", value=1, regex=regex) + def test_replace_different_int_types(self, any_int_numpy_dtype): # GH#45311 labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype) @@ -656,21 +662,18 @@ def test_replace_value_none_dtype_numeric(self, val): expected = pd.Series([1, None], dtype=object) tm.assert_series_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") - def test_replace_change_dtype_series(self, using_infer_string): + def test_replace_change_dtype_series(self): # GH#25797 - df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]}) - warn = FutureWarning if using_infer_string else None - with tm.assert_produces_warning(warn, match="Downcasting"): - df["Test"] = df["Test"].replace([True], [np.nan]) - expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]}) + df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object) + df["Test"] = df["Test"].replace([True], [np.nan]) + expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object) tm.assert_frame_equal(df, expected) - df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]}) + df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object) df["Test"] = df["Test"].replace([None], [np.nan]) tm.assert_frame_equal(df, expected) - df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]}) + df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object) df["Test"] = df["Test"].fillna(np.nan) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index 8c4f0ff3eaea7..f61e20c43657d 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -136,11 +134,10 @@ def test_unstack_mixed_type_name_in_multiindex( tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_unstack_multi_index_categorical_values(): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), - columns=Index(list("ABCD"), dtype=object), + columns=Index(list("ABCD")), index=date_range("2000-01-01", periods=10, freq="B"), ) mi = df.stack().index.rename(["major", "minor"]) diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 8516018e8aa93..8f63819b09238 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -413,6 +413,7 @@ def test_logical_ops_label_based(self, using_infer_string): for e in [Series(["z"])]: if using_infer_string: # TODO(infer_string) should this behave differently? + # -> https://github.com/pandas-dev/pandas/issues/60234 with pytest.raises( TypeError, match="not supported for dtype|unsupported operand type" ):