Skip to content

Commit

Permalink
TST (string dtype): update all tests in tests/frame/indexing (#60193)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Nov 5, 2024
1 parent cf52dec commit bec2dbc
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 43 deletions.
10 changes: 6 additions & 4 deletions pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -84,14 +82,18 @@ def test_6942(indexer_al):
assert df.iloc[0, 0] == t2


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_26395(indexer_al):
# .at case fixed by GH#45121 (best guess)
df = DataFrame(index=["A", "B", "C"])
df["D"] = 0

indexer_al(df)["C", "D"] = 2
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
expected = DataFrame(
{"D": [0, 0, 2]},
index=["A", "B", "C"],
columns=pd.Index(["D"], dtype=object),
dtype=np.int64,
)
tm.assert_frame_equal(df, expected)

with pytest.raises(TypeError, match="Invalid value"):
Expand Down
16 changes: 5 additions & 11 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from pandas._config import using_string_dtype

from pandas._libs import iNaT
from pandas.compat import HAS_PYARROW
from pandas.errors import InvalidIndexError

from pandas.core.dtypes.common import is_integer
Expand Down Expand Up @@ -505,17 +504,16 @@ def test_setitem_ambig(self, using_infer_string):
assert dm[2].dtype == np.object_

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_None(self, float_frame, using_infer_string):
def test_setitem_None(self, float_frame):
# GH #766
float_frame[None] = float_frame["A"]
key = None if not using_infer_string else np.nan
tm.assert_series_equal(
float_frame.iloc[:, -1], float_frame["A"], check_names=False
)
tm.assert_series_equal(
float_frame.loc[:, key], float_frame["A"], check_names=False
float_frame.loc[:, None], float_frame["A"], check_names=False
)
tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False)
tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)

def test_loc_setitem_boolean_mask_allfalse(self):
# GH 9596
Expand Down Expand Up @@ -1125,7 +1123,6 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self):
df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]]
tm.assert_series_equal(df["dates"], column)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_loc_setitem_datetimelike_with_inference(self):
# GH 7592
# assignment of timedeltas with NaT
Expand All @@ -1144,13 +1141,10 @@ def test_loc_setitem_datetimelike_with_inference(self):
result = df.dtypes
expected = Series(
[np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2,
index=list("ABCDEFGH"),
index=Index(list("ABCDEFGH"), dtype=object),
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
)
def test_getitem_boolean_indexing_mixed(self):
df = DataFrame(
{
Expand Down Expand Up @@ -1192,7 +1186,7 @@ def test_getitem_boolean_indexing_mixed(self):
tm.assert_frame_equal(df2, expected)

df["foo"] = "test"
msg = "not supported between instances|unorderable types"
msg = "not supported between instances|unorderable types|Invalid comparison"

with pytest.raises(TypeError, match=msg):
df[df > 0.3] = 1
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import PerformanceWarning

from pandas import (
Expand Down Expand Up @@ -63,15 +61,15 @@ def test_insert_column_bug_4032(self):
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_insert_with_columns_dups(self):
# GH#14291
df = DataFrame()
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
exp = DataFrame(
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]],
columns=Index(["A", "A", "A"], dtype=object),
)
tm.assert_frame_equal(df, exp)

Expand Down
26 changes: 12 additions & 14 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.base import _registry as ea_registry
from pandas.core.dtypes.common import is_object_dtype
from pandas.core.dtypes.dtypes import (
Expand Down Expand Up @@ -146,13 +144,16 @@ def test_setitem_different_dtype(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_empty_columns(self):
# GH 13522
df = DataFrame(index=["A", "B", "C"])
df["X"] = df.index
df["X"] = ["x", "y", "z"]
exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
exp = DataFrame(
data={"X": ["x", "y", "z"]},
index=["A", "B", "C"],
columns=Index(["X"], dtype=object),
)
tm.assert_frame_equal(df, exp)

def test_setitem_dt64_index_empty_columns(self):
Expand All @@ -162,14 +163,15 @@ def test_setitem_dt64_index_empty_columns(self):
df["A"] = rng
assert df["A"].dtype == np.dtype("M8[ns]")

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_timestamp_empty_columns(self):
# GH#19843
df = DataFrame(index=range(3))
df["now"] = Timestamp("20130101", tz="UTC")

expected = DataFrame(
[[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"]
[[Timestamp("20130101", tz="UTC")]] * 3,
index=range(3),
columns=Index(["now"], dtype=object),
)
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -202,14 +204,13 @@ def test_setitem_with_unaligned_sparse_value(self):
expected = Series(SparseArray([1, 0, 0]), name="new_column")
tm.assert_series_equal(df["new_column"], expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_period_preserves_dtype(self):
# GH: 26861
data = [Period("2003-12", "D")]
result = DataFrame([])
result["a"] = data

expected = DataFrame({"a": data})
expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object))

tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -672,11 +673,10 @@ def test_setitem_iloc_two_dimensional_generator(self):
expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
tm.assert_frame_equal(df, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_dtypes_bytes_type_to_object(self):
# GH 20734
index = Series(name="id", dtype="S24")
df = DataFrame(index=index)
df = DataFrame(index=index, columns=Index([], dtype="str"))
df["a"] = Series(name="a", index=index, dtype=np.uint32)
df["b"] = Series(name="b", index=index, dtype="S64")
df["c"] = Series(name="c", index=index, dtype="S64")
Expand Down Expand Up @@ -705,7 +705,6 @@ def test_setitem_ea_dtype_rhs_series(self):
expected = DataFrame({"a": [1, 2]}, dtype="Int64")
tm.assert_frame_equal(df, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_npmatrix_2d(self):
# GH#42376
# for use-case df["x"] = sparse.random((10, 10)).mean(axis=1)
Expand All @@ -714,7 +713,7 @@ def test_setitem_npmatrix_2d(self):
)

a = np.ones((10, 1))
df = DataFrame(index=np.arange(10))
df = DataFrame(index=np.arange(10), columns=Index([], dtype="str"))
df["np-array"] = a

# Instantiation of `np.matrix` gives PendingDeprecationWarning
Expand Down Expand Up @@ -927,12 +926,11 @@ def test_setitem_with_expansion_categorical_dtype(self):
ser.name = "E"
tm.assert_series_equal(result2.sort_index(), ser.sort_index())

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_scalars_no_index(self):
# GH#16823 / GH#17894
df = DataFrame()
df["foo"] = 1
expected = DataFrame(columns=["foo"]).astype(np.int64)
expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64)
tm.assert_frame_equal(df, expected)

def test_setitem_newcol_tuple_key(self, float_frame):
Expand Down
18 changes: 12 additions & 6 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def is_ok(s):


class TestDataFrameIndexingWhere:
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_where_get(self, where_frame, float_string_frame):
def _check_get(df, cond, check_dtypes=True):
other1 = _safe_add(df)
Expand All @@ -66,7 +65,10 @@ def _check_get(df, cond, check_dtypes=True):
# check getting
df = where_frame
if df is float_string_frame:
msg = "'>' not supported between instances of 'str' and 'int'"
msg = (
"'>' not supported between instances of 'str' and 'int'"
"|Invalid comparison"
)
with pytest.raises(TypeError, match=msg):
df > 0
return
Expand Down Expand Up @@ -99,7 +101,6 @@ def test_where_upcasting(self):

tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_where_alignment(self, where_frame, float_string_frame):
# aligning
def _check_align(df, cond, other, check_dtypes=True):
Expand Down Expand Up @@ -131,7 +132,10 @@ def _check_align(df, cond, other, check_dtypes=True):

df = where_frame
if df is float_string_frame:
msg = "'>' not supported between instances of 'str' and 'int'"
msg = (
"'>' not supported between instances of 'str' and 'int'"
"|Invalid comparison"
)
with pytest.raises(TypeError, match=msg):
df > 0
return
Expand Down Expand Up @@ -174,7 +178,6 @@ def test_where_invalid(self):
with pytest.raises(ValueError, match=msg):
df.mask(0)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
# where inplace

Expand All @@ -196,7 +199,10 @@ def _check_set(df, cond, check_dtypes=True):

df = where_frame
if df is float_string_frame:
msg = "'>' not supported between instances of 'str' and 'int'"
msg = (
"'>' not supported between instances of 'str' and 'int'"
"|Invalid comparison"
)
with pytest.raises(TypeError, match=msg):
df > 0
return
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/frame/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Index,
Expand Down Expand Up @@ -74,10 +72,9 @@ def test_xs_other(self, float_frame):
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
assert not (expected == 5).all()

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_xs_corner(self):
# pathological mixed-type reordering case
df = DataFrame(index=[0])
df = DataFrame(index=[0], columns=Index([], dtype="str"))
df["A"] = 1.0
df["B"] = "foo"
df["C"] = 2.0
Expand Down

0 comments on commit bec2dbc

Please sign in to comment.