Skip to content

Commit

Permalink
Merge branch 'main' into issue-37210-to-sql-truncate
Browse files Browse the repository at this point in the history
  • Loading branch information
gmcrocetti authored Dec 29, 2024
2 parents 2eb19e7 + d81882b commit 7a06949
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 72 deletions.
2 changes: 1 addition & 1 deletion pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -1644,7 +1644,7 @@ def _update_ctx_header(self, attrs: DataFrame, axis: AxisInt) -> None:
for j in attrs.columns:
ser = attrs[j]
for i, c in ser.items():
if not c:
if not c or pd.isna(c):
continue
css_list = maybe_convert_css_to_tuples(c)
if axis == 0:
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
kind = "integer"
elif dtype_str == "object":
kind = "object"
elif dtype_str == "str":
kind = "str"
else:
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")

Expand Down
56 changes: 32 additions & 24 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,19 +255,19 @@ def test_apply_with_mixed_dtype():
"foo2": ["one", "two", "two", "three", "one", "two"],
}
)
result = df.apply(lambda x: x, axis=1).dtypes
expected = df.dtypes
tm.assert_series_equal(result, expected)
result = df.apply(lambda x: x, axis=1)
expected = df
tm.assert_frame_equal(result, expected)

# GH 3610 incorrect dtype conversion with as_index=False
df = DataFrame({"c1": [1, 2, 6, 6, 8]})
df["c2"] = df.c1 / 2.0
result1 = df.groupby("c2").mean().reset_index().c2
result2 = df.groupby("c2", as_index=False).mean().c2
tm.assert_series_equal(result1, result2)
result1 = df.groupby("c2").mean().reset_index()
result2 = df.groupby("c2", as_index=False).mean()
tm.assert_frame_equal(result1, result2)


def test_groupby_as_index_apply():
def test_groupby_as_index_apply(as_index):
# GH #4648 and #3417
df = DataFrame(
{
Expand All @@ -276,27 +276,35 @@ def test_groupby_as_index_apply():
"time": range(6),
}
)
gb = df.groupby("user_id", as_index=as_index)

g_as = df.groupby("user_id", as_index=True)
g_not_as = df.groupby("user_id", as_index=False)

res_as = g_as.head(2).index
res_not_as = g_not_as.head(2).index
exp = Index([0, 1, 2, 4])
tm.assert_index_equal(res_as, exp)
tm.assert_index_equal(res_not_as, exp)

res_as_apply = g_as.apply(lambda x: x.head(2)).index
res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
expected = DataFrame(
{
"item_id": ["b", "b", "a", "a"],
"user_id": [1, 2, 1, 3],
"time": [0, 1, 2, 4],
},
index=[0, 1, 2, 4],
)
result = gb.head(2)
tm.assert_frame_equal(result, expected)

# apply doesn't maintain the original ordering
# changed in GH5610 as the as_index=False returns a MI here
exp_not_as_apply = Index([0, 2, 1, 4])
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None])

tm.assert_index_equal(res_as_apply, exp_as_apply)
tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
if as_index:
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
index = MultiIndex.from_tuples(tp, names=["user_id", None])
else:
index = Index([0, 2, 1, 4])
expected = DataFrame(
{
"item_id": list("baba"),
"time": [0, 2, 1, 4],
},
index=index,
)
result = gb.apply(lambda x: x.head(2))
tm.assert_frame_equal(result, expected)


def test_groupby_as_index_apply_str():
Expand Down
37 changes: 10 additions & 27 deletions pandas/tests/groupby/test_apply_mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,20 @@ def test_mutate_groups():
}
)

def f_copy(x):
def f(x):
x = x.copy()
x["rank"] = x.val.rank(method="min")
return x.groupby("cat2")["rank"].min()

def f_no_copy(x):
x["rank"] = x.val.rank(method="min")
return x.groupby("cat2")["rank"].min()

grpby_copy = df.groupby("cat1").apply(f_copy)
grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
tm.assert_series_equal(grpby_copy, grpby_no_copy)
expected = pd.DataFrame(
{
"cat1": list("aaaabbb"),
"cat2": list("cdefcde"),
"rank": [3.0, 2.0, 5.0, 1.0, 2.0, 4.0, 1.0],
}
).set_index(["cat1", "cat2"])["rank"]
result = df.groupby("cat1").apply(f)
tm.assert_series_equal(result, expected)


def test_no_mutate_but_looks_like():
Expand All @@ -61,22 +63,3 @@ def test_no_mutate_but_looks_like():
result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].value)
result2 = df.groupby("key", group_keys=True).apply(lambda x: x.value)
tm.assert_series_equal(result1, result2)


def test_apply_function_with_indexing():
# GH: 33058
df = pd.DataFrame(
{"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]}
)

def fn(x):
x.loc[x.index[-1], "col2"] = 0
return x.col2

result = df.groupby(["col1"], as_index=False).apply(fn)
expected = pd.Series(
[1, 2, 0, 4, 5, 0],
index=range(6),
name="col2",
)
tm.assert_series_equal(result, expected)
20 changes: 20 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3297,3 +3297,23 @@ def test_loc_reindexing_of_empty_index(self):
df.loc[Series([False] * 4, index=df.index, name=0), 0] = df[0]
expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"])
tm.assert_frame_equal(df, expected)

def test_loc_setitem_matching_index(self):
# GH 25548
s = Series(0.0, index=list("abcd"))
s1 = Series(1.0, index=list("ab"))
s2 = Series(2.0, index=list("xy"))

# Test matching indices
s.loc[["a", "b"]] = s1

result = s[["a", "b"]]
expected = s1
tm.assert_series_equal(result, expected)

# Test unmatched indices
s.loc[["a", "b"]] = s2

result = s[["a", "b"]]
expected = Series([np.nan, np.nan], index=["a", "b"])
tm.assert_series_equal(result, expected)
3 changes: 0 additions & 3 deletions pandas/tests/io/formats/style/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
MultiIndex,
Expand Down Expand Up @@ -731,7 +729,6 @@ def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp):
)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize(
"columns, siunitx",
Expand Down
45 changes: 34 additions & 11 deletions pandas/tests/io/pytables/test_file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]


@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
def test_mode(setup_path, tmp_path, mode):
def test_mode(setup_path, tmp_path, mode, using_infer_string):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
Expand Down Expand Up @@ -91,10 +90,12 @@ def test_mode(setup_path, tmp_path, mode):
read_hdf(path, "df", mode=mode)
else:
result = read_hdf(path, "df", mode=mode)
if using_infer_string:
df.columns = df.columns.astype("str")
tm.assert_frame_equal(result, df)


def test_default_mode(tmp_path, setup_path):
def test_default_mode(tmp_path, setup_path, using_infer_string):
# read_hdf uses default mode
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
Expand All @@ -104,7 +105,10 @@ def test_default_mode(tmp_path, setup_path):
path = tmp_path / setup_path
df.to_hdf(path, key="df", mode="w")
result = read_hdf(path, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)


def test_reopen_handle(tmp_path, setup_path):
Expand Down Expand Up @@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
assert not store.is_open


def test_open_args(setup_path):
def test_open_args(setup_path, using_infer_string):
with tm.ensure_clean(setup_path) as path:
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -178,8 +182,13 @@ def test_open_args(setup_path):
store["df"] = df
store.append("df2", df)

tm.assert_frame_equal(store["df"], df)
tm.assert_frame_equal(store["df2"], df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")

tm.assert_frame_equal(store["df"], expected)
tm.assert_frame_equal(store["df2"], expected)

store.close()

Expand All @@ -194,7 +203,7 @@ def test_flush(setup_path):
store.flush(fsync=True)


def test_complibs_default_settings(tmp_path, setup_path):
def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
# GH15943
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -207,7 +216,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df", complevel=9)
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand All @@ -218,7 +231,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df", complib="zlib")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand All @@ -229,7 +246,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand Down Expand Up @@ -308,6 +329,7 @@ def test_complibs(tmp_path, lvl, lib, request):
assert node.filters.complib == lib


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.skipif(
not is_platform_little_endian(), reason="reason platform is not little endian"
)
Expand All @@ -325,6 +347,7 @@ def test_encoding(setup_path):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"val",
[
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/pytables/test_subclass.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Series,
Expand All @@ -19,7 +17,6 @@

class TestHDFStoreSubclass:
# GH 33748
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_supported_for_subclass_dataframe(self, tmp_path):
data = {"a": [1, 2], "b": [3, 4]}
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
WASM,
is_platform_windows,
Expand Down Expand Up @@ -365,7 +363,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
expected = f_path.read()
assert result == expected

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
def test_write_fspath_hdf5(self):
# Same test as write_fspath_all, except HDF5 files aren't
# necessarily byte-for-byte identical for a given dataframe, so we'll
Expand Down

0 comments on commit 7a06949

Please sign in to comment.