From 7c400e7be5233c6e4a2a5f78b75a6f56f20a5312 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Tue, 24 Oct 2023 00:18:43 +0200
Subject: [PATCH] Fix other tests

---
 pandas/tests/groupby/methods/test_nth.py          | 12 +++++++++++-
 pandas/tests/groupby/methods/test_value_counts.py | 10 +++++++---
 pandas/tests/groupby/test_apply.py                |  4 +---
 pandas/tests/groupby/test_categorical.py          |  6 +++++-
 pandas/tests/groupby/test_groupby.py              | 10 +++++++---
 pandas/tests/groupby/test_grouping.py             |  4 ++--
 pandas/tests/groupby/test_reductions.py           |  6 ++++--
 7 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py
index 1cf4a90e25f1b..86212a621427a 100644
--- a/pandas/tests/groupby/methods/test_nth.py
+++ b/pandas/tests/groupby/methods/test_nth.py
@@ -1,3 +1,5 @@
+from decimal import Decimal
+
 import numpy as np
 import pytest
 
@@ -682,7 +684,15 @@ def test_first_multi_key_groupby_categorical():
 @pytest.mark.parametrize("method", ["first", "last", "nth"])
 def test_groupby_last_first_nth_with_none(method, nulls_fixture):
     # GH29645
-    expected = Series(["y"])
+    if nulls_fixture is not pd.NA and (
+        nulls_fixture is pd.NaT
+        or isinstance(nulls_fixture, Decimal)
+        and Decimal.is_nan(nulls_fixture)
+    ):
+        dtype = object
+    else:
+        dtype = None
+    expected = Series(["y"], dtype=dtype)
     data = Series(
         [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
         index=[0, 0, 0, 0, 0],
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index c5c2eca813e86..403ec9aad92e7 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under7p0
+from pandas.compat import pa_version_under10p1
 
 from pandas import (
     Categorical,
@@ -377,11 +377,15 @@ def test_against_frame_and_seriesgroupby(
         object,
         pytest.param(
             "string[pyarrow_numpy]",
-            marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"),
+            marks=pytest.mark.skipif(
+                pa_version_under10p1, reason="arrow not installed"
+            ),
         ),
         pytest.param(
             "string[pyarrow]",
-            marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"),
+            marks=pytest.mark.skipif(
+                pa_version_under10p1, reason="arrow not installed"
+            ),
         ),
     ],
 )
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index a2d195c0e2ce8..1f679eafbbb36 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1265,9 +1265,7 @@ def test_apply_dropna_with_indexed_same(dropna):
     [
         [
             False,
-            DataFrame(
-                [[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None], dtype=object)
-            ),
+            DataFrame([[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None])),
         ],
         [
             True,
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 3528ebf6b18a3..ec0427875d44a 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -338,7 +338,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-def test_observed(observed):
+def test_observed(observed, using_infer_string, request):
     # multiple groupers, don't re-expand the output space
     # of the grouper
     # gh-14942 (implement)
@@ -346,6 +346,10 @@ def test_observed(observed):
     # gh-8138 (back-compat)
     # gh-8869
 
+    if not observed and using_infer_string:
+        mark = pytest.mark.xfail(reason="fill_value=0 invalid for string dtype")
+        request.applymarker(mark)
+
     cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True)
     cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True)
     df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 216bb1d1b2efc..5687e4cb5d136 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under7p0
+from pandas.compat import pa_version_under10p1
 from pandas.errors import (
     PerformanceWarning,
     SpecificationError,
@@ -2557,7 +2557,9 @@ def test_groupby_column_index_name_lost(func):
         False,
         pytest.param(
             True,
-            marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"),
+            marks=pytest.mark.skipif(
+                pa_version_under10p1, reason="arrow not installed"
+            ),
         ),
     ],
 )
@@ -2800,7 +2802,9 @@ def test_rolling_wrong_param_min_period():
         object,
         pytest.param(
             "string[pyarrow_numpy]",
-            marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"),
+            marks=pytest.mark.skipif(
+                pa_version_under10p1, reason="arrow not installed"
+            ),
         ),
     ],
 )
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 45b9a3675eaf4..d22fa16fff26c 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -803,7 +803,7 @@ def test_groupby_empty(self):
         # check name
         assert s.groupby(s).grouper.names == ["name"]
 
-    def test_groupby_level_index_value_all_na(self):
+    def test_groupby_level_index_value_all_na(self, using_infer_string):
         # issue 20519
         df = DataFrame(
             [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"]
@@ -819,7 +819,7 @@ def test_groupby_level_index_value_all_na(self):
             columns=["C"],
             dtype="int64",
         )
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected, check_index_type=not using_infer_string)
 
     def test_groupby_multiindex_level_empty(self):
         # https://github.com/pandas-dev/pandas/issues/31670
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 35ad8e3f5dc61..31fb18d06fd0d 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -332,7 +332,7 @@ def test_max_min_non_numeric():
     assert "ss" in result
 
 
-def test_max_min_object_multiple_columns(using_array_manager):
+def test_max_min_object_multiple_columns(using_infer_string):
     # GH#41111 case where the aggregation is valid for some columns but not
     # others; we split object blocks column-wise, consistent with
     # DataFrame._reduce
@@ -345,7 +345,9 @@ def test_max_min_object_multiple_columns(using_array_manager):
         }
     )
     df._consolidate_inplace()  # should already be consolidate, but double-check
-    if not using_array_manager:
+    if using_infer_string:
+        assert len(df._mgr.blocks) == 3
+    else:
         assert len(df._mgr.blocks) == 2
 
     gb = df.groupby("A")