From b526058e9b4bad15f8096539afdf3ef3613631aa Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Fri, 24 Nov 2023 15:20:40 +0900 Subject: [PATCH 01/14] add test_groupby_aggregation_empty_group --- pandas/tests/groupby/aggregate/test_aggregate.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 45884a4b3c20f..245c137330718 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -881,6 +881,17 @@ def test_missing_raises(self): match = re.escape("Column(s) ['C'] do not exist") with pytest.raises(KeyError, match=match): df.groupby("A").agg(c=("C", "sum")) + + def test_groupby_aggregation_empty_group(): + # https://github.com/pandas-dev/pandas/issues/18869 + def f(x): + if len(x) == 0: + sys.exit(1) + return len(x) + + df = DataFrame({"A": pd.Categorical(['a', 'a'], categories=['a', 'b', 'c']), "B": [1, 1]}) + with pytest.raises(SystemExit): + df.groupby('A').agg(f) def test_agg_namedtuple(self): df = DataFrame({"A": [0, 1], "B": [1, 2]}) From a1a33995231e91a503b48ffc7b295e155de2a5ef Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Fri, 24 Nov 2023 16:01:17 +0900 Subject: [PATCH 02/14] Addressed Code Checks errors --- pandas/tests/groupby/aggregate/test_aggregate.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 245c137330718..ff3b577c0d60f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -886,11 +886,12 @@ def test_groupby_aggregation_empty_group(): # https://github.com/pandas-dev/pandas/issues/18869 def f(x): if len(x) == 0: - sys.exit(1) + raise ValueError("length must not be 0") return len(x) df = DataFrame({"A": pd.Categorical(['a', 'a'], categories=['a', 'b', 'c']), "B": [1, 1]}) - with pytest.raises(SystemExit): + msg = 'length must not be 0' + with pytest.raises(ValueError, match=msg): df.groupby('A').agg(f) def test_agg_namedtuple(self): From 8a6de1a486524fd66104f224826f07c9cd0d6f5a Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Sun, 26 Nov 2023 19:43:58 +0900 Subject: [PATCH 03/14] changed indent --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ff3b577c0d60f..c326c375fd0b1 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -883,7 +883,7 @@ def test_missing_raises(self): df.groupby("A").agg(c=("C", "sum")) def test_groupby_aggregation_empty_group(): - # https://github.com/pandas-dev/pandas/issues/18869 + # https://github.com/pandas-dev/pandas/issues/18869 def f(x): if len(x) == 0: raise ValueError("length must not be 0") From 26b8ee722a74ce92a973184b71f7179f412ec9ae Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Sun, 26 Nov 2023 19:51:07 +0900 Subject: [PATCH 04/14] flake8 --- pandas/tests/groupby/aggregate/test_aggregate.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c326c375fd0b1..488ef8d48915b 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -881,7 +881,7 @@ def test_missing_raises(self): match = re.escape("Column(s) ['C'] do not exist") with pytest.raises(KeyError, match=match): df.groupby("A").agg(c=("C", "sum")) - + def test_groupby_aggregation_empty_group(): # https://github.com/pandas-dev/pandas/issues/18869 def f(x): @@ -889,7 +889,8 @@ def f(x): raise ValueError("length must not be 0") return len(x) - df = DataFrame({"A": pd.Categorical(['a', 'a'], categories=['a', 'b', 'c']), "B": [1, 1]}) + df = DataFrame({"A": pd.Categorical(['a', 'a'], + categories=['a', 'b', 'c']), "B": [1, 1]}) msg = 'length must not be 0' with pytest.raises(ValueError, match=msg): df.groupby('A').agg(f) From 8ce3e2ffc4bbb4710c0fc5442010b2d50ae2478a Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 00:32:29 +0900 Subject: [PATCH 05/14] move function --- .../tests/groupby/aggregate/test_aggregate.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 488ef8d48915b..3cbaa9a21f023 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -882,19 +882,6 @@ def test_missing_raises(self): with pytest.raises(KeyError, match=match): df.groupby("A").agg(c=("C", "sum")) - def test_groupby_aggregation_empty_group(): - # https://github.com/pandas-dev/pandas/issues/18869 - def f(x): - if len(x) == 0: - raise ValueError("length must not be 0") - return len(x) - - df = DataFrame({"A": pd.Categorical(['a', 'a'], - categories=['a', 'b', 'c']), "B": [1, 1]}) - msg = 'length must not be 0' - with pytest.raises(ValueError, match=msg): - df.groupby('A').agg(f) - def test_agg_namedtuple(self): df = DataFrame({"A": [0, 1], "B": [1, 2]}) result = df.groupby("A").agg( @@ -1664,3 +1651,16 @@ def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation(): gb = df.groupby("grps") result = gb.agg(td=("td", "cumsum")) tm.assert_frame_equal(result, expected) + +def test_groupby_aggregation_empty_group(): + # https://github.com/pandas-dev/pandas/issues/18869 + def f(x): + if len(x) == 0: + raise ValueError("length must not be 0") + return len(x) + + df = DataFrame({"A": pd.Categorical(['a', 'a'], + categories=['a', 'b', 'c']), "B": [1, 1]}) + msg = 'length must not be 0' + with pytest.raises(ValueError, match=msg): + df.groupby('A').agg(f) \ No newline at end of file From 74f23de7eb905167c776f30eef03f204e8985657 Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 00:43:52 +0900 Subject: [PATCH 06/14] initialize --- pandas/tests/groupby/aggregate/test_aggregate.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3cbaa9a21f023..45884a4b3c20f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1651,16 +1651,3 @@ def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation(): gb = df.groupby("grps") result = gb.agg(td=("td", "cumsum")) tm.assert_frame_equal(result, expected) - -def test_groupby_aggregation_empty_group(): - # https://github.com/pandas-dev/pandas/issues/18869 - def f(x): - if len(x) == 0: - raise ValueError("length must not be 0") - return len(x) - - df = DataFrame({"A": pd.Categorical(['a', 'a'], - categories=['a', 'b', 'c']), "B": [1, 1]}) - msg = 'length must not be 0' - with pytest.raises(ValueError, match=msg): - df.groupby('A').agg(f) \ No newline at end of file From e4fc5d0691f8282d41b0da822df22942f31d4da3 Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 02:39:16 +0900 Subject: [PATCH 07/14] change format --- .../tests/groupby/aggregate/test_aggregate.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 45884a4b3c20f..77a2df042e999 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1651,3 +1651,20 @@ def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation(): gb = df.groupby("grps") result = gb.agg(td=("td", "cumsum")) tm.assert_frame_equal(result, expected) + +def test_groupby_aggregation_empty_group(): + # https://github.com/pandas-dev/pandas/issues/18869 + def f(x): + if len(x) == 0: + raise ValueError("length must not be 0") + return len(x) + + df = DataFrame( + {"A": pd.Categorical(['a', 'a'], + categories=['a', 'b', 'c']), + "B": [1, 1] + } + ) + msg = 'length must not be 0' + with pytest.raises(ValueError, match=msg): + df.groupby('A').agg(f) \ No newline at end of file From 6f72d85f9e48b5086ffcafb9e2dd23cd3b77b90c Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 02:40:27 +0900 Subject: [PATCH 08/14] add blank --- pandas/tests/groupby/aggregate/test_aggregate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 77a2df042e999..68211591b6750 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1652,6 +1652,7 @@ def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation(): result = gb.agg(td=("td", "cumsum")) tm.assert_frame_equal(result, expected) + def test_groupby_aggregation_empty_group(): # https://github.com/pandas-dev/pandas/issues/18869 def f(x): From e32c7084e1c208069e0c175bac03fd1132f4fdbb Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 02:46:01 +0900 Subject: [PATCH 09/14] format --- pandas/tests/groupby/aggregate/test_aggregate.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 68211591b6750..a98f58f4ec8ce 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1661,9 +1661,10 @@ def f(x): return len(x) df = DataFrame( - {"A": pd.Categorical(['a', 'a'], - categories=['a', 'b', 'c']), - "B": [1, 1] + { + "A": pd.Categorical(['a', 'a'], + categories=['a', 'b', 'c']), + "B": [1, 1] } ) msg = 'length must not be 0' From 6871b72a9db7b58774526b1efd25a0ab0716dc25 Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 02:52:00 +0900 Subject: [PATCH 10/14] format --- pandas/tests/groupby/aggregate/test_aggregate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index a98f58f4ec8ce..cb47893f65d63 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1662,9 +1662,11 @@ def f(x): df = DataFrame( { - "A": pd.Categorical(['a', 'a'], - categories=['a', 'b', 'c']), - "B": [1, 1] + "A": pd.Categorical( + ['a', 'a'], + categories=['a', 'b', 'c'], + ), + "B": [1, 1], } ) msg = 'length must not be 0' From 4169988d62228734baa86a960d82309402539c8e Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 02:57:39 +0900 Subject: [PATCH 11/14] format --- pandas/tests/groupby/aggregate/test_aggregate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index cb47893f65d63..9b1f24c4d8e49 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1663,12 +1663,12 @@ def f(x): df = DataFrame( { "A": pd.Categorical( - ['a', 'a'], - categories=['a', 'b', 'c'], + ["a", "a"], + categories=["a", "b", "c"], ), "B": [1, 1], } ) - msg = 'length must not be 0' + msg = "length must not be 0" with pytest.raises(ValueError, match=msg): - df.groupby('A').agg(f) \ No newline at end of file + df.groupby("A").agg(f) \ No newline at end of file From a01c5286c645ce25b5eed28be0fc678c14b16128 Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 23:23:44 +0900 Subject: [PATCH 12/14] format --- pandas/tests/groupby/aggregate/test_aggregate.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 9b1f24c4d8e49..dd908b36f8b17 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1655,20 +1655,14 @@ def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation(): def test_groupby_aggregation_empty_group(): # https://github.com/pandas-dev/pandas/issues/18869 - def f(x): + def func(x): if len(x) == 0: raise ValueError("length must not be 0") return len(x) df = DataFrame( - { - "A": pd.Categorical( - ["a", "a"], - categories=["a", "b", "c"], - ), - "B": [1, 1], - } + {"A": pd.Categorical(["a", "a"], categories=["a", "b", "c"]), "B": [1, 1]} ) msg = "length must not be 0" with pytest.raises(ValueError, match=msg): - df.groupby("A").agg(f) \ No newline at end of file + df.groupby("A").agg(func) \ No newline at end of file From cb3634964bb19a10dbd4d8cc569996155d8d237e Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Mon, 27 Nov 2023 23:38:04 +0900 Subject: [PATCH 13/14] remove last --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index dd908b36f8b17..c16a16dd6aea3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1665,4 +1665,4 @@ def func(x): ) msg = "length must not be 0" with pytest.raises(ValueError, match=msg): - df.groupby("A").agg(func) \ No newline at end of file + df.groupby("A").agg(func) From 64fd69bc6ef9d04bba00829103e5be83784ae8b7 Mon Sep 17 00:00:00 2001 From: HaruguchiKazuto Date: Wed, 29 Nov 2023 02:05:45 +0900 Subject: [PATCH 14/14] add observed=False --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c16a16dd6aea3..3ba1510cc6b1d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1665,4 +1665,4 @@ def func(x): ) msg = "length must not be 0" with pytest.raises(ValueError, match=msg): - df.groupby("A").agg(func) + df.groupby("A", observed=False).agg(func)