From 364c9cb65acf47119dc251b25226ab415cc9a100 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 4 Oct 2023 19:07:26 +0200 Subject: [PATCH] BUG: all not ignoring na when skipna=True (#55367) * BUG: all not ignoring na when skipna=True * Update v2.1.2.rst --- doc/source/whatsnew/v2.1.2.rst | 1 + pandas/core/arrays/string_arrow.py | 8 +++++--- pandas/tests/reductions/test_reductions.py | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index df19b01610a7a..87b71eac76d1a 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -29,6 +29,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`) - Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`) - Fixed bug in :meth:`Index.insert` raising when inserting ``None`` into :class:`Index` with ``dtype="string[pyarrow_numpy]"`` (:issue:`55365`) +- Fixed bug in :meth:`Series.all` and :meth:`Series.any` not treating missing values correctly for ``dtype="string[pyarrow_numpy]"`` (:issue:`55367`) - Fixed bug in :meth:`Series.rank` for ``string[pyarrow_numpy]`` dtype (:issue:`55362`) - Silence ``Period[B]`` warnings introduced by :issue:`53446` during normal plotting activity (:issue:`55138`) - diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index db11e74951ee5..24b99b5d4852e 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -631,9 +631,11 @@ def _reduce( self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs ): if name in ["any", "all"]: - arr = pc.and_kleene( - pc.invert(pc.is_null(self._pa_array)), pc.not_equal(self._pa_array, "") - ) + if not skipna and name == "all": + nas = pc.invert(pc.is_null(self._pa_array)) + arr = pc.and_kleene(nas, pc.not_equal(self._pa_array, "")) + else: + arr = pc.not_equal(self._pa_array, "") return ArrowExtensionArray(arr)._reduce( name, skipna=skipna, keepdims=keepdims, **kwargs ) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 021252500e814..560b2377ada70 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1087,7 +1087,8 @@ def test_any_all_pyarrow_string(self): ser = Series([None, "a"], dtype="string[pyarrow_numpy]") assert ser.any() - assert not ser.all() + assert ser.all() + assert not ser.all(skipna=False) ser = Series([None, ""], dtype="string[pyarrow_numpy]") assert not ser.any()