Merge branch 'main' into Test_issue_57930

pandas-dev · Nov 13, 2024 · e09ab5f · e09ab5f
2 parents ae61f89 + 73da90c
commit e09ab5f
Show file tree

Hide file tree

Showing 19 changed files with 113 additions and 80 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -85,4 +85,5 @@ pandas/tests/io/parser/data export-ignore
 
 # Include cibw script in sdist since it's needed for building wheels
 scripts/cibw_before_build.sh -export-ignore
-scripts/cibw_before_test.sh -export-ignore
+scripts/cibw_before_build_windows.sh -export-ignore
+scripts/cibw_before_test_windows.sh -export-ignore
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -387,8 +387,8 @@ jobs:
       - name: Build Environment
         run: |
           python --version
-          python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
-          python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy cython
+          python -m pip install --upgrade pip setuptools wheel numpy meson[ninja]==1.2.1 meson-python==0.13.1
+          python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
           python -m pip install versioneer[toml]
           python -m pip install python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
           python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -111,10 +111,6 @@ jobs:
         - buildplat: [ubuntu-22.04, pyodide_wasm32]
           python: ["cp312", "3.12"]
           cibw_build_frontend: 'build'
-        # TODO: Build free-threaded wheels for Windows
-        exclude:
-        - buildplat: [windows-2022, win_amd64]
-          python: ["cp313t", "3.13"]
 
     env:
       IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
@@ -181,20 +177,6 @@ jobs:
         shell: bash -el {0}
         run: for whl in $(ls wheelhouse); do wheel unpack wheelhouse/$whl -d /tmp; done
 
-      # Testing on windowsservercore instead of GHA runner to fail on missing DLLs
-      - name: Test Windows Wheels
-        if: ${{ matrix.buildplat[1] == 'win_amd64' }}
-        shell: pwsh
-        run: |
-          $TST_CMD = @"
-          python -m pip install hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0;
-          python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
-          python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
-          "@
-          # add rc to the end of the image name if the Python version is unreleased
-          docker pull python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
-          docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
-
       - uses: actions/upload-artifact@v4
         with:
           name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -65,3 +65,5 @@ graft pandas/_libs/include
 
 # Include cibw script in sdist since it's needed for building wheels
 include scripts/cibw_before_build.sh
+include scripts/cibw_before_build_windows.sh
+include scripts/cibw_before_test_windows.sh
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
@@ -106,10 +106,10 @@ Conversion
 Strings
 ^^^^^^^
 - Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
+- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
 - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
 - Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
 - Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
--
 
 Interval
 ^^^^^^^^

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -730,20 +730,9 @@ def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:
 
         return arr, self.dtype.na_value
 
-    def __setitem__(self, key, value) -> None:
-        value = extract_array(value, extract_numpy=True)
-        if isinstance(value, type(self)):
-            # extract_array doesn't extract NumpyExtensionArray subclasses
-            value = value._ndarray
-
-        key = check_array_indexer(self, key)
-        scalar_key = lib.is_scalar(key)
-        scalar_value = lib.is_scalar(value)
-        if scalar_key and not scalar_value:
-            raise ValueError("setting an array element with a sequence.")
-
-        # validate new items
-        if scalar_value:
+    def _maybe_convert_setitem_value(self, value):
+        """Maybe convert value to be pyarrow compatible."""
+        if lib.is_scalar(value):
             if isna(value):
                 value = self.dtype.na_value
             elif not isinstance(value, str):
@@ -753,8 +742,11 @@ def __setitem__(self, key, value) -> None:
                     "instead."
                 )
         else:
+            value = extract_array(value, extract_numpy=True)
             if not is_array_like(value):
                 value = np.asarray(value, dtype=object)
+            elif isinstance(value.dtype, type(self.dtype)):
+                return value
             else:
                 # cast categories and friends to arrays to see if values are
                 # compatible, compatibility with arrow backed strings
@@ -764,11 +756,26 @@ def __setitem__(self, key, value) -> None:
                     "Invalid value for dtype 'str'. Value should be a "
                     "string or missing value (or array of those)."
                 )
+        return value
 
-            mask = isna(value)
-            if mask.any():
-                value = value.copy()
-                value[isna(value)] = self.dtype.na_value
+    def __setitem__(self, key, value) -> None:
+        value = self._maybe_convert_setitem_value(value)
+
+        key = check_array_indexer(self, key)
+        scalar_key = lib.is_scalar(key)
+        scalar_value = lib.is_scalar(value)
+        if scalar_key and not scalar_value:
+            raise ValueError("setting an array element with a sequence.")
+
+        if not scalar_value:
+            if value.dtype == self.dtype:
+                value = value._ndarray
+            else:
+                value = np.asarray(value)
+                mask = isna(value)
+                if mask.any():
+                    value = value.copy()
+                    value[isna(value)] = self.dtype.na_value
 
         super().__setitem__(key, value)
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1749,6 +1749,13 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
             except (ValueError, TypeError):
                 return False
 
+        if dtype == "string":
+            try:
+                arr._maybe_convert_setitem_value(element)  # type: ignore[union-attr]
+                return True
+            except (ValueError, TypeError):
+                return False
+
         # This is technically incorrect, but maintains the behavior of
         # ExtensionBlock._can_hold_element
         return True

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -19,6 +19,7 @@
     NaT,
     iNaT,
 )
+from pandas.util._decorators import set_module
 
 from pandas.core.dtypes.common import (
     DT64NS_DTYPE,
@@ -93,6 +94,7 @@ def isna(
 def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ...
 
 
+@set_module("pandas")
 def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
     """
     Detect missing values for an array-like object.
@@ -307,6 +309,7 @@ def notna(
 def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ...
 
 
+@set_module("pandas")
 def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
     """
     Detect non-missing values for an array-like object.

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7668,8 +7668,12 @@ def interpolate(
             * 'linear': Ignore the index and treat the values as equally
               spaced. This is the only method supported on MultiIndexes.
             * 'time': Works on daily and higher resolution data to interpolate
-              given length of interval.
-            * 'index', 'values': use the actual numerical values of the index.
+              given length of interval. This interpolates values based on
+              time interval between observations.
+            * 'index': The interpolation uses the numerical values
+              of the DataFrame's index to linearly calculate missing values.
+            * 'values': Interpolation based on the numerical values
+              in the DataFrame, treating them as equally spaced along the index.
             * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
               'barycentric', 'polynomial': Passed to
               `scipy.interpolate.interp1d`, whereas 'spline' is passed to

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -77,6 +77,7 @@
     ABCNumpyExtensionArray,
     ABCSeries,
 )
+from pandas.core.dtypes.inference import is_re
 from pandas.core.dtypes.missing import (
     is_valid_na_for_dtype,
     isna,
@@ -706,7 +707,7 @@ def replace(
             #  bc _can_hold_element is incorrect.
             return [self.copy(deep=False)]
 
-        elif self._can_hold_element(value):
+        elif self._can_hold_element(value) or (self.dtype == "string" and is_re(value)):
             # TODO(CoW): Maybe split here as well into columns where mask has True
             # and rest?
             blk = self._maybe_copy(inplace)
@@ -766,14 +767,24 @@ def _replace_regex(
         -------
         List[Block]
         """
-        if not self._can_hold_element(to_replace):
+        if not is_re(to_replace) and not self._can_hold_element(to_replace):
             # i.e. only if self.is_object is True, but could in principle include a
             #  String ExtensionBlock
             return [self.copy(deep=False)]
 
-        rx = re.compile(to_replace)
+        if is_re(to_replace) and self.dtype not in [object, "string"]:
+            # only object or string dtype can hold strings, and a regex object
+            # will only match strings
+            return [self.copy(deep=False)]
 
-        block = self._maybe_copy(inplace)
+        if not (
+            self._can_hold_element(value) or (self.dtype == "string" and is_re(value))
+        ):
+            block = self.astype(np.dtype(object))
+        else:
+            block = self._maybe_copy(inplace)
+
+        rx = re.compile(to_replace)
 
         replace_regex(block.values, rx, value, mask)
         return [block]
@@ -793,7 +804,9 @@ def replace_list(
 
         # Exclude anything that we know we won't contain
         pairs = [
-            (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
+            (x, y)
+            for x, y in zip(src_list, dest_list)
+            if (self._can_hold_element(x) or (self.dtype == "string" and is_re(x)))
         ]
         if not len(pairs):
             return [self.copy(deep=False)]

diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
@@ -417,6 +417,8 @@ def test_set_module():
     assert pd.Period.__module__ == "pandas"
     assert pd.Timestamp.__module__ == "pandas"
     assert pd.Timedelta.__module__ == "pandas"
+    assert pd.isna.__module__ == "pandas"
+    assert pd.notna.__module__ == "pandas"
     assert pd.merge.__module__ == "pandas"
     assert pd.merge_ordered.__module__ == "pandas"
     assert pd.merge_asof.__module__ == "pandas"

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -889,7 +889,6 @@ def test_replace_input_formats_listlike(self):
         with pytest.raises(ValueError, match=msg):
             df.replace(to_rep, values[1:])
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_input_formats_scalar(self):
         df = DataFrame(
             {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
@@ -940,7 +939,6 @@ def test_replace_dict_no_regex(self):
         result = answer.replace(weights)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     def test_replace_series_no_regex(self):
         answer = Series(
             {
@@ -1176,7 +1174,6 @@ def test_replace_commutative(self, df, to_replace, exp):
         result = df.replace(to_replace)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
     @pytest.mark.parametrize(
         "replacer",
         [

diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
@@ -2589,6 +2589,14 @@ def test_plot_period_index_makes_no_right_shift(self, freq):
         result = ax.get_lines()[0].get_xdata()
         assert all(str(result[i]) == str(expected[i]) for i in range(4))
 
+    def test_plot_display_xlabel_and_xticks(self):
+        # GH#44050
+        df = DataFrame(np.random.default_rng(2).random((10, 2)), columns=["a", "b"])
+        ax = df.plot.hexbin(x="a", y="b")
+
+        _check_visible([ax.xaxis.get_label()], visible=True)
+        _check_visible(ax.get_xticklabels(), visible=True)
+
 
 def _generate_4_axes_via_gridspec():
     gs = mpl.gridspec.GridSpec(2, 2)

diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -860,24 +860,16 @@ def test_index_where(self, obj, key, expected, raises, val):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        if raises and obj.dtype == "string":
-            with pytest.raises(TypeError, match="Invalid value"):
-                Index(obj).where(~mask, val)
-        else:
-            res = Index(obj).where(~mask, val)
-            expected_idx = Index(expected, dtype=expected.dtype)
-            tm.assert_index_equal(res, expected_idx)
+        res = Index(obj).where(~mask, val)
+        expected_idx = Index(expected, dtype=expected.dtype)
+        tm.assert_index_equal(res, expected_idx)
 
     def test_index_putmask(self, obj, key, expected, raises, val):
         mask = np.zeros(obj.shape, dtype=bool)
         mask[key] = True
 
-        if raises and obj.dtype == "string":
-            with pytest.raises(TypeError, match="Invalid value"):
-                Index(obj).putmask(mask, val)
-        else:
-            res = Index(obj).putmask(mask, val)
-            tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
+        res = Index(obj).putmask(mask, val)
+        tm.assert_index_equal(res, Index(expected, dtype=expected.dtype))
 
 
 @pytest.mark.parametrize(

diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
@@ -635,13 +635,11 @@ def test_replace_regex_dtype_series(self, regex):
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("regex", [False, True])
-    def test_replace_regex_dtype_series_string(self, regex, using_infer_string):
-        if not using_infer_string:
-            # then this is object dtype which is already tested above
-            return
+    def test_replace_regex_dtype_series_string(self, regex):
         series = pd.Series(["0"], dtype="str")
-        with pytest.raises(TypeError, match="Invalid value"):
-            series.replace(to_replace="0", value=1, regex=regex)
+        expected = pd.Series([1], dtype=object)
+        result = series.replace(to_replace="0", value=1, regex=regex)
+        tm.assert_series_equal(result, expected)
 
     def test_replace_different_int_types(self, any_int_numpy_dtype):
         # GH#45311

diff --git a/pyproject.toml b/pyproject.toml
@@ -160,7 +160,13 @@ free-threaded-support = true
 before-build = "PACKAGE_DIR={package} bash {package}/scripts/cibw_before_build.sh"
 
 [tool.cibuildwheel.windows]
-before-build = "pip install delvewheel && bash {package}/scripts/cibw_before_build.sh"
+before-build = "pip install delvewheel && bash {package}/scripts/cibw_before_build_windows.sh"
+before-test = "bash {package}/scripts/cibw_before_test_windows.sh"
+test-command = """
+  set PANDAS_CI='1' && \
+  python -c "import pandas as pd; \
+  pd.test(extra_args=['--no-strict-data-files', '-m not clipboard and not single_cpu and not slow and not network and not db']);" \
+  """
 repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}"
 
 [[tool.cibuildwheel.overrides]]
@@ -175,13 +181,6 @@ test-command = """
 select = "*-musllinux*"
 before-test = "apk update && apk add musl-locales"
 
-[[tool.cibuildwheel.overrides]]
-select = "*-win*"
-# We test separately for Windows, since we use
-# the windowsservercore docker image to check if any dlls are
-# missing from the wheel
-test-command = ""
-
 [[tool.cibuildwheel.overrides]]
 # Don't strip wheels on macOS.
 # macOS doesn't support stripping wheels with linker

diff --git a/scripts/cibw_before_build.sh b/scripts/cibw_before_build.sh
@@ -5,8 +5,8 @@ done
 
 # TODO: Delete when there's a PyPI Cython release that supports free-threaded Python 3.13.
 FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")"
-if [[ $FREE_THREADED_BUILD == "True"  ]]; then
+if [[ $FREE_THREADED_BUILD == "True" ]]; then
     python -m pip install -U pip
-    python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy cython
-    python -m pip install ninja meson-python versioneer[toml]
+    python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
+    python -m pip install numpy ninja meson-python versioneer[toml]
 fi
diff --git a/scripts/cibw_before_build_windows.sh b/scripts/cibw_before_build_windows.sh
@@ -0,0 +1,13 @@
+# Add 3rd party licenses, like numpy does
+for file in $PACKAGE_DIR/LICENSES/*; do
+  cat $file >> $PACKAGE_DIR/LICENSE
+done
+
+# TODO: Delete when there's a PyPI Cython release that supports free-threaded Python 3.13
+# and a NumPy Windows wheel for the free-threaded build on PyPI.
+FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")"
+if [[ $FREE_THREADED_BUILD == "True" ]]; then
+    python -m pip install -U pip
+    python -m pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy cython
+    python -m pip install ninja meson-python versioneer[toml]
+fi