fjangda7 · fjangda7 · Nov 30, 2023 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
@@ -6,12 +6,12 @@
 import pandas as pd
 from pandas import (
     DataFrame,
+    Index,
     Series,
     Timestamp,
     date_range,
     to_timedelta,
 )
-import pandas._testing as tm
 from pandas.core.algorithms import checked_add_with_arr
 
 from .pandas_vb_common import numeric_dtypes
@@ -323,8 +323,10 @@ class IndexArithmetic:
 
     def setup(self, dtype):
         N = 10**6
-        indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
-        self.index = getattr(tm, indexes[dtype])(N)
+        if dtype == "float":
+            self.index = Index(np.arange(N), dtype=np.float64)
+        elif dtype == "int":
+            self.index = Index(np.arange(N), dtype=np.int64)
 
     def time_add(self, dtype):
         self.index + 2

diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst
@@ -449,9 +449,13 @@ which will be triggered when the tag is pushed.
     git tag -a v1.5.0.dev0 -m "DEV: Start 1.5.0"
     git push upstream main --follow-tags
 
-3. Build the source distribution (git must be in the tag commit)::
+3. Download the source distribution and wheels from the `wheel staging area <https://anaconda.org/scientific-python-nightly-wheels/pandas>`_.
+   Be careful to make sure that no wheels are missing (e.g. due to failed builds).
 
-    ./setup.py sdist --formats=gztar --quiet
+   Running scripts/download_wheels.sh with the version that you want to download wheels/the sdist for should do the trick.
+   This script will make a ``dist`` folder inside your clone of pandas and put the downloaded wheels and sdist there::
+
+    scripts/download_wheels.sh <VERSION>
 
 4. Create a `new GitHub release <https://github.com/pandas-dev/pandas/releases/new>`_:
 
@@ -463,23 +467,19 @@ which will be triggered when the tag is pushed.
    - Set as the latest release: Leave checked, unless releasing a patch release for an older version
      (e.g. releasing 1.4.5 after 1.5 has been released)
 
-5. The GitHub release will after some hours trigger an
+5. Upload wheels to PyPI::
+
+    twine upload pandas/dist/pandas-<version>*.{whl,tar.gz} --skip-existing
+
+6. The GitHub release will after some hours trigger an
    `automated conda-forge PR <https://github.com/conda-forge/pandas-feedstock/pulls>`_.
+   (If you don't want to wait, you can open an issue titled ``@conda-forge-admin, please update version`` to trigger the bot.)
    Merge it once the CI is green, and it will generate the conda-forge packages.
+
    In case a manual PR needs to be done, the version, sha256 and build fields are the
    ones that usually need to be changed. If anything else in the recipe has changed since
    the last release, those changes should be available in ``ci/meta.yaml``.
 
-6. Packages for supported versions in PyPI are built automatically from our CI.
-   Once all packages are build download all wheels from the
-   `Anaconda repository <https://anaconda.org/multibuild-wheels-staging/pandas/files?version=\<version\>>`_
-   where our CI published them to the ``dist/`` directory in your local pandas copy.
-   You can use the script ``scripts/download_wheels.sh`` to download all wheels at once.
-
-7. Upload wheels to PyPI::
-
-    twine upload pandas/dist/pandas-<version>*.{whl,tar.gz} --skip-existing
-
 Post-Release
 ````````````
 

diff --git a/doc/source/user_guide/copy_on_write.rst b/doc/source/user_guide/copy_on_write.rst
@@ -26,7 +26,7 @@ Previous behavior
 -----------------
 
 pandas indexing behavior is tricky to understand. Some operations return views while
-other return copies. Depending on the result of the operation, mutation one object
+other return copies. Depending on the result of the operation, mutating one object
 might accidentally mutate another:
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -428,6 +428,7 @@ Performance improvements
 - Performance improvement in :meth:`Series.str.get_dummies` when dtype is ``"string[pyarrow]"`` or ``"string[pyarrow_numpy]"`` (:issue:`56110`)
 - Performance improvement in :meth:`Series.str` methods (:issue:`55736`)
 - Performance improvement in :meth:`Series.value_counts` and :meth:`Series.mode` for masked dtypes (:issue:`54984`, :issue:`55340`)
+- Performance improvement in :meth:`DataFrameGroupBy.nunique` and :meth:`SeriesGroupBy.nunique` (:issue:`55972`)
 - Performance improvement in :meth:`SeriesGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`DataFrameGroupBy.idxmin` (:issue:`54234`)
 - Performance improvement when indexing into a non-unique index (:issue:`55816`)
 - Performance improvement when indexing with more than 4 keys (:issue:`54550`)
@@ -520,7 +521,7 @@ Indexing
 
 Missing
 ^^^^^^^
--
+- Bug in :meth:`DataFrame.update` wasn't updating in-place for tz-aware datetime64 dtypes (:issue:`56227`)
 -
 
 MultiIndex

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -27,12 +27,8 @@
 from pandas.compat import pa_version_under10p1
 
 from pandas.core.dtypes.common import (
-    is_float_dtype,
     is_sequence,
-    is_signed_integer_dtype,
     is_string_dtype,
-    is_unsigned_integer_dtype,
-    pandas_dtype,
 )
 
 import pandas as pd
@@ -46,6 +42,8 @@
     RangeIndex,
     Series,
     bdate_range,
+    date_range,
+    period_range,
     timedelta_range,
 )
 from pandas._testing._io import (
@@ -111,7 +109,6 @@
         NpDtype,
     )
 
-    from pandas import PeriodIndex
     from pandas.core.arrays import ArrowExtensionArray
 
 _N = 30
@@ -351,38 +348,6 @@ def getCols(k) -> str:
     return string.ascii_uppercase[:k]
 
 
-def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
-    dtype = pandas_dtype(dtype)
-    assert isinstance(dtype, np.dtype)
-
-    if dtype.kind in "iu":
-        values = np.arange(k, dtype=dtype)
-        if is_unsigned_integer_dtype(dtype):
-            values += 2 ** (dtype.itemsize * 8 - 1)
-    elif dtype.kind == "f":
-        values = np.random.default_rng(2).random(k) - np.random.default_rng(2).random(1)
-        values.sort()
-        values = values * (10 ** np.random.default_rng(2).integers(0, 9))
-    else:
-        raise NotImplementedError(f"wrong dtype {dtype}")
-
-    return Index(values, dtype=dtype, name=name)
-
-
-def makeIntIndex(k: int = 10, *, name=None, dtype: Dtype = "int64") -> Index:
-    dtype = pandas_dtype(dtype)
-    if not is_signed_integer_dtype(dtype):
-        raise TypeError(f"Wrong dtype {dtype}")
-    return makeNumericIndex(k, name=name, dtype=dtype)
-
-
-def makeFloatIndex(k: int = 10, *, name=None, dtype: Dtype = "float64") -> Index:
-    dtype = pandas_dtype(dtype)
-    if not is_float_dtype(dtype):
-        raise TypeError(f"Wrong dtype {dtype}")
-    return makeNumericIndex(k, name=name, dtype=dtype)
-
-
 def makeDateIndex(
     k: int = 10, freq: Frequency = "B", name=None, **kwargs
 ) -> DatetimeIndex:
@@ -391,12 +356,6 @@ def makeDateIndex(
     return DatetimeIndex(dr, name=name, **kwargs)
 
 
-def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex:
-    dt = datetime(2000, 1, 1)
-    pi = pd.period_range(start=dt, periods=k, freq="D", name=name, **kwargs)
-    return pi
-
-
 def makeObjectSeries(name=None) -> Series:
     data = [f"foo_{i}" for i in range(_N)]
     index = Index([f"bar_{i}" for i in range(_N)])
@@ -487,12 +446,12 @@ def makeCustomIndex(
 
     # specific 1D index type requested?
     idx_func_dict: dict[str, Callable[..., Index]] = {
-        "i": makeIntIndex,
-        "f": makeFloatIndex,
+        "i": lambda n: Index(np.arange(n), dtype=np.int64),
+        "f": lambda n: Index(np.arange(n), dtype=np.float64),
         "s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]),
-        "dt": makeDateIndex,
+        "dt": lambda n: date_range("2020-01-01", periods=n),
         "td": lambda n: timedelta_range("1 day", periods=n),
-        "p": makePeriodIndex,
+        "p": lambda n: period_range("2020-01-01", periods=n, freq="D"),
     }
     idx_func = idx_func_dict.get(idx_type)
     if idx_func:
@@ -975,11 +934,7 @@ def shares_memory(left, right) -> bool:
     "makeCustomIndex",
     "makeDataFrame",
     "makeDateIndex",
-    "makeFloatIndex",
-    "makeIntIndex",
-    "makeNumericIndex",
     "makeObjectSeries",
-    "makePeriodIndex",
     "makeTimeDataFrame",
     "makeTimeSeries",
     "maybe_produces_warning",

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -68,6 +68,7 @@
     Series,
     Timedelta,
     Timestamp,
+    period_range,
     timedelta_range,
 )
 import pandas._testing as tm
@@ -616,23 +617,27 @@ def _create_mi_with_dt64tz_level():
     "string": Index([f"pandas_{i}" for i in range(100)]),
     "datetime": tm.makeDateIndex(100),
     "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),
-    "period": tm.makePeriodIndex(100),
+    "period": period_range("2020-01-01", periods=100, freq="D"),
     "timedelta": timedelta_range(start="1 day", periods=100, freq="D"),
     "range": RangeIndex(100),
-    "int8": tm.makeIntIndex(100, dtype="int8"),
-    "int16": tm.makeIntIndex(100, dtype="int16"),
-    "int32": tm.makeIntIndex(100, dtype="int32"),
-    "int64": tm.makeIntIndex(100, dtype="int64"),
+    "int8": Index(np.arange(100), dtype="int8"),
+    "int16": Index(np.arange(100), dtype="int16"),
+    "int32": Index(np.arange(100), dtype="int32"),
+    "int64": Index(np.arange(100), dtype="int64"),
     "uint8": Index(np.arange(100), dtype="uint8"),
     "uint16": Index(np.arange(100), dtype="uint16"),
     "uint32": Index(np.arange(100), dtype="uint32"),
     "uint64": Index(np.arange(100), dtype="uint64"),
-    "float32": tm.makeFloatIndex(100, dtype="float32"),
-    "float64": tm.makeFloatIndex(100, dtype="float64"),
+    "float32": Index(np.arange(100), dtype="float32"),
+    "float64": Index(np.arange(100), dtype="float64"),
     "bool-object": Index([True, False] * 5, dtype=object),
     "bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0),
-    "complex64": tm.makeNumericIndex(100, dtype="float64").astype("complex64"),
-    "complex128": tm.makeNumericIndex(100, dtype="float64").astype("complex128"),
+    "complex64": Index(
+        np.arange(100, dtype="complex64") + 1.0j * np.arange(100, dtype="complex64")
+    ),
+    "complex128": Index(
+        np.arange(100, dtype="complex128") + 1.0j * np.arange(100, dtype="complex128")
+    ),
     "categorical": CategoricalIndex(list("abcd") * 25),
     "interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
     "empty": Index([]),

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8826,39 +8826,40 @@ def update(
         1  b  e
         2  c  f
 
-        For Series, its name attribute must be set.
-
         >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
         ...                    'B': ['x', 'y', 'z']})
-        >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])
-        >>> df.update(new_column)
+        >>> new_df = pd.DataFrame({'B': ['d', 'f']}, index=[0, 2])
+        >>> df.update(new_df)
         >>> df
            A  B
         0  a  d
         1  b  y
-        2  c  e
+        2  c  f
+
+        For Series, its name attribute must be set.
+
         >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
         ...                    'B': ['x', 'y', 'z']})
-        >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])
-        >>> df.update(new_df)
+        >>> new_column = pd.Series(['d', 'e', 'f'], name='B')
+        >>> df.update(new_column)
         >>> df
            A  B
-        0  a  x
-        1  b  d
-        2  c  e
+        0  a  d
+        1  b  e
+        2  c  f
 
         If `other` contains NaNs the corresponding values are not updated
         in the original dataframe.
 
         >>> df = pd.DataFrame({'A': [1, 2, 3],
-        ...                    'B': [400, 500, 600]})
+        ...                    'B': [400., 500., 600.]})
         >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
         >>> df.update(new_df)
         >>> df
-           A    B
-        0  1    4
-        1  2  500
-        2  3    6
+           A      B
+        0  1    4.0
+        1  2  500.0
+        2  3    6.0
         """
         if not PYPY and using_copy_on_write():
             if sys.getrefcount(self) <= REF_COUNT:
@@ -8875,8 +8876,6 @@ def update(
                     stacklevel=2,
                 )
 
-        from pandas.core.computation import expressions
-
         # TODO: Support other joins
         if join != "left":  # pragma: no cover
             raise NotImplementedError("Only left join is supported")
@@ -8910,7 +8909,7 @@ def update(
             if mask.all():
                 continue
 
-            self.loc[:, col] = expressions.where(mask, this, that)
+            self.loc[:, col] = self[col].where(mask, that)
 
     # ----------------------------------------------------------------------
     # Data reshaping