From f8bd9886e0eba6142e89db7af5a91deb50497b75 Mon Sep 17 00:00:00 2001
From: abonte <6319051+abonte@users.noreply.github.com>
Date: Tue, 16 Apr 2024 19:12:19 +0200
Subject: [PATCH 1/5] DOC: replace deprecated frequency alias (#58256)

replace deprecated alias
---
 pandas/core/arrays/datetimelike.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index f4f076103d8c3b..8ada9d88e08bc0 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1787,7 +1787,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
     ----------
     freq : str or Offset
         The frequency level to {op} the index to. Must be a fixed
-        frequency like 'S' (second) not 'ME' (month end). See
+        frequency like 's' (second) not 'ME' (month end). See
         :ref:`frequency aliases <timeseries.offset_aliases>` for
         a list of possible `freq` values.
     ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'

From 53bd1a83a987cad854f53db48a4d472dfeffdced Mon Sep 17 00:00:00 2001
From: Thomas H <tehunter@users.noreply.github.com>
Date: Tue, 16 Apr 2024 13:16:02 -0400
Subject: [PATCH 2/5] BUG: DataFrame slice selection treated as hashable in
 Python 3.12 #57500 (#58043)

* Reorder slice and hashable in __getitem__

* Add unit test

* Fix test and formatting

* Update whatsnew

* Restore original flow ordering

* Move whatsnew entry to 3.0.0

* Move whatsnew entry to Indexing

* Update doc/source/whatsnew/v3.0.0.rst

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst               |  2 +-
 pandas/core/frame.py                         |  4 +++-
 pandas/tests/frame/indexing/test_indexing.py | 10 ++++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 17328e6084cb48..0992142f563638 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -386,7 +386,7 @@ Interval
 
 Indexing
 ^^^^^^^^
--
+- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
 -
 
 Missing
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0b386efb5a867d..cd4812c3f78ae7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3855,8 +3855,10 @@ def __getitem__(self, key):
         key = lib.item_from_zerodim(key)
         key = com.apply_if_callable(key, self)
 
-        if is_hashable(key) and not is_iterator(key):
+        if is_hashable(key) and not is_iterator(key) and not isinstance(key, slice):
             # is_iterator to exclude generator e.g. test_getitem_listlike
+            # As of Python 3.12, slice is hashable which breaks MultiIndex (GH#57500)
+
             # shortcut if the key is in columns
             is_mi = isinstance(self.columns, MultiIndex)
             # GH#45316 Return view if key is not duplicated
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 49e5c4aff5afe6..5a6fe07aa007b0 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -524,6 +524,16 @@ def test_loc_setitem_boolean_mask_allfalse(self):
         result.loc[result.b.isna(), "a"] = result.a.copy()
         tm.assert_frame_equal(result, df)
 
+    def test_getitem_slice_empty(self):
+        df = DataFrame([[1]], columns=MultiIndex.from_product([["A"], ["a"]]))
+        result = df[:]
+
+        expected = DataFrame([[1]], columns=MultiIndex.from_product([["A"], ["a"]]))
+
+        tm.assert_frame_equal(result, expected)
+        # Ensure df[:] returns a view of df, not the same object
+        assert result is not df
+
     def test_getitem_fancy_slice_integers_step(self):
         df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)))
 

From b1dbd3bc1744d148fcc67aa807917bf6825470d3 Mon Sep 17 00:00:00 2001
From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
Date: Tue, 16 Apr 2024 20:17:01 +0300
Subject: [PATCH 3/5] GH: PDEP vote issue template (#58204)

* Create pdep_vote.yaml

* Unindent validations

* PDEP voting issue template

* Update pdeps path

* Minor changes

* Update label name

* Better wording

* Remove placeholder

* ignore::

* Remove wrong files

---------

Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com>
---
 .github/ISSUE_TEMPLATE/pdep_vote.yaml         | 74 +++++++++++++++++++
 .../pdeps/0001-purpose-and-guidelines.md      |  4 +-
 2 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/pdep_vote.yaml

diff --git a/.github/ISSUE_TEMPLATE/pdep_vote.yaml b/.github/ISSUE_TEMPLATE/pdep_vote.yaml
new file mode 100644
index 00000000000000..6dcbd76eb0f74b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/pdep_vote.yaml
@@ -0,0 +1,74 @@
+name: PDEP Vote
+description: Call for a vote on a PDEP
+title: "VOTE: "
+labels: [Vote]
+
+body:
+  - type: markdown
+    attributes:
+      value: >
+        As per [PDEP-1](https://pandas.pydata.org/pdeps/0001-purpose-and-guidelines.html), the following issue template should be used when a
+        maintainer has opened a PDEP discussion and is ready to call for a vote.
+  - type: checkboxes
+    attributes:
+      label: Locked issue
+      options:
+        - label: >
+            I locked this voting issue so that only voting members are able to cast their votes or
+            comment on this issue.
+          required: true
+  - type: input
+    id: PDEP-name
+    attributes:
+      label: PDEP number and title
+      placeholder: >
+        PDEP-1: Purpose and guidelines
+    validations:
+      required: true
+  - type: input
+    id: PDEP-link
+    attributes:
+      label: Pull request with discussion
+      description: e.g. https://github.com/pandas-dev/pandas/pull/47444
+    validations:
+      required: true
+  - type: input
+    id: PDEP-rendered-link
+    attributes:
+      label: Rendered PDEP for easy reading
+      description: e.g. https://github.com/pandas-dev/pandas/pull/47444/files?short_path=7c449e6#diff-7c449e698132205b235c501f7e47ebba38da4d2b7f9492c98f16745dba787041
+    validations:
+      required: true
+  - type: input
+    id: PDEP-number-of-discussion-participants
+    attributes:
+      label: Discussion participants
+      description: >
+        You may find it useful to list or total the number of participating members in the
+        PDEP discussion PR. This would be the maximum possible disapprove votes.
+      placeholder: >
+        14 voting members participated in the PR discussion thus far.
+  - type: input
+    id: PDEP-vote-end
+    attributes:
+      label: Voting will close in 15 days.
+      description: The voting period end date. ('Voting will close in 15 days.' will be automatically written)
+  - type: markdown
+    attributes:
+      value: ---
+  - type: textarea
+    id: Vote
+    attributes:
+      label: Vote
+      value: |
+        Cast your vote in a comment below.
+        * +1: approve.
+        * 0: abstain.
+            * Reason: A one sentence reason is required.
+        * -1: disapprove
+            * Reason: A one sentence reason is required.
+        A disapprove vote requires prior participation in the linked discussion PR.
+
+        @pandas-dev/pandas-core
+    validations:
+      required: true
diff --git a/web/pandas/pdeps/0001-purpose-and-guidelines.md b/web/pandas/pdeps/0001-purpose-and-guidelines.md
index 49a3bc4c871cdd..bb15b8f997b110 100644
--- a/web/pandas/pdeps/0001-purpose-and-guidelines.md
+++ b/web/pandas/pdeps/0001-purpose-and-guidelines.md
@@ -79,8 +79,8 @@ Next is described the workflow that PDEPs can follow.
 
 #### Submitting a PDEP
 
-Proposing a PDEP is done by creating a PR adding a new file to `web/pdeps/`.
-The file is a markdown file, you can use `web/pdeps/0001.md` as a reference
+Proposing a PDEP is done by creating a PR adding a new file to `web/pandas/pdeps/`.
+The file is a markdown file, you can use `web/pandas/pdeps/0001-purpose-and-guidelines.md` as a reference
 for the expected format.
 
 The initial status of a PDEP will be `Status: Draft`. This will be changed to

From 8131381c9eb6264d7abb6fe66ef8b892933af5c4 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 16 Apr 2024 08:28:30 -1000
Subject: [PATCH 4/5] REF: Clean up some iterator usages (#58267)

* Use better data structures

* Use generator and set

* Move sorted to exception block, use set instead of list

* Another iterator, use iter

* another set

* Dont use iterator protocol
---
 pandas/_libs/tslibs/offsets.pyx       | 12 +++++-------
 pandas/core/frame.py                  | 27 ++++++++++++++-------------
 pandas/core/generic.py                |  2 +-
 pandas/core/internals/construction.py | 15 +++++++--------
 pandas/core/tools/datetimes.py        |  8 ++++----
 5 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index e36abdf0ad9713..107608ec9f6060 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -219,8 +219,7 @@ cdef _get_calendar(weekmask, holidays, calendar):
         holidays = holidays + calendar.holidays().tolist()
     except AttributeError:
         pass
-    holidays = [_to_dt64D(dt) for dt in holidays]
-    holidays = tuple(sorted(holidays))
+    holidays = tuple(sorted(_to_dt64D(dt) for dt in holidays))
 
     kwargs = {"weekmask": weekmask}
     if holidays:
@@ -419,11 +418,10 @@ cdef class BaseOffset:
 
         if "holidays" in all_paras and not all_paras["holidays"]:
             all_paras.pop("holidays")
-        exclude = ["kwds", "name", "calendar"]
-        attrs = [(k, v) for k, v in all_paras.items()
-                 if (k not in exclude) and (k[0] != "_")]
-        attrs = sorted(set(attrs))
-        params = tuple([str(type(self))] + attrs)
+        exclude = {"kwds", "name", "calendar"}
+        attrs = {(k, v) for k, v in all_paras.items()
+                 if (k not in exclude) and (k[0] != "_")}
+        params = tuple([str(type(self))] + sorted(attrs))
         return params
 
     @property
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cd4812c3f78ae7..b65a00db7d7df8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2301,8 +2301,8 @@ def maybe_reorder(
                     exclude.update(index)
 
         if any(exclude):
-            arr_exclude = [x for x in exclude if x in arr_columns]
-            to_remove = [arr_columns.get_loc(col) for col in arr_exclude]
+            arr_exclude = (x for x in exclude if x in arr_columns)
+            to_remove = {arr_columns.get_loc(col) for col in arr_exclude}
             arrays = [v for i, v in enumerate(arrays) if i not in to_remove]
 
             columns = columns.drop(exclude)
@@ -3705,7 +3705,7 @@ def transpose(
         nv.validate_transpose(args, {})
         # construct the args
 
-        dtypes = list(self.dtypes)
+        first_dtype = self.dtypes.iloc[0] if len(self.columns) else None
 
         if self._can_fast_transpose:
             # Note: tests pass without this, but this improves perf quite a bit.
@@ -3723,11 +3723,11 @@ def transpose(
 
         elif (
             self._is_homogeneous_type
-            and dtypes
-            and isinstance(dtypes[0], ExtensionDtype)
+            and first_dtype is not None
+            and isinstance(first_dtype, ExtensionDtype)
         ):
             new_values: list
-            if isinstance(dtypes[0], BaseMaskedDtype):
+            if isinstance(first_dtype, BaseMaskedDtype):
                 # We have masked arrays with the same dtype. We can transpose faster.
                 from pandas.core.arrays.masked import (
                     transpose_homogeneous_masked_arrays,
@@ -3736,7 +3736,7 @@ def transpose(
                 new_values = transpose_homogeneous_masked_arrays(
                     cast(Sequence[BaseMaskedArray], self._iter_column_arrays())
                 )
-            elif isinstance(dtypes[0], ArrowDtype):
+            elif isinstance(first_dtype, ArrowDtype):
                 # We have arrow EAs with the same dtype. We can transpose faster.
                 from pandas.core.arrays.arrow.array import (
                     ArrowExtensionArray,
@@ -3748,10 +3748,11 @@ def transpose(
                 )
             else:
                 # We have other EAs with the same dtype. We preserve dtype in transpose.
-                dtyp = dtypes[0]
-                arr_typ = dtyp.construct_array_type()
+                arr_typ = first_dtype.construct_array_type()
                 values = self.values
-                new_values = [arr_typ._from_sequence(row, dtype=dtyp) for row in values]
+                new_values = [
+                    arr_typ._from_sequence(row, dtype=first_dtype) for row in values
+                ]
 
             result = type(self)._from_arrays(
                 new_values,
@@ -5882,7 +5883,7 @@ def set_index(
             else:
                 arrays.append(self.index)
 
-        to_remove: list[Hashable] = []
+        to_remove: set[Hashable] = set()
         for col in keys:
             if isinstance(col, MultiIndex):
                 arrays.extend(col._get_level_values(n) for n in range(col.nlevels))
@@ -5909,7 +5910,7 @@ def set_index(
                 arrays.append(frame[col])
                 names.append(col)
                 if drop:
-                    to_remove.append(col)
+                    to_remove.add(col)
 
             if len(arrays[-1]) != len(self):
                 # check newest element against length of calling frame, since
@@ -5926,7 +5927,7 @@ def set_index(
             raise ValueError(f"Index has duplicate keys: {duplicates}")
 
         # use set to handle duplicate column names gracefully in case of drop
-        for c in set(to_remove):
+        for c in to_remove:
             del frame[c]
 
         # clear up memory usage
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 523ca9de201bf3..9686c081b5fb38 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2045,7 +2045,7 @@ def __setstate__(self, state) -> None:
                 # e.g. say fill_value needing _mgr to be
                 # defined
                 meta = set(self._internal_names + self._metadata)
-                for k in list(meta):
+                for k in meta:
                     if k in state and k != "_flags":
                         v = state[k]
                         object.__setattr__(self, k, v)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 73b93110c9018d..cea52bf8c91b27 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -567,7 +567,7 @@ def _extract_index(data) -> Index:
     if len(data) == 0:
         return default_index(0)
 
-    raw_lengths = []
+    raw_lengths = set()
     indexes: list[list[Hashable] | Index] = []
 
     have_raw_arrays = False
@@ -583,7 +583,7 @@ def _extract_index(data) -> Index:
             indexes.append(list(val.keys()))
         elif is_list_like(val) and getattr(val, "ndim", 1) == 1:
             have_raw_arrays = True
-            raw_lengths.append(len(val))
+            raw_lengths.add(len(val))
         elif isinstance(val, np.ndarray) and val.ndim > 1:
             raise ValueError("Per-column arrays must each be 1-dimensional")
 
@@ -596,24 +596,23 @@ def _extract_index(data) -> Index:
         index = union_indexes(indexes, sort=False)
 
     if have_raw_arrays:
-        lengths = list(set(raw_lengths))
-        if len(lengths) > 1:
+        if len(raw_lengths) > 1:
             raise ValueError("All arrays must be of the same length")
 
         if have_dicts:
             raise ValueError(
                 "Mixing dicts with non-Series may lead to ambiguous ordering."
             )
-
+        raw_length = raw_lengths.pop()
         if have_series:
-            if lengths[0] != len(index):
+            if raw_length != len(index):
                 msg = (
-                    f"array length {lengths[0]} does not match index "
+                    f"array length {raw_length} does not match index "
                     f"length {len(index)}"
                 )
                 raise ValueError(msg)
         else:
-            index = default_index(lengths[0])
+            index = default_index(raw_length)
 
     return ensure_index(index)
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 2aeb1aff07a54f..df7a6cdb1ea52d 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -1124,18 +1124,18 @@ def f(value):
 
     # we require at least Ymd
     required = ["year", "month", "day"]
-    req = sorted(set(required) - set(unit_rev.keys()))
+    req = set(required) - set(unit_rev.keys())
     if len(req):
-        _required = ",".join(req)
+        _required = ",".join(sorted(req))
         raise ValueError(
             "to assemble mappings requires at least that "
             f"[year, month, day] be specified: [{_required}] is missing"
         )
 
     # keys we don't recognize
-    excess = sorted(set(unit_rev.keys()) - set(_unit_map.values()))
+    excess = set(unit_rev.keys()) - set(_unit_map.values())
     if len(excess):
-        _excess = ",".join(excess)
+        _excess = ",".join(sorted(excess))
         raise ValueError(
             f"extra keys have been passed to the datetime assemblage: [{_excess}]"
         )

From bb0fcc23eed9f6a1a6506c6e27b98fb397ce747e Mon Sep 17 00:00:00 2001
From: Antonio Valentino <antonio.valentino@tiscali.it>
Date: Tue, 16 Apr 2024 20:49:46 +0200
Subject: [PATCH 5/5] Avoid unnecessary re-opening of HDF5 files (Closes:
 #58248) (#58275)

* Avoid unnecessary re-opening of HDF5 files

* Update the whatsnew file

* Move the changelog entry for #58248 to the correct section
---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 pandas/io/pytables.py          | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 0992142f563638..7a4f709e56104f 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -331,6 +331,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
+- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
@@ -406,7 +407,6 @@ I/O
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 
-
 Period
 ^^^^^^
 -
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 5ecf7e287ea58b..3cfd740a513041 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -292,14 +292,14 @@ def to_hdf(
             dropna=dropna,
         )
 
-    path_or_buf = stringify_path(path_or_buf)
-    if isinstance(path_or_buf, str):
+    if isinstance(path_or_buf, HDFStore):
+        f(path_or_buf)
+    else:
+        path_or_buf = stringify_path(path_or_buf)
         with HDFStore(
             path_or_buf, mode=mode, complevel=complevel, complib=complib
         ) as store:
             f(store)
-    else:
-        f(path_or_buf)
 
 
 def read_hdf(