pandas-dev · lithomas1 · Jul 25, 2023 · Jun 20, 2023 · Jun 20, 2023 · Jun 20, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -47,8 +47,8 @@ jobs:
       - run:
           name: Build aarch64 wheels
           command: |
-            pip3 install cibuildwheel==2.12.1
-            cibuildwheel --output-dir wheelhouse
+            pip3 install cibuildwheel==2.13.1
+            cibuildwheel --prerelease-pythons --output-dir wheelhouse
           environment:
             CIBW_BUILD: << parameters.cibw-build >>
 
@@ -91,4 +91,5 @@ workflows:
               only: /^v.*/
           matrix:
             parameters:
-              cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]
+              # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
+              cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]#, "cp312-manylinux_aarch64"]
@@ -311,12 +311,16 @@ jobs:
     #    To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
     #    to the corresponding posix/windows-macos/sdist etc. workflows.
     # Feel free to modify this comment as necessary.
-    if: false # Uncomment this to freeze the workflow, comment it to unfreeze
+    #if: false # Uncomment this to freeze the workflow, comment it to unfreeze
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-22.04, macOS-latest, windows-latest]
+        # TODO: Disable macOS for now, Github Actions bug where python is not
+        # symlinked correctly to 3.12
+        # xref https://github.com/actions/setup-python/issues/701
+        #os: [ubuntu-22.04, macOS-latest, windows-latest]
+        os: [ubuntu-22.04, windows-latest]
 
     timeout-minutes: 180
 
@@ -340,21 +344,21 @@ jobs:
       - name: Set up Python Dev Version
         uses: actions/setup-python@v4
         with:
-          python-version: '3.11-dev'
+          python-version: '3.12-dev'
 
       - name: Install dependencies
         run: |
           python --version
-          python -m pip install --upgrade pip setuptools wheel
+          python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.0.1 meson-python==0.13.1
           python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
           python -m pip install git+https://github.com/nedbat/coveragepy.git
           python -m pip install versioneer[toml]
-          python -m pip install python-dateutil pytz cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
+          python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
           python -m pip list
 
       - name: Build Pandas
         run: |
-          python -m pip install -e . --no-build-isolation --no-index
+          python -m pip install -ve . --no-build-isolation --no-index
 
       - name: Build Version
         run: |

@@ -93,7 +93,8 @@ jobs:
         - [macos-12, macosx_*]
         - [windows-2022, win_amd64]
         # TODO: support PyPy?
-        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]
+        # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
+        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]#, ["cp312", "3.12"]]
     env:
       IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
       IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -117,6 +118,7 @@ jobs:
         #with:
         #  package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
         env:
+          CIBW_PRERELEASE_PYTHONS: True
           CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
 
       - name: Set up Python

diff --git a/meson.build b/meson.build
@@ -27,6 +27,13 @@ versioneer = files('generate_version.py')
 add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'c')
 add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'cpp')
 
+# Allow supporting older numpys than the version compiled against
+# Set the define to the min supported version of numpy for pandas
+# e.g. right now this is targeting numpy 1.21+
+add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'c')
+add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'cpp')
+
+
 if fs.exists('_version_meson.py')
     py.install_sources('_version_meson.py', pure: false, subdir: 'pandas')
 else

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -19,6 +19,7 @@
     ISMUSL,
     PY310,
     PY311,
+    PY312,
     PYPY,
 )
 import pandas.compat.compressors
@@ -189,5 +190,6 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
     "ISMUSL",
     "PY310",
     "PY311",
+    "PY312",
     "PYPY",
 ]
diff --git a/pandas/compat/_constants.py b/pandas/compat/_constants.py
@@ -15,6 +15,7 @@
 
 PY310 = sys.version_info >= (3, 10)
 PY311 = sys.version_info >= (3, 11)
+PY312 = sys.version_info >= (3, 12)
 PYPY = platform.python_implementation() == "PyPy"
 ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
 REF_COUNT = 2 if PY311 else 3
@@ -24,5 +25,6 @@
     "ISMUSL",
     "PY310",
     "PY311",
+    "PY312",
     "PYPY",
 ]
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -543,15 +543,18 @@ def visit_UnaryOp(self, node, **kwargs):
     def visit_Name(self, node, **kwargs):
         return self.term_type(node.id, self.env, **kwargs)
 
+    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
     def visit_NameConstant(self, node, **kwargs) -> Term:
         return self.const_type(node.value, self.env)
 
+    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
     def visit_Num(self, node, **kwargs) -> Term:
-        return self.const_type(node.n, self.env)
+        return self.const_type(node.value, self.env)
 
     def visit_Constant(self, node, **kwargs) -> Term:
-        return self.const_type(node.n, self.env)
+        return self.const_type(node.value, self.env)
 
+    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
     def visit_Str(self, node, **kwargs):
         name = self.env.add_tmp(node.s)
         return self.term_type(name, self.env)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from collections import abc
 from datetime import datetime
 import functools
 from itertools import zip_longest
@@ -3788,6 +3789,11 @@ def get_loc(self, key):
         try:
             return self._engine.get_loc(casted_key)
         except KeyError as err:
+            if isinstance(casted_key, slice) or (
+                isinstance(casted_key, abc.Iterable)
+                and any(isinstance(x, slice) for x in casted_key)
+            ):
+                raise InvalidIndexError(key)
             raise KeyError(key) from err
         except TypeError:
             # If we have a listlike key, _check_indexing_error will raise

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -32,7 +32,10 @@
     to_offset,
 )
 from pandas.compat.numpy import function as nv
-from pandas.errors import NullFrequencyError
+from pandas.errors import (
+    InvalidIndexError,
+    NullFrequencyError,
+)
 from pandas.util._decorators import (
     Appender,
     cache_readonly,
@@ -165,7 +168,7 @@ def __contains__(self, key: Any) -> bool:
         hash(key)
         try:
             self.get_loc(key)
-        except (KeyError, TypeError, ValueError):
+        except (KeyError, TypeError, ValueError, InvalidIndexError):
             return False
         return True
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -742,7 +742,12 @@ def _get_setitem_indexer(self, key):
 
         ax = self.obj._get_axis(0)
 
-        if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key):
+        if (
+            isinstance(ax, MultiIndex)
+            and self.name != "iloc"
+            and is_hashable(key)
+            and not isinstance(key, slice)
+        ):
             with suppress(KeyError, InvalidIndexError):
                 # TypeError e.g. passed a bool
                 return ax.get_loc(key)
@@ -1063,6 +1068,14 @@ def _getitem_nested_tuple(self, tup: tuple):
         # we have a nested tuple so have at least 1 multi-index level
         # we should be able to match up the dimensionality here
 
+        def _contains_slice(x: object) -> bool:
+            # Check if object is a slice or a tuple containing a slice
+            if isinstance(x, tuple):
+                return any(isinstance(v, slice) for v in x)
+            elif isinstance(x, slice):
+                return True
+            return False
+
         for key in tup:
             check_dict_or_set_indexers(key)
 
@@ -1073,7 +1086,10 @@ def _getitem_nested_tuple(self, tup: tuple):
             if self.name != "loc":
                 # This should never be reached, but let's be explicit about it
                 raise ValueError("Too many indices")  # pragma: no cover
-            if all(is_hashable(x) or com.is_null_slice(x) for x in tup):
+            if all(
+                (is_hashable(x) and not _contains_slice(x)) or com.is_null_slice(x)
+                for x in tup
+            ):
                 # GH#10521 Series should reduce MultiIndex dimensions instead of
                 #  DataFrame, IndexingError is not raised when slice(None,None,None)
                 #  with one row.
@@ -1422,7 +1438,15 @@ def _convert_to_indexer(self, key, axis: AxisInt):
         ):
             raise IndexingError("Too many indexers")
 
-        if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
+        # Slices are not valid keys passed in by the user,
+        # even though they are hashable in Python 3.12
+        contains_slice = False
+        if isinstance(key, tuple):
+            contains_slice = any(isinstance(v, slice) for v in key)
+
+        if is_scalar(key) or (
+            isinstance(labels, MultiIndex) and is_hashable(key) and not contains_slice
+        ):
             # Otherwise get_loc will raise InvalidIndexError
 
             # if we are a label return me

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1022,7 +1022,12 @@ def __getitem__(self, key):
         elif key_is_scalar:
             return self._get_value(key)
 
-        if is_hashable(key):
+        # Convert generator to list before going through hashable part
+        # (We will iterate through the generator there to check for slices)
+        if is_iterator(key):
+            key = list(key)
+
+        if is_hashable(key) and not isinstance(key, slice):
             # Otherwise index.get_value will raise InvalidIndexError
             try:
                 # For labels that don't resolve as scalars like tuples and frozensets
@@ -1042,9 +1047,6 @@ def __getitem__(self, key):
             # Do slice check before somewhat-costly is_bool_indexer
             return self._getitem_slice(key)
 
-        if is_iterator(key):
-            key = list(key)
-
         if com.is_bool_indexer(key):
             key = check_bool_indexer(self.index, key)
             key = np.asarray(key, dtype=bool)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -2070,6 +2070,11 @@ class SQLiteTable(SQLTable):
     """
 
     def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+
+        self._register_date_adapters()
+
+    def _register_date_adapters(self) -> None:
         # GH 8341
         # register an adapter callable for datetime.time object
         import sqlite3
@@ -2080,8 +2085,27 @@ def _adapt_time(t) -> str:
             # This is faster than strftime
             return f"{t.hour:02d}:{t.minute:02d}:{t.second:02d}.{t.microsecond:06d}"
 
+        # Also register adapters for date/datetime and co
+        # xref https://docs.python.org/3.12/library/sqlite3.html#adapter-and-converter-recipes
+        # Python 3.12+ doesn't auto-register adapters for us anymore
+
+        adapt_date_iso = lambda val: val.isoformat()
+        adapt_datetime_iso = lambda val: val.isoformat()
+        adapt_datetime_epoch = lambda val: int(val.timestamp())
+
         sqlite3.register_adapter(time, _adapt_time)
-        super().__init__(*args, **kwargs)
+
+        sqlite3.register_adapter(date, adapt_date_iso)
+        sqlite3.register_adapter(datetime, adapt_datetime_iso)
+        sqlite3.register_adapter(datetime, adapt_datetime_epoch)
+
+        convert_date = lambda val: date.fromisoformat(val.decode())
+        convert_datetime = lambda val: datetime.fromisoformat(val.decode())
+        convert_timestamp = lambda val: datetime.fromtimestamp(int(val))
+
+        sqlite3.register_converter("date", convert_date)
+        sqlite3.register_converter("datetime", convert_datetime)
+        sqlite3.register_converter("timestamp", convert_timestamp)
 
     def sql_schema(self) -> str:
         return str(";\n".join(self.table))

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
@@ -501,7 +501,7 @@ def _validate_names(self) -> None:
                 children = self.iterparse[next(iter(self.iterparse))]
             else:
                 parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces)
-                children = parent.findall("*") if parent else []
+                children = parent.findall("*") if parent is not None else []
 
             if is_list_like(self.names):
                 if len(self.names) < len(children):

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import contextlib
 from functools import reduce
 from itertools import product
 import operator
@@ -9,6 +10,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat import PY312
 from pandas.errors import (
     NumExprClobberingError,
     PerformanceWarning,
@@ -561,22 +563,16 @@ def test_unary_in_array(self):
         # TODO: 2022-01-29: result return list with numexpr 2.7.3 in CI
         # but cannot reproduce locally
         result = np.array(
-            pd.eval(
-                "[-True, True, ~True, +True,"
-                "-False, False, ~False, +False,"
-                "-37, 37, ~37, +37]"
-            ),
+            pd.eval("[-True, True, +True, -False, False, +False, -37, 37, ~37, +37]"),
             dtype=np.object_,
         )
         expected = np.array(
             [
                 -True,
                 True,
-                ~True,
                 +True,
                 -False,
                 False,
-                ~False,
                 +False,
                 -37,
                 37,
@@ -705,9 +701,17 @@ def test_disallow_python_keywords(self):
 
     def test_true_false_logic(self):
         # GH 25823
-        assert pd.eval("not True") == -2
-        assert pd.eval("not False") == -1
-        assert pd.eval("True and not True") == 0
+        # This behavior is deprecated in Python 3.12
+        if PY312:
+            context_mgr = tm.assert_produces_warning(
+                DeprecationWarning, check_stacklevel=False
+            )
+        else:
+            context_mgr = contextlib.nullcontext()
+        with context_mgr:
+            assert pd.eval("not True") == -2
+            assert pd.eval("not False") == -1
+            assert pd.eval("True and not True") == 0
 
     def test_and_logic_string_match(self):
         # GH 25823

diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -143,6 +143,8 @@ def _check_align(df, cond, other, check_dtypes=True):
         check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes)
         _check_align(df, cond, np.nan, check_dtypes=check_dtypes)
 
+    # Ignore deprecation warning in Python 3.12 for inverting a bool
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     def test_where_invalid(self):
         # invalid conditions
         df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])

diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py
@@ -176,10 +176,8 @@ def test_contains_requires_hashable_raises(self, index):
 
 class TestGetLoc:
     def test_get_loc_non_hashable(self, index):
-        # MultiIndex and Index raise TypeError, others InvalidIndexError
-
-        with pytest.raises((TypeError, InvalidIndexError), match="slice"):
-            index.get_loc(slice(0, 1))
+        with pytest.raises(InvalidIndexError, match="[0, 1]"):
+            index.get_loc([0, 1])
 
     def test_get_loc_non_scalar_hashable(self, index):
         # GH52877