Skip to content

Commit

Permalink
Merge branch 'main' into reset_index
Browse files Browse the repository at this point in the history
  • Loading branch information
lithomas1 authored Dec 7, 2023
2 parents fb8552c + d36fb98 commit cf59f20
Show file tree
Hide file tree
Showing 398 changed files with 7,466 additions and 5,217 deletions.
2 changes: 1 addition & 1 deletion .circleci/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ if pip show pandas 1>/dev/null; then
fi

echo "Install pandas"
python -m pip install --no-build-isolation -ve .
python -m pip install --no-build-isolation -ve . --config-settings=setup-args="--werror"

echo "done"
6 changes: 4 additions & 2 deletions .github/actions/build_pandas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ runs:
- name: Build Pandas
run: |
if [[ ${{ inputs.editable }} == "true" ]]; then
pip install -e . --no-build-isolation -v --no-deps
pip install -e . --no-build-isolation -v --no-deps \
--config-settings=setup-args="--werror"
else
pip install . --no-build-isolation -v --no-deps
pip install . --no-build-isolation -v --no-deps \
--config-settings=setup-args="--werror"
fi
shell: bash -el {0}
31 changes: 22 additions & 9 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
timeout-minutes: 90
strategy:
matrix:
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
# Prevent the include jobs from overriding other jobs
pattern: [""]
include:
Expand Down Expand Up @@ -69,10 +69,22 @@ jobs:
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write 3.12"
env_file: actions-312.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write 3.11 (warnings)"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Copy-on-Write 3.10 (warnings)"
env_file: actions-310.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Copy-on-Write 3.9 (warnings)"
env_file: actions-39.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -88,14 +100,15 @@ jobs:
name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}
env:
PATTERN: ${{ matrix.pattern }}
EXTRA_APT: ${{ matrix.extra_apt || '' }}
LANG: ${{ matrix.lang || 'C.UTF-8' }}
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: 'auto'
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
Expand Down Expand Up @@ -145,8 +158,8 @@ jobs:
fetch-depth: 0

- name: Extra installs
# xsel for clipboard tests
run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }}
run: sudo apt-get update && sudo apt-get install -y ${{ matrix.extra_apt }}
if: ${{ matrix.extra_apt }}

- name: Generate extra locales
# These extra locales will be available for locale.setlocale() calls in tests
Expand Down Expand Up @@ -181,7 +194,7 @@ jobs:
strategy:
matrix:
os: [macos-latest, windows-latest]
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
fail-fast: false
runs-on: ${{ matrix.os }}
name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}
Expand Down Expand Up @@ -241,7 +254,7 @@ jobs:
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip list --no-cache-dir
export PANDAS_CI=1
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
Expand Down Expand Up @@ -279,7 +292,7 @@ jobs:
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip list --no-cache-dir
- name: Run Tests
Expand Down Expand Up @@ -312,7 +325,7 @@ jobs:
# To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
# to the corresponding posix/windows-macos/sdist etc. workflows.
# Feel free to modify this comment as necessary.
#if: false # Uncomment this to freeze the workflow, comment it to unfreeze
if: false # Uncomment this to freeze the workflow, comment it to unfreeze
defaults:
run:
shell: bash -eou pipefail {0}
Expand Down Expand Up @@ -352,7 +365,7 @@ jobs:
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
python -m pip install -ve . --no-build-isolation --no-index --no-deps
python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror"
python -m pip list
- name: Run Tests
Expand Down
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
.mesonpy-native-file.ini
MANIFEST
compile_commands.json
debug
.debug

# Python files #
Expand Down Expand Up @@ -104,10 +105,11 @@ scikits
# Generated Sources #
#####################
!skts.c
!np_datetime.c
!np_datetime_strings.c
*.c
*.cpp
!pandas/_libs/src/**/*.c
!pandas/_libs/src/**/*.h
!pandas/_libs/include/**/*.h

# Unit / Performance Testing #
##############################
Expand Down
13 changes: 7 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ ci:
repos:
- repo: https://github.com/hauntsaninja/black-pre-commit-mirror
# black compiled with mypyc
rev: 23.10.1
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.4
rev: v0.1.6
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
Expand All @@ -47,7 +47,7 @@ repos:
types_or: [python, rst, markdown, cython, c]
additional_dependencies: [tomli]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.15.0
rev: v0.16.0
hooks:
- id: cython-lint
- id: double-quote-cython-strings
Expand Down Expand Up @@ -111,11 +111,11 @@ repos:
types: [text] # overwrite types: [rst]
types_or: [python, rst]
- repo: https://github.com/sphinx-contrib/sphinx-lint
rev: v0.8.1
rev: v0.9.0
hooks:
- id: sphinx-lint
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v17.0.4
rev: v17.0.6
hooks:
- id: clang-format
files: ^pandas/_libs/src|^pandas/_libs/include
Expand Down Expand Up @@ -240,8 +240,9 @@ repos:
# pytest raises without context
|\s\ pytest.raises
# TODO
# pytest.warns (use tm.assert_produces_warning instead)
|pytest\.warns
# |pytest\.warns
# os.remove
|os\.remove
Expand Down
15 changes: 9 additions & 6 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import pandas as pd

from .pandas_vb_common import tm

for imp in ["pandas.util", "pandas.tools.hashing"]:
try:
hashing = import_module(imp)
Expand Down Expand Up @@ -47,9 +45,12 @@ def setup(self, unique, sort, dtype):
elif dtype == "datetime64[ns, tz]":
data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
elif dtype == "object_str":
data = tm.makeStringIndex(N)
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
elif dtype == "string[pyarrow]":
data = pd.array(tm.makeStringIndex(N), dtype="string[pyarrow]")
data = pd.array(
pd.Index([f"i-{i}" for i in range(N)], dtype=object),
dtype="string[pyarrow]",
)
else:
raise NotImplementedError

Expand Down Expand Up @@ -88,7 +89,7 @@ def setup(self, unique, keep, dtype):
elif dtype == "float64":
data = pd.Index(np.random.randn(N), dtype="float64")
elif dtype == "string":
data = tm.makeStringIndex(N)
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
elif dtype == "datetime64[ns]":
data = pd.date_range("2011-01-01", freq="h", periods=N)
elif dtype == "datetime64[ns, tz]":
Expand Down Expand Up @@ -136,7 +137,9 @@ def setup_cache(self):
df = pd.DataFrame(
{
"strings": pd.Series(
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
pd.Index([f"i-{i}" for i in range(10000)], dtype=object).take(
np.random.randint(0, 10000, size=N)
)
),
"floats": np.random.randn(N),
"ints": np.arange(N),
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
date_range,
)

from ..pandas_vb_common import tm


class IsIn:
params = [
Expand Down Expand Up @@ -60,7 +58,9 @@ def setup(self, dtype):

elif dtype in ["str", "string[python]", "string[pyarrow]"]:
try:
self.series = Series(tm.makeStringIndex(N), dtype=dtype)
self.series = Series(
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
)
except ImportError:
raise NotImplementedError
self.values = list(self.series[:2])
Expand Down
8 changes: 5 additions & 3 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
Timestamp,
date_range,
to_timedelta,
)
import pandas._testing as tm
from pandas.core.algorithms import checked_add_with_arr

from .pandas_vb_common import numeric_dtypes
Expand Down Expand Up @@ -323,8 +323,10 @@ class IndexArithmetic:

def setup(self, dtype):
N = 10**6
indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
self.index = getattr(tm, indexes[dtype])(N)
if dtype == "float":
self.index = Index(np.arange(N), dtype=np.float64)
elif dtype == "int":
self.index = Index(np.arange(N), dtype=np.int64)

def time_add(self, dtype):
self.index + 2
Expand Down
8 changes: 3 additions & 5 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@

import pandas as pd

from .pandas_vb_common import tm

try:
from pandas.api.types import union_categoricals
except ImportError:
Expand Down Expand Up @@ -189,7 +187,7 @@ def setup(self):
N = 10**5
ncats = 15

self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
self.s_str = pd.Series(np.random.randint(0, ncats, size=N).astype(str))
self.s_str_cat = pd.Series(self.s_str, dtype="category")
with warnings.catch_warnings(record=True):
str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
Expand Down Expand Up @@ -242,7 +240,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
class Contains:
def setup(self):
N = 10**5
self.ci = tm.makeCategoricalIndex(N)
self.ci = pd.CategoricalIndex(np.arange(N))
self.c = self.ci.values
self.key = self.ci.categories[0]

Expand Down Expand Up @@ -325,7 +323,7 @@ def time_sort_values(self):
class SearchSorted:
def setup(self):
N = 10**5
self.ci = tm.makeCategoricalIndex(N).sort_values()
self.ci = pd.CategoricalIndex(np.arange(N)).sort_values()
self.c = self.ci.values
self.key = self.ci.categories[1]

Expand Down
4 changes: 1 addition & 3 deletions asv_bench/benchmarks/ctors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
date_range,
)

from .pandas_vb_common import tm


def no_change(arr):
return arr
Expand Down Expand Up @@ -115,7 +113,7 @@ def time_dtindex_from_index_with_series(self):
class MultiIndexConstructor:
def setup(self):
N = 10**4
self.iterables = [tm.makeStringIndex(N), range(20)]
self.iterables = [Index([f"i-{i}" for i in range(N)], dtype=object), range(20)]

def time_multiindex_from_iterables(self):
MultiIndex.from_product(self.iterables)
Expand Down
9 changes: 6 additions & 3 deletions asv_bench/benchmarks/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import numpy as np

import pandas as pd
from pandas import DataFrame
from pandas import (
DataFrame,
Index,
)
import pandas._testing as tm
from pandas.api.types import (
is_extension_array_dtype,
Expand Down Expand Up @@ -73,8 +76,8 @@ class SelectDtypes:

def setup(self, dtype):
N, K = 5000, 50
self.index = tm.makeStringIndex(N)
self.columns = tm.makeStringIndex(K)
self.index = Index([f"i-{i}" for i in range(N)], dtype=object)
self.columns = Index([f"i-{i}" for i in range(K)], dtype=object)

def create_df(data):
return DataFrame(data, index=self.index, columns=self.columns)
Expand Down
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
date_range,
)

from .pandas_vb_common import tm

try:
from pandas.tseries.offsets import (
Hour,
Expand All @@ -30,8 +28,8 @@
class FromDicts:
def setup(self):
N, K = 5000, 50
self.index = tm.makeStringIndex(N)
self.columns = tm.makeStringIndex(K)
self.index = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
self.columns = pd.Index([f"i-{i}" for i in range(K)], dtype=object)
frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns)
self.data = frame.to_dict()
self.dict_list = frame.to_dict(orient="records")
Expand Down
Loading

0 comments on commit cf59f20

Please sign in to comment.