Skip to content

Commit

Permalink
Merge branch 'main' into fix_gitpod
Browse files Browse the repository at this point in the history
  • Loading branch information
theuerc authored Nov 25, 2023
2 parents d41e49e + 24fdde6 commit 7dba9d6
Show file tree
Hide file tree
Showing 405 changed files with 8,827 additions and 6,052 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/comment-commands.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
echo 'EOF' >> $GITHUB_ENV
echo "REGEX=$REGEX" >> $GITHUB_ENV
- uses: actions/github-script@v6
- uses: actions/github-script@v7
env:
BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
REGEX: ${{env.REGEX}}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deprecation-tracking-bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
env:
DEPRECATION_TRACKER_ISSUE: 50578
steps:
- uses: actions/github-script@v6
- uses: actions/github-script@v7
id: update-deprecation-issue
with:
script: |
Expand Down
13 changes: 7 additions & 6 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ defaults:
jobs:
ubuntu:
runs-on: ubuntu-22.04
timeout-minutes: 180
timeout-minutes: 90
strategy:
matrix:
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
Expand Down Expand Up @@ -88,14 +88,15 @@ jobs:
name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}
env:
PATTERN: ${{ matrix.pattern }}
EXTRA_APT: ${{ matrix.extra_apt || '' }}
LANG: ${{ matrix.lang || 'C.UTF-8' }}
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: 'auto'
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
Expand Down Expand Up @@ -145,8 +146,8 @@ jobs:
fetch-depth: 0

- name: Extra installs
# xsel for clipboard tests
run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }}
run: sudo apt-get update && sudo apt-get install -y ${{ matrix.extra_apt }}
if: ${{ matrix.extra_apt }}

- name: Generate extra locales
# These extra locales will be available for locale.setlocale() calls in tests
Expand Down Expand Up @@ -177,7 +178,7 @@ jobs:
if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}

macos-windows:
timeout-minutes: 180
timeout-minutes: 90
strategy:
matrix:
os: [macos-latest, windows-latest]
Expand Down Expand Up @@ -322,7 +323,7 @@ jobs:
matrix:
os: [ubuntu-22.04, macOS-latest, windows-latest]

timeout-minutes: 180
timeout-minutes: 90

concurrency:
#https://github.community/t/concurrecy-not-work-for-push/183068/7
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
"matrix": {
"Cython": ["0.29.33"],
"Cython": ["3.0.5"],
"matplotlib": [],
"sqlalchemy": [],
"scipy": [],
Expand Down
45 changes: 45 additions & 0 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,51 @@ def time_groupby_extra_cat_nosort(self, observed):
self.df_extra_cat.groupby("a", observed=observed, sort=False)["b"].count()


class MultipleCategories:
def setup(self):
N = 10**3
arr = np.random.random(N)
data = {
"a1": Categorical(np.random.randint(10000, size=N)),
"a2": Categorical(np.random.randint(10000, size=N)),
"b": arr,
}
self.df = DataFrame(data)
data = {
"a1": Categorical(np.random.randint(10000, size=N), ordered=True),
"a2": Categorical(np.random.randint(10000, size=N), ordered=True),
"b": arr,
}
self.df_ordered = DataFrame(data)
data = {
"a1": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
"a2": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
"b": arr,
}
self.df_extra_cat = DataFrame(data)

def time_groupby_sort(self):
self.df.groupby(["a1", "a2"], observed=False)["b"].count()

def time_groupby_nosort(self):
self.df.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()

def time_groupby_ordered_sort(self):
self.df_ordered.groupby(["a1", "a2"], observed=False)["b"].count()

def time_groupby_ordered_nosort(self):
self.df_ordered.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()

def time_groupby_extra_cat_sort(self):
self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].count()

def time_groupby_extra_cat_nosort(self):
self.df_extra_cat.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()

def time_groupby_transform(self):
self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].cumsum()


class Datelike:
# GH 14338
params = ["period_range", "date_range", "date_range_tz"]
Expand Down
4 changes: 4 additions & 0 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,10 @@ def time_loc_null_slice_plus_slice(self, unique_levels):
target = (self.tgt_null_slice, self.tgt_slice)
self.df.loc[target, :]

def time_loc_multiindex(self, unique_levels):
target = self.df.index[::10]
self.df.loc[target]

def time_xs_level_0(self, unique_levels):
target = self.tgt_scalar
self.df.xs(target, level=0)
Expand Down
11 changes: 11 additions & 0 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,4 +621,15 @@ def time_read_csv_index_col(self):
)


class ReadCSVCParserLowMemory:
# GH 16798
def setup(self):
self.csv = StringIO(
"strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)])
)

def peakmem_over_2gb_input(self):
read_csv(self.csv, engine="c", low_memory=False)


from ..pandas_vb_common import setup # noqa: F401 isort:skip
3 changes: 2 additions & 1 deletion asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@ def time_extract_single_group(self, dtype, expand):
class Dummies(Dtypes):
def setup(self, dtype):
super().setup(dtype)
self.s = self.s.str.join("|")
N = len(self.s) // 5
self.s = self.s[:N].str.join("|")

def time_get_dummies(self, dtype):
self.s.str.get_dummies("|")
Expand Down
6 changes: 5 additions & 1 deletion asv_bench/benchmarks/tslibs/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,11 @@ def setup(self, size, freq, tz):
# tzlocal is cumbersomely slow, so skip to keep runtime in check
raise NotImplementedError

arr = np.arange(10, dtype="i8").repeat(size // 10)
# we pick 2**55 because smaller values end up returning
# -1 from npy_datetimestruct_to_datetime with NPY_FR_Y frequency
# this artificially slows down functions since -1 is also the
# error sentinel
arr = np.arange(2**55, 2**55 + 10, dtype="i8").repeat(size // 10)
self.i8values = arr

def time_dt64arr_to_periodarr(self, size, freq, tz):
Expand Down
7 changes: 5 additions & 2 deletions ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand All @@ -33,7 +34,7 @@ dependencies:
- gcsfs>=2022.11.0
- jinja2>=3.1.2
- lxml>=4.9.2
- matplotlib>=3.6.3, <3.8
- matplotlib>=3.6.3
- numba>=0.56.4
- numexpr>=2.8.4
- odfpy>=1.4.1
Expand All @@ -42,6 +43,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand All @@ -56,5 +58,6 @@ dependencies:
- zstandard>=0.19.0

- pip:
- pyqt5>=5.15.8
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
7 changes: 5 additions & 2 deletions ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand All @@ -34,7 +35,7 @@ dependencies:
- gcsfs>=2022.11.0
- jinja2>=3.1.2
- lxml>=4.9.2
- matplotlib>=3.6.3, <3.8
- matplotlib>=3.6.3
- numba>=0.56.4
- numexpr>=2.8.4
- odfpy>=1.4.1
Expand All @@ -43,6 +44,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand Down Expand Up @@ -70,6 +72,7 @@ dependencies:
- pyyaml
- py
- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- dataframe-api-compat>=0.1.7
- pyqt5>=5.15.8
- tzdata>=2022.7
7 changes: 5 additions & 2 deletions ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand All @@ -33,11 +34,12 @@ dependencies:
- gcsfs>=2022.11.0
- jinja2>=3.1.2
- lxml>=4.9.2
- matplotlib>=3.6.3, <3.8
- matplotlib>=3.6.3
- numba>=0.56.4
- numexpr>=2.8.4
- odfpy>=1.4.1
- qtpy>=2.3.0
- pyqt>=5.15.9
- openpyxl>=3.1.0
- psycopg2>=2.9.6
- pyarrow>=10.0.1
Expand All @@ -56,5 +58,6 @@ dependencies:
- zstandard>=0.19.0

- pip:
- pyqt5>=5.15.8
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
5 changes: 4 additions & 1 deletion ci/deps/actions-39-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand Down Expand Up @@ -44,6 +45,7 @@ dependencies:
- psycopg2=2.9.6
- pyarrow=10.0.1
- pymysql=1.0.2
- pyqt=5.15.9
- pyreadstat=1.2.0
- pytables=3.8.0
- python-calamine=0.1.6
Expand All @@ -58,6 +60,7 @@ dependencies:
- zstandard=0.19.0

- pip:
- adbc-driver-postgresql==0.8.0
- adbc-driver-sqlite==0.8.0
- dataframe-api-compat==0.1.7
- pyqt5==5.15.8
- tzdata==2022.7
7 changes: 5 additions & 2 deletions ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand All @@ -33,7 +34,7 @@ dependencies:
- gcsfs>=2022.11.0
- jinja2>=3.1.2
- lxml>=4.9.2
- matplotlib>=3.6.3, <3.8
- matplotlib>=3.6.3
- numba>=0.56.4
- numexpr>=2.8.4
- odfpy>=1.4.1
Expand All @@ -42,6 +43,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand All @@ -56,5 +58,6 @@ dependencies:
- zstandard>=0.19.0

- pip:
- pyqt5>=5.15.8
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
7 changes: 6 additions & 1 deletion ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand All @@ -33,7 +34,7 @@ dependencies:
- gcsfs>=2022.11.0
- jinja2>=3.1.2
- lxml>=4.9.2
- matplotlib>=3.6.3, <3.8
- matplotlib>=3.6.3
- numba>=0.56.4
- numexpr>=2.8.4
- odfpy>=1.4.1
Expand All @@ -42,6 +43,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand All @@ -54,3 +56,6 @@ dependencies:
- xlrd>=2.0.1
- xlsxwriter>=3.0.5
- zstandard>=0.19.0
- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
6 changes: 4 additions & 2 deletions doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ lxml 4.9.2 xml XML parser for read
SQL databases
^^^^^^^^^^^^^

Installable with ``pip install "pandas[postgresql, mysql, sql-other]"``.
Traditional drivers are installable with ``pip install "pandas[postgresql, mysql, sql-other]"``

========================= ================== =============== =============================================================
Dependency Minimum Version pip extra Notes
Expand All @@ -345,6 +345,8 @@ SQLAlchemy 2.0.0 postgresql, SQL support for dat
sql-other
psycopg2 2.9.6 postgresql PostgreSQL engine for sqlalchemy
pymysql 1.0.2 mysql MySQL engine for sqlalchemy
adbc-driver-postgresql 0.8.0 postgresql ADBC Driver for PostgreSQL
adbc-driver-sqlite 0.8.0 sql-other ADBC Driver for SQLite
========================= ================== =============== =============================================================

Other data sources
Expand Down Expand Up @@ -395,7 +397,7 @@ Installable with ``pip install "pandas[clipboard]"``.
========================= ================== =============== =============================================================
Dependency Minimum Version pip extra Notes
========================= ================== =============== =============================================================
PyQt4/PyQt5 5.15.8 clipboard Clipboard I/O
PyQt4/PyQt5 5.15.9 clipboard Clipboard I/O
qtpy 2.3.0 clipboard Clipboard I/O
========================= ================== =============== =============================================================

Expand Down
Loading

0 comments on commit 7dba9d6

Please sign in to comment.