Skip to content

Commit

Permalink
Merge branch 'main' into cow_dead_regf
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Oct 21, 2023
2 parents 4664c52 + 00f10db commit 4227598
Show file tree
Hide file tree
Showing 208 changed files with 1,899 additions and 1,276 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ jobs:
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
python -m pip install --no-cache-dir versioneer[toml] "cython<3.0.3" python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip list --no-cache-dir
export PANDAS_CI=1
Expand Down Expand Up @@ -274,7 +274,7 @@ jobs:
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] "cython<3.0.3" numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip list --no-cache-dir
Expand Down Expand Up @@ -347,7 +347,7 @@ jobs:
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil pytz tzdata "cython<3.0.3" hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
python -m pip install -ve . --no-build-isolation --no-index --no-deps
python -m pip list
Expand Down
5 changes: 2 additions & 3 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,8 @@ fi

### SINGLE-PAGE DOCS ###
if [[ -z "$CHECK" || "$CHECK" == "single-docs" ]]; then
python doc/make.py --warnings-are-errors --single pandas.Series.value_counts
python doc/make.py --warnings-are-errors --single pandas.Series.str.split
python doc/make.py clean
python doc/make.py --warnings-are-errors --no-browser --single pandas.Series.value_counts
python doc/make.py --warnings-are-errors --no-browser --single pandas.Series.str.split
fi

exit $RET
2 changes: 1 addition & 1 deletion ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
3 changes: 1 addition & 2 deletions ci/deps/actions-311-numpydev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:
- versioneer[toml]
- meson[ninja]=1.2.1
- meson-python=0.13.1
- cython>=0.29.33, <3.0.3
- cython>=0.29.33

# test dependencies
- pytest>=7.3.2
Expand All @@ -29,5 +29,4 @@ dependencies:
- "--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
- "--pre"
- "numpy"
- "scipy"
- "tzdata>=2022.1"
2 changes: 1 addition & 1 deletion ci/deps/actions-311-pyarrownightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
# build dependencies
- versioneer[toml]
- meson[ninja]=1.2.1
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson-python=0.13.1

# test dependencies
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-39-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-pypy-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.3
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
13 changes: 11 additions & 2 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ def __init__(
single_doc=None,
verbosity=0,
warnings_are_errors=False,
no_browser=False,
) -> None:
self.num_jobs = num_jobs
self.include_api = include_api
self.whatsnew = whatsnew
self.verbosity = verbosity
self.warnings_are_errors = warnings_are_errors
self.no_browser = no_browser

if single_doc:
single_doc = self._process_single_doc(single_doc)
Expand Down Expand Up @@ -234,11 +236,11 @@ def html(self):
os.remove(zip_fname)

if ret_code == 0:
if self.single_doc_html is not None:
if self.single_doc_html is not None and not self.no_browser:
self._open_browser(self.single_doc_html)
else:
self._add_redirects()
if self.whatsnew:
if self.whatsnew and not self.no_browser:
self._open_browser(os.path.join("whatsnew", "index.html"))

return ret_code
Expand Down Expand Up @@ -349,6 +351,12 @@ def main():
action="store_true",
help="fail if warnings are raised",
)
argparser.add_argument(
"--no-browser",
help="Don't open browser",
default=False,
action="store_true",
)
args = argparser.parse_args()

if args.command not in cmds:
Expand All @@ -374,6 +382,7 @@ def main():
args.single,
args.verbosity,
args.warnings_are_errors,
args.no_browser,
)
return getattr(builder, args.command)()

Expand Down
2 changes: 1 addition & 1 deletion doc/source/development/contributing_codebase.rst
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ xfail during the testing phase. To do so, use the ``request`` fixture:
def test_xfail(request):
mark = pytest.mark.xfail(raises=TypeError, reason="Indicate why here")
request.node.add_marker(mark)
request.applymarker(mark)
xfail is not to be used for tests involving failure due to invalid user arguments.
For these tests, we need to verify the correct exception type and error message
Expand Down
10 changes: 5 additions & 5 deletions doc/source/getting_started/comparison/comparison_with_sql.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,24 +164,24 @@ The pandas equivalent would be:
tips.groupby("sex").size()
Notice that in the pandas code we used :meth:`~pandas.core.groupby.DataFrameGroupBy.size` and not
:meth:`~pandas.core.groupby.DataFrameGroupBy.count`. This is because
:meth:`~pandas.core.groupby.DataFrameGroupBy.count` applies the function to each column, returning
Notice that in the pandas code we used :meth:`.DataFrameGroupBy.size` and not
:meth:`.DataFrameGroupBy.count`. This is because
:meth:`.DataFrameGroupBy.count` applies the function to each column, returning
the number of ``NOT NULL`` records within each.

.. ipython:: python
tips.groupby("sex").count()
Alternatively, we could have applied the :meth:`~pandas.core.groupby.DataFrameGroupBy.count` method
Alternatively, we could have applied the :meth:`.DataFrameGroupBy.count` method
to an individual column:

.. ipython:: python
tips.groupby("sex")["total_bill"].count()
Multiple functions can also be applied at once. For instance, say we'd like to see how tip amount
differs by day of the week - :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` allows you to pass a dictionary
differs by day of the week - :meth:`.DataFrameGroupBy.agg` allows you to pass a dictionary
to your grouped DataFrame, indicating which functions to apply to specific columns.

.. code-block:: sql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ For more information about :meth:`~DataFrame.pivot_table`, see the user guide se

::

air_quality.groupby(["parameter", "location"]).mean()
air_quality.groupby(["parameter", "location"])[["value"]].mean()

.. raw:: html

Expand Down
4 changes: 3 additions & 1 deletion doc/source/user_guide/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ See the :ref:`Grouping section <groupby>`.
df
Grouping by a column label, selecting column labels, and then applying the
:meth:`~pandas.core.groupby.DataFrameGroupBy.sum` function to the resulting
:meth:`.DataFrameGroupBy.sum` function to the resulting
groups:

.. ipython:: python
Expand Down Expand Up @@ -763,12 +763,14 @@ Parquet
Writing to a Parquet file:

.. ipython:: python
:okwarning:
df.to_parquet("foo.parquet")
Reading from a Parquet file Store using :func:`read_parquet`:

.. ipython:: python
:okwarning:
pd.read_parquet("foo.parquet")
Expand Down
130 changes: 75 additions & 55 deletions doc/source/user_guide/copy_on_write.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Copy-on-Write (CoW)
*******************

Copy-on-Write was first introduced in version 1.5.0. Starting from version 2.0 most of the
optimizations that become possible through CoW are implemented and supported. A complete list
can be found at :ref:`Copy-on-Write optimizations <copy_on_write.optimizations>`.
optimizations that become possible through CoW are implemented and supported. All possible
optimizations are supported starting from pandas 2.1.

We expect that CoW will be enabled by default in version 3.0.

Expand Down Expand Up @@ -154,66 +154,86 @@ With copy on write this can be done by using ``loc``.
df.loc[df["bar"] > 5, "foo"] = 100
Read-only NumPy arrays
----------------------

Accessing the underlying NumPy array of a DataFrame will return a read-only array if the array
shares data with the initial DataFrame:

The array is a copy if the initial DataFrame consists of more than one array:


.. ipython:: python
df = pd.DataFrame({"a": [1, 2], "b": [1.5, 2.5]})
df.to_numpy()
The array shares data with the DataFrame if the DataFrame consists of only one NumPy array:

.. ipython:: python
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
df.to_numpy()
This array is read-only, which means that it can't be modified inplace:

.. ipython:: python
:okexcept:
arr = df.to_numpy()
arr[0, 0] = 100
The same holds true for a Series, since a Series always consists of a single array.

There are two potential solution to this:

- Trigger a copy manually if you want to avoid updating DataFrames that share memory with your array.
- Make the array writeable. This is a more performant solution but circumvents Copy-on-Write rules, so
it should be used with caution.

.. ipython:: python
arr = df.to_numpy()
arr.flags.writeable = True
arr[0, 0] = 100
arr
Patterns to avoid
-----------------

No defensive copy will be performed if two objects share the same data while
you are modifying one object inplace.

.. ipython:: python
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df2 = df.reset_index()
df2.iloc[0, 0] = 100
This creates two objects that share data and thus the setitem operation will trigger a
copy. This is not necessary if the initial object ``df`` isn't needed anymore.
Simply reassigning to the same variable will invalidate the reference that is
held by the object.

.. ipython:: python
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df = df.reset_index()
df.iloc[0, 0] = 100
No copy is necessary in this example.
Creating multiple references keeps unnecessary references alive
and thus will hurt performance with Copy-on-Write.

.. _copy_on_write.optimizations:

Copy-on-Write optimizations
---------------------------

A new lazy copy mechanism that defers the copy until the object in question is modified
and only if this object shares data with another object. This mechanism was added to
following methods:

- :meth:`DataFrame.reset_index` / :meth:`Series.reset_index`
- :meth:`DataFrame.set_index`
- :meth:`DataFrame.set_axis` / :meth:`Series.set_axis`
- :meth:`DataFrame.set_flags` / :meth:`Series.set_flags`
- :meth:`DataFrame.rename_axis` / :meth:`Series.rename_axis`
- :meth:`DataFrame.reindex` / :meth:`Series.reindex`
- :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like`
- :meth:`DataFrame.assign`
- :meth:`DataFrame.drop`
- :meth:`DataFrame.dropna` / :meth:`Series.dropna`
- :meth:`DataFrame.select_dtypes`
- :meth:`DataFrame.align` / :meth:`Series.align`
- :meth:`Series.to_frame`
- :meth:`DataFrame.rename` / :meth:`Series.rename`
- :meth:`DataFrame.add_prefix` / :meth:`Series.add_prefix`
- :meth:`DataFrame.add_suffix` / :meth:`Series.add_suffix`
- :meth:`DataFrame.drop_duplicates` / :meth:`Series.drop_duplicates`
- :meth:`DataFrame.droplevel` / :meth:`Series.droplevel`
- :meth:`DataFrame.reorder_levels` / :meth:`Series.reorder_levels`
- :meth:`DataFrame.between_time` / :meth:`Series.between_time`
- :meth:`DataFrame.filter` / :meth:`Series.filter`
- :meth:`DataFrame.head` / :meth:`Series.head`
- :meth:`DataFrame.tail` / :meth:`Series.tail`
- :meth:`DataFrame.isetitem`
- :meth:`DataFrame.pipe` / :meth:`Series.pipe`
- :meth:`DataFrame.pop` / :meth:`Series.pop`
- :meth:`DataFrame.replace` / :meth:`Series.replace`
- :meth:`DataFrame.shift` / :meth:`Series.shift`
- :meth:`DataFrame.sort_index` / :meth:`Series.sort_index`
- :meth:`DataFrame.sort_values` / :meth:`Series.sort_values`
- :meth:`DataFrame.squeeze` / :meth:`Series.squeeze`
- :meth:`DataFrame.swapaxes`
- :meth:`DataFrame.swaplevel` / :meth:`Series.swaplevel`
- :meth:`DataFrame.take` / :meth:`Series.take`
- :meth:`DataFrame.to_timestamp` / :meth:`Series.to_timestamp`
- :meth:`DataFrame.to_period` / :meth:`Series.to_period`
- :meth:`DataFrame.truncate`
- :meth:`DataFrame.iterrows`
- :meth:`DataFrame.tz_convert` / :meth:`Series.tz_localize`
- :meth:`DataFrame.fillna` / :meth:`Series.fillna`
- :meth:`DataFrame.interpolate` / :meth:`Series.interpolate`
- :meth:`DataFrame.ffill` / :meth:`Series.ffill`
- :meth:`DataFrame.bfill` / :meth:`Series.bfill`
- :meth:`DataFrame.where` / :meth:`Series.where`
- :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects`
- :meth:`DataFrame.astype` / :meth:`Series.astype`
- :meth:`DataFrame.convert_dtypes` / :meth:`Series.convert_dtypes`
- :meth:`DataFrame.join`
- :meth:`DataFrame.eval`
- :func:`concat`
- :func:`merge`
methods that don't require a copy of the underlying data. Popular examples are :meth:`DataFrame.drop` for ``axis=1``
and :meth:`DataFrame.rename`.

These methods return views when Copy-on-Write is enabled, which provides a significant
performance improvement compared to the regular execution.
Expand Down
Loading

0 comments on commit 4227598

Please sign in to comment.