Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STY: Fix doctest and docstring formatting errors #56408

Merged
merged 7 commits into from
Dec 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# $ ./ci/code_checks.sh single-docs # check single-page docs build warning-free
# $ ./ci/code_checks.sh notebooks # check execution of documentation notebooks

set -uo pipefail

[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }

Expand Down
5 changes: 3 additions & 2 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ def html(self):
os.remove(zip_fname)

if ret_code == 0:
if self.single_doc_html is not None and not self.no_browser:
self._open_browser(self.single_doc_html)
if self.single_doc_html is not None:
if not self.no_browser:
self._open_browser(self.single_doc_html)
else:
self._add_redirects()
if self.whatsnew and not self.no_browser:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
Examples
--------
>>> import scipy.sparse
>>> mat = scipy.sparse.eye(3)
>>> mat = scipy.sparse.eye(3, dtype=float)
>>> pd.DataFrame.sparse.from_spmatrix(mat)
0 1 2
0 1.0 0.0 0.0
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3955,7 +3955,7 @@ def to_csv(
>>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'],
... 'mask': ['red', 'purple'],
... 'weapon': ['sai', 'bo staff']}})
>>> df.to_csv('out.csv', index=False) # doctest: +SKIP
>>> df.to_csv('out.csv', index=False) # doctest: +SKIP

Create 'out.zip' containing 'out.csv'

Expand Down Expand Up @@ -8972,7 +8972,7 @@ def clip(

Clips using specific lower and upper thresholds per column:

>>> df.clip([-2, -1], [4,5])
>>> df.clip([-2, -1], [4, 5])
col_0 col_1
0 4 -1
1 -2 -1
Expand Down
30 changes: 13 additions & 17 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,10 +470,9 @@ def _aggregate_named(self, func, *args, **kwargs):

__examples_series_doc = dedent(
"""
>>> ser = pd.Series(
... [390.0, 350.0, 30.0, 20.0],
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
... name="Max Speed")
>>> ser = pd.Series([390.0, 350.0, 30.0, 20.0],
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
... name="Max Speed")
rhshadrach marked this conversation as resolved.
Show resolved Hide resolved
>>> grouped = ser.groupby([1, 1, 2, 2])
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
Falcon 0.707107
Expand Down Expand Up @@ -1331,14 +1330,10 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
"""
Examples
--------
>>> df = pd.DataFrame(
... {
... "A": [1, 1, 2, 2],
>>> data = {"A": [1, 1, 2, 2],
... "B": [1, 2, 3, 4],
... "C": [0.362838, 0.227877, 1.267767, -0.562860],
... }
... )

... "C": [0.362838, 0.227877, 1.267767, -0.562860]}
>>> df = pd.DataFrame(data)
>>> df
A B C
0 1 1 0.362838
Expand Down Expand Up @@ -1393,7 +1388,8 @@ class DataFrameGroupBy(GroupBy[DataFrame]):

>>> df.groupby("A").agg(
... b_min=pd.NamedAgg(column="B", aggfunc="min"),
... c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
... c_sum=pd.NamedAgg(column="C", aggfunc="sum")
... )
b_min c_sum
A
1 1 0.590715
Expand Down Expand Up @@ -2154,7 +2150,7 @@ def idxmax(

>>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
... 'co2_emissions': [37.2, 19.66, 1712]},
... index=['Pork', 'Wheat Products', 'Beef'])
... index=['Pork', 'Wheat Products', 'Beef'])

>>> df
consumption co2_emissions
Expand Down Expand Up @@ -2236,7 +2232,7 @@ def idxmin(

>>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
... 'co2_emissions': [37.2, 19.66, 1712]},
... index=['Pork', 'Wheat Products', 'Beef'])
... index=['Pork', 'Wheat Products', 'Beef'])

>>> df
consumption co2_emissions
Expand Down Expand Up @@ -2319,9 +2315,9 @@ def value_counts(
Examples
--------
>>> df = pd.DataFrame({
... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
... })

>>> df
Expand Down
18 changes: 11 additions & 7 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@ class providing the base-class of operations.
""",
"dataframe_examples": """
>>> df = pd.DataFrame({'A': 'a a b'.split(),
... 'B': [1,2,3],
... 'C': [4,6,5]})
... 'B': [1, 2, 3],
... 'C': [4, 6, 5]})
>>> g1 = df.groupby('A', group_keys=False)
>>> g2 = df.groupby('A', group_keys=True)

Expand Down Expand Up @@ -313,7 +313,7 @@ class providing the base-class of operations.

The resulting dtype will reflect the return value of the passed ``func``.

>>> g1.apply(lambda x: x*2 if x.name == 'a' else x/2)
>>> g1.apply(lambda x: x * 2 if x.name == 'a' else x / 2)
a 0.0
a 2.0
b 1.0
Expand All @@ -322,7 +322,7 @@ class providing the base-class of operations.
In the above, the groups are not part of the index. We can have them included
by using ``g2`` where ``group_keys=True``:

>>> g2.apply(lambda x: x*2 if x.name == 'a' else x/2)
>>> g2.apply(lambda x: x * 2 if x.name == 'a' else x / 2)
a a 0.0
a 2.0
b b 1.0
Expand Down Expand Up @@ -421,14 +421,18 @@ class providing the base-class of operations.
functions that expect Series, DataFrames, GroupBy or Resampler objects.
Instead of writing

>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
>>> g = lambda x, arg1: x * 5 / arg1
>>> f = lambda x: x ** 4
>>> df = pd.DataFrame([["a", 4], ["b", 5]], columns=["group", "value"])
>>> h(g(f(df.groupby('group')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP

You can write

>>> (df.groupby('group')
... .pipe(f)
... .pipe(g, arg1=a)
... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP
... .pipe(g, arg1=1)
... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP

which is much more readable.

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,8 @@ def levels(self) -> FrozenList:
Examples
--------
>>> index = pd.MultiIndex.from_product([['mammal'],
... ('goat', 'human', 'cat', 'dog')], names=['Category', 'Animals'])
... ('goat', 'human', 'cat', 'dog')],
... names=['Category', 'Animals'])
>>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs'])
>>> leg_num
Legs
Expand Down
13 changes: 4 additions & 9 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,7 @@ def fillna(self, method, limit: int | None = None):
Missing values present before the upsampling are not affected.

>>> sm = pd.Series([1, None, 3],
... index=pd.date_range('20180101', periods=3, freq='h'))
... index=pd.date_range('20180101', periods=3, freq='h'))
>>> sm
2018-01-01 00:00:00 1.0
2018-01-01 01:00:00 NaN
Expand Down Expand Up @@ -1023,21 +1023,16 @@ def interpolate(
Examples
--------

>>> import datetime as dt
>>> timesteps = [
... dt.datetime(2023, 3, 1, 7, 0, 0),
... dt.datetime(2023, 3, 1, 7, 0, 1),
... dt.datetime(2023, 3, 1, 7, 0, 2),
... dt.datetime(2023, 3, 1, 7, 0, 3),
... dt.datetime(2023, 3, 1, 7, 0, 4)]
>>> start = "2023-03-01T07:00:00"
>>> timesteps = pd.date_range(start, periods=5, freq="s")
>>> series = pd.Series(data=[1, -1, 2, 1, 3], index=timesteps)
>>> series
2023-03-01 07:00:00 1
2023-03-01 07:00:01 -1
2023-03-01 07:00:02 2
2023-03-01 07:00:03 1
2023-03-01 07:00:04 3
dtype: int64
Freq: s, dtype: int64

Upsample the dataframe to 0.5Hz by providing the period time of 2s.

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,7 @@
... 'B': ['a', 'b', 'c', 'd', 'e'],
... 'C': ['f', 'g', 'h', 'i', 'j']}})

>>> df.replace(to_replace='^[a-g]', value = 'e', regex=True)
>>> df.replace(to_replace='^[a-g]', value='e', regex=True)
A B C
0 0 e e
1 1 e e
Expand All @@ -808,7 +808,7 @@
If ``value`` is not ``None`` and `to_replace` is a dictionary, the dictionary
keys will be the DataFrame columns that the replacement will be applied.

>>> df.replace(to_replace={{'B': '^[a-c]', 'C': '^[h-j]'}}, value = 'e', regex=True)
>>> df.replace(to_replace={{'B': '^[a-c]', 'C': '^[h-j]'}}, value='e', regex=True)
A B C
0 0 e f
1 1 e g
Expand Down
39 changes: 21 additions & 18 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2439,14 +2439,14 @@ def var(
create_section_header("Examples"),
dedent(
"""\
>>> ser = pd.Series([1, 5, 2, 7, 12, 6])
>>> ser = pd.Series([1, 5, 2, 7, 15, 6])
>>> ser.rolling(3).skew().round(6)
0 NaN
1 NaN
2 1.293343
3 -0.585583
4 0.000000
5 1.545393
4 0.670284
5 1.652317
dtype: float64
"""
),
Expand Down Expand Up @@ -2794,12 +2794,12 @@ def cov(

>>> v1 = [3, 3, 3, 5, 8]
>>> v2 = [3, 4, 4, 4, 8]
>>> # numpy returns a 2X2 array, the correlation coefficient
>>> # is the number at entry [0][1]
>>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}")
0.333333
>>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}")
0.916949
>>> np.corrcoef(v1[:-1], v2[:-1])
array([[1. , 0.33333333],
[0.33333333, 1. ]])
>>> np.corrcoef(v1[1:], v2[1:])
array([[1. , 0.9169493],
[0.9169493, 1. ]])
>>> s1 = pd.Series(v1)
>>> s2 = pd.Series(v2)
>>> s1.rolling(4).corr(s2)
Expand All @@ -2813,15 +2813,18 @@ def cov(
The below example shows a similar rolling calculation on a
DataFrame using the pairwise option.

>>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
[46., 31.], [50., 36.]])
>>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
[[1. 0.6263001]
[0.6263001 1. ]]
>>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
[[1. 0.5553681]
[0.5553681 1. ]]
>>> df = pd.DataFrame(matrix, columns=['X','Y'])
>>> matrix = np.array([[51., 35.],
... [49., 30.],
... [47., 32.],
... [46., 31.],
... [50., 36.]])
>>> np.corrcoef(matrix[:-1, 0], matrix[:-1, 1])
array([[1. , 0.6263001],
[0.6263001, 1. ]])
>>> np.corrcoef(matrix[1:, 0], matrix[1:, 1])
array([[1. , 0.55536811],
[0.55536811, 1. ]])
>>> df = pd.DataFrame(matrix, columns=['X', 'Y'])
>>> df
X Y
0 51.0 35.0
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ def read_sql(

pandas now supports reading via ADBC drivers

>>> from adbc_driver_postgresql import dbapi
>>> from adbc_driver_postgresql import dbapi # doctest:+SKIP
>>> with dbapi.connect('postgres:///db_name') as conn: # doctest:+SKIP
... pd.read_sql('SELECT int_column FROM test_data', conn)
int_column
Expand Down
16 changes: 7 additions & 9 deletions pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,10 @@ def hist_frame(
.. plot::
:context: close-figs

>>> df = pd.DataFrame({
... 'length': [1.5, 0.5, 1.2, 0.9, 3],
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
>>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3],
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}
>>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']
>>> df = pd.DataFrame(data, index=index)
>>> hist = df.hist(bins=3)
"""
plot_backend = _get_plot_backend(backend)
Expand Down Expand Up @@ -607,10 +607,10 @@ def boxplot_frame_groupby(
>>> import itertools
>>> tuples = [t for t in itertools.product(range(1000), range(4))]
>>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
>>> data = np.random.randn(len(index),4)
>>> data = np.random.randn(len(index), 4)
>>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
>>> grouped = df.groupby(level='lvl1')
>>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP
>>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP

The ``subplots=False`` option shows the boxplots in a single figure.

Expand Down Expand Up @@ -1400,9 +1400,7 @@ def hist(
.. plot::
:context: close-figs

>>> df = pd.DataFrame(
... np.random.randint(1, 7, 6000),
... columns = ['one'])
>>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
>>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
>>> ax = df.plot.hist(bins=12, alpha=0.5)

Expand Down
2 changes: 1 addition & 1 deletion pandas/plotting/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def bootstrap_plot(
:context: close-figs

>>> s = pd.Series(np.random.uniform(size=100))
>>> pd.plotting.bootstrap_plot(s)
>>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP
<Figure size 640x480 with 6 Axes>
"""
plot_backend = _get_plot_backend("matplotlib")
Expand Down
11 changes: 6 additions & 5 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,12 @@ def validate_pep8(self):
file.name,
]
response = subprocess.run(cmd, capture_output=True, check=False, text=True)
stdout = response.stdout
stdout = stdout.replace(file.name, "")
messages = stdout.strip("\n").splitlines()
if messages:
error_messages.extend(messages)
for output in ("stdout", "stderr"):
out = getattr(response, output)
out = out.replace(file.name, "")
messages = out.strip("\n").splitlines()
if messages:
error_messages.extend(messages)
finally:
file.close()
os.unlink(file.name)
Expand Down