Skip to content

Commit

Permalink
STY: Fix doctest and docstring formatting errors (#56408)
Browse files Browse the repository at this point in the history
* STY: Fix doctest and docstring formatting errors

* ensure stderr is output too

* Fix more failures

* Don't add redirects for single page, fix example

* A few more

* Remove e flag
  • Loading branch information
mroeschke authored Dec 9, 2023
1 parent 1ab4d03 commit 04307e7
Show file tree
Hide file tree
Showing 14 changed files with 76 additions and 75 deletions.
2 changes: 2 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# $ ./ci/code_checks.sh single-docs # check single-page docs build warning-free
# $ ./ci/code_checks.sh notebooks # check execution of documentation notebooks

set -uo pipefail

[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }

Expand Down
5 changes: 3 additions & 2 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,9 @@ def html(self):
os.remove(zip_fname)

if ret_code == 0:
if self.single_doc_html is not None and not self.no_browser:
self._open_browser(self.single_doc_html)
if self.single_doc_html is not None:
if not self.no_browser:
self._open_browser(self.single_doc_html)
else:
self._add_redirects()
if self.whatsnew and not self.no_browser:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
Examples
--------
>>> import scipy.sparse
>>> mat = scipy.sparse.eye(3)
>>> mat = scipy.sparse.eye(3, dtype=float)
>>> pd.DataFrame.sparse.from_spmatrix(mat)
0 1 2
0 1.0 0.0 0.0
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3955,7 +3955,7 @@ def to_csv(
>>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'],
... 'mask': ['red', 'purple'],
... 'weapon': ['sai', 'bo staff']}})
>>> df.to_csv('out.csv', index=False) # doctest: +SKIP
>>> df.to_csv('out.csv', index=False) # doctest: +SKIP
Create 'out.zip' containing 'out.csv'
Expand Down Expand Up @@ -8972,7 +8972,7 @@ def clip(
Clips using specific lower and upper thresholds per column:
>>> df.clip([-2, -1], [4,5])
>>> df.clip([-2, -1], [4, 5])
col_0 col_1
0 4 -1
1 -2 -1
Expand Down
30 changes: 13 additions & 17 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,10 +470,9 @@ def _aggregate_named(self, func, *args, **kwargs):

__examples_series_doc = dedent(
"""
>>> ser = pd.Series(
... [390.0, 350.0, 30.0, 20.0],
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
... name="Max Speed")
>>> ser = pd.Series([390.0, 350.0, 30.0, 20.0],
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
... name="Max Speed")
>>> grouped = ser.groupby([1, 1, 2, 2])
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
Falcon 0.707107
Expand Down Expand Up @@ -1331,14 +1330,10 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
"""
Examples
--------
>>> df = pd.DataFrame(
... {
... "A": [1, 1, 2, 2],
>>> data = {"A": [1, 1, 2, 2],
... "B": [1, 2, 3, 4],
... "C": [0.362838, 0.227877, 1.267767, -0.562860],
... }
... )
... "C": [0.362838, 0.227877, 1.267767, -0.562860]}
>>> df = pd.DataFrame(data)
>>> df
A B C
0 1 1 0.362838
Expand Down Expand Up @@ -1393,7 +1388,8 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
>>> df.groupby("A").agg(
... b_min=pd.NamedAgg(column="B", aggfunc="min"),
... c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
... c_sum=pd.NamedAgg(column="C", aggfunc="sum")
... )
b_min c_sum
A
1 1 0.590715
Expand Down Expand Up @@ -2154,7 +2150,7 @@ def idxmax(
>>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
... 'co2_emissions': [37.2, 19.66, 1712]},
... index=['Pork', 'Wheat Products', 'Beef'])
... index=['Pork', 'Wheat Products', 'Beef'])
>>> df
consumption co2_emissions
Expand Down Expand Up @@ -2236,7 +2232,7 @@ def idxmin(
>>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
... 'co2_emissions': [37.2, 19.66, 1712]},
... index=['Pork', 'Wheat Products', 'Beef'])
... index=['Pork', 'Wheat Products', 'Beef'])
>>> df
consumption co2_emissions
Expand Down Expand Up @@ -2319,9 +2315,9 @@ def value_counts(
Examples
--------
>>> df = pd.DataFrame({
... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
... })
>>> df
Expand Down
18 changes: 11 additions & 7 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@ class providing the base-class of operations.
""",
"dataframe_examples": """
>>> df = pd.DataFrame({'A': 'a a b'.split(),
... 'B': [1,2,3],
... 'C': [4,6,5]})
... 'B': [1, 2, 3],
... 'C': [4, 6, 5]})
>>> g1 = df.groupby('A', group_keys=False)
>>> g2 = df.groupby('A', group_keys=True)
Expand Down Expand Up @@ -313,7 +313,7 @@ class providing the base-class of operations.
The resulting dtype will reflect the return value of the passed ``func``.
>>> g1.apply(lambda x: x*2 if x.name == 'a' else x/2)
>>> g1.apply(lambda x: x * 2 if x.name == 'a' else x / 2)
a 0.0
a 2.0
b 1.0
Expand All @@ -322,7 +322,7 @@ class providing the base-class of operations.
In the above, the groups are not part of the index. We can have them included
by using ``g2`` where ``group_keys=True``:
>>> g2.apply(lambda x: x*2 if x.name == 'a' else x/2)
>>> g2.apply(lambda x: x * 2 if x.name == 'a' else x / 2)
a a 0.0
a 2.0
b b 1.0
Expand Down Expand Up @@ -421,14 +421,18 @@ class providing the base-class of operations.
functions that expect Series, DataFrames, GroupBy or Resampler objects.
Instead of writing
>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
>>> g = lambda x, arg1: x * 5 / arg1
>>> f = lambda x: x ** 4
>>> df = pd.DataFrame([["a", 4], ["b", 5]], columns=["group", "value"])
>>> h(g(f(df.groupby('group')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP
You can write
>>> (df.groupby('group')
... .pipe(f)
... .pipe(g, arg1=a)
... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP
... .pipe(g, arg1=1)
... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP
which is much more readable.
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,8 @@ def levels(self) -> FrozenList:
Examples
--------
>>> index = pd.MultiIndex.from_product([['mammal'],
... ('goat', 'human', 'cat', 'dog')], names=['Category', 'Animals'])
... ('goat', 'human', 'cat', 'dog')],
... names=['Category', 'Animals'])
>>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs'])
>>> leg_num
Legs
Expand Down
13 changes: 4 additions & 9 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ def fillna(self, method, limit: int | None = None):
Missing values present before the upsampling are not affected.
>>> sm = pd.Series([1, None, 3],
... index=pd.date_range('20180101', periods=3, freq='h'))
... index=pd.date_range('20180101', periods=3, freq='h'))
>>> sm
2018-01-01 00:00:00 1.0
2018-01-01 01:00:00 NaN
Expand Down Expand Up @@ -1028,21 +1028,16 @@ def interpolate(
Examples
--------
>>> import datetime as dt
>>> timesteps = [
... dt.datetime(2023, 3, 1, 7, 0, 0),
... dt.datetime(2023, 3, 1, 7, 0, 1),
... dt.datetime(2023, 3, 1, 7, 0, 2),
... dt.datetime(2023, 3, 1, 7, 0, 3),
... dt.datetime(2023, 3, 1, 7, 0, 4)]
>>> start = "2023-03-01T07:00:00"
>>> timesteps = pd.date_range(start, periods=5, freq="s")
>>> series = pd.Series(data=[1, -1, 2, 1, 3], index=timesteps)
>>> series
2023-03-01 07:00:00 1
2023-03-01 07:00:01 -1
2023-03-01 07:00:02 2
2023-03-01 07:00:03 1
2023-03-01 07:00:04 3
dtype: int64
Freq: s, dtype: int64
Upsample the dataframe to 0.5Hz by providing the period time of 2s.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,7 @@
... 'B': ['a', 'b', 'c', 'd', 'e'],
... 'C': ['f', 'g', 'h', 'i', 'j']}})
>>> df.replace(to_replace='^[a-g]', value = 'e', regex=True)
>>> df.replace(to_replace='^[a-g]', value='e', regex=True)
A B C
0 0 e e
1 1 e e
Expand All @@ -808,7 +808,7 @@
If ``value`` is not ``None`` and `to_replace` is a dictionary, the dictionary
keys will be the DataFrame columns that the replacement will be applied.
>>> df.replace(to_replace={{'B': '^[a-c]', 'C': '^[h-j]'}}, value = 'e', regex=True)
>>> df.replace(to_replace={{'B': '^[a-c]', 'C': '^[h-j]'}}, value='e', regex=True)
A B C
0 0 e f
1 1 e g
Expand Down
39 changes: 21 additions & 18 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2439,14 +2439,14 @@ def var(
create_section_header("Examples"),
dedent(
"""\
>>> ser = pd.Series([1, 5, 2, 7, 12, 6])
>>> ser = pd.Series([1, 5, 2, 7, 15, 6])
>>> ser.rolling(3).skew().round(6)
0 NaN
1 NaN
2 1.293343
3 -0.585583
4 0.000000
5 1.545393
4 0.670284
5 1.652317
dtype: float64
"""
),
Expand Down Expand Up @@ -2794,12 +2794,12 @@ def cov(
>>> v1 = [3, 3, 3, 5, 8]
>>> v2 = [3, 4, 4, 4, 8]
>>> # numpy returns a 2X2 array, the correlation coefficient
>>> # is the number at entry [0][1]
>>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}")
0.333333
>>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}")
0.916949
>>> np.corrcoef(v1[:-1], v2[:-1])
array([[1. , 0.33333333],
[0.33333333, 1. ]])
>>> np.corrcoef(v1[1:], v2[1:])
array([[1. , 0.9169493],
[0.9169493, 1. ]])
>>> s1 = pd.Series(v1)
>>> s2 = pd.Series(v2)
>>> s1.rolling(4).corr(s2)
Expand All @@ -2813,15 +2813,18 @@ def cov(
The below example shows a similar rolling calculation on a
DataFrame using the pairwise option.
>>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
[46., 31.], [50., 36.]])
>>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
[[1. 0.6263001]
[0.6263001 1. ]]
>>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
[[1. 0.5553681]
[0.5553681 1. ]]
>>> df = pd.DataFrame(matrix, columns=['X','Y'])
>>> matrix = np.array([[51., 35.],
... [49., 30.],
... [47., 32.],
... [46., 31.],
... [50., 36.]])
>>> np.corrcoef(matrix[:-1, 0], matrix[:-1, 1])
array([[1. , 0.6263001],
[0.6263001, 1. ]])
>>> np.corrcoef(matrix[1:, 0], matrix[1:, 1])
array([[1. , 0.55536811],
[0.55536811, 1. ]])
>>> df = pd.DataFrame(matrix, columns=['X', 'Y'])
>>> df
X Y
0 51.0 35.0
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ def read_sql(
pandas now supports reading via ADBC drivers
>>> from adbc_driver_postgresql import dbapi
>>> from adbc_driver_postgresql import dbapi # doctest:+SKIP
>>> with dbapi.connect('postgres:///db_name') as conn: # doctest:+SKIP
... pd.read_sql('SELECT int_column FROM test_data', conn)
int_column
Expand Down
16 changes: 7 additions & 9 deletions pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,10 @@ def hist_frame(
.. plot::
:context: close-figs
>>> df = pd.DataFrame({
... 'length': [1.5, 0.5, 1.2, 0.9, 3],
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
>>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3],
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}
>>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']
>>> df = pd.DataFrame(data, index=index)
>>> hist = df.hist(bins=3)
"""
plot_backend = _get_plot_backend(backend)
Expand Down Expand Up @@ -607,10 +607,10 @@ def boxplot_frame_groupby(
>>> import itertools
>>> tuples = [t for t in itertools.product(range(1000), range(4))]
>>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
>>> data = np.random.randn(len(index),4)
>>> data = np.random.randn(len(index), 4)
>>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
>>> grouped = df.groupby(level='lvl1')
>>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP
>>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP
The ``subplots=False`` option shows the boxplots in a single figure.
Expand Down Expand Up @@ -1400,9 +1400,7 @@ def hist(
.. plot::
:context: close-figs
>>> df = pd.DataFrame(
... np.random.randint(1, 7, 6000),
... columns = ['one'])
>>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
>>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
>>> ax = df.plot.hist(bins=12, alpha=0.5)
Expand Down
2 changes: 1 addition & 1 deletion pandas/plotting/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def bootstrap_plot(
:context: close-figs
>>> s = pd.Series(np.random.uniform(size=100))
>>> pd.plotting.bootstrap_plot(s)
>>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP
<Figure size 640x480 with 6 Axes>
"""
plot_backend = _get_plot_backend("matplotlib")
Expand Down
11 changes: 6 additions & 5 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,12 @@ def validate_pep8(self):
file.name,
]
response = subprocess.run(cmd, capture_output=True, check=False, text=True)
stdout = response.stdout
stdout = stdout.replace(file.name, "")
messages = stdout.strip("\n").splitlines()
if messages:
error_messages.extend(messages)
for output in ("stdout", "stderr"):
out = getattr(response, output)
out = out.replace(file.name, "")
messages = out.strip("\n").splitlines()
if messages:
error_messages.extend(messages)
finally:
file.close()
os.unlink(file.name)
Expand Down

0 comments on commit 04307e7

Please sign in to comment.