Skip to content

Commit

Permalink
Merge branch 'main' into fix_docstring_groupby
Browse files Browse the repository at this point in the history
  • Loading branch information
githubalexliu authored Dec 5, 2024
2 parents 9d2d4cf + a36c44e commit 2e25207
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 41 deletions.
6 changes: 0 additions & 6 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Period.freq GL08" \
-i "pandas.Period.ordinal GL08" \
-i "pandas.RangeIndex.from_range PR01,SA01" \
-i "pandas.Series.dt.unit GL08" \
-i "pandas.Series.pad PR01,SA01" \
-i "pandas.Timedelta.max PR02" \
-i "pandas.Timedelta.min PR02" \
-i "pandas.Timedelta.resolution PR02" \
Expand All @@ -87,12 +85,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.arrays.NumpyExtensionArray SA01" \
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
-i "pandas.core.resample.Resampler.mean SA01" \
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,7 @@ Other
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)

.. ***DO NOT USE THIS SECTION***
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- hypothesis>=6.84.0
- gcsfs>=2022.11.0
- ipython
- pickleshare # Needed for IPython Sphinx directive in the docs GH#60429
- jinja2>=3.1.2
- lxml>=4.9.2
- matplotlib>=3.6.3
Expand Down
24 changes: 23 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2073,7 +2073,29 @@ def _creso(self) -> int:

@cache_readonly
def unit(self) -> str:
# e.g. "ns", "us", "ms"
"""
The precision unit of the datetime data.
Returns the precision unit for the dtype.
It means the smallest time frame that can be stored within this dtype.
Returns
-------
str
Unit string representation (e.g. "ns").
See Also
--------
TimelikeOps.as_unit : Converts to a specific unit.
Examples
--------
>>> idx = pd.DatetimeIndex(["2020-01-02 01:02:03.004005006"])
>>> idx.unit
'ns'
>>> idx.as_unit("s").unit
's'
"""
# error: Argument 1 to "dtype_to_unit" has incompatible type
# "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
return dtype_to_unit(self.dtype) # type: ignore[arg-type]
Expand Down
12 changes: 5 additions & 7 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@

if TYPE_CHECKING:
from collections.abc import (
Collection,
Sequence,
Sized,
)

from pandas._typing import (
Expand Down Expand Up @@ -1581,7 +1581,7 @@ def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
return _maybe_unbox_datetimelike(value, dtype)


def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray:
"""
Transform any list-like object in a 1-dimensional numpy array of object
dtype.
Expand All @@ -1599,11 +1599,9 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
-------
1-dimensional numpy array of dtype object
"""
# numpy will try to interpret nested lists as further dimensions, hence
# making a 1D array that contains list-likes is a bit tricky:
result = np.empty(len(values), dtype="object")
result[:] = values
return result
# numpy will try to interpret nested lists as further dimensions in np.array(),
# hence explicitly making a 1D array using np.fromiter
return np.fromiter(values, dtype="object", count=len(values))


def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray:
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3878,6 +3878,14 @@ def to_csv(
>>> import os # doctest: +SKIP
>>> os.makedirs("folder/subfolder", exist_ok=True) # doctest: +SKIP
>>> df.to_csv("folder/subfolder/out.csv") # doctest: +SKIP
Format floats to two decimal places:
>>> df.to_csv("out1.csv", float_format="%.2f") # doctest: +SKIP
Format floats using scientific notation:
>>> df.to_csv("out2.csv", float_format="{{:.2e}}".format) # doctest: +SKIP
"""
df = self if isinstance(self, ABCDataFrame) else self.to_frame()

Expand Down
48 changes: 30 additions & 18 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1321,8 +1321,8 @@ def idxmin(self, skipna: bool = True) -> Series:
Returns
-------
Index
Label of the minimum value.
Series
Indexes of minima in each group.
Raises
------
Expand Down Expand Up @@ -1374,8 +1374,8 @@ def idxmax(self, skipna: bool = True) -> Series:
Returns
-------
Index
Label of the maximum value.
Series
Indexes of maxima in each group.
Raises
------
Expand Down Expand Up @@ -2453,6 +2453,10 @@ def nunique(self, dropna: bool = True) -> DataFrame:
nunique: DataFrame
Counts of unique elements in each position.
See Also
--------
DataFrame.nunique : Count number of distinct elements in specified axis.
Examples
--------
>>> df = pd.DataFrame(
Expand Down Expand Up @@ -2508,8 +2512,8 @@ def idxmax(
Returns
-------
Series
Indexes of maxima in each group.
DataFrame
Indexes of maxima in each column according to the group.
Raises
------
Expand All @@ -2519,6 +2523,7 @@ def idxmax(
See Also
--------
Series.idxmax : Return index of the maximum element.
DataFrame.idxmax : Indexes of maxima along the specified axis.
Notes
-----
Expand All @@ -2532,6 +2537,7 @@ def idxmax(
... {
... "consumption": [10.51, 103.11, 55.48],
... "co2_emissions": [37.2, 19.66, 1712],
... "food_type": ["meat", "plant", "meat"],
... },
... index=["Pork", "Wheat Products", "Beef"],
... )
Expand All @@ -2542,12 +2548,14 @@ def idxmax(
Wheat Products 103.11 19.66
Beef 55.48 1712.00
By default, it returns the index for the maximum value in each column.
By default, it returns the index for the maximum value in each column
according to the group.
>>> df.idxmax()
consumption Wheat Products
co2_emissions Beef
dtype: object
>>> df.groupby("food_type").idxmax()
consumption co2_emissions
food_type
animal Beef Beef
plant Wheat Products Wheat Products
"""
return self._idxmax_idxmin("idxmax", numeric_only=numeric_only, skipna=skipna)

Expand All @@ -2570,8 +2578,8 @@ def idxmin(
Returns
-------
Series
Indexes of minima in each group.
DataFrame
Indexes of minima in each column according to the group.
Raises
------
Expand All @@ -2581,6 +2589,7 @@ def idxmin(
See Also
--------
Series.idxmin : Return index of the minimum element.
DataFrame.idxmin : Indexes of minima along the specified axis.
Notes
-----
Expand All @@ -2594,6 +2603,7 @@ def idxmin(
... {
... "consumption": [10.51, 103.11, 55.48],
... "co2_emissions": [37.2, 19.66, 1712],
... "food_type": ["meat", "plant", "meat"],
... },
... index=["Pork", "Wheat Products", "Beef"],
... )
Expand All @@ -2604,12 +2614,14 @@ def idxmin(
Wheat Products 103.11 19.66
Beef 55.48 1712.00
By default, it returns the index for the minimum value in each column.
By default, it returns the index for the minimum value in each column
according to the group.
>>> df.idxmin()
consumption Pork
co2_emissions Wheat Products
dtype: object
>>> df.groupby("food_type").idxmin()
consumption co2_emissions
food_type
animal Pork Pork
plant Wheat Products Wheat Products
"""
return self._idxmax_idxmin("idxmin", numeric_only=numeric_only, skipna=skipna)

Expand Down
16 changes: 14 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,19 @@ def get_group(self, name) -> DataFrame | Series:
Returns
-------
DataFrame or Series
Series or DataFrame
Get the respective Series or DataFrame corresponding to the group provided.
See Also
--------
DataFrameGroupBy.groups: Dictionary representation of the groupings formed
during a groupby operation.
DataFrameGroupBy.indices: Provides a mapping of group rows to positions
of the elements.
SeriesGroupBy.groups: Dictionary representation of the groupings formed
during a groupby operation.
SeriesGroupBy.indices: Provides a mapping of group rows to positions
of the elements.
Examples
--------
Expand Down Expand Up @@ -2660,8 +2672,8 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT:
See Also
--------
Series.sem : Return unbiased standard error of the mean over requested axis.
DataFrame.sem : Return unbiased standard error of the mean over requested axis.
Series.sem : Return unbiased standard error of the mean over requested axis.
Examples
--------
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,9 +669,9 @@ def _truncate_horizontally(self) -> None:
assert self.max_cols_fitted is not None
col_num = self.max_cols_fitted // 2
if col_num >= 1:
left = self.tr_frame.iloc[:, :col_num]
right = self.tr_frame.iloc[:, -col_num:]
self.tr_frame = concat((left, right), axis=1)
_len = len(self.tr_frame.columns)
_slice = np.hstack([np.arange(col_num), np.arange(_len - col_num, _len)])
self.tr_frame = self.tr_frame.iloc[:, _slice]

# truncate formatter
if isinstance(self.formatters, (list, tuple)):
Expand All @@ -682,7 +682,7 @@ def _truncate_horizontally(self) -> None:
else:
col_num = cast(int, self.max_cols)
self.tr_frame = self.tr_frame.iloc[:, :col_num]
self.tr_col_num = col_num
self.tr_col_num: int = col_num

def _truncate_vertically(self) -> None:
"""Remove rows, which are not to be displayed.
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2207,14 +2207,14 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype:
def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict:
new_dict = {}
for key, value in convert_dates.items():
if not value.startswith("%"): # make sure proper fmts
if not convert_dates[key].startswith("%"): # make sure proper fmts
convert_dates[key] = "%" + value
if key in varlist:
new_dict[varlist.index(key)] = value
new_dict[varlist.index(key)] = convert_dates[key]
else:
if not isinstance(key, int):
raise ValueError("convert_dates key must be a column or an integer")
new_dict[key] = value
new_dict[key] = convert_dates[key]
return new_dict


Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ def test_repr_truncation_preserves_na(self):
with option_context("display.max_rows", 2, "display.show_dimensions", False):
assert repr(df) == " a\n0 <NA>\n.. ...\n9 <NA>"

def test_repr_truncation_dataframe_attrs(self):
# GH#60455
df = DataFrame([[0] * 10])
df.attrs["b"] = DataFrame([])
with option_context("display.max_columns", 2, "display.show_dimensions", False):
assert repr(df) == " 0 ... 9\n0 0 ... 0"

def test_max_colwidth_negative_int_raises(self):
# Deprecation enforced from:
# https://github.com/pandas-dev/pandas/issues/31532
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ html5lib>=1.1
hypothesis>=6.84.0
gcsfs>=2022.11.0
ipython
pickleshare
jinja2>=3.1.2
lxml>=4.9.2
matplotlib>=3.6.3
Expand Down

0 comments on commit 2e25207

Please sign in to comment.