diff --git a/ci/code_checks.sh b/ci/code_checks.sh index b3aae369db76e..adc5bc9a01bdd 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -73,8 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Period.freq GL08" \ -i "pandas.Period.ordinal GL08" \ -i "pandas.RangeIndex.from_range PR01,SA01" \ - -i "pandas.Series.dt.unit GL08" \ - -i "pandas.Series.pad PR01,SA01" \ -i "pandas.Timedelta.max PR02" \ -i "pandas.Timedelta.min PR02" \ -i "pandas.Timedelta.resolution PR02" \ @@ -87,12 +85,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.arrays.NumpyExtensionArray SA01" \ -i "pandas.arrays.TimedeltaArray PR07,SA01" \ -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \ - -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \ - -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \ -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \ - -i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \ -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \ - -i "pandas.core.resample.Resampler.get_group RT03,SA01" \ -i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.mean SA01" \ -i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \ diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 83638ce87f7ac..bb9f48d17b2e1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -797,6 +797,7 @@ Other - Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) +- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) .. ***DO NOT USE THIS SECTION*** diff --git a/environment.yml b/environment.yml index 8ede5a16b7a59..69647a436e3ad 100644 --- a/environment.yml +++ b/environment.yml @@ -35,6 +35,7 @@ dependencies: - hypothesis>=6.84.0 - gcsfs>=2022.11.0 - ipython + - pickleshare # Needed for IPython Sphinx directive in the docs GH#60429 - jinja2>=3.1.2 - lxml>=4.9.2 - matplotlib>=3.6.3 diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 9c821bf0d184e..c6b6367e347ba 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2073,7 +2073,29 @@ def _creso(self) -> int: @cache_readonly def unit(self) -> str: - # e.g. "ns", "us", "ms" + """ + The precision unit of the datetime data. + + Returns the precision unit for the dtype. + It means the smallest time frame that can be stored within this dtype. + + Returns + ------- + str + Unit string representation (e.g. "ns"). + + See Also + -------- + TimelikeOps.as_unit : Converts to a specific unit. + + Examples + -------- + >>> idx = pd.DatetimeIndex(["2020-01-02 01:02:03.004005006"]) + >>> idx.unit + 'ns' + >>> idx.as_unit("s").unit + 's' + """ # error: Argument 1 to "dtype_to_unit" has incompatible type # "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]" return dtype_to_unit(self.dtype) # type: ignore[arg-type] diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 137a49c4487f6..02b9291da9b31 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -87,8 +87,8 @@ if TYPE_CHECKING: from collections.abc import ( + Collection, Sequence, - Sized, ) from pandas._typing import ( @@ -1581,7 +1581,7 @@ def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj): return _maybe_unbox_datetimelike(value, dtype) -def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: +def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray: """ Transform any list-like object in a 1-dimensional numpy array of object dtype. @@ -1599,11 +1599,9 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: ------- 1-dimensional numpy array of dtype object """ - # numpy will try to interpret nested lists as further dimensions, hence - # making a 1D array that contains list-likes is a bit tricky: - result = np.empty(len(values), dtype="object") - result[:] = values - return result + # numpy will try to interpret nested lists as further dimensions in np.array(), + # hence explicitly making a 1D array using np.fromiter + return np.fromiter(values, dtype="object", count=len(values)) def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a6be17a654aa7..3a48cc8a66076 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3878,6 +3878,14 @@ def to_csv( >>> import os # doctest: +SKIP >>> os.makedirs("folder/subfolder", exist_ok=True) # doctest: +SKIP >>> df.to_csv("folder/subfolder/out.csv") # doctest: +SKIP + + Format floats to two decimal places: + + >>> df.to_csv("out1.csv", float_format="%.2f") # doctest: +SKIP + + Format floats using scientific notation: + + >>> df.to_csv("out2.csv", float_format="{{:.2e}}".format) # doctest: +SKIP """ df = self if isinstance(self, ABCDataFrame) else self.to_frame() diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 35ec09892ede6..3fa34007a739b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1321,8 +1321,8 @@ def idxmin(self, skipna: bool = True) -> Series: Returns ------- - Index - Label of the minimum value. + Series + Indexes of minima in each group. Raises ------ @@ -1374,8 +1374,8 @@ def idxmax(self, skipna: bool = True) -> Series: Returns ------- - Index - Label of the maximum value. + Series + Indexes of maxima in each group. Raises ------ @@ -2453,6 +2453,10 @@ def nunique(self, dropna: bool = True) -> DataFrame: nunique: DataFrame Counts of unique elements in each position. + See Also + -------- + DataFrame.nunique : Count number of distinct elements in specified axis. + Examples -------- >>> df = pd.DataFrame( @@ -2508,8 +2512,8 @@ def idxmax( Returns ------- - Series - Indexes of maxima in each group. + DataFrame + Indexes of maxima in each column according to the group. Raises ------ @@ -2519,6 +2523,7 @@ def idxmax( See Also -------- Series.idxmax : Return index of the maximum element. + DataFrame.idxmax : Indexes of maxima along the specified axis. Notes ----- @@ -2532,6 +2537,7 @@ def idxmax( ... { ... "consumption": [10.51, 103.11, 55.48], ... "co2_emissions": [37.2, 19.66, 1712], + ... "food_type": ["meat", "plant", "meat"], ... }, ... index=["Pork", "Wheat Products", "Beef"], ... ) @@ -2542,12 +2548,14 @@ def idxmax( Wheat Products 103.11 19.66 Beef 55.48 1712.00 - By default, it returns the index for the maximum value in each column. + By default, it returns the index for the maximum value in each column + according to the group. - >>> df.idxmax() - consumption Wheat Products - co2_emissions Beef - dtype: object + >>> df.groupby("food_type").idxmax() + consumption co2_emissions + food_type + animal Beef Beef + plant Wheat Products Wheat Products """ return self._idxmax_idxmin("idxmax", numeric_only=numeric_only, skipna=skipna) @@ -2570,8 +2578,8 @@ def idxmin( Returns ------- - Series - Indexes of minima in each group. + DataFrame + Indexes of minima in each column according to the group. Raises ------ @@ -2581,6 +2589,7 @@ def idxmin( See Also -------- Series.idxmin : Return index of the minimum element. + DataFrame.idxmin : Indexes of minima along the specified axis. Notes ----- @@ -2594,6 +2603,7 @@ def idxmin( ... { ... "consumption": [10.51, 103.11, 55.48], ... "co2_emissions": [37.2, 19.66, 1712], + ... "food_type": ["meat", "plant", "meat"], ... }, ... index=["Pork", "Wheat Products", "Beef"], ... ) @@ -2604,12 +2614,14 @@ def idxmin( Wheat Products 103.11 19.66 Beef 55.48 1712.00 - By default, it returns the index for the minimum value in each column. + By default, it returns the index for the minimum value in each column + according to the group. - >>> df.idxmin() - consumption Pork - co2_emissions Wheat Products - dtype: object + >>> df.groupby("food_type").idxmin() + consumption co2_emissions + food_type + animal Pork Pork + plant Wheat Products Wheat Products """ return self._idxmax_idxmin("idxmin", numeric_only=numeric_only, skipna=skipna) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 656da103f27e4..f0513be3498d1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -715,7 +715,19 @@ def get_group(self, name) -> DataFrame | Series: Returns ------- - DataFrame or Series + Series or DataFrame + Get the respective Series or DataFrame corresponding to the group provided. + + See Also + -------- + DataFrameGroupBy.groups: Dictionary representation of the groupings formed + during a groupby operation. + DataFrameGroupBy.indices: Provides a mapping of group rows to positions + of the elements. + SeriesGroupBy.groups: Dictionary representation of the groupings formed + during a groupby operation. + SeriesGroupBy.indices: Provides a mapping of group rows to positions + of the elements. Examples -------- @@ -2660,8 +2672,8 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: See Also -------- - Series.sem : Return unbiased standard error of the mean over requested axis. DataFrame.sem : Return unbiased standard error of the mean over requested axis. + Series.sem : Return unbiased standard error of the mean over requested axis. Examples -------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4f87b1a30ca61..17460eae8c049 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -669,9 +669,9 @@ def _truncate_horizontally(self) -> None: assert self.max_cols_fitted is not None col_num = self.max_cols_fitted // 2 if col_num >= 1: - left = self.tr_frame.iloc[:, :col_num] - right = self.tr_frame.iloc[:, -col_num:] - self.tr_frame = concat((left, right), axis=1) + _len = len(self.tr_frame.columns) + _slice = np.hstack([np.arange(col_num), np.arange(_len - col_num, _len)]) + self.tr_frame = self.tr_frame.iloc[:, _slice] # truncate formatter if isinstance(self.formatters, (list, tuple)): @@ -682,7 +682,7 @@ def _truncate_horizontally(self) -> None: else: col_num = cast(int, self.max_cols) self.tr_frame = self.tr_frame.iloc[:, :col_num] - self.tr_col_num = col_num + self.tr_col_num: int = col_num def _truncate_vertically(self) -> None: """Remove rows, which are not to be displayed. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 053e331925b6f..34d95fb59a21c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2207,14 +2207,14 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype: def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict: new_dict = {} for key, value in convert_dates.items(): - if not value.startswith("%"): # make sure proper fmts + if not convert_dates[key].startswith("%"): # make sure proper fmts convert_dates[key] = "%" + value if key in varlist: - new_dict[varlist.index(key)] = value + new_dict[varlist.index(key)] = convert_dates[key] else: if not isinstance(key, int): raise ValueError("convert_dates key must be a column or an integer") - new_dict[key] = value + new_dict[key] = convert_dates[key] return new_dict diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0dc16e1ebc723..d7db3d5082135 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -129,6 +129,13 @@ def test_repr_truncation_preserves_na(self): with option_context("display.max_rows", 2, "display.show_dimensions", False): assert repr(df) == " a\n0 \n.. ...\n9 " + def test_repr_truncation_dataframe_attrs(self): + # GH#60455 + df = DataFrame([[0] * 10]) + df.attrs["b"] = DataFrame([]) + with option_context("display.max_columns", 2, "display.show_dimensions", False): + assert repr(df) == " 0 ... 9\n0 0 ... 0" + def test_max_colwidth_negative_int_raises(self): # Deprecation enforced from: # https://github.com/pandas-dev/pandas/issues/31532 diff --git a/requirements-dev.txt b/requirements-dev.txt index b68b9f0c8f92c..fb4d9cdb589ca 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -24,6 +24,7 @@ html5lib>=1.1 hypothesis>=6.84.0 gcsfs>=2022.11.0 ipython +pickleshare jinja2>=3.1.2 lxml>=4.9.2 matplotlib>=3.6.3