From 99e3afe8a7bab9cd2d53322909627e5797907be2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:52:03 -1000 Subject: [PATCH] API: MultiIndex.names|codes|levels returns tuples (#57042) * MultiIndex.names|codes|levels returns tuples * Fix typing * Add whatsnew note * Fix stacking * Fix doctest, test * Fix other test * Remove example --- doc/source/user_guide/groupby.rst | 9 - doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/index.pyi | 11 +- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 31 ++-- pandas/core/indexes/frozen.py | 120 -------------- pandas/core/indexes/multi.py | 156 +++++++++--------- pandas/core/resample.py | 5 +- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/merge.py | 14 +- pandas/core/reshape/pivot.py | 2 +- pandas/core/reshape/reshape.py | 36 ++-- pandas/core/strings/accessor.py | 2 +- pandas/core/window/rolling.py | 4 +- .../tests/frame/methods/test_rename_axis.py | 6 +- pandas/tests/frame/methods/test_set_index.py | 10 +- .../tests/frame/methods/test_sort_values.py | 4 +- pandas/tests/frame/test_stack_unstack.py | 4 +- pandas/tests/generic/test_frame.py | 8 +- pandas/tests/generic/test_series.py | 4 +- pandas/tests/groupby/methods/test_quantile.py | 7 +- .../groupby/methods/test_value_counts.py | 2 +- pandas/tests/groupby/test_apply.py | 2 +- pandas/tests/indexes/multi/test_astype.py | 2 +- .../tests/indexes/multi/test_constructors.py | 12 +- pandas/tests/indexes/multi/test_copy.py | 2 +- pandas/tests/indexes/multi/test_duplicates.py | 2 +- pandas/tests/indexes/multi/test_formats.py | 22 +-- pandas/tests/indexes/multi/test_get_set.py | 10 +- pandas/tests/indexes/multi/test_integrity.py | 4 +- pandas/tests/indexes/multi/test_names.py | 16 +- pandas/tests/indexes/multi/test_reindex.py | 32 ++-- pandas/tests/indexes/multi/test_reshape.py | 2 +- pandas/tests/indexes/multi/test_setops.py | 2 +- pandas/tests/indexes/multi/test_sorting.py | 6 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_common.py | 4 +- pandas/tests/indexes/test_frozen.py | 113 ------------- .../tests/indexing/multiindex/test_partial.py | 2 +- .../tests/io/json/test_json_table_schema.py | 10 +- pandas/tests/io/pytables/test_store.py | 2 +- pandas/tests/io/test_sql.py | 9 +- pandas/tests/reshape/concat/test_concat.py | 2 +- pandas/tests/reshape/merge/test_multi.py | 12 +- pandas/tests/reshape/test_crosstab.py | 4 +- .../tests/series/methods/test_rename_axis.py | 6 +- pandas/tests/test_common.py | 12 -- pandas/tests/util/test_assert_index_equal.py | 4 +- pandas/tests/window/test_rolling.py | 2 +- 49 files changed, 246 insertions(+), 492 deletions(-) delete mode 100644 pandas/core/indexes/frozen.py delete mode 100644 pandas/tests/indexes/test_frozen.py diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 2a4d7791322e5..19c2abf10651d 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -137,15 +137,6 @@ We could naturally group by either the ``A`` or ``B`` columns, or both: ``df.groupby('A')`` is just syntactic sugar for ``df.groupby(df['A'])``. -If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all -the columns except the one we specify: - -.. ipython:: python - - df2 = df.set_index(["A", "B"]) - grouped = df2.groupby(level=df2.index.names.difference(["B"])) - grouped.sum() - The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do a transpose: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7d75a2a19b4b2..3bb61e7ce6215 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -86,6 +86,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ - 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`) +- :attr:`MultiIndex.codes`, :attr:`MultiIndex.levels`, and :attr:`MultiIndex.names` now returns a ``tuple`` instead of a ``FrozenList`` (:issue:`53531`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi index 15b56a6c2db63..8cd135c944dc6 100644 --- a/pandas/_libs/index.pyi +++ b/pandas/_libs/index.pyi @@ -2,7 +2,10 @@ import numpy as np from pandas._typing import npt -from pandas import MultiIndex +from pandas import ( + Index, + MultiIndex, +) from pandas.core.arrays import ExtensionArray multiindex_nulls_shift: int @@ -70,13 +73,13 @@ class MaskedUInt8Engine(MaskedIndexEngine): ... class MaskedBoolEngine(MaskedUInt8Engine): ... class BaseMultiIndexCodesEngine: - levels: list[np.ndarray] + levels: tuple[np.ndarray] offsets: np.ndarray # ndarray[uint64_t, ndim=1] def __init__( self, - levels: list[np.ndarray], # all entries hashable - labels: list[np.ndarray], # all entries integer-dtyped + levels: tuple[Index, ...], # all entries hashable + labels: tuple[np.ndarray], # all entries integer-dtyped offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1] ) -> None: ... def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ... diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fa79b23b8209e..67e25531990ec 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5698,7 +5698,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde idx = cast(MultiIndex, idx) levels = list(idx.levels) + [lev] codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))] - mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None]) + mi = MultiIndex(levels=levels, codes=codes, names=list(idx.names) + [None]) else: nidx = len(idx) idx_codes = coerce_indexer_dtype(np.arange(nidx), idx) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 42613ca4c6573..0a0d6243e8414 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -179,7 +179,6 @@ disallow_ndim_indexing, is_valid_positional_slice, ) -from pandas.core.indexes.frozen import FrozenList from pandas.core.missing import clean_reindex_fill_method from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op @@ -1767,8 +1766,8 @@ def _get_default_index_names( return names - def _get_names(self) -> FrozenList: - return FrozenList((self.name,)) + def _get_names(self) -> tuple[Hashable | None, ...]: + return (self.name,) def _set_names(self, values, *, level=None) -> None: """ @@ -1866,7 +1865,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['species', 'year']) + names=('species', 'year')) When renaming levels with a dict, levels can not be passed. @@ -1875,7 +1874,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['snake', 'year']) + names=('snake', 'year')) """ if level is not None and not isinstance(self, ABCMultiIndex): raise ValueError("Level must be None for non-MultiIndex") @@ -1959,19 +1958,19 @@ def rename(self, name, inplace: bool = False) -> Self | None: >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], ... [2018, 2019]], - ... names=['kind', 'year']) + ... names=('kind', 'year')) >>> idx MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['kind', 'year']) + names=('kind', 'year')) >>> idx.rename(['species', 'year']) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=['species', 'year']) + names=('species', 'year')) >>> idx.rename('species') Traceback (most recent call last): TypeError: Must pass list-like as `names`. @@ -2135,22 +2134,22 @@ def droplevel(self, level: IndexLabel = 0): >>> mi MultiIndex([(1, 3, 5), (2, 4, 6)], - names=['x', 'y', 'z']) + names=('x', 'y', 'z')) >>> mi.droplevel() MultiIndex([(3, 5), (4, 6)], - names=['y', 'z']) + names=('y', 'z')) >>> mi.droplevel(2) MultiIndex([(1, 3), (2, 4)], - names=['x', 'y']) + names=('x', 'y')) >>> mi.droplevel('z') MultiIndex([(1, 3), (2, 4)], - names=['x', 'y']) + names=('x', 'y')) >>> mi.droplevel(['x', 'y']) Index([5, 6], dtype='int64', name='z') @@ -4865,7 +4864,9 @@ def _join_level( """ from pandas.core.indexes.multi import MultiIndex - def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: + def _get_leaf_sorter( + labels: tuple[np.ndarray, ...] | list[np.ndarray] + ) -> npt.NDArray[np.intp]: """ Returns sorter for the inner most level while preserving the order of higher levels. @@ -6627,7 +6628,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: MultiIndex([(1, 'red'), (2, 'blue'), (3, 'green')], - names=['number', 'color']) + names=('number', 'color')) Check whether the strings in the 'color' level of the MultiIndex are in a list of colors. @@ -7608,7 +7609,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) MultiIndex([('a', 'a'), ('a', 'b')], - names=['L1', 'L2']) + names=('L1', 'L2')) See Also -------- diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py deleted file mode 100644 index 2a8c777db47e1..0000000000000 --- a/pandas/core/indexes/frozen.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -frozen (immutable) data structures to support MultiIndexing - -These are used for: - -- .names (FrozenList) - -""" -from __future__ import annotations - -from typing import ( - TYPE_CHECKING, - NoReturn, -) - -from pandas.core.base import PandasObject - -from pandas.io.formats.printing import pprint_thing - -if TYPE_CHECKING: - from pandas._typing import Self - - -class FrozenList(PandasObject, list): - """ - Container that doesn't allow setting item *but* - because it's technically hashable, will be used - for lookups, appropriately, etc. - """ - - # Side note: This has to be of type list. Otherwise, - # it messes up PyTables type checks. - - def union(self, other) -> FrozenList: - """ - Returns a FrozenList with other concatenated to the end of self. - - Parameters - ---------- - other : array-like - The array-like whose elements we are concatenating. - - Returns - ------- - FrozenList - The collection difference between self and other. - """ - if isinstance(other, tuple): - other = list(other) - return type(self)(super().__add__(other)) - - def difference(self, other) -> FrozenList: - """ - Returns a FrozenList with elements from other removed from self. - - Parameters - ---------- - other : array-like - The array-like whose elements we are removing self. - - Returns - ------- - FrozenList - The collection difference between self and other. - """ - other = set(other) - temp = [x for x in self if x not in other] - return type(self)(temp) - - # TODO: Consider deprecating these in favor of `union` (xref gh-15506) - # error: Incompatible types in assignment (expression has type - # "Callable[[FrozenList, Any], FrozenList]", base class "list" defined the - # type as overloaded function) - __add__ = __iadd__ = union # type: ignore[assignment] - - def __getitem__(self, n): - if isinstance(n, slice): - return type(self)(super().__getitem__(n)) - return super().__getitem__(n) - - def __radd__(self, other) -> Self: - if isinstance(other, tuple): - other = list(other) - return type(self)(other + list(self)) - - def __eq__(self, other: object) -> bool: - if isinstance(other, (tuple, FrozenList)): - other = list(other) - return super().__eq__(other) - - __req__ = __eq__ - - def __mul__(self, other) -> Self: - return type(self)(super().__mul__(other)) - - __imul__ = __mul__ - - def __reduce__(self): - return type(self), (list(self),) - - # error: Signature of "__hash__" incompatible with supertype "list" - def __hash__(self) -> int: # type: ignore[override] - return hash(tuple(self)) - - def _disabled(self, *args, **kwargs) -> NoReturn: - """ - This method will not function because object is immutable. - """ - raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") - - def __str__(self) -> str: - return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) - - def __repr__(self) -> str: - return f"{type(self).__name__}({self!s})" - - __setitem__ = __setslice__ = _disabled # type: ignore[assignment] - __delitem__ = __delslice__ = _disabled - pop = append = extend = _disabled - remove = sort = insert = _disabled # type: ignore[assignment] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f4bf4f3b2f275..a11dad9dcb518 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -101,7 +101,6 @@ ensure_index, get_unanimous_names, ) -from pandas.core.indexes.frozen import FrozenList from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ( get_group_index, @@ -303,7 +302,7 @@ class MultiIndex(Index): (1, 'blue'), (2, 'red'), (2, 'blue')], - names=['number', 'color']) + names=('number', 'color')) See further examples for how to construct a MultiIndex in the doc strings of the mentioned helper methods. @@ -313,9 +312,9 @@ class MultiIndex(Index): # initialize to zero-length tuples to make everything work _typ = "multiindex" - _names: list[Hashable | None] = [] - _levels = FrozenList() - _codes = FrozenList() + _names: tuple[Hashable | None, ...] = () + _levels: tuple[Index, ...] = () + _codes: tuple[np.ndarray, ...] = () _comparables = ["names"] sortorder: int | None @@ -351,7 +350,7 @@ def __new__( result._set_levels(levels, copy=copy, validate=False) result._set_codes(codes, copy=copy, validate=False) - result._names = [None] * len(levels) + result._names = (None,) * len(levels) if names is not None: # handles name validation result._set_names(names) @@ -370,15 +369,15 @@ def __new__( return result - def _validate_codes(self, level: list, code: list): + def _validate_codes(self, level: Index, code: np.ndarray) -> np.ndarray: """ Reassign code values as -1 if their corresponding levels are NaN. Parameters ---------- - code : list + code : Index Code to reassign. - level : list + level : np.ndarray Level to check for missing values (NaN, NaT, None). Returns @@ -388,24 +387,21 @@ def _validate_codes(self, level: list, code: list): """ null_mask = isna(level) if np.any(null_mask): - # error: Incompatible types in assignment - # (expression has type "ndarray[Any, dtype[Any]]", - # variable has type "List[Any]") - code = np.where(null_mask[code], -1, code) # type: ignore[assignment] + code = np.where(null_mask[code], -1, code) return code def _verify_integrity( self, - codes: list | None = None, - levels: list | None = None, + codes: tuple | None = None, + levels: tuple | None = None, levels_to_verify: list[int] | range | None = None, - ): + ) -> tuple: """ Parameters ---------- - codes : optional list + codes : optional tuple Codes to check for validity. Defaults to current codes. - levels : optional list + levels : optional tuple Levels to check for validity. Defaults to current levels. levels_to_validate: optional list Specifies the levels to verify. @@ -469,7 +465,7 @@ def _verify_integrity( else: result_codes.append(codes[i]) - new_codes = FrozenList(result_codes) + new_codes = tuple(result_codes) return new_codes @classmethod @@ -512,7 +508,7 @@ def from_arrays( (1, 'blue'), (2, 'red'), (2, 'blue')], - names=['number', 'color']) + names=('number', 'color')) """ error_msg = "Input must be a list / sequence of array-likes." if not is_list_like(arrays): @@ -584,7 +580,7 @@ def from_tuples( (1, 'blue'), (2, 'red'), (2, 'blue')], - names=['number', 'color']) + names=('number', 'color')) """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") @@ -661,14 +657,14 @@ def from_product( >>> numbers = [0, 1, 2] >>> colors = ['green', 'purple'] >>> pd.MultiIndex.from_product([numbers, colors], - ... names=['number', 'color']) + ... names=('number', 'color')) MultiIndex([(0, 'green'), (0, 'purple'), (1, 'green'), (1, 'purple'), (2, 'green'), (2, 'purple')], - names=['number', 'color']) + names=('number', 'color')) """ from pandas.core.reshape.util import cartesian_product @@ -736,7 +732,7 @@ def from_frame( ('HI', 'Precip'), ('NJ', 'Temp'), ('NJ', 'Precip')], - names=['a', 'b']) + names=('a', 'b')) Using explicit names, instead of the column names @@ -745,7 +741,7 @@ def from_frame( ('HI', 'Precip'), ('NJ', 'Temp'), ('NJ', 'Precip')], - names=['state', 'observation']) + names=('state', 'observation')) """ if not isinstance(df, ABCDataFrame): raise TypeError("Input must be a DataFrame") @@ -768,16 +764,18 @@ def _values(self) -> np.ndarray: vals = index if isinstance(vals.dtype, CategoricalDtype): vals = cast("CategoricalIndex", vals) - vals = vals._data._internal_get_values() + # Incompatible types in assignment (expression has type + # "ExtensionArray | ndarray[Any, Any]", variable has type "Index") + vals = vals._data._internal_get_values() # type: ignore[assignment] if isinstance(vals.dtype, ExtensionDtype) or lib.is_np_dtype( vals.dtype, "mM" ): vals = vals.astype(object) - vals = np.array(vals, copy=False) - vals = algos.take_nd(vals, codes, fill_value=index._na_value) - values.append(vals) + array_vals = np.array(vals, copy=False) + array_vals = algos.take_nd(array_vals, codes, fill_value=index._na_value) + values.append(array_vals) arr = lib.fast_zip(values) return arr @@ -809,7 +807,7 @@ def dtypes(self) -> Series: Examples -------- >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')], - ... names=['number', 'color']) + ... names=('number', 'color')) >>> idx MultiIndex([(0, 'green'), (0, 'purple'), @@ -817,7 +815,7 @@ def dtypes(self) -> Series: (1, 'purple'), (2, 'green'), (2, 'purple')], - names=['number', 'color']) + names=('number', 'color')) >>> idx.dtypes number int64 color object @@ -843,7 +841,7 @@ def size(self) -> int: # Levels Methods @cache_readonly - def levels(self) -> FrozenList: + def levels(self) -> tuple[Index, ...]: """ Levels of the MultiIndex. @@ -875,7 +873,8 @@ def levels(self) -> FrozenList: dog 4 >>> leg_num.index.levels - FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']]) + (Index(['mammal'], dtype='object', name='Category'), + Index(['cat', 'dog', 'goat', 'human'], dtype='object', name='Animals')) MultiIndex levels will not change even if the DataFrame using the MultiIndex does not contain all them anymore. @@ -890,7 +889,8 @@ def levels(self) -> FrozenList: dog 4 >>> large_leg_num.index.levels - FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']]) + (Index(['mammal'], dtype='object', name='Category'), + Index(['cat', 'dog', 'goat', 'human'], dtype='object', name='Animals')) """ # Use cache_readonly to ensure that self.get_locs doesn't repeatedly # create new IndexEngine @@ -899,7 +899,7 @@ def levels(self) -> FrozenList: for level in result: # disallow midx.levels[0].name = "foo" level._no_setting_name = True - return FrozenList(result) + return tuple(result) def _set_levels( self, @@ -922,16 +922,14 @@ def _set_levels( raise ValueError("Length of levels must match length of level.") if level is None: - new_levels = FrozenList( - ensure_index(lev, copy=copy)._view() for lev in levels - ) + new_levels = tuple(ensure_index(lev, copy=copy)._view() for lev in levels) level_numbers = list(range(len(new_levels))) else: level_numbers = [self._get_level_number(lev) for lev in level] new_levels_list = list(self._levels) for lev_num, lev in zip(level_numbers, levels): new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() - new_levels = FrozenList(new_levels_list) + new_levels = tuple(new_levels_list) if verify_integrity: new_codes = self._verify_integrity( @@ -940,7 +938,7 @@ def _set_levels( self._codes = new_codes names = self.names - self._levels = new_levels + self._levels: tuple[Index, ...] = new_levels if any(names): self._set_names(names) @@ -985,7 +983,7 @@ def set_levels( (2, 'two'), (3, 'one'), (3, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) MultiIndex([('a', 1), @@ -994,7 +992,7 @@ def set_levels( ('b', 2), ('c', 1), ('c', 2)], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels(['a', 'b', 'c'], level=0) MultiIndex([('a', 'one'), ('a', 'two'), @@ -1002,7 +1000,7 @@ def set_levels( ('b', 'two'), ('c', 'one'), ('c', 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels(['a', 'b'], level='bar') MultiIndex([(1, 'a'), (1, 'b'), @@ -1010,7 +1008,7 @@ def set_levels( (2, 'b'), (3, 'a'), (3, 'b')], - names=['foo', 'bar']) + names=('foo', 'bar')) If any of the levels passed to ``set_levels()`` exceeds the existing length, all of the values from that argument will @@ -1024,10 +1022,10 @@ def set_levels( ('b', 2), ('c', 1), ('c', 2)], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels - FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) - """ + (Index(['a', 'b', 'c'], dtype='object', name='foo'), Index([1, 2, 3, 4], dtype='int64', name='bar')) + """ # noqa: E501 if isinstance(levels, Index): pass @@ -1080,7 +1078,7 @@ def levshape(self) -> Shape: # Codes Methods @property - def codes(self) -> FrozenList: + def codes(self) -> tuple: return self._codes def _set_codes( @@ -1100,7 +1098,7 @@ def _set_codes( level_numbers: list[int] | range if level is None: - new_codes = FrozenList( + new_codes = tuple( _coerce_indexer_frozen(level_codes, lev, copy=copy).view() for lev, level_codes in zip(self._levels, codes) ) @@ -1113,7 +1111,7 @@ def _set_codes( new_codes_list[lev_num] = _coerce_indexer_frozen( level_codes, lev, copy=copy ) - new_codes = FrozenList(new_codes_list) + new_codes = tuple(new_codes_list) if verify_integrity: new_codes = self._verify_integrity( @@ -1154,32 +1152,32 @@ def set_codes( (1, 'two'), (2, 'one'), (2, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) MultiIndex([(2, 'one'), (1, 'one'), (2, 'two'), (1, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([1, 0, 1, 0], level=0) MultiIndex([(2, 'one'), (1, 'two'), (2, 'one'), (1, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([0, 0, 1, 1], level='bar') MultiIndex([(1, 'one'), (1, 'one'), (2, 'two'), (2, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) MultiIndex([(2, 'one'), (1, 'one'), (2, 'two'), (1, 'two')], - names=['foo', 'bar']) + names=('foo', 'bar')) """ level, codes = _require_listlike(level, codes, "Codes") @@ -1447,6 +1445,7 @@ def format( if len(self) == 0: return [] + formatted: Iterable stringified_levels = [] for lev, level_codes in zip(self.levels, self.codes): na = na_rep if na_rep is not None else _get_na_rep(lev.dtype) @@ -1470,7 +1469,9 @@ def format( stringified_levels.append(formatted) result_levels = [] - for lev, lev_name in zip(stringified_levels, self.names): + # Incompatible types in assignment (expression has type "Iterable[Any]", + # variable has type "Index") + for lev, lev_name in zip(stringified_levels, self.names): # type: ignore[assignment] level = [] if names: @@ -1513,6 +1514,7 @@ def _format_multi( if len(self) == 0: return [] + formatted: Iterable stringified_levels = [] for lev, level_codes in zip(self.levels, self.codes): na = _get_na_rep(lev.dtype) @@ -1537,7 +1539,9 @@ def _format_multi( stringified_levels.append(formatted) result_levels = [] - for lev, lev_name in zip(stringified_levels, self.names): + # Incompatible types in assignment (expression has type "Iterable[Any]", + # variable has type "Index") + for lev, lev_name in zip(stringified_levels, self.names): # type: ignore[assignment] level = [] if include_names: @@ -1569,8 +1573,8 @@ def _format_multi( # -------------------------------------------------------------------- # Names Methods - def _get_names(self) -> FrozenList: - return FrozenList(self._names) + def _get_names(self) -> tuple[Hashable | None, ...]: + return self._names def _set_names(self, names, *, level=None, validate: bool = True) -> None: """ @@ -1617,6 +1621,7 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: level = [self._get_level_number(lev) for lev in level] # set the name + new_names = list(self._names) for lev, name in zip(level, names): if name is not None: # GH 20527 @@ -1625,7 +1630,8 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: raise TypeError( f"{type(self).__name__}.name must be a hashable type" ) - self._names[lev] = name + new_names[lev] = name + self._names = tuple(new_names) # If .levels has been accessed, the names in our cache will be stale. self._reset_cache() @@ -1644,9 +1650,9 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: >>> mi MultiIndex([(1, 3, 5), (2, 4, 6)], - names=['x', 'y', 'z']) + names=('x', 'y', 'z')) >>> mi.names - FrozenList(['x', 'y', 'z']) + ('x', 'y', 'z') """, ) @@ -1957,7 +1963,7 @@ def to_flat_index(self) -> Index: # type: ignore[override] -------- >>> index = pd.MultiIndex.from_product( ... [['foo', 'bar'], ['baz', 'qux']], - ... names=['a', 'b']) + ... names=('a', 'b')) >>> index.to_flat_index() Index([('foo', 'baz'), ('foo', 'qux'), ('bar', 'baz'), ('bar', 'qux')], @@ -2115,7 +2121,7 @@ def remove_unused_levels(self) -> MultiIndex: >>> mi2 = mi[2:].remove_unused_levels() >>> mi2.levels - FrozenList([[1], ['a', 'b']]) + (Index([1], dtype='int64'), Index(['a', 'b'], dtype='object')) """ new_levels = [] new_codes = [] @@ -2388,13 +2394,13 @@ def drop( # type: ignore[override] (1, 'purple'), (2, 'green'), (2, 'purple')], - names=['number', 'color']) + names=('number', 'color')) >>> idx.drop([(1, 'green'), (2, 'purple')]) MultiIndex([(0, 'green'), (0, 'purple'), (1, 'purple'), (2, 'green')], - names=['number', 'color']) + names=('number', 'color')) We can also drop from a specific level. @@ -2402,12 +2408,12 @@ def drop( # type: ignore[override] MultiIndex([(0, 'purple'), (1, 'purple'), (2, 'purple')], - names=['number', 'color']) + names=('number', 'color')) >>> idx.drop([1, 2], level=0) MultiIndex([(0, 'green'), (0, 'purple')], - names=['number', 'color']) + names=('number', 'color')) """ if level is not None: return self._drop_from_level(codes, level, errors) @@ -2547,17 +2553,17 @@ def reorder_levels(self, order) -> MultiIndex: >>> mi MultiIndex([(1, 3), (2, 4)], - names=['x', 'y']) + names=('x', 'y')) >>> mi.reorder_levels(order=[1, 0]) MultiIndex([(3, 1), (4, 2)], - names=['y', 'x']) + names=('y', 'x')) >>> mi.reorder_levels(order=['y', 'x']) MultiIndex([(3, 1), (4, 2)], - names=['y', 'x']) + names=('y', 'x')) """ order = [self._get_level_number(i) for i in order] result = self._reorder_ilevels(order) @@ -2926,7 +2932,9 @@ def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left" if lab not in lev and not isna(lab): # short circuit try: - loc = algos.searchsorted(lev, lab, side=side) + # Argument 1 to "searchsorted" has incompatible type "Index"; + # expected "ExtensionArray | ndarray[Any, Any]" + loc = algos.searchsorted(lev, lab, side=side) # type: ignore[arg-type] except TypeError as err: # non-comparable e.g. test_slice_locs_with_type_mismatch raise TypeError(f"Level type mismatch: {lab}") from err @@ -3594,7 +3602,7 @@ def _reorder_indexer( k_codes = self.levels[i].get_indexer(k) k_codes = k_codes[k_codes >= 0] # Filter absent keys # True if the given codes are not ordered - need_sort = (k_codes[:-1] > k_codes[1:]).any() + need_sort = bool((k_codes[:-1] > k_codes[1:]).any()) else: need_sort = True elif isinstance(k, slice): @@ -4027,7 +4035,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: __invert__ = make_invalid_op("__invert__") -def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: +def _lexsort_depth(codes: tuple[np.ndarray], nlevels: int) -> int: """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" int64_codes = [ensure_int64(level_codes) for level_codes in codes] for k in range(nlevels, 0, -1): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 8c65020e38a6d..24c9b10c2b3a6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1805,8 +1805,9 @@ def _wrap_result(self, result): if self.kind == "period" and not isinstance(result.index, PeriodIndex): if isinstance(result.index, MultiIndex): # GH 24103 - e.g. groupby resample - if not isinstance(result.index.levels[-1], PeriodIndex): - new_level = result.index.levels[-1].to_period(self.freq) + new_level = result.index.levels[-1] + if not isinstance(new_level, PeriodIndex): + new_level = new_level.to_period(self.freq) # type: ignore[attr-defined] result.index = result.index.set_levels(new_level, level=-1) else: result.index = result.index.to_period(self.freq) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 4bf1742a83c66..3ee896275a67a 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -117,7 +117,7 @@ def melt( else: mdata[col] = np.tile(id_data._values, num_cols_adjusted) - mcolumns = id_vars + var_name + [value_name] + mcolumns = id_vars + list(var_name) + [value_name] if frame.shape[1] > 0 and not any( not isinstance(dt, np.dtype) and dt._supports_2d for dt in frame.dtypes diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e53eea6f7f075..95261394994ae 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -105,7 +105,6 @@ from pandas import DataFrame from pandas.core import groupby from pandas.core.arrays import DatetimeArray - from pandas.core.indexes.frozen import FrozenList _factorizers = { np.int64: libhashtable.Int64Factorizer, @@ -1809,7 +1808,7 @@ def restore_dropped_levels_multijoin( join_index: Index, lindexer: npt.NDArray[np.intp], rindexer: npt.NDArray[np.intp], -) -> tuple[FrozenList, FrozenList, FrozenList]: +) -> tuple[tuple, tuple, tuple]: """ *this is an internal non-public method* @@ -1841,7 +1840,7 @@ def restore_dropped_levels_multijoin( levels of combined multiindexes labels : np.ndarray[np.intp] labels of combined multiindexes - names : List[Hashable] + names : tuple[Hashable] names of combined multiindex levels """ @@ -1883,12 +1882,11 @@ def _convert_to_multiindex(index: Index) -> MultiIndex: else: restore_codes = algos.take_nd(codes, indexer, fill_value=-1) - # error: Cannot determine type of "__add__" - join_levels = join_levels + [restore_levels] # type: ignore[has-type] - join_codes = join_codes + [restore_codes] # type: ignore[has-type] - join_names = join_names + [dropped_level_name] + join_levels = join_levels + (restore_levels,) + join_codes = join_codes + (restore_codes,) + join_names = join_names + (dropped_level_name,) - return join_levels, join_codes, join_names + return tuple(join_levels), tuple(join_codes), tuple(join_names) class _OrderedMerge(_MergeOperation): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index db28bfb1e9200..3abc1408584a0 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -410,7 +410,7 @@ def _all_key(key): if isinstance(piece.index, MultiIndex): # We are adding an empty level transformed_piece.index = MultiIndex.from_tuples( - [all_key], names=piece.index.names + [None] + [all_key], names=piece.index.names + (None,) ) else: transformed_piece.index = Index([all_key], name=piece.index.name) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 39cd619715a91..ad313b112a2e7 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -59,7 +59,6 @@ ) from pandas.core.arrays import ExtensionArray - from pandas.core.indexes.frozen import FrozenList class _Unstacker: @@ -335,21 +334,15 @@ def get_new_columns(self, value_columns: Index | None): width = len(value_columns) propagator = np.repeat(np.arange(width), stride) - new_levels: FrozenList | list[Index] + new_levels: tuple[Index, ...] if isinstance(value_columns, MultiIndex): - # error: Cannot determine type of "__add__" [has-type] - new_levels = value_columns.levels + ( # type: ignore[has-type] - self.removed_level_full, - ) + new_levels = value_columns.levels + (self.removed_level_full,) new_names = value_columns.names + (self.removed_name,) new_codes = [lab.take(propagator) for lab in value_columns.codes] else: - new_levels = [ - value_columns, - self.removed_level_full, - ] + new_levels = (value_columns, self.removed_level_full) new_names = [value_columns.name, self.removed_name] new_codes = [propagator] @@ -377,7 +370,7 @@ def _repeater(self) -> np.ndarray: return repeater @cache_readonly - def new_index(self) -> MultiIndex: + def new_index(self) -> MultiIndex | Index: # Does not depend on values or value_columns result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]] @@ -715,7 +708,7 @@ def stack_multiple(frame: DataFrame, level, dropna: bool = True, sort: bool = Tr return result -def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex: +def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex | Index: """Creates a MultiIndex from the first N-1 levels of this MultiIndex.""" if len(columns.levels) <= 2: return columns.levels[0]._rename(name=columns.names[0]) @@ -978,27 +971,26 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame: # Construct the correct MultiIndex by combining the frame's index and # stacked columns. - index_levels: list | FrozenList if isinstance(frame.index, MultiIndex): index_levels = frame.index.levels - index_codes = list(np.tile(frame.index.codes, (1, ratio))) + index_codes = tuple(np.tile(frame.index.codes, (1, ratio))) else: codes, uniques = factorize(frame.index, use_na_sentinel=False) - index_levels = [uniques] - index_codes = list(np.tile(codes, (1, ratio))) + # Incompatible types in assignment (expression has type + # "tuple[ndarray[Any, Any] | Index]", variable has type "tuple[Index, ...]") + index_levels = (uniques,) # type: ignore[assignment] + index_codes = tuple(np.tile(codes, (1, ratio))) if isinstance(ordered_stack_cols, MultiIndex): column_levels = ordered_stack_cols.levels column_codes = ordered_stack_cols.drop_duplicates().codes else: - column_levels = [ordered_stack_cols.unique()] - column_codes = [factorize(ordered_stack_cols_unique, use_na_sentinel=False)[0]] - # error: Incompatible types in assignment (expression has type "list[ndarray[Any, - # dtype[Any]]]", variable has type "FrozenList") - column_codes = [np.repeat(codes, len(frame)) for codes in column_codes] # type: ignore[assignment] + column_levels = (ordered_stack_cols.unique(),) + column_codes = (factorize(ordered_stack_cols_unique, use_na_sentinel=False)[0],) + column_codes = tuple(np.repeat(codes, len(frame)) for codes in column_codes) result.index = MultiIndex( levels=index_levels + column_levels, codes=index_codes + column_codes, - names=frame.index.names + list(ordered_stack_cols.names), + names=frame.index.names + ordered_stack_cols.names, verify_integrity=False, ) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 804beb44bd699..fa85897872981 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3514,7 +3514,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame: from pandas import MultiIndex - index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"]) + index = MultiIndex.from_tuples(index_list, names=arr.index.names + ("match",)) dtype = _result_dtype(arr) result = arr._constructor_expanddim( diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b55432085b928..b1a1da387ab83 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -819,12 +819,12 @@ def _apply_pairwise( else: idx_codes, idx_levels = factorize(result.index) result_codes = [idx_codes] - result_levels = [idx_levels] + result_levels = [idx_levels] # type: ignore[list-item] result_names = [result.index.name] # 3) Create the resulting index by combining 1) + 2) result_codes = groupby_codes + result_codes - result_levels = groupby_levels + result_levels + result_levels = groupby_levels + result_levels # type: ignore[assignment] result_names = self._grouper.names + result_names result_index = MultiIndex( diff --git a/pandas/tests/frame/methods/test_rename_axis.py b/pandas/tests/frame/methods/test_rename_axis.py index dd4a77c6509b8..908a3f728c749 100644 --- a/pandas/tests/frame/methods/test_rename_axis.py +++ b/pandas/tests/frame/methods/test_rename_axis.py @@ -60,15 +60,15 @@ def test_rename_axis_mapper(self): # Test for renaming index using dict result = df.rename_axis(index={"ll": "foo"}) - assert result.index.names == ["foo", "nn"] + assert result.index.names == ("foo", "nn") # Test for renaming index using a function result = df.rename_axis(index=str.upper, axis=0) - assert result.index.names == ["LL", "NN"] + assert result.index.names == ("LL", "NN") # Test for renaming index providing complete list result = df.rename_axis(index=["foo", "goo"]) - assert result.index.names == ["foo", "goo"] + assert result.index.names == ("foo", "goo") # Test for changing index and columns at same time sdf = df.reset_index().set_index("nn").drop(columns=["ll", "y"]) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 62f8458441e17..4fbc84cd1a66c 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -163,7 +163,7 @@ def test_set_index_names(self): ) df.index.name = "name" - assert df.set_index(df.index).index.names == ["name"] + assert df.set_index(df.index).index.names == ("name",) mi = MultiIndex.from_arrays(df[["A", "B"]].T.values, names=["A", "B"]) mi2 = MultiIndex.from_arrays( @@ -172,7 +172,7 @@ def test_set_index_names(self): df = df.set_index(["A", "B"]) - assert df.set_index(df.index).index.names == ["A", "B"] + assert df.set_index(df.index).index.names == ("A", "B") # Check that set_index isn't converting a MultiIndex into an Index assert isinstance(df.set_index(df.index).index, MultiIndex) @@ -292,7 +292,7 @@ def test_set_index_pass_single_array( # only valid column keys are dropped # since B is always passed as array above, nothing is dropped expected = df.set_index(["B"], drop=False, append=append) - expected.index.names = [index_name] + name if append else name + expected.index.names = [index_name] + list(name) if append else name tm.assert_frame_equal(result, expected) @@ -464,12 +464,12 @@ def test_set_index_datetime(self): df = df.set_index("label", append=True) tm.assert_index_equal(df.index.levels[0], expected) tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) - assert df.index.names == ["datetime", "label"] + assert df.index.names == ("datetime", "label") df = df.swaplevel(0, 1) tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) tm.assert_index_equal(df.index.levels[1], expected) - assert df.index.names == ["label", "datetime"] + assert df.index.names == ("label", "datetime") df = DataFrame(np.random.default_rng(2).random(6)) idx1 = DatetimeIndex( diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index c146dcc9c2d71..b856a7ff5d26b 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -857,7 +857,7 @@ def test_sort_index_level_and_column_label( ) # Get index levels from df_idx - levels = df_idx.index.names + levels = list(df_idx.index.names) # Compute expected by sorting on columns and the setting index expected = df_none.sort_values( @@ -875,7 +875,7 @@ def test_sort_column_level_and_index_label( # GH#14353 # Get levels from df_idx - levels = df_idx.index.names + levels = list(df_idx.index.names) # Compute expected by sorting on axis=0, setting index levels, and then # transposing. For some cases this will result in a frame with diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 2501427f985a9..15426b117614c 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -803,7 +803,7 @@ def test_unstack_multi_level_cols(self): [[10, 20, 30], [10, 20, 40]], names=["i1", "i2", "i3"] ), ) - assert df.unstack(["i2", "i1"]).columns.names[-2:] == ["i2", "i1"] + assert df.unstack(["i2", "i1"]).columns.names[-2:] == ("i2", "i1") def test_unstack_multi_level_rows_and_cols(self): # PH 28306: Unstack df with multi level cols and rows @@ -1834,7 +1834,7 @@ def test_stack_unstack_preserve_names( unstacked = frame.unstack() assert unstacked.index.name == "first" - assert unstacked.columns.names == ["exp", "second"] + assert unstacked.columns.names == ("exp", "second") restacked = unstacked.stack(future_stack=future_stack) assert restacked.index.names == frame.index.names diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index fc7aa9e7b2c46..371bca6b9fc33 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -35,15 +35,15 @@ def test_set_axis_name_mi(self, func): columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]), ) - level_names = ["L1", "L2"] + level_names = ("L1", "L2") result = methodcaller(func, level_names)(df) assert result.index.names == level_names - assert result.columns.names == [None, None] + assert result.columns.names == (None, None) result = methodcaller(func, level_names, axis=1)(df) - assert result.columns.names == ["L1", "L2"] - assert result.index.names == [None, None] + assert result.columns.names == level_names + assert result.index.names == (None, None) def test_nonzero_single_element(self): # allow single item via bool method diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 3648961eb3808..67e8ca2106840 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -24,9 +24,9 @@ def test_set_axis_name_mi(self, func): result = methodcaller(func, ["L1", "L2"])(ser) assert ser.index.name is None - assert ser.index.names == ["l1", "l2"] + assert ser.index.names == ("l1", "l2") assert result.index.name is None - assert result.index.names, ["L1", "L2"] + assert result.index.names == ("L1", "L2") def test_set_axis_name_raises(self): ser = Series([1]) diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index af0deba138469..9b825b73c26c0 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -454,5 +454,8 @@ def test_groupby_quantile_nonmulti_levels_order(): tm.assert_series_equal(result, expected) # We need to check that index levels are not sorted - expected_levels = pd.core.indexes.frozen.FrozenList([["B", "A"], [0.2, 0.8]]) - tm.assert_equal(result.index.levels, expected_levels) + tm.assert_index_equal( + result.index.levels[0], Index(["B", "A"], dtype=object, name="cat1") + ) + tm.assert_index_equal(result.index.levels[1], Index([0.2, 0.8])) + assert isinstance(result.index.levels, tuple) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 4d610018917f6..bfe7d8075f430 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -109,7 +109,7 @@ def rebuild_index(df): gr = df.groupby(keys, sort=isort) right = gr["3rd"].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1] + ["3rd"] + right.index.names = tuple(list(right.index.names[:-1]) + ["3rd"]) # https://github.com/pandas-dev/pandas/issues/49909 right = right.rename(name) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 26b31e202e6e6..ee82d8ad37c2d 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -986,7 +986,7 @@ def test_apply_multi_level_name(category): ).set_index(["A", "B"]) result = df.groupby("B", observed=False).apply(lambda x: x.sum()) tm.assert_frame_equal(result, expected) - assert df.index.names == ["A", "B"] + assert df.index.names == ("A", "B") def test_groupby_apply_datetime_result_dtypes(using_infer_string): diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 29908537fbe59..c993f425fa132 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -11,7 +11,7 @@ def test_astype(idx): actual = idx.astype("O") tm.assert_copy(actual.levels, expected.levels) tm.assert_copy(actual.codes, expected.codes) - assert actual.names == list(expected.names) + assert actual.names == expected.names with pytest.raises(TypeError, match="^Setting.*dtype.*object"): idx.astype(np.dtype(int)) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 38e0920b7004e..2b16f2c4c095d 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -27,7 +27,7 @@ def test_constructor_single_level(): assert isinstance(result, MultiIndex) expected = Index(["foo", "bar", "baz", "qux"], name="first") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ["first"] + assert result.names == ("first",) def test_constructor_no_levels(): @@ -277,7 +277,7 @@ def test_from_arrays_empty(): assert isinstance(result, MultiIndex) expected = Index([], name="A") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ["A"] + assert result.names == ("A",) # N levels for N in [2, 3]: @@ -424,7 +424,7 @@ def test_from_product_empty_one_level(): result = MultiIndex.from_product([[]], names=["A"]) expected = Index([], name="A") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ["A"] + assert result.names == ("A",) @pytest.mark.parametrize( @@ -712,7 +712,7 @@ def test_from_frame_dtype_fidelity(): @pytest.mark.parametrize( - "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] + "names_in,names_out", [(None, (("L1", "x"), ("L2", "y"))), (["x", "y"], ("x", "y"))] ) def test_from_frame_valid_names(names_in, names_out): # GH 22420 @@ -812,13 +812,13 @@ def test_constructor_with_tz(): result = MultiIndex.from_arrays([index, columns]) - assert result.names == ["dt1", "dt2"] + assert result.names == ("dt1", "dt2") tm.assert_index_equal(result.levels[0], index) tm.assert_index_equal(result.levels[1], columns) result = MultiIndex.from_arrays([Series(index), Series(columns)]) - assert result.names == ["dt1", "dt2"] + assert result.names == ("dt1", "dt2") tm.assert_index_equal(result.levels[0], index) tm.assert_index_equal(result.levels[1], columns) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 2e09a580f9528..14d327093500e 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -70,7 +70,7 @@ def test_copy_method(deep): @pytest.mark.parametrize( "kwarg, value", [ - ("names", ["third", "fourth"]), + ("names", ("third", "fourth")), ], ) def test_copy_method_kwargs(deep, kwarg, value): diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 1bbeedac3fb10..622520f45f904 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -112,7 +112,7 @@ def test_duplicate_multiindex_codes(): mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]]) -@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) +@pytest.mark.parametrize("names", [("a", "b", "a"), (1, 1, 2), (1, "a", 1)]) def test_duplicate_level_names(names): # GH18872, GH19029 mi = MultiIndex.from_product([[0, 1]] * 3, names=names) diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py index 52ff3109128f2..286a048f02dad 100644 --- a/pandas/tests/indexes/multi/test_formats.py +++ b/pandas/tests/indexes/multi/test_formats.py @@ -98,14 +98,14 @@ def test_repr_max_seq_items_equal_to_n(self, idx): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - names=['first', 'second'])""" + names=('first', 'second'))""" assert result == expected def test_repr(self, idx): result = idx[:1].__repr__() expected = """\ MultiIndex([('foo', 'one')], - names=['first', 'second'])""" + names=('first', 'second'))""" assert result == expected result = idx.__repr__() @@ -116,7 +116,7 @@ def test_repr(self, idx): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - names=['first', 'second'])""" + names=('first', 'second'))""" assert result == expected with pd.option_context("display.max_seq_items", 5): @@ -127,7 +127,7 @@ def test_repr(self, idx): ... ('qux', 'one'), ('qux', 'two')], - names=['first', 'second'], length=6)""" + names=('first', 'second'), length=6)""" assert result == expected # display.max_seq_items == 1 @@ -136,7 +136,7 @@ def test_repr(self, idx): expected = """\ MultiIndex([... ('qux', 'two')], - names=['first', ...], length=6)""" + names=('first', ...), length=6)""" assert result == expected def test_rjust(self): @@ -147,7 +147,7 @@ def test_rjust(self): result = mi[:1].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00')], - names=['a', 'b', 'dti'])""" + names=('a', 'b', 'dti'))""" assert result == expected result = mi[::500].__repr__() @@ -156,7 +156,7 @@ def test_rjust(self): ( 'a', 9, '2000-01-01 00:08:20'), ('abc', 10, '2000-01-01 00:16:40'), ('abc', 10, '2000-01-01 00:25:00')], - names=['a', 'b', 'dti'])""" + names=('a', 'b', 'dti'))""" assert result == expected result = mi.__repr__() @@ -182,7 +182,7 @@ def test_rjust(self): ('abc', 10, '2000-01-01 00:33:17'), ('abc', 10, '2000-01-01 00:33:18'), ('abc', 10, '2000-01-01 00:33:19')], - names=['a', 'b', 'dti'], length=2000)""" + names=('a', 'b', 'dti'), length=2000)""" assert result == expected def test_tuple_width(self): @@ -194,7 +194,7 @@ def test_tuple_width(self): mi = MultiIndex.from_arrays(levels, names=names) result = mi[:1].__repr__() expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], - names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501 + names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'))""" # noqa: E501 assert result == expected result = mi[:10].__repr__() @@ -209,7 +209,7 @@ def test_tuple_width(self): ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], - names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'))""" assert result == expected result = mi.__repr__() @@ -235,7 +235,7 @@ def test_tuple_width(self): ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], - names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" + names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'), length=2000)""" assert result == expected def test_multiindex_long_element(self): diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index dd4bba42eda6f..d17b0aae953cd 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -101,16 +101,16 @@ def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): def test_set_name_methods(idx): # so long as these are synonyms, we don't need to test set_names - index_names = ["first", "second"] + index_names = ("first", "second") assert idx.rename == idx.set_names - new_names = [name + "SUFFIX" for name in index_names] + new_names = tuple(name + "SUFFIX" for name in index_names) ind = idx.set_names(new_names) assert idx.names == index_names assert ind.names == new_names msg = "Length of names must match number of levels in MultiIndex" with pytest.raises(ValueError, match=msg): ind.set_names(new_names + new_names) - new_names2 = [name + "SUFFIX2" for name in new_names] + new_names2 = tuple(name + "SUFFIX2" for name in new_names) res = ind.set_names(new_names2, inplace=True) assert res is None assert ind.names == new_names2 @@ -118,11 +118,11 @@ def test_set_name_methods(idx): # set names for specific level (# GH7792) ind = idx.set_names(new_names[0], level=0) assert idx.names == index_names - assert ind.names == [new_names[0], index_names[1]] + assert ind.names == (new_names[0], index_names[1]) res = ind.set_names(new_names2[0], level=0, inplace=True) assert res is None - assert ind.names == [new_names2[0], index_names[1]] + assert ind.names == (new_names2[0], index_names[1]) # set names for multiple levels ind = idx.set_names(new_names, level=[0, 1]) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index d956747cbc859..9fabd8622a108 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -216,7 +216,9 @@ def test_can_hold_identifiers(idx): def test_metadata_immutable(idx): levels, codes = idx.levels, idx.codes # shouldn't be able to set at either the top level or base level - mutable_regex = re.compile("does not support mutable operations") + mutable_regex = re.compile( + "does not support mutable operations|does not support item assignment" + ) with pytest.raises(TypeError, match=mutable_regex): levels[0] = levels[0] with pytest.raises(TypeError, match=mutable_regex): diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 45f19b4d70fb9..aff9ebfb1c1e3 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -60,20 +60,20 @@ def test_copy_names(): multi_idx1 = multi_idx.copy() assert multi_idx.equals(multi_idx1) - assert multi_idx.names == ["MyName1", "MyName2"] - assert multi_idx1.names == ["MyName1", "MyName2"] + assert multi_idx.names == ("MyName1", "MyName2") + assert multi_idx1.names == ("MyName1", "MyName2") multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"]) assert multi_idx.equals(multi_idx2) - assert multi_idx.names == ["MyName1", "MyName2"] - assert multi_idx2.names == ["NewName1", "NewName2"] + assert multi_idx.names == ("MyName1", "MyName2") + assert multi_idx2.names == ("NewName1", "NewName2") multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"]) assert multi_idx.equals(multi_idx3) - assert multi_idx.names == ["MyName1", "MyName2"] - assert multi_idx3.names == ["NewName1", "NewName2"] + assert multi_idx.names == ("MyName1", "MyName2") + assert multi_idx3.names == ("NewName1", "NewName2") # gh-35592 with pytest.raises(ValueError, match="Length of new names must be 2, got 1"): @@ -85,8 +85,8 @@ def test_copy_names(): def test_names(idx): # names are assigned in setup - assert idx.names == ["first", "second"] - level_names = [level.name for level in idx.levels] + assert idx.names == ("first", "second") + level_names = tuple(level.name for level in idx.levels) assert level_names == idx.names # setting bad names on existing diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index d1b4fe8b98760..d949a390bd97f 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -12,13 +12,13 @@ def test_reindex(idx): result, indexer = idx.reindex(list(idx[:4])) assert isinstance(result, MultiIndex) - assert result.names == ["first", "second"] + assert result.names == ("first", "second") assert [level.name for level in result.levels] == ["first", "second"] result, indexer = idx.reindex(list(idx)) assert isinstance(result, MultiIndex) assert indexer is None - assert result.names == ["first", "second"] + assert result.names == ("first", "second") assert [level.name for level in result.levels] == ["first", "second"] @@ -52,27 +52,27 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): other_dtype = MultiIndex.from_product([[1, 2], [3, 4]]) # list & ndarray cases - assert idx.reindex([])[0].names == [None, None] - assert idx.reindex(np.array([]))[0].names == [None, None] - assert idx.reindex(target.tolist())[0].names == [None, None] - assert idx.reindex(target.values)[0].names == [None, None] - assert idx.reindex(other_dtype.tolist())[0].names == [None, None] - assert idx.reindex(other_dtype.values)[0].names == [None, None] + assert idx.reindex([])[0].names == (None, None) + assert idx.reindex(np.array([]))[0].names == (None, None) + assert idx.reindex(target.tolist())[0].names == (None, None) + assert idx.reindex(target.values)[0].names == (None, None) + assert idx.reindex(other_dtype.tolist())[0].names == (None, None) + assert idx.reindex(other_dtype.values)[0].names == (None, None) idx.names = ["foo", "bar"] - assert idx.reindex([])[0].names == ["foo", "bar"] - assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] - assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] - assert idx.reindex(target.values)[0].names == ["foo", "bar"] - assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] - assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] + assert idx.reindex([])[0].names == ("foo", "bar") + assert idx.reindex(np.array([]))[0].names == ("foo", "bar") + assert idx.reindex(target.tolist())[0].names == ("foo", "bar") + assert idx.reindex(target.values)[0].names == ("foo", "bar") + assert idx.reindex(other_dtype.tolist())[0].names == ("foo", "bar") + assert idx.reindex(other_dtype.values)[0].names == ("foo", "bar") def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): # GH7774 idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) - assert idx.reindex([], level=0)[0].names == ["foo", "bar"] - assert idx.reindex([], level=1)[0].names == ["foo", "bar"] + assert idx.reindex([], level=0)[0].names == ("foo", "bar") + assert idx.reindex([], level=1)[0].names == ("foo", "bar") def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array( diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 06dbb33aadf97..1bf91a09ee754 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -23,7 +23,7 @@ def test_insert(idx): exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") tm.assert_index_equal(new_index.levels[0], exp0) - assert new_index.names == ["first", "second"] + assert new_index.names == ("first", "second") exp1 = Index(list(idx.levels[1]) + ["three"], name="second") tm.assert_index_equal(new_index.levels[1], exp1) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 9354984538c58..15076b8705bdc 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -121,7 +121,7 @@ def test_multiindex_symmetric_difference(): idx2 = idx.copy().rename(["A", "B"]) result = idx.symmetric_difference(idx2) - assert result.names == [None, None] + assert result.names == (None, None) def test_empty(idx): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 4a1a6b9c452d5..134853761b04e 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -16,7 +16,6 @@ Timestamp, ) import pandas._testing as tm -from pandas.core.indexes.frozen import FrozenList def test_sortlevel(idx): @@ -289,8 +288,9 @@ def test_remove_unused_levels_with_nan(): idx = idx.set_levels(["a", np.nan], level="id1") idx = idx.remove_unused_levels() result = idx.levels - expected = FrozenList([["a", np.nan], [4]]) - assert str(result) == str(expected) + expected = (Index(["a", np.nan], name="id1"), Index([4], name="id2")) + for res, exp in zip(result, expected): + tm.assert_index_equal(res, exp) def test_sort_values_nan(): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 66f209837345a..5f08443b4b5b5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -942,7 +942,7 @@ def test_isin_level_kwarg_bad_level_raises(self, index): @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan]) def test_isin_level_kwarg_bad_label_raises(self, label, index): if isinstance(index, MultiIndex): - index = index.rename(["foo", "bar"] + index.names[2:]) + index = index.rename(("foo", "bar") + index.names[2:]) msg = f"'Level {label} not found'" else: index = index.rename("foo") diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 80c39322b9b81..79c3780642e7d 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -120,7 +120,7 @@ def test_set_name_methods(self, index_flat): # should return None assert res is None assert index.name == new_name - assert index.names == [new_name] + assert index.names == (new_name,) with pytest.raises(ValueError, match="Level must be None"): index.set_names("a", level=0) @@ -128,7 +128,7 @@ def test_set_name_methods(self, index_flat): name = ("A", "B") index.rename(name, inplace=True) assert index.name == name - assert index.names == [name] + assert index.names == (name,) @pytest.mark.xfail def test_set_names_single_label_no_level(self, index_flat): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py deleted file mode 100644 index ace66b5b06a51..0000000000000 --- a/pandas/tests/indexes/test_frozen.py +++ /dev/null @@ -1,113 +0,0 @@ -import re - -import pytest - -from pandas.core.indexes.frozen import FrozenList - - -@pytest.fixture -def lst(): - return [1, 2, 3, 4, 5] - - -@pytest.fixture -def container(lst): - return FrozenList(lst) - - -@pytest.fixture -def unicode_container(): - return FrozenList(["\u05d0", "\u05d1", "c"]) - - -class TestFrozenList: - def check_mutable_error(self, *args, **kwargs): - # Pass whatever function you normally would to pytest.raises - # (after the Exception kind). - mutable_regex = re.compile("does not support mutable operations") - msg = "'(_s)?re.(SRE_)?Pattern' object is not callable" - with pytest.raises(TypeError, match=msg): - mutable_regex(*args, **kwargs) - - def test_no_mutable_funcs(self, container): - def setitem(): - container[0] = 5 - - self.check_mutable_error(setitem) - - def setslice(): - container[1:2] = 3 - - self.check_mutable_error(setslice) - - def delitem(): - del container[0] - - self.check_mutable_error(delitem) - - def delslice(): - del container[0:3] - - self.check_mutable_error(delslice) - - mutable_methods = ("extend", "pop", "remove", "insert") - - for meth in mutable_methods: - self.check_mutable_error(getattr(container, meth)) - - def test_slicing_maintains_type(self, container, lst): - result = container[1:2] - expected = lst[1:2] - self.check_result(result, expected) - - def check_result(self, result, expected): - assert isinstance(result, FrozenList) - assert result == expected - - def test_string_methods_dont_fail(self, container): - repr(container) - str(container) - bytes(container) - - def test_tricky_container(self, unicode_container): - repr(unicode_container) - str(unicode_container) - - def test_add(self, container, lst): - result = container + (1, 2, 3) - expected = FrozenList(lst + [1, 2, 3]) - self.check_result(result, expected) - - result = (1, 2, 3) + container - expected = FrozenList([1, 2, 3] + lst) - self.check_result(result, expected) - - def test_iadd(self, container, lst): - q = r = container - - q += [5] - self.check_result(q, lst + [5]) - - # Other shouldn't be mutated. - self.check_result(r, lst) - - def test_union(self, container, lst): - result = container.union((1, 2, 3)) - expected = FrozenList(lst + [1, 2, 3]) - self.check_result(result, expected) - - def test_difference(self, container): - result = container.difference([2]) - expected = FrozenList([1, 3, 4, 5]) - self.check_result(result, expected) - - def test_difference_dupe(self): - result = FrozenList([1, 2, 3, 2]).difference([2]) - expected = FrozenList([1, 3]) - self.check_result(result, expected) - - def test_tricky_container_to_bytes_raises(self, unicode_container): - # GH 26447 - msg = "^'str' object cannot be interpreted as an integer$" - with pytest.raises(TypeError, match=msg): - bytes(unicode_container) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index dbfabf7666d25..78f701fff6e29 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -150,7 +150,7 @@ def test_getitem_intkey_leading_level( # GH#33355 dont fall-back to positional when leading level is int ymd = multiindex_year_month_day_dataframe_random_data levels = ymd.index.levels - ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) + ymd.index = ymd.index.set_levels((levels[0].astype(dtype),) + levels[1:]) ser = ymd["A"] mi = ser.index assert isinstance(mi, MultiIndex) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 28b613fa1f6f6..b6fa90edbf106 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -114,7 +114,7 @@ def test_multiindex(self, df_schema, using_infer_string): {"name": "C", "type": "datetime"}, {"name": "D", "type": "duration"}, ], - "primaryKey": ["level_0", "level_1"], + "primaryKey": ("level_0", "level_1"), } if using_infer_string: expected["fields"][0] = { @@ -127,7 +127,7 @@ def test_multiindex(self, df_schema, using_infer_string): df.index.names = ["idx0", None] expected["fields"][0]["name"] = "idx0" - expected["primaryKey"] = ["idx0", "level_1"] + expected["primaryKey"] = ("idx0", "level_1") result = build_table_schema(df, version=False) assert result == expected @@ -597,21 +597,21 @@ def test_categorical(self): (pd.Index([1], name="myname"), "myname", "name"), ( pd.MultiIndex.from_product([("a", "b"), ("c", "d")]), - ["level_0", "level_1"], + ("level_0", "level_1"), "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", "n2"] ), - ["n1", "n2"], + ("n1", "n2"), "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", None] ), - ["n1", "level_1"], + ("n1", "level_1"), "names", ), ], diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 9ab70cd95c1cf..4866ef78d79a2 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1015,7 +1015,7 @@ def test_columns_multiindex_modified(tmp_path, setup_path): df.index.name = "letters" df = df.set_index(keys="E", append=True) - data_columns = df.index.names + df.columns.tolist() + data_columns = list(df.index.names) + df.columns.tolist() path = tmp_path / setup_path df.to_hdf( path, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e6595ca9b06a8..8bb67fac19c65 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2322,15 +2322,18 @@ def test_read_table_index_col(conn, request, test_frame1): sql.to_sql(test_frame1, "test_frame", conn) result = sql.read_sql_table("test_frame", conn, index_col="index") - assert result.index.names == ["index"] + assert result.index.names == ("index",) result = sql.read_sql_table("test_frame", conn, index_col=["A", "B"]) - assert result.index.names == ["A", "B"] + assert result.index.names == ("A", "B") result = sql.read_sql_table( "test_frame", conn, index_col=["A", "B"], columns=["C", "D"] ) - assert result.index.names == ["A", "B"] + assert result.index.names == ( + "A", + "B", + ) assert result.columns.tolist() == ["C", "D"] diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 7174245ec16d8..876906cd76e3f 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -137,7 +137,7 @@ def test_concat_keys_specific_levels(self): tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key")) tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3])) - assert result.columns.names == ["group_key", None] + assert result.columns.names == ("group_key", None) @pytest.mark.parametrize("mapping", ["mapping", "dict"]) def test_concat_mapping(self, mapping, non_dict_mapping_subclass): diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 7ae2fffa04205..33d9a721df6b7 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -814,12 +814,10 @@ def test_join_multi_levels2(self): class TestJoinMultiMulti: def test_join_multi_multi(self, left_multi, right_multi, join_type, on_cols_multi): - left_names = left_multi.index.names - right_names = right_multi.index.names if join_type == "right": - level_order = right_names + left_names.difference(right_names) + level_order = ["Origin", "Destination", "Period", "LinkType", "TripPurp"] else: - level_order = left_names + right_names.difference(left_names) + level_order = ["Origin", "Destination", "Period", "TripPurp", "LinkType"] # Multi-index join tests expected = ( merge( @@ -841,12 +839,10 @@ def test_join_multi_empty_frames( left_multi = left_multi.drop(columns=left_multi.columns) right_multi = right_multi.drop(columns=right_multi.columns) - left_names = left_multi.index.names - right_names = right_multi.index.names if join_type == "right": - level_order = right_names + left_names.difference(right_names) + level_order = ["Origin", "Destination", "Period", "LinkType", "TripPurp"] else: - level_order = left_names + right_names.difference(left_names) + level_order = ["Origin", "Destination", "Period", "TripPurp", "LinkType"] expected = ( merge( diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 8a30b63cf0e17..2a538d34d8b2c 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -135,7 +135,7 @@ def test_crosstab_margins(self): result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) assert result.index.names == ("a",) - assert result.columns.names == ["b", "c"] + assert result.columns.names == ("b", "c") all_cols = result["All", ""] exp_cols = df.groupby(["a"]).size().astype("i8") @@ -173,7 +173,7 @@ def test_crosstab_margins_set_margin_name(self): ) assert result.index.names == ("a",) - assert result.columns.names == ["b", "c"] + assert result.columns.names == ("b", "c") all_cols = result["TOTAL", ""] exp_cols = df.groupby(["a"]).size().astype("i8") diff --git a/pandas/tests/series/methods/test_rename_axis.py b/pandas/tests/series/methods/test_rename_axis.py index 58c095d697ede..60175242a06b5 100644 --- a/pandas/tests/series/methods/test_rename_axis.py +++ b/pandas/tests/series/methods/test_rename_axis.py @@ -15,13 +15,13 @@ def test_rename_axis_mapper(self): ser = Series(list(range(len(mi))), index=mi) result = ser.rename_axis(index={"ll": "foo"}) - assert result.index.names == ["foo", "nn"] + assert result.index.names == ("foo", "nn") result = ser.rename_axis(index=str.upper, axis=0) - assert result.index.names == ["LL", "NN"] + assert result.index.names == ("LL", "NN") result = ser.rename_axis(index=["foo", "goo"]) - assert result.index.names == ["foo", "goo"] + assert result.index.names == ("foo", "goo") with pytest.raises(TypeError, match="unexpected"): ser.rename_axis(columns="wrong") diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 00f6fe5dfe5d9..94d7d5fead622 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -207,18 +207,6 @@ class MyList(list): val = MyList([True]) assert com.is_bool_indexer(val) - def test_frozenlist(self): - # GH#42461 - data = {"col1": [1, 2], "col2": [3, 4]} - df = pd.DataFrame(data=data) - - frozen = df.index.names[1:] - assert not com.is_bool_indexer(frozen) - - result = df[frozen] - expected = df[[]] - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("with_exception", [True, False]) def test_temp_setattr(with_exception): diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index dc6efdcec380e..78ff774c188fe 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -198,8 +198,8 @@ def test_index_equal_names(name1, name2): msg = f"""Index are different Attribute "names" are different -\\[left\\]: \\[{name1}\\] -\\[right\\]: \\[{name2}\\]""" +\\[left\\]: \\({name1},\\) +\\[right\\]: \\({name2},\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 85821ed2cfb6f..47bfc219d0fe9 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -589,7 +589,7 @@ def test_multi_index_names(): result = df.rolling(3).cov() tm.assert_index_equal(result.columns, df.columns) - assert result.index.names == [None, "1", "2"] + assert result.index.names == (None, "1", "2") def test_rolling_axis_sum():