From 2e9e89abf0f3013e16b7bd2e2dfd4fa502069696 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 11 Apr 2024 11:38:21 -0400 Subject: [PATCH] API: Revert 57042 - MultiIndex.names|codes|levels returns tuples (#57788) * API: Revert 57042 - MultiIndex.names|codes|levels returns tuples * Typing fixup * Docstring fixup * ruff --- doc/source/whatsnew/v3.0.0.rst | 1 - pandas/_libs/index.pyi | 6 +- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 31 ++--- pandas/core/indexes/frozen.py | 121 ++++++++++++++++ pandas/core/indexes/multi.py | 130 ++++++++---------- pandas/core/resample.py | 5 +- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/merge.py | 14 +- pandas/core/reshape/pivot.py | 6 +- pandas/core/reshape/reshape.py | 32 +++-- pandas/core/strings/accessor.py | 2 +- pandas/core/window/rolling.py | 6 +- .../tests/frame/methods/test_rename_axis.py | 6 +- pandas/tests/frame/methods/test_set_index.py | 10 +- .../tests/frame/methods/test_sort_values.py | 4 +- pandas/tests/frame/test_stack_unstack.py | 4 +- pandas/tests/generic/test_frame.py | 8 +- pandas/tests/generic/test_series.py | 4 +- pandas/tests/groupby/methods/test_quantile.py | 7 +- .../groupby/methods/test_value_counts.py | 2 +- pandas/tests/groupby/test_apply.py | 2 +- pandas/tests/indexes/multi/test_astype.py | 2 +- .../tests/indexes/multi/test_constructors.py | 12 +- pandas/tests/indexes/multi/test_copy.py | 2 +- pandas/tests/indexes/multi/test_duplicates.py | 2 +- pandas/tests/indexes/multi/test_formats.py | 22 +-- pandas/tests/indexes/multi/test_get_set.py | 10 +- pandas/tests/indexes/multi/test_integrity.py | 4 +- pandas/tests/indexes/multi/test_names.py | 16 +-- pandas/tests/indexes/multi/test_reindex.py | 32 ++--- pandas/tests/indexes/multi/test_reshape.py | 2 +- pandas/tests/indexes/multi/test_setops.py | 2 +- pandas/tests/indexes/multi/test_sorting.py | 6 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_common.py | 4 +- pandas/tests/indexes/test_frozen.py | 113 +++++++++++++++ .../tests/indexing/multiindex/test_partial.py | 2 +- .../tests/io/json/test_json_table_schema.py | 10 +- pandas/tests/io/pytables/test_store.py | 2 +- pandas/tests/io/test_sql.py | 9 +- pandas/tests/reshape/concat/test_concat.py | 2 +- pandas/tests/reshape/merge/test_multi.py | 12 +- pandas/tests/reshape/test_crosstab.py | 4 +- .../tests/series/methods/test_rename_axis.py | 6 +- pandas/tests/test_common.py | 12 ++ pandas/tests/util/test_assert_index_equal.py | 4 +- pandas/tests/window/test_rolling.py | 2 +- 48 files changed, 473 insertions(+), 228 deletions(-) create mode 100644 pandas/core/indexes/frozen.py create mode 100644 pandas/tests/indexes/test_frozen.py diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 269f6e1a96a7a..e05cc87d1af14 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -147,7 +147,6 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ - 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`) -- :attr:`MultiIndex.codes`, :attr:`MultiIndex.levels`, and :attr:`MultiIndex.names` now returns a ``tuple`` instead of a ``FrozenList`` (:issue:`53531`) - :func:`read_table`'s ``parse_dates`` argument defaults to ``None`` to improve consistency with :func:`read_csv` (:issue:`57476`) - Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`) - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi index 8cd135c944dc6..12a5bf245977e 100644 --- a/pandas/_libs/index.pyi +++ b/pandas/_libs/index.pyi @@ -73,13 +73,13 @@ class MaskedUInt8Engine(MaskedIndexEngine): ... class MaskedBoolEngine(MaskedUInt8Engine): ... class BaseMultiIndexCodesEngine: - levels: tuple[np.ndarray] + levels: list[np.ndarray] offsets: np.ndarray # ndarray[uint64_t, ndim=1] def __init__( self, - levels: tuple[Index, ...], # all entries hashable - labels: tuple[np.ndarray], # all entries integer-dtyped + levels: list[Index], # all entries hashable + labels: list[np.ndarray], # all entries integer-dtyped offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1] ) -> None: ... def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ... diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c91e4233ef540..bc37405b25a16 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -5609,7 +5609,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde idx = cast(MultiIndex, idx) levels = list(idx.levels) + [lev] codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))] - mi = MultiIndex(levels=levels, codes=codes, names=list(idx.names) + [None]) + mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None]) else: nidx = len(idx) idx_codes = coerce_indexer_dtype(np.arange(nidx), idx) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 734711942b9f9..d5517a210b39d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -174,6 +174,7 @@ disallow_ndim_indexing, is_valid_positional_slice, ) +from pandas.core.indexes.frozen import FrozenList from pandas.core.missing import clean_reindex_fill_method from pandas.core.ops import get_op_result_name from pandas.core.sorting import ( @@ -1726,8 +1727,8 @@ def _get_default_index_names( return names - def _get_names(self) -> tuple[Hashable | None, ...]: - return (self.name,) + def _get_names(self) -> FrozenList: + return FrozenList((self.name,)) def _set_names(self, values, *, level=None) -> None: """ @@ -1821,7 +1822,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=('species', 'year')) + names=['species', 'year']) When renaming levels with a dict, levels can not be passed. @@ -1830,7 +1831,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=('snake', 'year')) + names=['snake', 'year']) """ if level is not None and not isinstance(self, ABCMultiIndex): raise ValueError("Level must be None for non-MultiIndex") @@ -1915,13 +1916,13 @@ def rename(self, name, *, inplace: bool = False) -> Self | None: ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=('kind', 'year')) + names=['kind', 'year']) >>> idx.rename(["species", "year"]) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], - names=('species', 'year')) + names=['species', 'year']) >>> idx.rename("species") Traceback (most recent call last): TypeError: Must pass list-like as `names`. @@ -2085,22 +2086,22 @@ def droplevel(self, level: IndexLabel = 0): >>> mi MultiIndex([(1, 3, 5), (2, 4, 6)], - names=('x', 'y', 'z')) + names=['x', 'y', 'z']) >>> mi.droplevel() MultiIndex([(3, 5), (4, 6)], - names=('y', 'z')) + names=['y', 'z']) >>> mi.droplevel(2) MultiIndex([(1, 3), (2, 4)], - names=('x', 'y')) + names=['x', 'y']) >>> mi.droplevel("z") MultiIndex([(1, 3), (2, 4)], - names=('x', 'y')) + names=['x', 'y']) >>> mi.droplevel(["x", "y"]) Index([5, 6], dtype='int64', name='z') @@ -4437,9 +4438,7 @@ def _join_level( """ from pandas.core.indexes.multi import MultiIndex - def _get_leaf_sorter( - labels: tuple[np.ndarray, ...] | list[np.ndarray], - ) -> npt.NDArray[np.intp]: + def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: """ Returns sorter for the inner most level while preserving the order of higher levels. @@ -6184,13 +6183,13 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: array([ True, False, False]) >>> midx = pd.MultiIndex.from_arrays( - ... [[1, 2, 3], ["red", "blue", "green"]], names=("number", "color") + ... [[1, 2, 3], ["red", "blue", "green"]], names=["number", "color"] ... ) >>> midx MultiIndex([(1, 'red'), (2, 'blue'), (3, 'green')], - names=('number', 'color')) + names=['number', 'color']) Check whether the strings in the 'color' level of the MultiIndex are in a list of colors. @@ -7178,7 +7177,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) MultiIndex([('a', 'a'), ('a', 'b')], - names=('L1', 'L2')) + names=['L1', 'L2']) See Also -------- diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py new file mode 100644 index 0000000000000..c559c529586b5 --- /dev/null +++ b/pandas/core/indexes/frozen.py @@ -0,0 +1,121 @@ +""" +frozen (immutable) data structures to support MultiIndexing + +These are used for: + +- .names (FrozenList) + +""" + +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + NoReturn, +) + +from pandas.core.base import PandasObject + +from pandas.io.formats.printing import pprint_thing + +if TYPE_CHECKING: + from pandas._typing import Self + + +class FrozenList(PandasObject, list): + """ + Container that doesn't allow setting item *but* + because it's technically hashable, will be used + for lookups, appropriately, etc. + """ + + # Side note: This has to be of type list. Otherwise, + # it messes up PyTables type checks. + + def union(self, other) -> FrozenList: + """ + Returns a FrozenList with other concatenated to the end of self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are concatenating. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + if isinstance(other, tuple): + other = list(other) + return type(self)(super().__add__(other)) + + def difference(self, other) -> FrozenList: + """ + Returns a FrozenList with elements from other removed from self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are removing self. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + other = set(other) + temp = [x for x in self if x not in other] + return type(self)(temp) + + # TODO: Consider deprecating these in favor of `union` (xref gh-15506) + # error: Incompatible types in assignment (expression has type + # "Callable[[FrozenList, Any], FrozenList]", base class "list" defined the + # type as overloaded function) + __add__ = __iadd__ = union # type: ignore[assignment] + + def __getitem__(self, n): + if isinstance(n, slice): + return type(self)(super().__getitem__(n)) + return super().__getitem__(n) + + def __radd__(self, other) -> Self: + if isinstance(other, tuple): + other = list(other) + return type(self)(other + list(self)) + + def __eq__(self, other: object) -> bool: + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super().__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other) -> Self: + return type(self)(super().__mul__(other)) + + __imul__ = __mul__ + + def __reduce__(self): + return type(self), (list(self),) + + # error: Signature of "__hash__" incompatible with supertype "list" + def __hash__(self) -> int: # type: ignore[override] + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs) -> NoReturn: + """ + This method will not function because object is immutable. + """ + raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") + + def __str__(self) -> str: + return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) + + def __repr__(self) -> str: + return f"{type(self).__name__}({self!s})" + + __setitem__ = __setslice__ = _disabled # type: ignore[assignment] + __delitem__ = __delslice__ = _disabled + pop = append = extend = _disabled + remove = sort = insert = _disabled # type: ignore[assignment] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d6149bcd6fdac..4affa1337aa2a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -101,6 +101,7 @@ ensure_index, get_unanimous_names, ) +from pandas.core.indexes.frozen import FrozenList from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ( get_group_index, @@ -299,7 +300,7 @@ class MultiIndex(Index): (1, 'blue'), (2, 'red'), (2, 'blue')], - names=('number', 'color')) + names=['number', 'color']) See further examples for how to construct a MultiIndex in the doc strings of the mentioned helper methods. @@ -309,9 +310,9 @@ class MultiIndex(Index): # initialize to zero-length tuples to make everything work _typ = "multiindex" - _names: tuple[Hashable | None, ...] = () - _levels: tuple[Index, ...] = () - _codes: tuple[np.ndarray, ...] = () + _names: list[Hashable | None] = [] + _levels = FrozenList() + _codes = FrozenList() _comparables = ["names"] sortorder: int | None @@ -347,7 +348,7 @@ def __new__( result._set_levels(levels, copy=copy, validate=False) result._set_codes(codes, copy=copy, validate=False) - result._names = (None,) * len(levels) + result._names = [None] * len(levels) if names is not None: # handles name validation result._set_names(names) @@ -389,16 +390,16 @@ def _validate_codes(self, level: Index, code: np.ndarray) -> np.ndarray: def _verify_integrity( self, - codes: tuple | None = None, - levels: tuple | None = None, + codes: list | None = None, + levels: list | None = None, levels_to_verify: list[int] | range | None = None, - ) -> tuple: + ) -> FrozenList: """ Parameters ---------- - codes : optional tuple + codes : optional list Codes to check for validity. Defaults to current codes. - levels : optional tuple + levels : optional list Levels to check for validity. Defaults to current levels. levels_to_validate: optional list Specifies the levels to verify. @@ -462,7 +463,7 @@ def _verify_integrity( else: result_codes.append(codes[i]) - new_codes = tuple(result_codes) + new_codes = FrozenList(result_codes) return new_codes @classmethod @@ -505,7 +506,7 @@ def from_arrays( (1, 'blue'), (2, 'red'), (2, 'blue')], - names=('number', 'color')) + names=['number', 'color']) """ error_msg = "Input must be a list / sequence of array-likes." if not is_list_like(arrays): @@ -576,7 +577,7 @@ def from_tuples( (1, 'blue'), (2, 'red'), (2, 'blue')], - names=('number', 'color')) + names=['number', 'color']) """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") @@ -659,7 +660,7 @@ def from_product( (1, 'purple'), (2, 'green'), (2, 'purple')], - names=('number', 'color')) + names=['number', 'color']) """ from pandas.core.reshape.util import cartesian_product @@ -728,7 +729,7 @@ def from_frame( ('HI', 'Precip'), ('NJ', 'Temp'), ('NJ', 'Precip')], - names=('a', 'b')) + names=['a', 'b']) Using explicit names, instead of the column names @@ -737,7 +738,7 @@ def from_frame( ('HI', 'Precip'), ('NJ', 'Temp'), ('NJ', 'Precip')], - names=('state', 'observation')) + names=['state', 'observation']) """ if not isinstance(df, ABCDataFrame): raise TypeError("Input must be a DataFrame") @@ -760,9 +761,7 @@ def _values(self) -> np.ndarray: vals = index if isinstance(vals.dtype, CategoricalDtype): vals = cast("CategoricalIndex", vals) - # Incompatible types in assignment (expression has type - # "ExtensionArray | ndarray[Any, Any]", variable has type "Index") - vals = vals._data._internal_get_values() # type: ignore[assignment] + vals = vals._data._internal_get_values() if isinstance(vals.dtype, ExtensionDtype) or lib.is_np_dtype( vals.dtype, "mM" @@ -812,7 +811,7 @@ def dtypes(self) -> Series: (1, 'purple'), (2, 'green'), (2, 'purple')], - names=('number', 'color')) + names=['number', 'color']) >>> idx.dtypes number int64 color object @@ -838,7 +837,7 @@ def size(self) -> int: # Levels Methods @cache_readonly - def levels(self) -> tuple[Index, ...]: + def levels(self) -> FrozenList: """ Levels of the MultiIndex. @@ -871,8 +870,7 @@ def levels(self) -> tuple[Index, ...]: dog 4 >>> leg_num.index.levels - (Index(['mammal'], dtype='object', name='Category'), - Index(['cat', 'dog', 'goat', 'human'], dtype='object', name='Animals')) + FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']]) MultiIndex levels will not change even if the DataFrame using the MultiIndex does not contain all them anymore. @@ -887,8 +885,7 @@ def levels(self) -> tuple[Index, ...]: dog 4 >>> large_leg_num.index.levels - (Index(['mammal'], dtype='object', name='Category'), - Index(['cat', 'dog', 'goat', 'human'], dtype='object', name='Animals')) + FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']]) """ # Use cache_readonly to ensure that self.get_locs doesn't repeatedly # create new IndexEngine @@ -897,7 +894,7 @@ def levels(self) -> tuple[Index, ...]: for level in result: # disallow midx.levels[0].name = "foo" level._no_setting_name = True - return tuple(result) + return FrozenList(result) def _set_levels( self, @@ -920,14 +917,16 @@ def _set_levels( raise ValueError("Length of levels must match length of level.") if level is None: - new_levels = tuple(ensure_index(lev, copy=copy)._view() for lev in levels) + new_levels = FrozenList( + ensure_index(lev, copy=copy)._view() for lev in levels + ) level_numbers: range | list[int] = range(len(new_levels)) else: level_numbers = [self._get_level_number(lev) for lev in level] new_levels_list = list(self._levels) for lev_num, lev in zip(level_numbers, levels): new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() - new_levels = tuple(new_levels_list) + new_levels = FrozenList(new_levels_list) if verify_integrity: new_codes = self._verify_integrity( @@ -936,7 +935,7 @@ def _set_levels( self._codes = new_codes names = self.names - self._levels: tuple[Index, ...] = new_levels + self._levels = new_levels if any(names): self._set_names(names) @@ -981,7 +980,7 @@ def set_levels( (2, 'two'), (3, 'one'), (3, 'two')], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_levels([["a", "b", "c"], [1, 2]]) MultiIndex([('a', 1), @@ -990,7 +989,7 @@ def set_levels( ('b', 2), ('c', 1), ('c', 2)], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_levels(["a", "b", "c"], level=0) MultiIndex([('a', 'one'), ('a', 'two'), @@ -998,7 +997,7 @@ def set_levels( ('b', 'two'), ('c', 'one'), ('c', 'two')], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_levels(["a", "b"], level="bar") MultiIndex([(1, 'a'), (1, 'b'), @@ -1006,7 +1005,7 @@ def set_levels( (2, 'b'), (3, 'a'), (3, 'b')], - names=('foo', 'bar')) + names=['foo', 'bar']) If any of the levels passed to ``set_levels()`` exceeds the existing length, all of the values from that argument will @@ -1020,10 +1019,10 @@ def set_levels( ('b', 2), ('c', 1), ('c', 2)], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_levels([["a", "b", "c"], [1, 2, 3, 4]], level=[0, 1]).levels - (Index(['a', 'b', 'c'], dtype='object', name='foo'), Index([1, 2, 3, 4], dtype='int64', name='bar')) - """ # noqa: E501 + FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) + """ if isinstance(levels, Index): pass @@ -1076,7 +1075,7 @@ def levshape(self) -> Shape: # Codes Methods @property - def codes(self) -> tuple: + def codes(self) -> FrozenList: """ Codes of the MultiIndex. @@ -1098,7 +1097,7 @@ def codes(self) -> tuple: >>> arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]] >>> mi = pd.MultiIndex.from_arrays(arrays, names=("number", "color")) >>> mi.codes - (array([0, 0, 1, 1], dtype=int8), array([1, 0, 1, 0], dtype=int8)) + FrozenList([[0, 0, 1, 1], [1, 0, 1, 0]]) """ return self._codes @@ -1119,7 +1118,7 @@ def _set_codes( level_numbers: list[int] | range if level is None: - new_codes = tuple( + new_codes = FrozenList( _coerce_indexer_frozen(level_codes, lev, copy=copy).view() for lev, level_codes in zip(self._levels, codes) ) @@ -1132,7 +1131,7 @@ def _set_codes( new_codes_list[lev_num] = _coerce_indexer_frozen( level_codes, lev, copy=copy ) - new_codes = tuple(new_codes_list) + new_codes = FrozenList(new_codes_list) if verify_integrity: new_codes = self._verify_integrity( @@ -1173,32 +1172,32 @@ def set_codes( (1, 'two'), (2, 'one'), (2, 'two')], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) MultiIndex([(2, 'one'), (1, 'one'), (2, 'two'), (1, 'two')], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_codes([1, 0, 1, 0], level=0) MultiIndex([(2, 'one'), (1, 'two'), (2, 'one'), (1, 'two')], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_codes([0, 0, 1, 1], level="bar") MultiIndex([(1, 'one'), (1, 'one'), (2, 'two'), (2, 'two')], - names=('foo', 'bar')) + names=['foo', 'bar']) >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) MultiIndex([(2, 'one'), (1, 'one'), (2, 'two'), (1, 'two')], - names=('foo', 'bar')) + names=['foo', 'bar']) """ level, codes = _require_listlike(level, codes, "Codes") @@ -1451,7 +1450,6 @@ def _format_multi( if len(self) == 0: return [] - formatted: Iterable stringified_levels = [] for lev, level_codes in zip(self.levels, self.codes): na = _get_na_rep(lev.dtype) @@ -1476,9 +1474,7 @@ def _format_multi( stringified_levels.append(formatted) result_levels = [] - # Incompatible types in assignment (expression has type "Iterable[Any]", - # variable has type "Index") - for lev, lev_name in zip(stringified_levels, self.names): # type: ignore[assignment] + for lev, lev_name in zip(stringified_levels, self.names): level = [] if include_names: @@ -1510,8 +1506,8 @@ def _format_multi( # -------------------------------------------------------------------- # Names Methods - def _get_names(self) -> tuple[Hashable | None, ...]: - return self._names + def _get_names(self) -> FrozenList: + return FrozenList(self._names) def _set_names(self, names, *, level=None, validate: bool = True) -> None: """ @@ -1558,7 +1554,6 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: level = [self._get_level_number(lev) for lev in level] # set the name - new_names = list(self._names) for lev, name in zip(level, names): if name is not None: # GH 20527 @@ -1567,8 +1562,7 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: raise TypeError( f"{type(self).__name__}.name must be a hashable type" ) - new_names[lev] = name - self._names = tuple(new_names) + self._names[lev] = name # If .levels has been accessed, the names in our cache will be stale. self._reset_cache() @@ -1587,9 +1581,9 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: >>> mi MultiIndex([(1, 3, 5), (2, 4, 6)], - names=('x', 'y', 'z')) + names=['x', 'y', 'z']) >>> mi.names - ('x', 'y', 'z') + FrozenList(['x', 'y', 'z']) """, ) @@ -2063,7 +2057,7 @@ def remove_unused_levels(self) -> MultiIndex: >>> mi2 = mi[2:].remove_unused_levels() >>> mi2.levels - (RangeIndex(start=1, stop=2, step=1), Index(['a', 'b'], dtype='object')) + FrozenList([[1], ['a', 'b']]) """ new_levels = [] new_codes = [] @@ -2337,13 +2331,13 @@ def drop( # type: ignore[override] (1, 'purple'), (2, 'green'), (2, 'purple')], - names=('number', 'color')) + names=['number', 'color']) >>> idx.drop([(1, "green"), (2, "purple")]) MultiIndex([(0, 'green'), (0, 'purple'), (1, 'purple'), (2, 'green')], - names=('number', 'color')) + names=['number', 'color']) We can also drop from a specific level. @@ -2351,12 +2345,12 @@ def drop( # type: ignore[override] MultiIndex([(0, 'purple'), (1, 'purple'), (2, 'purple')], - names=('number', 'color')) + names=['number', 'color']) >>> idx.drop([1, 2], level=0) MultiIndex([(0, 'green'), (0, 'purple')], - names=('number', 'color')) + names=['number', 'color']) """ if level is not None: return self._drop_from_level(codes, level, errors) @@ -2497,17 +2491,17 @@ def reorder_levels(self, order) -> MultiIndex: >>> mi MultiIndex([(1, 3), (2, 4)], - names=('x', 'y')) + names=['x', 'y']) >>> mi.reorder_levels(order=[1, 0]) MultiIndex([(3, 1), (4, 2)], - names=('y', 'x')) + names=['y', 'x']) >>> mi.reorder_levels(order=["y", "x"]) MultiIndex([(3, 1), (4, 2)], - names=('y', 'x')) + names=['y', 'x']) """ order = [self._get_level_number(i) for i in order] result = self._reorder_ilevels(order) @@ -2876,9 +2870,7 @@ def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left" if lab not in lev and not isna(lab): # short circuit try: - # Argument 1 to "searchsorted" has incompatible type "Index"; - # expected "ExtensionArray | ndarray[Any, Any]" - loc = algos.searchsorted(lev, lab, side=side) # type: ignore[arg-type] + loc = algos.searchsorted(lev, lab, side=side) except TypeError as err: # non-comparable e.g. test_slice_locs_with_type_mismatch raise TypeError(f"Level type mismatch: {lab}") from err @@ -3546,7 +3538,7 @@ def _reorder_indexer( k_codes = self.levels[i].get_indexer(k) k_codes = k_codes[k_codes >= 0] # Filter absent keys # True if the given codes are not ordered - need_sort = bool((k_codes[:-1] > k_codes[1:]).any()) + need_sort = (k_codes[:-1] > k_codes[1:]).any() else: need_sort = True elif isinstance(k, slice): @@ -3979,7 +3971,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: __invert__ = make_invalid_op("__invert__") -def _lexsort_depth(codes: tuple[np.ndarray], nlevels: int) -> int: +def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" int64_codes = [ensure_int64(level_codes) for level_codes in codes] for k in range(nlevels, 0, -1): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 43077e7aeecb4..4392f54d9c442 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1681,9 +1681,8 @@ def _wrap_result(self, result): if self.kind == "period" and not isinstance(result.index, PeriodIndex): if isinstance(result.index, MultiIndex): # GH 24103 - e.g. groupby resample - new_level = result.index.levels[-1] - if not isinstance(new_level, PeriodIndex): - new_level = new_level.to_period(self.freq) # type: ignore[attr-defined] + if not isinstance(result.index.levels[-1], PeriodIndex): + new_level = result.index.levels[-1].to_period(self.freq) result.index = result.index.set_levels(new_level, level=-1) else: result.index = result.index.to_period(self.freq) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index f51a833e5f906..b4720306094e9 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -237,7 +237,7 @@ def melt( else: mdata[col] = np.tile(id_data._values, num_cols_adjusted) - mcolumns = id_vars + list(var_name) + [value_name] + mcolumns = id_vars + var_name + [value_name] if frame.shape[1] > 0 and not any( not isinstance(dt, np.dtype) and dt._supports_2d for dt in frame.dtypes diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index dcb638cfee97b..19e53a883d1e2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -106,6 +106,7 @@ from pandas import DataFrame from pandas.core import groupby from pandas.core.arrays import DatetimeArray + from pandas.core.indexes.frozen import FrozenList _factorizers = { np.int64: libhashtable.Int64Factorizer, @@ -1803,7 +1804,7 @@ def restore_dropped_levels_multijoin( join_index: Index, lindexer: npt.NDArray[np.intp], rindexer: npt.NDArray[np.intp], -) -> tuple[tuple, tuple, tuple]: +) -> tuple[FrozenList, FrozenList, FrozenList]: """ *this is an internal non-public method* @@ -1835,7 +1836,7 @@ def restore_dropped_levels_multijoin( levels of combined multiindexes labels : np.ndarray[np.intp] labels of combined multiindexes - names : tuple[Hashable] + names : List[Hashable] names of combined multiindex levels """ @@ -1877,11 +1878,12 @@ def _convert_to_multiindex(index: Index) -> MultiIndex: else: restore_codes = algos.take_nd(codes, indexer, fill_value=-1) - join_levels = join_levels + (restore_levels,) - join_codes = join_codes + (restore_codes,) - join_names = join_names + (dropped_level_name,) + # error: Cannot determine type of "__add__" + join_levels = join_levels + [restore_levels] # type: ignore[has-type] + join_codes = join_codes + [restore_codes] # type: ignore[has-type] + join_names = join_names + [dropped_level_name] - return tuple(join_levels), tuple(join_codes), tuple(join_names) + return join_levels, join_codes, join_names class _OrderedMerge(_MergeOperation): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b62f550662f5d..e0126d439a79c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -397,7 +397,11 @@ def _all_key(key): if isinstance(piece.index, MultiIndex): # We are adding an empty level transformed_piece.index = MultiIndex.from_tuples( - [all_key], names=piece.index.names + (None,) + [all_key], + names=piece.index.names + + [ + None, + ], ) else: transformed_piece.index = Index([all_key], name=piece.index.name) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 574e6839070be..01cc85ceff181 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -62,6 +62,7 @@ ) from pandas.core.arrays import ExtensionArray + from pandas.core.indexes.frozen import FrozenList class _Unstacker: @@ -349,15 +350,21 @@ def get_new_columns(self, value_columns: Index | None): width = len(value_columns) propagator = np.repeat(np.arange(width), stride) - new_levels: tuple[Index, ...] + new_levels: FrozenList | list[Index] if isinstance(value_columns, MultiIndex): - new_levels = value_columns.levels + (self.removed_level_full,) + # error: Cannot determine type of "__add__" [has-type] + new_levels = value_columns.levels + ( # type: ignore[has-type] + self.removed_level_full, + ) new_names = value_columns.names + (self.removed_name,) new_codes = [lab.take(propagator) for lab in value_columns.codes] else: - new_levels = (value_columns, self.removed_level_full) + new_levels = [ + value_columns, + self.removed_level_full, + ] new_names = [value_columns.name, self.removed_name] new_codes = [propagator] @@ -987,26 +994,27 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame: # Construct the correct MultiIndex by combining the frame's index and # stacked columns. + index_levels: list | FrozenList if isinstance(frame.index, MultiIndex): index_levels = frame.index.levels - index_codes = tuple(np.tile(frame.index.codes, (1, ratio))) + index_codes = list(np.tile(frame.index.codes, (1, ratio))) else: codes, uniques = factorize(frame.index, use_na_sentinel=False) - # Incompatible types in assignment (expression has type - # "tuple[ndarray[Any, Any] | Index]", variable has type "tuple[Index, ...]") - index_levels = (uniques,) # type: ignore[assignment] - index_codes = tuple(np.tile(codes, (1, ratio))) + index_levels = [uniques] + index_codes = list(np.tile(codes, (1, ratio))) if isinstance(ordered_stack_cols, MultiIndex): column_levels = ordered_stack_cols.levels column_codes = ordered_stack_cols.drop_duplicates().codes else: - column_levels = (ordered_stack_cols.unique(),) - column_codes = (factorize(ordered_stack_cols_unique, use_na_sentinel=False)[0],) - column_codes = tuple(np.repeat(codes, len(frame)) for codes in column_codes) + column_levels = [ordered_stack_cols.unique()] + column_codes = [factorize(ordered_stack_cols_unique, use_na_sentinel=False)[0]] + # error: Incompatible types in assignment (expression has type "list[ndarray[Any, + # dtype[Any]]]", variable has type "FrozenList") + column_codes = [np.repeat(codes, len(frame)) for codes in column_codes] # type: ignore[assignment] result.index = MultiIndex( levels=index_levels + column_levels, codes=index_codes + column_codes, - names=frame.index.names + ordered_stack_cols.names, + names=frame.index.names + list(ordered_stack_cols.names), verify_integrity=False, ) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index ef115e350462f..d274c1d7a5aff 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3608,7 +3608,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame: from pandas import MultiIndex - index = MultiIndex.from_tuples(index_list, names=arr.index.names + ("match",)) + index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"]) dtype = _result_dtype(arr) result = arr._constructor_expanddim( diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 07998cdbd40b5..db6078ae636e3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -820,12 +820,12 @@ def _apply_pairwise( else: idx_codes, idx_levels = factorize(result.index) result_codes = [idx_codes] - result_levels = [idx_levels] # type: ignore[list-item] + result_levels = [idx_levels] result_names = [result.index.name] - # 3) Create the resulting index by combining 1) + 2) + # 3) Create the resulting index by combining 1) + 2) result_codes = groupby_codes + result_codes - result_levels = groupby_levels + result_levels # type: ignore[assignment] + result_levels = groupby_levels + result_levels result_names = self._grouper.names + result_names result_index = MultiIndex( diff --git a/pandas/tests/frame/methods/test_rename_axis.py b/pandas/tests/frame/methods/test_rename_axis.py index 908a3f728c749..dd4a77c6509b8 100644 --- a/pandas/tests/frame/methods/test_rename_axis.py +++ b/pandas/tests/frame/methods/test_rename_axis.py @@ -60,15 +60,15 @@ def test_rename_axis_mapper(self): # Test for renaming index using dict result = df.rename_axis(index={"ll": "foo"}) - assert result.index.names == ("foo", "nn") + assert result.index.names == ["foo", "nn"] # Test for renaming index using a function result = df.rename_axis(index=str.upper, axis=0) - assert result.index.names == ("LL", "NN") + assert result.index.names == ["LL", "NN"] # Test for renaming index providing complete list result = df.rename_axis(index=["foo", "goo"]) - assert result.index.names == ("foo", "goo") + assert result.index.names == ["foo", "goo"] # Test for changing index and columns at same time sdf = df.reset_index().set_index("nn").drop(columns=["ll", "y"]) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index a1968c6c694d5..198cab0e91eab 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -163,7 +163,7 @@ def test_set_index_names(self): ) df.index.name = "name" - assert df.set_index(df.index).index.names == ("name",) + assert df.set_index(df.index).index.names == ["name"] mi = MultiIndex.from_arrays(df[["A", "B"]].T.values, names=["A", "B"]) mi2 = MultiIndex.from_arrays( @@ -172,7 +172,7 @@ def test_set_index_names(self): df = df.set_index(["A", "B"]) - assert df.set_index(df.index).index.names == ("A", "B") + assert df.set_index(df.index).index.names == ["A", "B"] # Check that set_index isn't converting a MultiIndex into an Index assert isinstance(df.set_index(df.index).index, MultiIndex) @@ -292,7 +292,7 @@ def test_set_index_pass_single_array( # only valid column keys are dropped # since B is always passed as array above, nothing is dropped expected = df.set_index(["B"], drop=False, append=append) - expected.index.names = [index_name] + list(name) if append else name + expected.index.names = [index_name] + name if append else name tm.assert_frame_equal(result, expected) @@ -464,12 +464,12 @@ def test_set_index_datetime(self): df = df.set_index("label", append=True) tm.assert_index_equal(df.index.levels[0], expected) tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) - assert df.index.names == ("datetime", "label") + assert df.index.names == ["datetime", "label"] df = df.swaplevel(0, 1) tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) tm.assert_index_equal(df.index.levels[1], expected) - assert df.index.names == ("label", "datetime") + assert df.index.names == ["label", "datetime"] df = DataFrame(np.random.default_rng(2).random(6)) idx1 = DatetimeIndex( diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index b856a7ff5d26b..c146dcc9c2d71 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -857,7 +857,7 @@ def test_sort_index_level_and_column_label( ) # Get index levels from df_idx - levels = list(df_idx.index.names) + levels = df_idx.index.names # Compute expected by sorting on columns and the setting index expected = df_none.sort_values( @@ -875,7 +875,7 @@ def test_sort_column_level_and_index_label( # GH#14353 # Get levels from df_idx - levels = list(df_idx.index.names) + levels = df_idx.index.names # Compute expected by sorting on axis=0, setting index levels, and then # transposing. For some cases this will result in a frame with diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 09235f154b188..03db284d892e3 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -805,7 +805,7 @@ def test_unstack_multi_level_cols(self): [[10, 20, 30], [10, 20, 40]], names=["i1", "i2", "i3"] ), ) - assert df.unstack(["i2", "i1"]).columns.names[-2:] == ("i2", "i1") + assert df.unstack(["i2", "i1"]).columns.names[-2:] == ["i2", "i1"] def test_unstack_multi_level_rows_and_cols(self): # PH 28306: Unstack df with multi level cols and rows @@ -1848,7 +1848,7 @@ def test_stack_unstack_preserve_names( unstacked = frame.unstack() assert unstacked.index.name == "first" - assert unstacked.columns.names == ("exp", "second") + assert unstacked.columns.names == ["exp", "second"] restacked = unstacked.stack(future_stack=future_stack) assert restacked.index.names == frame.index.names diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 81676a5d8520a..1d0f491529b56 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -35,15 +35,15 @@ def test_set_axis_name_mi(self, func): columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]), ) - level_names = ("L1", "L2") + level_names = ["L1", "L2"] result = methodcaller(func, level_names)(df) assert result.index.names == level_names - assert result.columns.names == (None, None) + assert result.columns.names == [None, None] result = methodcaller(func, level_names, axis=1)(df) - assert result.columns.names == level_names - assert result.index.names == (None, None) + assert result.columns.names == ["L1", "L2"] + assert result.index.names == [None, None] def test_nonzero_single_element(self): df = DataFrame([[False, False]]) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index cbaf064c379ea..7dcdcd96cce51 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -24,9 +24,9 @@ def test_set_axis_name_mi(self, func): result = methodcaller(func, ["L1", "L2"])(ser) assert ser.index.name is None - assert ser.index.names == ("l1", "l2") + assert ser.index.names == ["l1", "l2"] assert result.index.name is None - assert result.index.names == ("L1", "L2") + assert result.index.names, ["L1", "L2"] def test_set_axis_name_raises(self): ser = Series([1]) diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index 9b825b73c26c0..af0deba138469 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -454,8 +454,5 @@ def test_groupby_quantile_nonmulti_levels_order(): tm.assert_series_equal(result, expected) # We need to check that index levels are not sorted - tm.assert_index_equal( - result.index.levels[0], Index(["B", "A"], dtype=object, name="cat1") - ) - tm.assert_index_equal(result.index.levels[1], Index([0.2, 0.8])) - assert isinstance(result.index.levels, tuple) + expected_levels = pd.core.indexes.frozen.FrozenList([["B", "A"], [0.2, 0.8]]) + tm.assert_equal(result.index.levels, expected_levels) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index a8d359f3206c2..be52b4a591c26 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -108,7 +108,7 @@ def rebuild_index(df): gr = df.groupby(keys, sort=isort) right = gr["3rd"].apply(Series.value_counts, **kwargs) - right.index.names = tuple(list(right.index.names[:-1]) + ["3rd"]) + right.index.names = right.index.names[:-1] + ["3rd"] # https://github.com/pandas-dev/pandas/issues/49909 right = right.rename(name) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 9bd2c22788fac..1a2589fe94ea5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -987,7 +987,7 @@ def test_apply_multi_level_name(category): ).set_index(["A", "B"]) result = df.groupby("B", observed=False).apply(lambda x: x.sum()) tm.assert_frame_equal(result, expected) - assert df.index.names == ("A", "B") + assert df.index.names == ["A", "B"] def test_groupby_apply_datetime_result_dtypes(using_infer_string): diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index c993f425fa132..29908537fbe59 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -11,7 +11,7 @@ def test_astype(idx): actual = idx.astype("O") tm.assert_copy(actual.levels, expected.levels) tm.assert_copy(actual.codes, expected.codes) - assert actual.names == expected.names + assert actual.names == list(expected.names) with pytest.raises(TypeError, match="^Setting.*dtype.*object"): idx.astype(np.dtype(int)) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 2b16f2c4c095d..38e0920b7004e 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -27,7 +27,7 @@ def test_constructor_single_level(): assert isinstance(result, MultiIndex) expected = Index(["foo", "bar", "baz", "qux"], name="first") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ("first",) + assert result.names == ["first"] def test_constructor_no_levels(): @@ -277,7 +277,7 @@ def test_from_arrays_empty(): assert isinstance(result, MultiIndex) expected = Index([], name="A") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ("A",) + assert result.names == ["A"] # N levels for N in [2, 3]: @@ -424,7 +424,7 @@ def test_from_product_empty_one_level(): result = MultiIndex.from_product([[]], names=["A"]) expected = Index([], name="A") tm.assert_index_equal(result.levels[0], expected) - assert result.names == ("A",) + assert result.names == ["A"] @pytest.mark.parametrize( @@ -712,7 +712,7 @@ def test_from_frame_dtype_fidelity(): @pytest.mark.parametrize( - "names_in,names_out", [(None, (("L1", "x"), ("L2", "y"))), (["x", "y"], ("x", "y"))] + "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] ) def test_from_frame_valid_names(names_in, names_out): # GH 22420 @@ -812,13 +812,13 @@ def test_constructor_with_tz(): result = MultiIndex.from_arrays([index, columns]) - assert result.names == ("dt1", "dt2") + assert result.names == ["dt1", "dt2"] tm.assert_index_equal(result.levels[0], index) tm.assert_index_equal(result.levels[1], columns) result = MultiIndex.from_arrays([Series(index), Series(columns)]) - assert result.names == ("dt1", "dt2") + assert result.names == ["dt1", "dt2"] tm.assert_index_equal(result.levels[0], index) tm.assert_index_equal(result.levels[1], columns) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 14d327093500e..2e09a580f9528 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -70,7 +70,7 @@ def test_copy_method(deep): @pytest.mark.parametrize( "kwarg, value", [ - ("names", ("third", "fourth")), + ("names", ["third", "fourth"]), ], ) def test_copy_method_kwargs(deep, kwarg, value): diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 622520f45f904..1bbeedac3fb10 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -112,7 +112,7 @@ def test_duplicate_multiindex_codes(): mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]]) -@pytest.mark.parametrize("names", [("a", "b", "a"), (1, 1, 2), (1, "a", 1)]) +@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) def test_duplicate_level_names(names): # GH18872, GH19029 mi = MultiIndex.from_product([[0, 1]] * 3, names=names) diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py index cc6a33c22503d..6ea42349bd04a 100644 --- a/pandas/tests/indexes/multi/test_formats.py +++ b/pandas/tests/indexes/multi/test_formats.py @@ -56,14 +56,14 @@ def test_repr_max_seq_items_equal_to_n(self, idx): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - names=('first', 'second'))""" + names=['first', 'second'])""" assert result == expected def test_repr(self, idx): result = idx[:1].__repr__() expected = """\ MultiIndex([('foo', 'one')], - names=('first', 'second'))""" + names=['first', 'second'])""" assert result == expected result = idx.__repr__() @@ -74,7 +74,7 @@ def test_repr(self, idx): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - names=('first', 'second'))""" + names=['first', 'second'])""" assert result == expected with pd.option_context("display.max_seq_items", 5): @@ -85,7 +85,7 @@ def test_repr(self, idx): ... ('qux', 'one'), ('qux', 'two')], - names=('first', 'second'), length=6)""" + names=['first', 'second'], length=6)""" assert result == expected # display.max_seq_items == 1 @@ -94,7 +94,7 @@ def test_repr(self, idx): expected = """\ MultiIndex([... ('qux', 'two')], - names=('first', ...), length=6)""" + names=['first', ...], length=6)""" assert result == expected def test_rjust(self): @@ -105,7 +105,7 @@ def test_rjust(self): result = mi[:1].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00')], - names=('a', 'b', 'dti'))""" + names=['a', 'b', 'dti'])""" assert result == expected result = mi[::500].__repr__() @@ -114,7 +114,7 @@ def test_rjust(self): ( 'a', 9, '2000-01-01 00:08:20'), ('abc', 10, '2000-01-01 00:16:40'), ('abc', 10, '2000-01-01 00:25:00')], - names=('a', 'b', 'dti'))""" + names=['a', 'b', 'dti'])""" assert result == expected result = mi.__repr__() @@ -140,7 +140,7 @@ def test_rjust(self): ('abc', 10, '2000-01-01 00:33:17'), ('abc', 10, '2000-01-01 00:33:18'), ('abc', 10, '2000-01-01 00:33:19')], - names=('a', 'b', 'dti'), length=2000)""" + names=['a', 'b', 'dti'], length=2000)""" assert result == expected def test_tuple_width(self): @@ -152,7 +152,7 @@ def test_tuple_width(self): mi = MultiIndex.from_arrays(levels, names=names) result = mi[:1].__repr__() expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], - names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'))""" # noqa: E501 + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501 assert result == expected result = mi[:10].__repr__() @@ -167,7 +167,7 @@ def test_tuple_width(self): ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], - names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'))""" + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" assert result == expected result = mi.__repr__() @@ -193,7 +193,7 @@ def test_tuple_width(self): ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], - names=('a', 'b', 'dti_1', 'dti_2', 'dti_3'), length=2000)""" + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" assert result == expected def test_multiindex_long_element(self): diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index d17b0aae953cd..dd4bba42eda6f 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -101,16 +101,16 @@ def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): def test_set_name_methods(idx): # so long as these are synonyms, we don't need to test set_names - index_names = ("first", "second") + index_names = ["first", "second"] assert idx.rename == idx.set_names - new_names = tuple(name + "SUFFIX" for name in index_names) + new_names = [name + "SUFFIX" for name in index_names] ind = idx.set_names(new_names) assert idx.names == index_names assert ind.names == new_names msg = "Length of names must match number of levels in MultiIndex" with pytest.raises(ValueError, match=msg): ind.set_names(new_names + new_names) - new_names2 = tuple(name + "SUFFIX2" for name in new_names) + new_names2 = [name + "SUFFIX2" for name in new_names] res = ind.set_names(new_names2, inplace=True) assert res is None assert ind.names == new_names2 @@ -118,11 +118,11 @@ def test_set_name_methods(idx): # set names for specific level (# GH7792) ind = idx.set_names(new_names[0], level=0) assert idx.names == index_names - assert ind.names == (new_names[0], index_names[1]) + assert ind.names == [new_names[0], index_names[1]] res = ind.set_names(new_names2[0], level=0, inplace=True) assert res is None - assert ind.names == (new_names2[0], index_names[1]) + assert ind.names == [new_names2[0], index_names[1]] # set names for multiple levels ind = idx.set_names(new_names, level=[0, 1]) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index f6d960bd41925..d570e911bf584 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -216,9 +216,7 @@ def test_can_hold_identifiers(idx): def test_metadata_immutable(idx): levels, codes = idx.levels, idx.codes # shouldn't be able to set at either the top level or base level - mutable_regex = re.compile( - "does not support mutable operations|does not support item assignment" - ) + mutable_regex = re.compile("does not support mutable operations") with pytest.raises(TypeError, match=mutable_regex): levels[0] = levels[0] with pytest.raises(TypeError, match=mutable_regex): diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index aff9ebfb1c1e3..45f19b4d70fb9 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -60,20 +60,20 @@ def test_copy_names(): multi_idx1 = multi_idx.copy() assert multi_idx.equals(multi_idx1) - assert multi_idx.names == ("MyName1", "MyName2") - assert multi_idx1.names == ("MyName1", "MyName2") + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx1.names == ["MyName1", "MyName2"] multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"]) assert multi_idx.equals(multi_idx2) - assert multi_idx.names == ("MyName1", "MyName2") - assert multi_idx2.names == ("NewName1", "NewName2") + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx2.names == ["NewName1", "NewName2"] multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"]) assert multi_idx.equals(multi_idx3) - assert multi_idx.names == ("MyName1", "MyName2") - assert multi_idx3.names == ("NewName1", "NewName2") + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx3.names == ["NewName1", "NewName2"] # gh-35592 with pytest.raises(ValueError, match="Length of new names must be 2, got 1"): @@ -85,8 +85,8 @@ def test_copy_names(): def test_names(idx): # names are assigned in setup - assert idx.names == ("first", "second") - level_names = tuple(level.name for level in idx.levels) + assert idx.names == ["first", "second"] + level_names = [level.name for level in idx.levels] assert level_names == idx.names # setting bad names on existing diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index d949a390bd97f..d1b4fe8b98760 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -12,13 +12,13 @@ def test_reindex(idx): result, indexer = idx.reindex(list(idx[:4])) assert isinstance(result, MultiIndex) - assert result.names == ("first", "second") + assert result.names == ["first", "second"] assert [level.name for level in result.levels] == ["first", "second"] result, indexer = idx.reindex(list(idx)) assert isinstance(result, MultiIndex) assert indexer is None - assert result.names == ("first", "second") + assert result.names == ["first", "second"] assert [level.name for level in result.levels] == ["first", "second"] @@ -52,27 +52,27 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): other_dtype = MultiIndex.from_product([[1, 2], [3, 4]]) # list & ndarray cases - assert idx.reindex([])[0].names == (None, None) - assert idx.reindex(np.array([]))[0].names == (None, None) - assert idx.reindex(target.tolist())[0].names == (None, None) - assert idx.reindex(target.values)[0].names == (None, None) - assert idx.reindex(other_dtype.tolist())[0].names == (None, None) - assert idx.reindex(other_dtype.values)[0].names == (None, None) + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] idx.names = ["foo", "bar"] - assert idx.reindex([])[0].names == ("foo", "bar") - assert idx.reindex(np.array([]))[0].names == ("foo", "bar") - assert idx.reindex(target.tolist())[0].names == ("foo", "bar") - assert idx.reindex(target.values)[0].names == ("foo", "bar") - assert idx.reindex(other_dtype.tolist())[0].names == ("foo", "bar") - assert idx.reindex(other_dtype.values)[0].names == ("foo", "bar") + assert idx.reindex([])[0].names == ["foo", "bar"] + assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] + assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(target.values)[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): # GH7774 idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) - assert idx.reindex([], level=0)[0].names == ("foo", "bar") - assert idx.reindex([], level=1)[0].names == ("foo", "bar") + assert idx.reindex([], level=0)[0].names == ["foo", "bar"] + assert idx.reindex([], level=1)[0].names == ["foo", "bar"] def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array( diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 1bf91a09ee754..06dbb33aadf97 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -23,7 +23,7 @@ def test_insert(idx): exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") tm.assert_index_equal(new_index.levels[0], exp0) - assert new_index.names == ("first", "second") + assert new_index.names == ["first", "second"] exp1 = Index(list(idx.levels[1]) + ["three"], name="second") tm.assert_index_equal(new_index.levels[1], exp1) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 15076b8705bdc..9354984538c58 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -121,7 +121,7 @@ def test_multiindex_symmetric_difference(): idx2 = idx.copy().rename(["A", "B"]) result = idx.symmetric_difference(idx2) - assert result.names == (None, None) + assert result.names == [None, None] def test_empty(idx): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index a5a678af4aba7..3d21ee8a57716 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -13,6 +13,7 @@ Timestamp, ) import pandas._testing as tm +from pandas.core.indexes.frozen import FrozenList def test_sortlevel(idx): @@ -285,9 +286,8 @@ def test_remove_unused_levels_with_nan(): idx = idx.set_levels(["a", np.nan], level="id1") idx = idx.remove_unused_levels() result = idx.levels - expected = (Index(["a", np.nan], name="id1"), Index([4], name="id2")) - for res, exp in zip(result, expected): - tm.assert_index_equal(res, exp) + expected = FrozenList([["a", np.nan], [4]]) + assert str(result) == str(expected) def test_sort_values_nan(): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 484f647c7a8f9..3a2d04d3ffdc2 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -908,7 +908,7 @@ def test_isin_level_kwarg_bad_level_raises(self, index): @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan]) def test_isin_level_kwarg_bad_label_raises(self, label, index): if isinstance(index, MultiIndex): - index = index.rename(("foo", "bar") + index.names[2:]) + index = index.rename(["foo", "bar"] + index.names[2:]) msg = f"'Level {label} not found'" else: index = index.rename("foo") diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 732f7cc624f86..b6e1c3698c258 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -119,7 +119,7 @@ def test_set_name_methods(self, index_flat): # should return None assert res is None assert index.name == new_name - assert index.names == (new_name,) + assert index.names == [new_name] with pytest.raises(ValueError, match="Level must be None"): index.set_names("a", level=0) @@ -127,7 +127,7 @@ def test_set_name_methods(self, index_flat): name = ("A", "B") index.rename(name, inplace=True) assert index.name == name - assert index.names == (name,) + assert index.names == [name] @pytest.mark.xfail def test_set_names_single_label_no_level(self, index_flat): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py new file mode 100644 index 0000000000000..ace66b5b06a51 --- /dev/null +++ b/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,113 @@ +import re + +import pytest + +from pandas.core.indexes.frozen import FrozenList + + +@pytest.fixture +def lst(): + return [1, 2, 3, 4, 5] + + +@pytest.fixture +def container(lst): + return FrozenList(lst) + + +@pytest.fixture +def unicode_container(): + return FrozenList(["\u05d0", "\u05d1", "c"]) + + +class TestFrozenList: + def check_mutable_error(self, *args, **kwargs): + # Pass whatever function you normally would to pytest.raises + # (after the Exception kind). + mutable_regex = re.compile("does not support mutable operations") + msg = "'(_s)?re.(SRE_)?Pattern' object is not callable" + with pytest.raises(TypeError, match=msg): + mutable_regex(*args, **kwargs) + + def test_no_mutable_funcs(self, container): + def setitem(): + container[0] = 5 + + self.check_mutable_error(setitem) + + def setslice(): + container[1:2] = 3 + + self.check_mutable_error(setslice) + + def delitem(): + del container[0] + + self.check_mutable_error(delitem) + + def delslice(): + del container[0:3] + + self.check_mutable_error(delslice) + + mutable_methods = ("extend", "pop", "remove", "insert") + + for meth in mutable_methods: + self.check_mutable_error(getattr(container, meth)) + + def test_slicing_maintains_type(self, container, lst): + result = container[1:2] + expected = lst[1:2] + self.check_result(result, expected) + + def check_result(self, result, expected): + assert isinstance(result, FrozenList) + assert result == expected + + def test_string_methods_dont_fail(self, container): + repr(container) + str(container) + bytes(container) + + def test_tricky_container(self, unicode_container): + repr(unicode_container) + str(unicode_container) + + def test_add(self, container, lst): + result = container + (1, 2, 3) + expected = FrozenList(lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + container + expected = FrozenList([1, 2, 3] + lst) + self.check_result(result, expected) + + def test_iadd(self, container, lst): + q = r = container + + q += [5] + self.check_result(q, lst + [5]) + + # Other shouldn't be mutated. + self.check_result(r, lst) + + def test_union(self, container, lst): + result = container.union((1, 2, 3)) + expected = FrozenList(lst + [1, 2, 3]) + self.check_result(result, expected) + + def test_difference(self, container): + result = container.difference([2]) + expected = FrozenList([1, 3, 4, 5]) + self.check_result(result, expected) + + def test_difference_dupe(self): + result = FrozenList([1, 2, 3, 2]).difference([2]) + expected = FrozenList([1, 3]) + self.check_result(result, expected) + + def test_tricky_container_to_bytes_raises(self, unicode_container): + # GH 26447 + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(unicode_container) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 78f701fff6e29..dbfabf7666d25 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -150,7 +150,7 @@ def test_getitem_intkey_leading_level( # GH#33355 dont fall-back to positional when leading level is int ymd = multiindex_year_month_day_dataframe_random_data levels = ymd.index.levels - ymd.index = ymd.index.set_levels((levels[0].astype(dtype),) + levels[1:]) + ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) ser = ymd["A"] mi = ser.index assert isinstance(mi, MultiIndex) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index afc9974c75e6a..a728f6ec6ca9a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -115,7 +115,7 @@ def test_multiindex(self, df_schema, using_infer_string): {"name": "C", "type": "datetime"}, {"name": "D", "type": "duration"}, ], - "primaryKey": ("level_0", "level_1"), + "primaryKey": ["level_0", "level_1"], } if using_infer_string: expected["fields"][0] = { @@ -128,7 +128,7 @@ def test_multiindex(self, df_schema, using_infer_string): df.index.names = ["idx0", None] expected["fields"][0]["name"] = "idx0" - expected["primaryKey"] = ("idx0", "level_1") + expected["primaryKey"] = ["idx0", "level_1"] result = build_table_schema(df, version=False) assert result == expected @@ -598,21 +598,21 @@ def test_categorical(self): (pd.Index([1], name="myname"), "myname", "name"), ( pd.MultiIndex.from_product([("a", "b"), ("c", "d")]), - ("level_0", "level_1"), + ["level_0", "level_1"], "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", "n2"] ), - ("n1", "n2"), + ["n1", "n2"], "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", None] ), - ("n1", "level_1"), + ["n1", "level_1"], "names", ), ], diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index e62df0bc1c977..471f7b8958ee4 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1024,7 +1024,7 @@ def test_columns_multiindex_modified(tmp_path, setup_path): df.index.name = "letters" df = df.set_index(keys="E", append=True) - data_columns = list(df.index.names) + df.columns.tolist() + data_columns = df.index.names + df.columns.tolist() path = tmp_path / setup_path df.to_hdf( path, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 67b1311a5a798..3083fa24ba8b5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2346,18 +2346,15 @@ def test_read_table_index_col(conn, request, test_frame1): sql.to_sql(test_frame1, "test_frame", conn) result = sql.read_sql_table("test_frame", conn, index_col="index") - assert result.index.names == ("index",) + assert result.index.names == ["index"] result = sql.read_sql_table("test_frame", conn, index_col=["A", "B"]) - assert result.index.names == ("A", "B") + assert result.index.names == ["A", "B"] result = sql.read_sql_table( "test_frame", conn, index_col=["A", "B"], columns=["C", "D"] ) - assert result.index.names == ( - "A", - "B", - ) + assert result.index.names == ["A", "B"] assert result.columns.tolist() == ["C", "D"] diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 92e756756547d..2f9fd1eb421d4 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -125,7 +125,7 @@ def test_concat_keys_specific_levels(self): tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key")) tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3])) - assert result.columns.names == ("group_key", None) + assert result.columns.names == ["group_key", None] @pytest.mark.parametrize("mapping", ["mapping", "dict"]) def test_concat_mapping(self, mapping, non_dict_mapping_subclass): diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 33d9a721df6b7..7ae2fffa04205 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -814,10 +814,12 @@ def test_join_multi_levels2(self): class TestJoinMultiMulti: def test_join_multi_multi(self, left_multi, right_multi, join_type, on_cols_multi): + left_names = left_multi.index.names + right_names = right_multi.index.names if join_type == "right": - level_order = ["Origin", "Destination", "Period", "LinkType", "TripPurp"] + level_order = right_names + left_names.difference(right_names) else: - level_order = ["Origin", "Destination", "Period", "TripPurp", "LinkType"] + level_order = left_names + right_names.difference(left_names) # Multi-index join tests expected = ( merge( @@ -839,10 +841,12 @@ def test_join_multi_empty_frames( left_multi = left_multi.drop(columns=left_multi.columns) right_multi = right_multi.drop(columns=right_multi.columns) + left_names = left_multi.index.names + right_names = right_multi.index.names if join_type == "right": - level_order = ["Origin", "Destination", "Period", "LinkType", "TripPurp"] + level_order = right_names + left_names.difference(right_names) else: - level_order = ["Origin", "Destination", "Period", "TripPurp", "LinkType"] + level_order = left_names + right_names.difference(left_names) expected = ( merge( diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index c4af63fe5cc81..070c756e8c928 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -135,7 +135,7 @@ def test_crosstab_margins(self): result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) assert result.index.names == ("a",) - assert result.columns.names == ("b", "c") + assert result.columns.names == ["b", "c"] all_cols = result["All", ""] exp_cols = df.groupby(["a"]).size().astype("i8") @@ -173,7 +173,7 @@ def test_crosstab_margins_set_margin_name(self): ) assert result.index.names == ("a",) - assert result.columns.names == ("b", "c") + assert result.columns.names == ["b", "c"] all_cols = result["TOTAL", ""] exp_cols = df.groupby(["a"]).size().astype("i8") diff --git a/pandas/tests/series/methods/test_rename_axis.py b/pandas/tests/series/methods/test_rename_axis.py index 60175242a06b5..58c095d697ede 100644 --- a/pandas/tests/series/methods/test_rename_axis.py +++ b/pandas/tests/series/methods/test_rename_axis.py @@ -15,13 +15,13 @@ def test_rename_axis_mapper(self): ser = Series(list(range(len(mi))), index=mi) result = ser.rename_axis(index={"ll": "foo"}) - assert result.index.names == ("foo", "nn") + assert result.index.names == ["foo", "nn"] result = ser.rename_axis(index=str.upper, axis=0) - assert result.index.names == ("LL", "NN") + assert result.index.names == ["LL", "NN"] result = ser.rename_axis(index=["foo", "goo"]) - assert result.index.names == ("foo", "goo") + assert result.index.names == ["foo", "goo"] with pytest.raises(TypeError, match="unexpected"): ser.rename_axis(columns="wrong") diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 9c2b9a76bbb83..bcecd1b2d5eec 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -205,6 +205,18 @@ class MyList(list): val = MyList([True]) assert com.is_bool_indexer(val) + def test_frozenlist(self): + # GH#42461 + data = {"col1": [1, 2], "col2": [3, 4]} + df = pd.DataFrame(data=data) + + frozen = df.index.names[1:] + assert not com.is_bool_indexer(frozen) + + result = df[frozen] + expected = df[[]] + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("with_exception", [True, False]) def test_temp_setattr(with_exception): diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 78ff774c188fe..dc6efdcec380e 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -198,8 +198,8 @@ def test_index_equal_names(name1, name2): msg = f"""Index are different Attribute "names" are different -\\[left\\]: \\({name1},\\) -\\[right\\]: \\({name2},\\)""" +\\[left\\]: \\[{name1}\\] +\\[right\\]: \\[{name2}\\]""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 47bfc219d0fe9..85821ed2cfb6f 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -589,7 +589,7 @@ def test_multi_index_names(): result = df.rolling(3).cov() tm.assert_index_equal(result.columns, df.columns) - assert result.index.names == (None, "1", "2") + assert result.index.names == [None, "1", "2"] def test_rolling_axis_sum():