Skip to content

Commit

Permalink
TST/CLN: Remove makeCategoricalIndex (#56186)
Browse files Browse the repository at this point in the history
* TST/CLN: Remove makeCategoricalIndex

* Remove usage in asv_bench

* Adjust xarray test for more categories

* Remove rands_array
  • Loading branch information
mroeschke authored Nov 28, 2023
1 parent 5ad9abd commit e973b42
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 51 deletions.
8 changes: 3 additions & 5 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@

import pandas as pd

from .pandas_vb_common import tm

try:
from pandas.api.types import union_categoricals
except ImportError:
Expand Down Expand Up @@ -189,7 +187,7 @@ def setup(self):
N = 10**5
ncats = 15

self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
self.s_str = pd.Series(np.random.randint(0, ncats, size=N).astype(str))
self.s_str_cat = pd.Series(self.s_str, dtype="category")
with warnings.catch_warnings(record=True):
str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
Expand Down Expand Up @@ -242,7 +240,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
class Contains:
def setup(self):
N = 10**5
self.ci = tm.makeCategoricalIndex(N)
self.ci = pd.CategoricalIndex(np.arange(N))
self.c = self.ci.values
self.key = self.ci.categories[0]

Expand Down Expand Up @@ -325,7 +323,7 @@ def time_sort_values(self):
class SearchSorted:
def setup(self):
N = 10**5
self.ci = tm.makeCategoricalIndex(N).sort_values()
self.ci = pd.CategoricalIndex(np.arange(N)).sort_values()
self.c = self.ci.values
self.key = self.ci.categories[1]

Expand Down
28 changes: 0 additions & 28 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
from pandas import (
ArrowDtype,
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
Expand Down Expand Up @@ -348,36 +347,10 @@ def to_array(obj):
# Others


def rands_array(
nchars, size: int, dtype: NpDtype = "O", replace: bool = True
) -> np.ndarray:
"""
Generate an array of byte strings.
"""
chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
retval = (
np.random.default_rng(2)
.choice(chars, size=nchars * np.prod(size), replace=replace)
.view((np.str_, nchars))
.reshape(size)
)
return retval.astype(dtype)


def getCols(k) -> str:
return string.ascii_uppercase[:k]


def makeCategoricalIndex(
k: int = 10, n: int = 3, name=None, **kwargs
) -> CategoricalIndex:
"""make a length k index or n categories"""
x = rands_array(nchars=4, size=n, replace=False)
return CategoricalIndex(
Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs
)


def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
dtype = pandas_dtype(dtype)
assert isinstance(dtype, np.dtype)
Expand Down Expand Up @@ -998,7 +971,6 @@ def shares_memory(left, right) -> bool:
"iat",
"iloc",
"loc",
"makeCategoricalIndex",
"makeCustomDataframe",
"makeCustomIndex",
"makeDataFrame",
Expand Down
3 changes: 2 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@

import pandas as pd
from pandas import (
CategoricalIndex,
DataFrame,
Interval,
IntervalIndex,
Expand Down Expand Up @@ -632,7 +633,7 @@ def _create_mi_with_dt64tz_level():
"bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0),
"complex64": tm.makeNumericIndex(100, dtype="float64").astype("complex64"),
"complex128": tm.makeNumericIndex(100, dtype="float64").astype("complex128"),
"categorical": tm.makeCategoricalIndex(100),
"categorical": CategoricalIndex(list("abcd") * 25),
"interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
"empty": Index([]),
"tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
Expand Down Expand Up @@ -398,8 +399,7 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append):
tm.assert_frame_equal(result, expected)

def test_construction_with_categorical_index(self):
ci = tm.makeCategoricalIndex(10)
ci.name = "B"
ci = CategoricalIndex(list("ab") * 5, name="B")

# with Categorical
df = DataFrame(
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/generic/test_to_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ class TestDataFrameToXArray:
def df(self):
return DataFrame(
{
"a": list("abc"),
"b": list(range(1, 4)),
"c": np.arange(3, 6).astype("u1"),
"d": np.arange(4.0, 7.0, dtype="float64"),
"e": [True, False, True],
"f": Categorical(list("abc")),
"g": date_range("20130101", periods=3),
"h": date_range("20130101", periods=3, tz="US/Eastern"),
"a": list("abcd"),
"b": list(range(1, 5)),
"c": np.arange(3, 7).astype("u1"),
"d": np.arange(4.0, 8.0, dtype="float64"),
"e": [True, False, True, False],
"f": Categorical(list("abcd")),
"g": date_range("20130101", periods=4),
"h": date_range("20130101", periods=4, tz="US/Eastern"),
}
)

Expand All @@ -37,11 +37,11 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):

from xarray import Dataset

df.index = index[:3]
df.index = index[:4]
df.index.name = "foo"
df.columns.name = "bar"
result = df.to_xarray()
assert result.dims["foo"] == 3
assert result.dims["foo"] == 4
assert len(result.coords) == 1
assert len(result.data_vars) == 8
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
Expand Down Expand Up @@ -69,10 +69,10 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
from xarray import Dataset

# MultiIndex
df.index = MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
result = df.to_xarray()
assert result.dims["one"] == 1
assert result.dims["two"] == 3
assert result.dims["two"] == 4
assert len(result.coords) == 2
assert len(result.data_vars) == 8
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_ensure_copied_data(self):
#
# Must be tested separately from other indexes because
# self.values is not an ndarray.
index = tm.makeCategoricalIndex(10)
index = CategoricalIndex(list("ab") * 5)

result = CategoricalIndex(index.values, copy=True)
tm.assert_index_equal(index, result)
Expand All @@ -261,7 +261,7 @@ def test_ensure_copied_data(self):
class TestCategoricalIndex2:
def test_view_i8(self):
# GH#25464
ci = tm.makeCategoricalIndex(100)
ci = CategoricalIndex(list("ab") * 50)
msg = "When changing to a larger dtype, its size must be a divisor"
with pytest.raises(ValueError, match=msg):
ci.view("i8")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def test_tab_completion_with_categorical(self):
@pytest.mark.parametrize(
"index",
[
Index(list("ab") * 5, dtype="category"),
Index([str(i) for i in range(10)]),
tm.makeCategoricalIndex(10),
Index(["foo", "bar", "baz"] * 2),
tm.makeDateIndex(10),
tm.makePeriodIndex(10),
Expand Down

0 comments on commit e973b42

Please sign in to comment.