TST/ CLN: Remove makeCustomIndex/DataFrame (#56331)

* Remove makeCustom * Fix mismatches * Remove makeCustomIndex in all * Fix excel
pandas-dev · Dec 5, 2023 · 0b9e784 · 0b9e784
1 parent dc4c474
commit 0b9e784
Show file tree

Hide file tree

Showing 19 changed files with 311 additions and 403 deletions.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1490,9 +1490,9 @@ rows will skip the intervening rows.
 
 .. ipython:: python
 
-   from pandas._testing import makeCustomDataframe as mkdf
-
-   df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
+   mi_idx = pd.MultiIndex.from_arrays([[1, 2, 3, 4], list("abcd")], names=list("ab"))
+   mi_col = pd.MultiIndex.from_arrays([[1, 2], list("ab")], names=list("cd"))
+   df = pd.DataFrame(np.ones((4, 2)), index=mi_idx, columns=mi_col)
    df.to_csv("mi.csv")
    print(open("mi.csv").read())
    pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1])

diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst
@@ -250,9 +250,9 @@ IO enhancements
 
       .. ipython:: python
 
-         from pandas._testing import makeCustomDataframe as mkdf
-
-         df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
+         mi_idx = pd.MultiIndex.from_arrays([[1, 2, 3, 4], list("abcd")], names=list("ab"))
+         mi_col = pd.MultiIndex.from_arrays([[1, 2], list("ab")], names=list("cd"))
+         df = pd.DataFrame(np.ones((4, 2)), index=mi_idx, columns=mi_col)
          df.to_csv("mi.csv")
          print(open("mi.csv").read())
          pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1])

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import collections
-from collections import Counter
 from decimal import Decimal
 import operator
 import os
@@ -24,10 +22,7 @@
 
 from pandas.compat import pa_version_under10p1
 
-from pandas.core.dtypes.common import (
-    is_sequence,
-    is_string_dtype,
-)
+from pandas.core.dtypes.common import is_string_dtype
 
 import pandas as pd
 from pandas import (
@@ -38,9 +33,6 @@
     MultiIndex,
     RangeIndex,
     Series,
-    date_range,
-    period_range,
-    timedelta_range,
 )
 from pandas._testing._io import (
     round_trip_localpath,
@@ -332,229 +324,6 @@ def to_array(obj):
     return extract_array(obj, extract_numpy=True)
 
 
-# -----------------------------------------------------------------------------
-# Others
-
-
-def makeCustomIndex(
-    nentries,
-    nlevels,
-    prefix: str = "#",
-    names: bool | str | list[str] | None = False,
-    ndupe_l=None,
-    idx_type=None,
-) -> Index:
-    """
-    Create an index/multindex with given dimensions, levels, names, etc'
-
-    nentries - number of entries in index
-    nlevels - number of levels (> 1 produces multindex)
-    prefix - a string prefix for labels
-    names - (Optional), bool or list of strings. if True will use default
-       names, if false will use no names, if a list is given, the name of
-       each level in the index will be taken from the list.
-    ndupe_l - (Optional), list of ints, the number of rows for which the
-       label will repeated at the corresponding level, you can specify just
-       the first few, the rest will use the default ndupe_l of 1.
-       len(ndupe_l) <= nlevels.
-    idx_type - "i"/"f"/"s"/"dt"/"p"/"td".
-       If idx_type is not None, `idx_nlevels` must be 1.
-       "i"/"f" creates an integer/float index,
-       "s" creates a string
-       "dt" create a datetime index.
-       "td" create a datetime index.
-
-        if unspecified, string labels will be generated.
-    """
-    if ndupe_l is None:
-        ndupe_l = [1] * nlevels
-    assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels
-    assert names is None or names is False or names is True or len(names) is nlevels
-    assert idx_type is None or (
-        idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1
-    )
-
-    if names is True:
-        # build default names
-        names = [prefix + str(i) for i in range(nlevels)]
-    if names is False:
-        # pass None to index constructor for no name
-        names = None
-
-    # make singleton case uniform
-    if isinstance(names, str) and nlevels == 1:
-        names = [names]
-
-    # specific 1D index type requested?
-    idx_func_dict: dict[str, Callable[..., Index]] = {
-        "i": lambda n: Index(np.arange(n), dtype=np.int64),
-        "f": lambda n: Index(np.arange(n), dtype=np.float64),
-        "s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]),
-        "dt": lambda n: date_range("2020-01-01", periods=n),
-        "td": lambda n: timedelta_range("1 day", periods=n),
-        "p": lambda n: period_range("2020-01-01", periods=n, freq="D"),
-    }
-    idx_func = idx_func_dict.get(idx_type)
-    if idx_func:
-        idx = idx_func(nentries)
-        # but we need to fill in the name
-        if names:
-            idx.name = names[0]
-        return idx
-    elif idx_type is not None:
-        raise ValueError(
-            f"{repr(idx_type)} is not a legal value for `idx_type`, "
-            "use  'i'/'f'/'s'/'dt'/'p'/'td'."
-        )
-
-    if len(ndupe_l) < nlevels:
-        ndupe_l.extend([1] * (nlevels - len(ndupe_l)))
-    assert len(ndupe_l) == nlevels
-
-    assert all(x > 0 for x in ndupe_l)
-
-    list_of_lists = []
-    for i in range(nlevels):
-
-        def keyfunc(x):
-            numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_")
-            return [int(num) for num in numeric_tuple]
-
-        # build a list of lists to create the index from
-        div_factor = nentries // ndupe_l[i] + 1
-
-        # Deprecated since version 3.9: collections.Counter now supports []. See PEP 585
-        # and Generic Alias Type.
-        cnt: Counter[str] = collections.Counter()
-        for j in range(div_factor):
-            label = f"{prefix}_l{i}_g{j}"
-            cnt[label] = ndupe_l[i]
-        # cute Counter trick
-        result = sorted(cnt.elements(), key=keyfunc)[:nentries]
-        list_of_lists.append(result)
-
-    tuples = list(zip(*list_of_lists))
-
-    # convert tuples to index
-    if nentries == 1:
-        # we have a single level of tuples, i.e. a regular Index
-        name = None if names is None else names[0]
-        index = Index(tuples[0], name=name)
-    elif nlevels == 1:
-        name = None if names is None else names[0]
-        index = Index((x[0] for x in tuples), name=name)
-    else:
-        index = MultiIndex.from_tuples(tuples, names=names)
-    return index
-
-
-def makeCustomDataframe(
-    nrows,
-    ncols,
-    c_idx_names: bool | list[str] = True,
-    r_idx_names: bool | list[str] = True,
-    c_idx_nlevels: int = 1,
-    r_idx_nlevels: int = 1,
-    data_gen_f=None,
-    c_ndupe_l=None,
-    r_ndupe_l=None,
-    dtype=None,
-    c_idx_type=None,
-    r_idx_type=None,
-) -> DataFrame:
-    """
-    Create a DataFrame using supplied parameters.
-
-    Parameters
-    ----------
-    nrows,  ncols - number of data rows/cols
-    c_idx_names, r_idx_names  - False/True/list of strings,  yields No names ,
-            default names or uses the provided names for the levels of the
-            corresponding index. You can provide a single string when
-            c_idx_nlevels ==1.
-    c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex
-    r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex
-    data_gen_f - a function f(row,col) which return the data value
-            at that position, the default generator used yields values of the form
-            "RxCy" based on position.
-    c_ndupe_l, r_ndupe_l - list of integers, determines the number
-            of duplicates for each label at a given level of the corresponding
-            index. The default `None` value produces a multiplicity of 1 across
-            all levels, i.e. a unique index. Will accept a partial list of length
-            N < idx_nlevels, for just the first N levels. If ndupe doesn't divide
-            nrows/ncol, the last label might have lower multiplicity.
-    dtype - passed to the DataFrame constructor as is, in case you wish to
-            have more control in conjunction with a custom `data_gen_f`
-    r_idx_type, c_idx_type -  "i"/"f"/"s"/"dt"/"td".
-        If idx_type is not None, `idx_nlevels` must be 1.
-        "i"/"f" creates an integer/float index,
-        "s" creates a string index
-        "dt" create a datetime index.
-        "td" create a timedelta index.
-
-            if unspecified, string labels will be generated.
-
-    Examples
-    --------
-    # 5 row, 3 columns, default names on both, single index on both axis
-    >> makeCustomDataframe(5,3)
-
-    # make the data a random int between 1 and 100
-    >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100))
-
-    # 2-level multiindex on rows with each label duplicated
-    # twice on first level, default names on both axis, single
-    # index on both axis
-    >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2])
-
-    # DatetimeIndex on row, index with unicode labels on columns
-    # no names on either axis
-    >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False,
-                             r_idx_type="dt",c_idx_type="u")
-
-    # 4-level multindex on rows with names provided, 2-level multindex
-    # on columns with default labels and default names.
-    >> a=makeCustomDataframe(5,3,r_idx_nlevels=4,
-                             r_idx_names=["FEE","FIH","FOH","FUM"],
-                             c_idx_nlevels=2)
-
-    >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
-    """
-    assert c_idx_nlevels > 0
-    assert r_idx_nlevels > 0
-    assert r_idx_type is None or (
-        r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1
-    )
-    assert c_idx_type is None or (
-        c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1
-    )
-
-    columns = makeCustomIndex(
-        ncols,
-        nlevels=c_idx_nlevels,
-        prefix="C",
-        names=c_idx_names,
-        ndupe_l=c_ndupe_l,
-        idx_type=c_idx_type,
-    )
-    index = makeCustomIndex(
-        nrows,
-        nlevels=r_idx_nlevels,
-        prefix="R",
-        names=r_idx_names,
-        ndupe_l=r_ndupe_l,
-        idx_type=r_idx_type,
-    )
-
-    # by default, generate data based on location
-    if data_gen_f is None:
-        data_gen_f = lambda r, c: f"R{r}C{c}"
-
-    data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)]
-
-    return DataFrame(data, index, columns, dtype=dtype)
-
-
 class SubclassedSeries(Series):
     _metadata = ["testattr", "name"]
 
@@ -868,8 +637,6 @@ def shares_memory(left, right) -> bool:
     "iat",
     "iloc",
     "loc",
-    "makeCustomDataframe",
-    "makeCustomIndex",
     "maybe_produces_warning",
     "NARROW_NP_DTYPES",
     "NP_NAT_OBJECTS",