From 0b9e784fe5606d8383a8e825e5475d35e9552a2a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 5 Dec 2023 07:50:18 -1000 Subject: [PATCH] TST/ CLN: Remove makeCustomIndex/DataFrame (#56331) * Remove makeCustom * Fix mismatches * Remove makeCustomIndex in all * Fix excel --- doc/source/user_guide/io.rst | 6 +- doc/source/whatsnew/v0.12.0.rst | 6 +- pandas/_testing/__init__.py | 235 +----------------- pandas/tests/computation/test_eval.py | 125 +++++++--- .../tests/frame/methods/test_select_dtypes.py | 7 +- pandas/tests/frame/methods/test_to_csv.py | 106 ++++++-- pandas/tests/frame/test_query_eval.py | 9 +- pandas/tests/indexes/datetimes/test_join.py | 22 +- pandas/tests/indexes/multi/test_indexing.py | 6 +- pandas/tests/indexes/period/test_join.py | 12 +- pandas/tests/indexes/timedeltas/test_join.py | 11 +- pandas/tests/indexing/test_iloc.py | 6 +- pandas/tests/indexing/test_indexing.py | 3 +- pandas/tests/indexing/test_loc.py | 6 +- pandas/tests/io/excel/test_writers.py | 62 +++-- pandas/tests/io/parser/test_header.py | 18 +- pandas/tests/io/test_clipboard.py | 45 +--- pandas/tests/io/test_html.py | 9 +- pandas/tests/reshape/concat/test_invalid.py | 20 +- 19 files changed, 311 insertions(+), 403 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 21e07e8d00ad6..863a663fc2413 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1490,9 +1490,9 @@ rows will skip the intervening rows. .. ipython:: python - from pandas._testing import makeCustomDataframe as mkdf - - df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + mi_idx = pd.MultiIndex.from_arrays([[1, 2, 3, 4], list("abcd")], names=list("ab")) + mi_col = pd.MultiIndex.from_arrays([[1, 2], list("ab")], names=list("cd")) + df = pd.DataFrame(np.ones((4, 2)), index=mi_idx, columns=mi_col) df.to_csv("mi.csv") print(open("mi.csv").read()) pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1]) diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst index 091efe1b421c7..59d104cb3e96c 100644 --- a/doc/source/whatsnew/v0.12.0.rst +++ b/doc/source/whatsnew/v0.12.0.rst @@ -250,9 +250,9 @@ IO enhancements .. ipython:: python - from pandas._testing import makeCustomDataframe as mkdf - - df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + mi_idx = pd.MultiIndex.from_arrays([[1, 2, 3, 4], list("abcd")], names=list("ab")) + mi_col = pd.MultiIndex.from_arrays([[1, 2], list("ab")], names=list("cd")) + df = pd.DataFrame(np.ones((4, 2)), index=mi_idx, columns=mi_col) df.to_csv("mi.csv") print(open("mi.csv").read()) pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1]) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index c5d338605b2ab..0036c3ab25bf1 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1,7 +1,5 @@ from __future__ import annotations -import collections -from collections import Counter from decimal import Decimal import operator import os @@ -24,10 +22,7 @@ from pandas.compat import pa_version_under10p1 -from pandas.core.dtypes.common import ( - is_sequence, - is_string_dtype, -) +from pandas.core.dtypes.common import is_string_dtype import pandas as pd from pandas import ( @@ -38,9 +33,6 @@ MultiIndex, RangeIndex, Series, - date_range, - period_range, - timedelta_range, ) from pandas._testing._io import ( round_trip_localpath, @@ -332,229 +324,6 @@ def to_array(obj): return extract_array(obj, extract_numpy=True) -# ----------------------------------------------------------------------------- -# Others - - -def makeCustomIndex( - nentries, - nlevels, - prefix: str = "#", - names: bool | str | list[str] | None = False, - ndupe_l=None, - idx_type=None, -) -> Index: - """ - Create an index/multindex with given dimensions, levels, names, etc' - - nentries - number of entries in index - nlevels - number of levels (> 1 produces multindex) - prefix - a string prefix for labels - names - (Optional), bool or list of strings. if True will use default - names, if false will use no names, if a list is given, the name of - each level in the index will be taken from the list. - ndupe_l - (Optional), list of ints, the number of rows for which the - label will repeated at the corresponding level, you can specify just - the first few, the rest will use the default ndupe_l of 1. - len(ndupe_l) <= nlevels. - idx_type - "i"/"f"/"s"/"dt"/"p"/"td". - If idx_type is not None, `idx_nlevels` must be 1. - "i"/"f" creates an integer/float index, - "s" creates a string - "dt" create a datetime index. - "td" create a datetime index. - - if unspecified, string labels will be generated. - """ - if ndupe_l is None: - ndupe_l = [1] * nlevels - assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels - assert names is None or names is False or names is True or len(names) is nlevels - assert idx_type is None or ( - idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1 - ) - - if names is True: - # build default names - names = [prefix + str(i) for i in range(nlevels)] - if names is False: - # pass None to index constructor for no name - names = None - - # make singleton case uniform - if isinstance(names, str) and nlevels == 1: - names = [names] - - # specific 1D index type requested? - idx_func_dict: dict[str, Callable[..., Index]] = { - "i": lambda n: Index(np.arange(n), dtype=np.int64), - "f": lambda n: Index(np.arange(n), dtype=np.float64), - "s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]), - "dt": lambda n: date_range("2020-01-01", periods=n), - "td": lambda n: timedelta_range("1 day", periods=n), - "p": lambda n: period_range("2020-01-01", periods=n, freq="D"), - } - idx_func = idx_func_dict.get(idx_type) - if idx_func: - idx = idx_func(nentries) - # but we need to fill in the name - if names: - idx.name = names[0] - return idx - elif idx_type is not None: - raise ValueError( - f"{repr(idx_type)} is not a legal value for `idx_type`, " - "use 'i'/'f'/'s'/'dt'/'p'/'td'." - ) - - if len(ndupe_l) < nlevels: - ndupe_l.extend([1] * (nlevels - len(ndupe_l))) - assert len(ndupe_l) == nlevels - - assert all(x > 0 for x in ndupe_l) - - list_of_lists = [] - for i in range(nlevels): - - def keyfunc(x): - numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_") - return [int(num) for num in numeric_tuple] - - # build a list of lists to create the index from - div_factor = nentries // ndupe_l[i] + 1 - - # Deprecated since version 3.9: collections.Counter now supports []. See PEP 585 - # and Generic Alias Type. - cnt: Counter[str] = collections.Counter() - for j in range(div_factor): - label = f"{prefix}_l{i}_g{j}" - cnt[label] = ndupe_l[i] - # cute Counter trick - result = sorted(cnt.elements(), key=keyfunc)[:nentries] - list_of_lists.append(result) - - tuples = list(zip(*list_of_lists)) - - # convert tuples to index - if nentries == 1: - # we have a single level of tuples, i.e. a regular Index - name = None if names is None else names[0] - index = Index(tuples[0], name=name) - elif nlevels == 1: - name = None if names is None else names[0] - index = Index((x[0] for x in tuples), name=name) - else: - index = MultiIndex.from_tuples(tuples, names=names) - return index - - -def makeCustomDataframe( - nrows, - ncols, - c_idx_names: bool | list[str] = True, - r_idx_names: bool | list[str] = True, - c_idx_nlevels: int = 1, - r_idx_nlevels: int = 1, - data_gen_f=None, - c_ndupe_l=None, - r_ndupe_l=None, - dtype=None, - c_idx_type=None, - r_idx_type=None, -) -> DataFrame: - """ - Create a DataFrame using supplied parameters. - - Parameters - ---------- - nrows, ncols - number of data rows/cols - c_idx_names, r_idx_names - False/True/list of strings, yields No names , - default names or uses the provided names for the levels of the - corresponding index. You can provide a single string when - c_idx_nlevels ==1. - c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex - r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex - data_gen_f - a function f(row,col) which return the data value - at that position, the default generator used yields values of the form - "RxCy" based on position. - c_ndupe_l, r_ndupe_l - list of integers, determines the number - of duplicates for each label at a given level of the corresponding - index. The default `None` value produces a multiplicity of 1 across - all levels, i.e. a unique index. Will accept a partial list of length - N < idx_nlevels, for just the first N levels. If ndupe doesn't divide - nrows/ncol, the last label might have lower multiplicity. - dtype - passed to the DataFrame constructor as is, in case you wish to - have more control in conjunction with a custom `data_gen_f` - r_idx_type, c_idx_type - "i"/"f"/"s"/"dt"/"td". - If idx_type is not None, `idx_nlevels` must be 1. - "i"/"f" creates an integer/float index, - "s" creates a string index - "dt" create a datetime index. - "td" create a timedelta index. - - if unspecified, string labels will be generated. - - Examples - -------- - # 5 row, 3 columns, default names on both, single index on both axis - >> makeCustomDataframe(5,3) - - # make the data a random int between 1 and 100 - >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100)) - - # 2-level multiindex on rows with each label duplicated - # twice on first level, default names on both axis, single - # index on both axis - >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2]) - - # DatetimeIndex on row, index with unicode labels on columns - # no names on either axis - >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False, - r_idx_type="dt",c_idx_type="u") - - # 4-level multindex on rows with names provided, 2-level multindex - # on columns with default labels and default names. - >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, - r_idx_names=["FEE","FIH","FOH","FUM"], - c_idx_nlevels=2) - - >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) - """ - assert c_idx_nlevels > 0 - assert r_idx_nlevels > 0 - assert r_idx_type is None or ( - r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1 - ) - assert c_idx_type is None or ( - c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1 - ) - - columns = makeCustomIndex( - ncols, - nlevels=c_idx_nlevels, - prefix="C", - names=c_idx_names, - ndupe_l=c_ndupe_l, - idx_type=c_idx_type, - ) - index = makeCustomIndex( - nrows, - nlevels=r_idx_nlevels, - prefix="R", - names=r_idx_names, - ndupe_l=r_ndupe_l, - idx_type=r_idx_type, - ) - - # by default, generate data based on location - if data_gen_f is None: - data_gen_f = lambda r, c: f"R{r}C{c}" - - data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] - - return DataFrame(data, index, columns, dtype=dtype) - - class SubclassedSeries(Series): _metadata = ["testattr", "name"] @@ -868,8 +637,6 @@ def shares_memory(left, right) -> bool: "iat", "iloc", "loc", - "makeCustomDataframe", - "makeCustomIndex", "maybe_produces_warning", "NARROW_NP_DTYPES", "NP_NAT_OBJECTS", diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 75473b8c50f4e..17630f14b08c7 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -25,8 +25,11 @@ import pandas as pd from pandas import ( DataFrame, + Index, Series, date_range, + period_range, + timedelta_range, ) import pandas._testing as tm from pandas.core.computation import ( @@ -115,6 +118,18 @@ def lhs(request): midhs = lhs +@pytest.fixture +def idx_func_dict(): + return { + "i": lambda n: Index(np.arange(n), dtype=np.int64), + "f": lambda n: Index(np.arange(n), dtype=np.float64), + "s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]), + "dt": lambda n: date_range("2020-01-01", periods=n), + "td": lambda n: timedelta_range("1 day", periods=n), + "p": lambda n: period_range("2020-01-01", periods=n, freq="D"), + } + + class TestEval: @pytest.mark.parametrize( "cmp1", @@ -724,9 +739,6 @@ def test_and_logic_string_match(self): assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}") -f = lambda *args, **kwargs: np.random.default_rng(2).standard_normal() - - # ------------------------------------- # gh-12388: Typecasting rules consistency with python @@ -738,7 +750,7 @@ class TestTypeCasting: @pytest.mark.parametrize("dt", [np.float32, np.float64]) @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")]) def test_binop_typecasting(self, engine, parser, op, dt, left_right): - df = tm.makeCustomDataframe(5, 3, data_gen_f=f, dtype=dt) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dt) left, right = left_right s = f"{left} {op} {right}" res = pd.eval(s, engine=engine, parser=parser) @@ -765,7 +777,7 @@ class TestAlignment: def test_align_nested_unary_op(self, engine, parser): s = "df * ~2" - df = tm.makeCustomDataframe(5, 3, data_gen_f=f) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) res = pd.eval(s, engine=engine, parser=parser) tm.assert_frame_equal(res, df * ~2) @@ -774,13 +786,17 @@ def test_align_nested_unary_op(self, engine, parser): @pytest.mark.parametrize("rr_idx_type", index_types) @pytest.mark.parametrize("c_idx_type", index_types) def test_basic_frame_alignment( - self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type + self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type, idx_func_dict ): - df = tm.makeCustomDataframe( - 10, 10, data_gen_f=f, r_idx_type=lr_idx_type, c_idx_type=c_idx_type + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 10)), + index=idx_func_dict[lr_idx_type](10), + columns=idx_func_dict[c_idx_type](10), ) - df2 = tm.makeCustomDataframe( - 20, 10, data_gen_f=f, r_idx_type=rr_idx_type, c_idx_type=c_idx_type + df2 = DataFrame( + np.random.default_rng(2).standard_normal((20, 10)), + index=idx_func_dict[rr_idx_type](20), + columns=idx_func_dict[c_idx_type](10), ) # only warns if not monotonic and not sortable if should_warn(df.index, df2.index): @@ -792,9 +808,13 @@ def test_basic_frame_alignment( @pytest.mark.parametrize("r_idx_type", lhs_index_types) @pytest.mark.parametrize("c_idx_type", lhs_index_types) - def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type): - df = tm.makeCustomDataframe( - 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + def test_frame_comparison( + self, engine, parser, r_idx_type, c_idx_type, idx_func_dict + ): + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 10)), + index=idx_func_dict[r_idx_type](10), + columns=idx_func_dict[c_idx_type](10), ) res = pd.eval("df < 2", engine=engine, parser=parser) tm.assert_frame_equal(res, df < 2) @@ -812,10 +832,24 @@ def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type): @pytest.mark.parametrize("c1", index_types) @pytest.mark.parametrize("r2", index_types) @pytest.mark.parametrize("c2", index_types) - def test_medium_complex_frame_alignment(self, engine, parser, r1, c1, r2, c2): - df = tm.makeCustomDataframe(3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1) - df2 = tm.makeCustomDataframe(4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2) - df3 = tm.makeCustomDataframe(5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2) + def test_medium_complex_frame_alignment( + self, engine, parser, r1, c1, r2, c2, idx_func_dict + ): + df = DataFrame( + np.random.default_rng(2).standard_normal((3, 2)), + index=idx_func_dict[r1](3), + columns=idx_func_dict[c1](2), + ) + df2 = DataFrame( + np.random.default_rng(2).standard_normal((4, 2)), + index=idx_func_dict[r2](4), + columns=idx_func_dict[c2](2), + ) + df3 = DataFrame( + np.random.default_rng(2).standard_normal((5, 2)), + index=idx_func_dict[r2](5), + columns=idx_func_dict[c2](2), + ) if should_warn(df.index, df2.index, df3.index): with tm.assert_produces_warning(RuntimeWarning): res = pd.eval("df + df2 + df3", engine=engine, parser=parser) @@ -828,10 +862,12 @@ def test_medium_complex_frame_alignment(self, engine, parser, r1, c1, r2, c2): @pytest.mark.parametrize("c_idx_type", index_types) @pytest.mark.parametrize("r_idx_type", lhs_index_types) def test_basic_frame_series_alignment( - self, engine, parser, index_name, r_idx_type, c_idx_type + self, engine, parser, index_name, r_idx_type, c_idx_type, idx_func_dict ): - df = tm.makeCustomDataframe( - 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 10)), + index=idx_func_dict[r_idx_type](10), + columns=idx_func_dict[c_idx_type](10), ) index = getattr(df, index_name) s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) @@ -855,7 +891,7 @@ def test_basic_frame_series_alignment( ) @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_basic_series_frame_alignment( - self, request, engine, parser, index_name, r_idx_type, c_idx_type + self, request, engine, parser, index_name, r_idx_type, c_idx_type, idx_func_dict ): if ( engine == "numexpr" @@ -870,8 +906,10 @@ def test_basic_series_frame_alignment( f"r_idx_type={r_idx_type}, c_idx_type={c_idx_type}" ) request.applymarker(pytest.mark.xfail(reason=reason, strict=False)) - df = tm.makeCustomDataframe( - 10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 7)), + index=idx_func_dict[r_idx_type](10), + columns=idx_func_dict[c_idx_type](7), ) index = getattr(df, index_name) s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) @@ -893,10 +931,12 @@ def test_basic_series_frame_alignment( @pytest.mark.parametrize("index_name", ["index", "columns"]) @pytest.mark.parametrize("op", ["+", "*"]) def test_series_frame_commutativity( - self, engine, parser, index_name, op, r_idx_type, c_idx_type + self, engine, parser, index_name, op, r_idx_type, c_idx_type, idx_func_dict ): - df = tm.makeCustomDataframe( - 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 10)), + index=idx_func_dict[r_idx_type](10), + columns=idx_func_dict[c_idx_type](10), ) index = getattr(df, index_name) s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) @@ -921,13 +961,22 @@ def test_series_frame_commutativity( @pytest.mark.parametrize("c1", index_types) @pytest.mark.parametrize("r2", index_types) @pytest.mark.parametrize("c2", index_types) - def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): + def test_complex_series_frame_alignment( + self, engine, parser, r1, c1, r2, c2, idx_func_dict + ): n = 3 m1 = 5 m2 = 2 * m1 - - df = tm.makeCustomDataframe(m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1) - df2 = tm.makeCustomDataframe(m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2) + df = DataFrame( + np.random.default_rng(2).standard_normal((m1, n)), + index=idx_func_dict[r1](m1), + columns=idx_func_dict[c1](n), + ) + df2 = DataFrame( + np.random.default_rng(2).standard_normal((m2, n)), + index=idx_func_dict[r2](m2), + columns=idx_func_dict[c2](n), + ) index = df2.columns ser = Series(np.random.default_rng(2).standard_normal(n), index[:n]) @@ -1414,8 +1463,10 @@ def test_inplace_no_assignment(self, target): self.eval(expression, target=target, inplace=True) def test_basic_period_index_boolean_expression(self): - df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") - + df = DataFrame( + np.random.default_rng(2).standard_normal((2, 2)), + columns=period_range("2020-01-01", freq="D", periods=2), + ) e = df < 2 r = self.eval("df < 2", local_dict={"df": df}) x = df < 2 @@ -1424,13 +1475,19 @@ def test_basic_period_index_boolean_expression(self): tm.assert_frame_equal(x, e) def test_basic_period_index_subscript_expression(self): - df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + df = DataFrame( + np.random.default_rng(2).standard_normal((2, 2)), + columns=period_range("2020-01-01", freq="D", periods=2), + ) r = self.eval("df[df < 2 + 3]", local_dict={"df": df}) e = df[df < 2 + 3] tm.assert_frame_equal(r, e) def test_nested_period_index_subscript_expression(self): - df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + df = DataFrame( + np.random.default_rng(2).standard_normal((2, 2)), + columns=period_range("2020-01-01", freq="D", periods=2), + ) r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df}) e = df[df[df < 2] < 2] + df * 2 tm.assert_frame_equal(r, e) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index a38d2c6fd016a..e2759c5d5b7b7 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -378,12 +378,9 @@ def test_select_dtypes_bad_arg_raises(self): def test_select_dtypes_typecodes(self): # GH 11990 - df = tm.makeCustomDataframe( - 30, 3, data_gen_f=lambda x, y: np.random.default_rng(2).random() - ) - expected = df + df = DataFrame(np.random.default_rng(2).random((5, 3))) FLOAT_TYPES = list(np.typecodes["AllFloat"]) - tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) + tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), df) @pytest.mark.parametrize( "arr,expected", diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 94c98ad477cc1..97fbe597d1dab 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -16,6 +16,7 @@ Series, Timestamp, date_range, + period_range, read_csv, to_datetime, ) @@ -155,7 +156,11 @@ def test_to_csv_cols_reordering(self): chunksize = 5 N = int(chunksize * 2.5) - df = tm.makeCustomDataframe(N, 3) + df = DataFrame( + np.ones((N, 3)), + index=Index([f"i-{i}" for i in range(N)], name="a"), + columns=Index([f"i-{i}" for i in range(3)], name="a"), + ) cs = df.columns cols = [cs[2], cs[0]] @@ -171,8 +176,11 @@ def test_to_csv_new_dupe_cols(self, cols): N = int(chunksize * 2.5) # dupe cols - df = tm.makeCustomDataframe(N, 3) - df.columns = ["a", "a", "b"] + df = DataFrame( + np.ones((N, 3)), + index=Index([f"i-{i}" for i in range(N)], name="a"), + columns=["a", "a", "b"], + ) with tm.ensure_clean() as path: df.to_csv(path, columns=cols, chunksize=chunksize) rs_c = read_csv(path, index_col=0) @@ -335,7 +343,11 @@ def _to_uni(x): "nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251] ) def test_to_csv_nrows(self, nrows): - df = tm.makeCustomDataframe(nrows, 4, r_idx_type="dt", c_idx_type="s") + df = DataFrame( + np.ones((nrows, 4)), + index=date_range("2020-01-01", periods=nrows), + columns=Index(list("abcd"), dtype=object), + ) result, expected = self._return_result_expected(df, 1000, "dt", "s") tm.assert_frame_equal(result, expected, check_names=False) @@ -349,8 +361,16 @@ def test_to_csv_nrows(self, nrows): @pytest.mark.parametrize("ncols", [1, 2, 3, 4]) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols): - df = tm.makeCustomDataframe( - nrows, ncols, r_idx_type=r_idx_type, c_idx_type=c_idx_type + axes = { + "i": lambda n: Index(np.arange(n), dtype=np.int64), + "s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]), + "dt": lambda n: date_range("2020-01-01", periods=n), + "p": lambda n: period_range("2020-01-01", periods=n, freq="D"), + } + df = DataFrame( + np.ones((nrows, ncols)), + index=axes[r_idx_type](nrows), + columns=axes[c_idx_type](ncols), ) result, expected = self._return_result_expected( df, @@ -366,14 +386,23 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols): ) @pytest.mark.parametrize("ncols", [1, 2, 3, 4]) def test_to_csv_idx_ncols(self, nrows, ncols): - df = tm.makeCustomDataframe(nrows, ncols) + df = DataFrame( + np.ones((nrows, ncols)), + index=Index([f"i-{i}" for i in range(nrows)], name="a"), + columns=Index([f"i-{i}" for i in range(ncols)], name="a"), + ) result, expected = self._return_result_expected(df, 1000) tm.assert_frame_equal(result, expected, check_names=False) @pytest.mark.slow @pytest.mark.parametrize("nrows", [10, 98, 99, 100, 101, 102]) def test_to_csv_dup_cols(self, nrows): - df = tm.makeCustomDataframe(nrows, 3) + df = DataFrame( + np.ones((nrows, 3)), + index=Index([f"i-{i}" for i in range(nrows)], name="a"), + columns=Index([f"i-{i}" for i in range(3)], name="a"), + ) + cols = list(df.columns) cols[:2] = ["dupe", "dupe"] cols[-2:] = ["dupe", "dupe"] @@ -394,7 +423,12 @@ def test_to_csv_empty(self): @pytest.mark.slow def test_to_csv_chunksize(self): chunksize = 1000 - df = tm.makeCustomDataframe(chunksize // 2 + 1, 2, r_idx_nlevels=2) + rows = chunksize // 2 + 1 + df = DataFrame( + np.ones((rows, 2)), + columns=Index(list("ab"), dtype=object), + index=MultiIndex.from_arrays([range(rows) for _ in range(2)]), + ) result, expected = self._return_result_expected(df, chunksize, rnlvl=2) tm.assert_frame_equal(result, expected, check_names=False) @@ -412,7 +446,22 @@ def test_to_csv_chunksize(self): ], ) def test_to_csv_params(self, nrows, df_params, func_params, ncols): - df = tm.makeCustomDataframe(nrows, ncols, **df_params) + if df_params.get("r_idx_nlevels"): + index = MultiIndex.from_arrays( + [f"i-{i}" for i in range(nrows)] + for _ in range(df_params["r_idx_nlevels"]) + ) + else: + index = None + + if df_params.get("c_idx_nlevels"): + columns = MultiIndex.from_arrays( + [f"i-{i}" for i in range(ncols)] + for _ in range(df_params["c_idx_nlevels"]) + ) + else: + columns = Index([f"i-{i}" for i in range(ncols)], dtype=object) + df = DataFrame(np.ones((nrows, ncols)), index=index, columns=columns) result, expected = self._return_result_expected(df, 1000, **func_params) tm.assert_frame_equal(result, expected, check_names=False) @@ -545,19 +594,40 @@ def _make_frame(names=None): ) # column & index are multi-index - df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df = DataFrame( + np.ones((5, 3)), + columns=MultiIndex.from_arrays( + [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd") + ), + index=MultiIndex.from_arrays( + [[f"i-{i}" for i in range(5)] for _ in range(2)], names=list("ab") + ), + ) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1]) tm.assert_frame_equal(df, result) # column is mi - df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) + df = DataFrame( + np.ones((5, 3)), + columns=MultiIndex.from_arrays( + [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd") + ), + ) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=0) tm.assert_frame_equal(df, result) # dup column names? - df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) + df = DataFrame( + np.ones((5, 3)), + columns=MultiIndex.from_arrays( + [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd") + ), + index=MultiIndex.from_arrays( + [[f"i-{i}" for i in range(5)] for _ in range(3)], names=list("abc") + ), + ) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2]) tm.assert_frame_equal(df, result) @@ -737,11 +807,13 @@ def test_to_csv_dups_cols(self): result.columns = df.columns tm.assert_frame_equal(result, df) + def test_to_csv_dups_cols2(self): # GH3457 - - N = 10 - df = tm.makeCustomDataframe(N, 3) - df.columns = ["a", "a", "b"] + df = DataFrame( + np.ones((5, 3)), + index=Index([f"i-{i}" for i in range(5)], name="foo"), + columns=Index(["a", "a", "b"], dtype=object), + ) with tm.ensure_clean() as filename: df.to_csv(filename) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 2e8c5258547c3..6353546648156 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -360,8 +360,11 @@ def test_query_with_partially_named_multiindex(self, parser, engine): tm.assert_frame_equal(res, exp) def test_query_multiindex_get_index_resolvers(self): - df = tm.makeCustomDataframe( - 10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"] + df = DataFrame( + np.ones((10, 3)), + index=MultiIndex.from_arrays( + [range(10) for _ in range(2)], names=["spam", "eggs"] + ), ) resolvers = df._get_index_resolvers() @@ -377,7 +380,7 @@ def to_series(mi, level): "columns": col_series, "spam": to_series(df.index, "spam"), "eggs": to_series(df.index, "eggs"), - "C0": col_series, + "clevel_0": col_series, } for k, v in resolvers.items(): if isinstance(v, Index): diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index 959fbab0dcec6..d0ac32939296c 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -7,10 +7,12 @@ import pytest from pandas import ( + DataFrame, DatetimeIndex, Index, Timestamp, date_range, + period_range, to_datetime, ) import pandas._testing as tm @@ -23,15 +25,7 @@ class TestJoin: def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe( - 10, - 10, - data_gen_f=lambda *args, **kwargs: np.random.default_rng( - 2 - ).standard_normal(), - r_idx_type="i", - c_idx_type="dt", - ) + df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2)) cols = df.columns.join(df.index, how="outer") joined = cols.join(df.columns) assert cols.dtype == np.dtype("O") @@ -44,12 +38,10 @@ def test_join_self(self, join_type): assert index is joined def test_join_with_period_index(self, join_type): - df = tm.makeCustomDataframe( - 10, - 10, - data_gen_f=lambda *args: np.random.default_rng(2).integers(2), - c_idx_type="p", - r_idx_type="dt", + df = DataFrame( + np.ones((10, 2)), + index=date_range("2020-01-01", periods=10), + columns=period_range("2020-01-01", periods=2), ) s = df.iloc[:5, 0] diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index d270741a0e0bc..5e2d3c23da645 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -73,7 +73,11 @@ def test_slice_locs_with_type_mismatch(self): idx.slice_locs((1, 3)) with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) - df = tm.makeCustomDataframe(5, 5) + df = DataFrame( + np.ones((5, 5)), + index=Index([f"i-{i}" for i in range(5)], name="a"), + columns=Index([f"i-{i}" for i in range(5)], name="a"), + ) stacked = df.stack(future_stack=True) idx = stacked.index with pytest.raises(TypeError, match="^Level type mismatch"): diff --git a/pandas/tests/indexes/period/test_join.py b/pandas/tests/indexes/period/test_join.py index 191dba2be0c5d..3e659c1a63266 100644 --- a/pandas/tests/indexes/period/test_join.py +++ b/pandas/tests/indexes/period/test_join.py @@ -4,8 +4,10 @@ from pandas._libs.tslibs import IncompatibleFrequency from pandas import ( + DataFrame, Index, PeriodIndex, + date_range, period_range, ) import pandas._testing as tm @@ -35,12 +37,10 @@ def test_join_self(self, join_type): assert index is res def test_join_does_not_recur(self): - df = tm.makeCustomDataframe( - 3, - 2, - data_gen_f=lambda *args: np.random.default_rng(2).integers(2), - c_idx_type="p", - r_idx_type="dt", + df = DataFrame( + np.ones((3, 2)), + index=date_range("2020-01-01", periods=3), + columns=period_range("2020-01-01", periods=2), ) ser = df.iloc[:2, 0] diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py index 89579d0c86f20..cbd7a5de71b10 100644 --- a/pandas/tests/indexes/timedeltas/test_join.py +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -1,6 +1,7 @@ import numpy as np from pandas import ( + DataFrame, Index, Timedelta, timedelta_range, @@ -25,15 +26,7 @@ def test_join_self(self, join_type): tm.assert_index_equal(index, joined) def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe( - 10, - 10, - data_gen_f=lambda *args, **kwargs: np.random.default_rng( - 2 - ).standard_normal(), - r_idx_type="i", - c_idx_type="td", - ) + df = DataFrame(np.ones((5, 5)), columns=timedelta_range("1 day", periods=5)) cols = df.columns.join(df.index, how="outer") joined = cols.join(df.columns) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 31263b44ed205..409eca42f404b 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -821,7 +821,11 @@ def test_iloc_non_unique_indexing(self): df2.loc[idx] def test_iloc_empty_list_indexer_is_ok(self): - df = tm.makeCustomDataframe(5, 2) + df = DataFrame( + np.ones((5, 2)), + index=Index([f"i-{i}" for i in range(5)], name="a"), + columns=Index([f"i-{i}" for i in range(2)], name="a"), + ) # vertical empty tm.assert_frame_equal( df.iloc[:, []], diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d6ec7ac3e4185..57f45f867254d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -243,8 +243,7 @@ def test_setitem_dtype_upcast3(self): def test_dups_fancy_indexing(self): # GH 3455 - df = tm.makeCustomDataframe(10, 3) - df.columns = ["a", "a", "b"] + df = DataFrame(np.eye(3), columns=["a", "a", "b"]) result = df[["b", "a"]].columns expected = Index(["b", "a", "a"]) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8459cc5a30130..ce7dde3c4cb42 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1072,7 +1072,11 @@ def test_loc_name(self): assert result == "index_name" def test_loc_empty_list_indexer_is_ok(self): - df = tm.makeCustomDataframe(5, 2) + df = DataFrame( + np.ones((5, 2)), + index=Index([f"i-{i}" for i in range(5)], name="a"), + columns=Index([f"i-{i}" for i in range(2)], name="a"), + ) # vertical empty tm.assert_frame_equal( df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 8452ec01a0936..6c91fcf39c30c 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -220,8 +220,8 @@ def test_read_excel_multiindex_empty_level(self, ext): actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - @pytest.mark.parametrize("c_idx_names", [True, False]) - @pytest.mark.parametrize("r_idx_names", [True, False]) + @pytest.mark.parametrize("c_idx_names", ["a", None]) + @pytest.mark.parametrize("r_idx_names", ["b", None]) @pytest.mark.parametrize("c_idx_levels", [1, 3]) @pytest.mark.parametrize("r_idx_levels", [1, 3]) def test_excel_multindex_roundtrip( @@ -229,21 +229,28 @@ def test_excel_multindex_roundtrip( ): # see gh-4679 with tm.ensure_clean(ext) as pth: - if (c_idx_levels == 1 and c_idx_names) and not ( - r_idx_levels == 3 and not r_idx_names - ): - mark = pytest.mark.xfail( - reason="Column index name cannot be serialized unless " - "it's a MultiIndex" - ) - request.applymarker(mark) - # Empty name case current read in as # unnamed levels, not Nones. - check_names = r_idx_names or r_idx_levels <= 1 + check_names = bool(r_idx_names) or r_idx_levels <= 1 - df = tm.makeCustomDataframe( - 5, 5, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels + if c_idx_levels == 1: + columns = Index(list("abcde"), dtype=object) + else: + columns = MultiIndex.from_arrays( + [range(5) for _ in range(c_idx_levels)], + names=[f"{c_idx_names}-{i}" for i in range(c_idx_levels)], + ) + if r_idx_levels == 1: + index = Index(list("ghijk"), dtype=object) + else: + index = MultiIndex.from_arrays( + [range(5) for _ in range(r_idx_levels)], + names=[f"{r_idx_names}-{i}" for i in range(r_idx_levels)], + ) + df = DataFrame( + 1.1 * np.ones((5, 5)), + columns=columns, + index=index, ) df.to_excel(pth) @@ -1011,8 +1018,25 @@ def roundtrip(data, header=True, parser_hdr=0, index=True): # ensure limited functionality in 0.10 # override of gh-2370 until sorted out in 0.11 - df = tm.makeCustomDataframe( - nrows, ncols, r_idx_nlevels=r_idx_nlevels, c_idx_nlevels=c_idx_nlevels + if c_idx_nlevels == 1: + columns = Index([f"a-{i}" for i in range(ncols)], dtype=object) + else: + columns = MultiIndex.from_arrays( + [range(ncols) for _ in range(c_idx_nlevels)], + names=[f"i-{i}" for i in range(c_idx_nlevels)], + ) + if r_idx_nlevels == 1: + index = Index([f"b-{i}" for i in range(nrows)], dtype=object) + else: + index = MultiIndex.from_arrays( + [range(nrows) for _ in range(r_idx_nlevels)], + names=[f"j-{i}" for i in range(r_idx_nlevels)], + ) + + df = DataFrame( + np.ones((nrows, ncols)), + columns=columns, + index=index, ) # This if will be removed once multi-column Excel writing @@ -1433,7 +1457,11 @@ def assert_called_and_reset(cls): with tm.ensure_clean(path) as filepath: with ExcelWriter(filepath) as writer: assert isinstance(writer, DummyClass) - df = tm.makeCustomDataframe(1, 1) + df = DataFrame( + ["a"], + columns=Index(["b"], name="foo"), + index=Index(["c"], name="bar"), + ) df.to_excel(filepath) DummyClass.assert_called_and_reset() diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 86162bf90db8b..aaac416d8f464 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -121,7 +121,6 @@ def test_header_not_first_line(all_parsers): @xfail_pyarrow # TypeError: an integer is required def test_header_multi_index(all_parsers): parser = all_parsers - expected = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) data = """\ C0,,C_l0_g0,C_l0_g1,C_l0_g2 @@ -137,6 +136,23 @@ def test_header_multi_index(all_parsers): R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 """ result = parser.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[0, 1]) + data_gen_f = lambda r, c: f"R{r}C{c}" + + data = [[data_gen_f(r, c) for c in range(3)] for r in range(5)] + index = MultiIndex.from_arrays( + [[f"R_l0_g{i}" for i in range(5)], [f"R_l1_g{i}" for i in range(5)]], + names=["R0", "R1"], + ) + columns = MultiIndex.from_arrays( + [ + [f"C_l0_g{i}" for i in range(3)], + [f"C_l1_g{i}" for i in range(3)], + [f"C_l2_g{i}" for i in range(3)], + [f"C_l3_g{i}" for i in range(3)], + ], + names=["C0", "C1", "C2", "C3"], + ) + expected = DataFrame(data, columns=columns, index=index) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 3d150aaa28eb4..8564f09ef7ae9 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -64,32 +64,21 @@ def df(request): {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} ) elif data_type == "string": - return tm.makeCustomDataframe( - 5, 3, c_idx_type="s", r_idx_type="i", c_idx_names=[None], r_idx_names=[None] + return DataFrame( + np.array([f"i-{i}" for i in range(15)]).reshape(5, 3), columns=list("abc") ) elif data_type == "long": max_rows = get_option("display.max_rows") - return tm.makeCustomDataframe( - max_rows + 1, - 3, - data_gen_f=lambda *args: np.random.default_rng(2).integers(2), - c_idx_type="s", - r_idx_type="i", - c_idx_names=[None], - r_idx_names=[None], + return DataFrame( + np.random.default_rng(2).integers(0, 10, size=(max_rows + 1, 3)), + columns=list("abc"), ) elif data_type == "nonascii": return DataFrame({"en": "in English".split(), "es": "en espaƱol".split()}) elif data_type == "colwidth": _cw = get_option("display.max_colwidth") + 1 - return tm.makeCustomDataframe( - 5, - 3, - data_gen_f=lambda *args: "x" * _cw, - c_idx_type="s", - r_idx_type="i", - c_idx_names=[None], - r_idx_names=[None], + return DataFrame( + np.array(["x" * _cw for _ in range(15)]).reshape(5, 3), columns=list("abc") ) elif data_type == "mixed": return DataFrame( @@ -100,24 +89,10 @@ def df(request): } ) elif data_type == "float": - return tm.makeCustomDataframe( - 5, - 3, - data_gen_f=lambda r, c: float(r) + 0.01, - c_idx_type="s", - r_idx_type="i", - c_idx_names=[None], - r_idx_names=[None], - ) + return DataFrame(np.random.default_rng(2).random((5, 3)), columns=list("abc")) elif data_type == "int": - return tm.makeCustomDataframe( - 5, - 3, - data_gen_f=lambda *args: np.random.default_rng(2).integers(2), - c_idx_type="s", - r_idx_type="i", - c_idx_names=[None], - r_idx_names=[None], + return DataFrame( + np.random.default_rng(2).integers(0, 10, (5, 3)), columns=list("abc") ) else: raise ValueError diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index dcee52011a691..f0256316e1689 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -152,12 +152,9 @@ def banklist_data(self, datapath): def test_to_html_compat(self, flavor_read_html): df = ( - tm.makeCustomDataframe( - 4, - 3, - data_gen_f=lambda *args: np.random.default_rng(2).random(), - c_idx_names=False, - r_idx_names=False, + DataFrame( + np.random.default_rng(2).random((4, 3)), + columns=pd.Index(list("abc"), dtype=object), ) # pylint: disable-next=consider-using-f-string .map("{:.3f}".format).astype(float) diff --git a/pandas/tests/reshape/concat/test_invalid.py b/pandas/tests/reshape/concat/test_invalid.py index 5e6703b185f27..b1c44fc0debc3 100644 --- a/pandas/tests/reshape/concat/test_invalid.py +++ b/pandas/tests/reshape/concat/test_invalid.py @@ -12,19 +12,19 @@ class TestInvalidConcat: - def test_concat_invalid(self): + @pytest.mark.parametrize("obj", [1, {}, [1, 2], (1, 2)]) + def test_concat_invalid(self, obj): # trying to concat a ndframe with a non-ndframe - df1 = tm.makeCustomDataframe(10, 2) - for obj in [1, {}, [1, 2], (1, 2)]: - msg = ( - f"cannot concatenate object of type '{type(obj)}'; " - "only Series and DataFrame objs are valid" - ) - with pytest.raises(TypeError, match=msg): - concat([df1, obj]) + df1 = DataFrame(range(2)) + msg = ( + f"cannot concatenate object of type '{type(obj)}'; " + "only Series and DataFrame objs are valid" + ) + with pytest.raises(TypeError, match=msg): + concat([df1, obj]) def test_concat_invalid_first_argument(self): - df1 = tm.makeCustomDataframe(10, 2) + df1 = DataFrame(range(2)) msg = ( "first argument must be an iterable of pandas " 'objects, you passed an object of type "DataFrame"'