diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 57adf35f9ad89..6000c828555c6 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -345,14 +345,6 @@ def getCols(k) -> str: return string.ascii_uppercase[:k] -def getSeriesData() -> dict[str, Series]: - index = Index([f"foo_{i}" for i in range(_N)]) - return { - c: Series(np.random.default_rng(i).standard_normal(_N), index=index) - for i, c in enumerate(getCols(_K)) - } - - def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series: if nper is None: nper = _N @@ -903,7 +895,6 @@ def shares_memory(left, right) -> bool: "get_finest_unit", "get_obj", "get_op_from_name", - "getSeriesData", "getTimeSeriesData", "iat", "iloc", diff --git a/pandas/conftest.py b/pandas/conftest.py index 7c2bbc6e0b723..9ed6f8f43ae03 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -840,27 +840,12 @@ def int_frame() -> DataFrame: Fixture for DataFrame of ints with index of unique strings Columns are ['A', 'B', 'C', 'D'] - - A B C D - vpBeWjM651 1 0 1 0 - 5JyxmrP1En -1 0 0 0 - qEDaoD49U2 -1 1 0 0 - m66TkTfsFe 0 0 0 0 - EHPaNzEUFm -1 0 -1 0 - fpRJCevQhi 2 0 0 0 - OlQvnmfi3Q 0 0 -2 0 - ... .. .. .. .. - uB1FPlz4uP 0 0 0 1 - EcSe6yNzCU 0 0 -1 0 - L50VudaiI8 -1 1 -2 0 - y3bpw4nwIp 0 -1 0 0 - H0RdLLwrCT 1 1 0 0 - rY82K0vMwm 0 0 0 0 - 1OPIUjnkjk 2 0 0 0 - - [30 rows x 4 columns] """ - return DataFrame(tm.getSeriesData()).astype("int64") + return DataFrame( + np.ones((30, 4), dtype=np.int64), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) @pytest.fixture @@ -869,27 +854,12 @@ def float_frame() -> DataFrame: Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']. - - A B C D - P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 - qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 - tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 - wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 - M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 - QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 - r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 - ... ... ... ... ... - IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 - lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 - qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 - yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 - 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 - eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 - xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 - - [30 rows x 4 columns] - """ - return DataFrame(tm.getSeriesData()) + """ + return DataFrame( + np.random.default_rng(2).standard_normal((30, 4)), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) @pytest.fixture diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index f7ed5180b46d9..99ea565e5b60c 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -3,6 +3,7 @@ from pandas import ( DataFrame, + Index, NaT, date_range, ) @@ -44,27 +45,12 @@ def float_string_frame(): Fixture for DataFrame of floats and strings with index of unique strings Columns are ['A', 'B', 'C', 'D', 'foo']. - - A B C D foo - w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar - PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar - ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar - 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar - khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar - LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar - HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar - ... ... ... ... ... ... - 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar - h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar - mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar - oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar - 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar - jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar - lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar - - [30 rows x 5 columns] """ - df = DataFrame(tm.getSeriesData()) + df = DataFrame( + np.random.default_rng(2).standard_normal((30, 4)), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) df["foo"] = "bar" return df @@ -75,31 +61,18 @@ def mixed_float_frame(): Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. - - A B C D - GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 - KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 - VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 - kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 - CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 - 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 - tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 - ... ... ... ... ... - 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 - 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 - B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 - hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 - 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 - 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 - xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 - - [30 rows x 4 columns] """ - df = DataFrame(tm.getSeriesData()) - df.A = df.A.astype("float32") - df.B = df.B.astype("float32") - df.C = df.C.astype("float16") - df.D = df.D.astype("float64") + df = DataFrame( + { + col: np.random.default_rng(2).random(30, dtype=dtype) + for col, dtype in zip( + list("ABCD"), ["float32", "float32", "float32", "float64"] + ) + }, + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + ) + # not supported by numpy random + df["C"] = df["C"].astype("float16") return df @@ -109,32 +82,14 @@ def mixed_int_frame(): Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. - - A B C D - mUrCZ67juP 0 1 2 2 - rw99ACYaKS 0 1 0 0 - 7QsEcpaaVU 0 1 1 1 - xkrimI2pcE 0 1 0 0 - dz01SuzoS8 0 1 255 255 - ccQkqOHX75 -1 1 0 0 - DN0iXaoDLd 0 1 0 0 - ... .. .. ... ... - Dfb141wAaQ 1 1 254 254 - IPD8eQOVu5 0 1 0 0 - CcaKulsCmv 0 1 0 0 - rIBa8gu7E5 0 1 0 0 - RP6peZmh5o 0 1 1 1 - NMb9pipQWQ 0 1 0 0 - PqgbJEzjib 0 1 3 3 - - [30 rows x 4 columns] """ - df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) - df.A = df.A.astype("int32") - df.B = np.ones(len(df.B), dtype="uint64") - df.C = df.C.astype("uint8") - df.D = df.C.astype("int64") - return df + return DataFrame( + { + col: np.ones(30, dtype=dtype) + for col, dtype in zip(list("ABCD"), ["int32", "uint64", "uint8", "int64"]) + }, + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + ) @pytest.fixture diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index be809e3a17c8e..e09e9f5624f5b 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -57,7 +57,7 @@ def test_xs( assert xs["B"] == "1" with pytest.raises( - KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')") + KeyError, match=re.escape("Timestamp('2019-12-31 00:00:00')") ): datetime_frame.xs(datetime_frame.index[0] - BDay()) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 23355a5549a88..0014ccf054653 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -30,7 +30,7 @@ def test_first_subset(self, frame_or_series): with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("3ME") - expected = ts[:"3/31/2000"] + expected = ts[:"3/31/2020"] tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): @@ -78,7 +78,7 @@ def test_last_subset(self, frame_or_series): with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): result = ts.last("21D") - expected = ts["2000-01-10":] + expected = ts["2020-01-10":] tm.assert_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index 7d9e0fe90f44c..fcb7677f03f27 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -532,11 +532,11 @@ def test_info_compute_numba(): with option_context("compute.use_numba", True): buf = StringIO() - df.info() + df.info(buf=buf) result = buf.getvalue() buf = StringIO() - df.info() + df.info(buf=buf) expected = buf.getvalue() assert result == expected diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py index 4c4b04076c8d5..f348b8e823eff 100644 --- a/pandas/tests/frame/methods/test_truncate.py +++ b/pandas/tests/frame/methods/test_truncate.py @@ -60,7 +60,7 @@ def test_truncate(self, datetime_frame, frame_or_series): truncated = ts.truncate(before=ts.index[-1] + ts.index.freq) assert len(truncated) == 0 - msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00" + msg = "Truncate: 2020-01-06 00:00:00 must be after 2020-02-04 00:00:00" with pytest.raises(ValueError, match=msg): ts.truncate( before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 1ca9ec6feecae..b079c331eeebb 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -156,36 +156,18 @@ def bool_frame_with_na(): Fixture for DataFrame of booleans with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing - - A B C D - zBZxY2IDGd False False False False - IhBWBMWllt False True True True - ctjdvZSR6R True False True True - AVTujptmxb False True False True - G9lrImrSWq False False False True - sFFwdIUfz2 NaN NaN NaN NaN - s15ptEJnRb NaN NaN NaN NaN - ... ... ... ... ... - UW41KkDyZ4 True True False False - l9l6XkOdqV True False False False - X2MeZfzDYA False True False False - xWkIKU7vfX False True False True - QOhL6VmpGU False False False True - 22PwkRJdat False True False False - kfboQ3VeIK True False True False - - [30 rows x 4 columns] """ - df = DataFrame(tm.getSeriesData()) > 0 - df = df.astype(object) + df = DataFrame( + np.concatenate( + [np.ones((15, 4), dtype=bool), np.zeros((15, 4), dtype=bool)], axis=0 + ), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + dtype=object, + ) # set some NAs df.iloc[5:10] = np.nan df.iloc[15:20, -2:] = np.nan - - # For `any` tests we need to have at least one True before the first NaN - # in each column - for i in range(4): - df.iloc[i, i] = True return df @@ -195,27 +177,12 @@ def float_frame_with_na(): Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing - - A B C D - ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997 - DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872 - neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522 - 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018 - 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826 - soujjZ0A08 NaN NaN NaN NaN - 7W6NLGsjB9 NaN NaN NaN NaN - ... ... ... ... ... - uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590 - n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717 - ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189 - uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503 - 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947 - 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083 - sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517 - - [30 rows x 4 columns] """ - df = DataFrame(tm.getSeriesData()) + df = DataFrame( + np.random.default_rng(2).standard_normal((30, 4)), + index=Index([f"foo_{i}" for i in range(30)], dtype=object), + columns=Index(list("ABCD"), dtype=object), + ) # set some NAs df.iloc[5:10] = np.nan df.iloc[15:20, -2:] = np.nan diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5275050391ca3..37bc2812a2095 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -90,15 +90,14 @@ def assert_json_roundtrip_equal(result, expected, orient): class TestPandasContainer: @pytest.fixture def categorical_frame(self): - _seriesd = tm.getSeriesData() - - _cat_frame = DataFrame(_seriesd) - - cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15) - _cat_frame.index = pd.CategoricalIndex(cat, name="E") - _cat_frame["E"] = list(reversed(cat)) - _cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64") - return _cat_frame + data = { + c: np.random.default_rng(i).standard_normal(30) + for i, c in enumerate(list("ABCD")) + } + cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * 15 + data["E"] = list(reversed(cat)) + data["sort"] = np.arange(30, dtype="int64") + return DataFrame(data, index=pd.CategoricalIndex(cat, name="E")) @pytest.fixture def datetime_series(self):