Remove getSeriesData

pandas-dev · Nov 29, 2023 · ed89785 · ed89785
1 parent 2bbd6a6
commit ed89785
Show file tree

Hide file tree

Showing 9 changed files with 62 additions and 180 deletions.
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -345,14 +345,6 @@ def getCols(k) -> str:
     return string.ascii_uppercase[:k]
 
 
-def getSeriesData() -> dict[str, Series]:
-    index = Index([f"foo_{i}" for i in range(_N)])
-    return {
-        c: Series(np.random.default_rng(i).standard_normal(_N), index=index)
-        for i, c in enumerate(getCols(_K))
-    }
-
-
 def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series:
     if nper is None:
         nper = _N
@@ -903,7 +895,6 @@ def shares_memory(left, right) -> bool:
     "get_finest_unit",
     "get_obj",
     "get_op_from_name",
-    "getSeriesData",
     "getTimeSeriesData",
     "iat",
     "iloc",

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -840,27 +840,12 @@ def int_frame() -> DataFrame:
     Fixture for DataFrame of ints with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']
-
-                A  B  C  D
-    vpBeWjM651  1  0  1  0
-    5JyxmrP1En -1  0  0  0
-    qEDaoD49U2 -1  1  0  0
-    m66TkTfsFe  0  0  0  0
-    EHPaNzEUFm -1  0 -1  0
-    fpRJCevQhi  2  0  0  0
-    OlQvnmfi3Q  0  0 -2  0
-    ...        .. .. .. ..
-    uB1FPlz4uP  0  0  0  1
-    EcSe6yNzCU  0  0 -1  0
-    L50VudaiI8 -1  1 -2  0
-    y3bpw4nwIp  0 -1  0  0
-    H0RdLLwrCT  1  1  0  0
-    rY82K0vMwm  0  0  0  0
-    1OPIUjnkjk  2  0  0  0
-
-    [30 rows x 4 columns]
     """
-    return DataFrame(tm.getSeriesData()).astype("int64")
+    return DataFrame(
+        np.ones((30, 4), dtype=np.int64),
+        index=Index([f"foo_{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD"), dtype=object),
+    )
 
 
 @pytest.fixture
@@ -869,27 +854,12 @@ def float_frame() -> DataFrame:
     Fixture for DataFrame of floats with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
-
-                       A         B         C         D
-    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
-    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
-    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
-    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
-    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
-    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
-    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
-    ...              ...       ...       ...       ...
-    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
-    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
-    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
-    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
-    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
-    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
-    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
-
-    [30 rows x 4 columns]
-    """
-    return DataFrame(tm.getSeriesData())
+    """
+    return DataFrame(
+        np.random.default_rng(2).standard_normal((30, 4)),
+        index=Index([f"foo_{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD"), dtype=object),
+    )
 
 
 @pytest.fixture

diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
@@ -3,6 +3,7 @@
 
 from pandas import (
     DataFrame,
+    Index,
     NaT,
     date_range,
 )
@@ -44,27 +45,12 @@ def float_string_frame():
     Fixture for DataFrame of floats and strings with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D', 'foo'].
-
-                       A         B         C         D  foo
-    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
-    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
-    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
-    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
-    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
-    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
-    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
-    ...              ...       ...       ...       ...  ...
-    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
-    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
-    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
-    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
-    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
-    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
-    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar
-
-    [30 rows x 5 columns]
     """
-    df = DataFrame(tm.getSeriesData())
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((30, 4)),
+        index=Index([f"foo_{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD"), dtype=object),
+    )
     df["foo"] = "bar"
     return df
 
@@ -75,31 +61,18 @@ def mixed_float_frame():
     Fixture for DataFrame of different float types with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
-
-                       A         B         C         D
-    GI7bbDaEZe -0.237908 -0.246225 -0.468506  0.752993
-    KGp9mFepzA -1.140809 -0.644046 -1.225586  0.801588
-    VeVYLAb1l2 -1.154013 -1.677615  0.690430 -0.003731
-    kmPME4WKhO  0.979578  0.998274 -0.776367  0.897607
-    CPyopdXTiz  0.048119 -0.257174  0.836426  0.111266
-    0kJZQndAj0  0.274357 -0.281135 -0.344238  0.834541
-    tqdwQsaHG8 -0.979716 -0.519897  0.582031  0.144710
-    ...              ...       ...       ...       ...
-    7FhZTWILQj -2.906357  1.261039 -0.780273 -0.537237
-    4pUDPM4eGq -2.042512 -0.464382 -0.382080  1.132612
-    B8dUgUzwTi -1.506637 -0.364435  1.087891  0.297653
-    hErlVYjVv9  1.477453 -0.495515 -0.713867  1.438427
-    1BKN3o7YLs  0.127535 -0.349812 -0.881836  0.489827
-    9S4Ekn7zga  1.445518 -2.095149  0.031982  0.373204
-    xN1dNn6OV6  1.425017 -0.983995 -0.363281 -0.224502
-
-    [30 rows x 4 columns]
     """
-    df = DataFrame(tm.getSeriesData())
-    df.A = df.A.astype("float32")
-    df.B = df.B.astype("float32")
-    df.C = df.C.astype("float16")
-    df.D = df.D.astype("float64")
+    df = DataFrame(
+        {
+            col: np.random.default_rng(2).random(30, dtype=dtype)
+            for col, dtype in zip(
+                list("ABCD"), ["float32", "float32", "float32", "float64"]
+            )
+        },
+        index=Index([f"foo_{i}" for i in range(30)], dtype=object),
+    )
+    # not supported by numpy random
+    df["C"] = df["C"].astype("float16")
     return df
 
 
@@ -109,32 +82,14 @@ def mixed_int_frame():
     Fixture for DataFrame of different int types with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
-
-                A  B    C    D
-    mUrCZ67juP  0  1    2    2
-    rw99ACYaKS  0  1    0    0
-    7QsEcpaaVU  0  1    1    1
-    xkrimI2pcE  0  1    0    0
-    dz01SuzoS8  0  1  255  255
-    ccQkqOHX75 -1  1    0    0
-    DN0iXaoDLd  0  1    0    0
-    ...        .. ..  ...  ...
-    Dfb141wAaQ  1  1  254  254
-    IPD8eQOVu5  0  1    0    0
-    CcaKulsCmv  0  1    0    0
-    rIBa8gu7E5  0  1    0    0
-    RP6peZmh5o  0  1    1    1
-    NMb9pipQWQ  0  1    0    0
-    PqgbJEzjib  0  1    3    3
-
-    [30 rows x 4 columns]
     """
-    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
-    df.A = df.A.astype("int32")
-    df.B = np.ones(len(df.B), dtype="uint64")
-    df.C = df.C.astype("uint8")
-    df.D = df.C.astype("int64")
-    return df
+    return DataFrame(
+        {
+            col: np.ones(30, dtype=dtype)
+            for col, dtype in zip(list("ABCD"), ["int32", "uint64", "uint8", "int64"])
+        },
+        index=Index([f"foo_{i}" for i in range(30)], dtype=object),
+    )
 
 
 @pytest.fixture

diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py
@@ -57,7 +57,7 @@ def test_xs(
         assert xs["B"] == "1"
 
         with pytest.raises(
-            KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')")
+            KeyError, match=re.escape("Timestamp('2019-12-31 00:00:00')")
         ):
             datetime_frame.xs(datetime_frame.index[0] - BDay())
 

diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py
@@ -30,7 +30,7 @@ def test_first_subset(self, frame_or_series):
 
         with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
             result = ts.first("3ME")
-            expected = ts[:"3/31/2000"]
+            expected = ts[:"3/31/2020"]
             tm.assert_equal(result, expected)
 
         with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
@@ -78,7 +78,7 @@ def test_last_subset(self, frame_or_series):
 
         with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):
             result = ts.last("21D")
-        expected = ts["2000-01-10":]
+        expected = ts["2020-01-10":]
         tm.assert_equal(result, expected)
 
         with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):

diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
@@ -532,11 +532,11 @@ def test_info_compute_numba():
 
     with option_context("compute.use_numba", True):
         buf = StringIO()
-        df.info()
+        df.info(buf=buf)
         result = buf.getvalue()
 
     buf = StringIO()
-    df.info()
+    df.info(buf=buf)
     expected = buf.getvalue()
     assert result == expected
 

diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py
@@ -60,7 +60,7 @@ def test_truncate(self, datetime_frame, frame_or_series):
         truncated = ts.truncate(before=ts.index[-1] + ts.index.freq)
         assert len(truncated) == 0
 
-        msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00"
+        msg = "Truncate: 2020-01-06 00:00:00 must be after 2020-02-04 00:00:00"
         with pytest.raises(ValueError, match=msg):
             ts.truncate(
                 before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -156,36 +156,18 @@ def bool_frame_with_na():
     Fixture for DataFrame of booleans with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']; some entries are missing
-
-                    A      B      C      D
-    zBZxY2IDGd  False  False  False  False
-    IhBWBMWllt  False   True   True   True
-    ctjdvZSR6R   True  False   True   True
-    AVTujptmxb  False   True  False   True
-    G9lrImrSWq  False  False  False   True
-    sFFwdIUfz2    NaN    NaN    NaN    NaN
-    s15ptEJnRb    NaN    NaN    NaN    NaN
-    ...           ...    ...    ...    ...
-    UW41KkDyZ4   True   True  False  False
-    l9l6XkOdqV   True  False  False  False
-    X2MeZfzDYA  False   True  False  False
-    xWkIKU7vfX  False   True  False   True
-    QOhL6VmpGU  False  False  False   True
-    22PwkRJdat  False   True  False  False
-    kfboQ3VeIK   True  False   True  False
-
-    [30 rows x 4 columns]
     """
-    df = DataFrame(tm.getSeriesData()) > 0
-    df = df.astype(object)
+    df = DataFrame(
+        np.concatenate(
+            [np.ones((15, 4), dtype=bool), np.zeros((15, 4), dtype=bool)], axis=0
+        ),
+        index=Index([f"foo_{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD"), dtype=object),
+        dtype=object,
+    )
     # set some NAs
     df.iloc[5:10] = np.nan
     df.iloc[15:20, -2:] = np.nan
-
-    # For `any` tests we need to have at least one True before the first NaN
-    #  in each column
-    for i in range(4):
-        df.iloc[i, i] = True
     return df
 
 
@@ -195,27 +177,12 @@ def float_frame_with_na():
     Fixture for DataFrame of floats with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']; some entries are missing
-
-                       A         B         C         D
-    ABwBzA0ljw -1.128865 -0.897161  0.046603  0.274997
-    DJiRzmbyQF  0.728869  0.233502  0.722431 -0.890872
-    neMgPD5UBF  0.486072 -1.027393 -0.031553  1.449522
-    0yWA4n8VeX -1.937191 -1.142531  0.805215 -0.462018
-    3slYUbbqU1  0.153260  1.164691  1.489795 -0.545826
-    soujjZ0A08       NaN       NaN       NaN       NaN
-    7W6NLGsjB9       NaN       NaN       NaN       NaN
-    ...              ...       ...       ...       ...
-    uhfeaNkCR1 -0.231210 -0.340472  0.244717 -0.901590
-    n6p7GYuBIV -0.419052  1.922721 -0.125361 -0.727717
-    ZhzAeY6p1y  1.234374 -1.425359 -0.827038 -0.633189
-    uWdPsORyUh  0.046738 -0.980445 -1.102965  0.605503
-    3DJA6aN590 -0.091018 -1.684734 -1.100900  0.215947
-    2GBPAzdbMk -2.883405 -1.021071  1.209877  1.633083
-    sHadBoyVHw -2.223032 -0.326384  0.258931  0.245517
-
-    [30 rows x 4 columns]
     """
-    df = DataFrame(tm.getSeriesData())
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((30, 4)),
+        index=Index([f"foo_{i}" for i in range(30)], dtype=object),
+        columns=Index(list("ABCD"), dtype=object),
+    )
     # set some NAs
     df.iloc[5:10] = np.nan
     df.iloc[15:20, -2:] = np.nan

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -90,15 +90,14 @@ def assert_json_roundtrip_equal(result, expected, orient):
 class TestPandasContainer:
     @pytest.fixture
     def categorical_frame(self):
-        _seriesd = tm.getSeriesData()
-
-        _cat_frame = DataFrame(_seriesd)
-
-        cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
-        _cat_frame.index = pd.CategoricalIndex(cat, name="E")
-        _cat_frame["E"] = list(reversed(cat))
-        _cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")
-        return _cat_frame
+        data = {
+            c: np.random.default_rng(i).standard_normal(30)
+            for i, c in enumerate(list("ABCD"))
+        }
+        cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * 15
+        data["E"] = list(reversed(cat))
+        data["sort"] = np.arange(30, dtype="int64")
+        return DataFrame(data, index=pd.CategoricalIndex(cat, name="E"))
 
     @pytest.fixture
     def datetime_series(self):