Speed up mega-resampling test

man-group · Apr 19, 2024 · 3b49db6 · 3b49db6
1 parent 5f2e626
commit 3b49db6
Showing 1 changed file with 45 additions and 45 deletions.
diff --git a/python/tests/unit/arcticdb/version_store/test_resample.py b/python/tests/unit/arcticdb/version_store/test_resample.py
@@ -34,21 +34,21 @@ def test_resample_rejects_frequency_strings(freq):
         q = q.resample(freq + "1m")
 
 
-@pytest.mark.parametrize("freq", ("s", "min", "h", "D", "1h30min"))
+@pytest.mark.parametrize("freq", ("min", "h", "D", "1h30min"))
 @pytest.mark.parametrize("date_range", (None, (pd.Timestamp("2024-01-02T12:00:00"), pd.Timestamp("2024-01-03T12:00:00"))))
 @pytest.mark.parametrize("closed", ("left", "right"))
 @pytest.mark.parametrize("label", ("left", "right"))
 def test_resampling(lmdb_version_store_v1, freq, date_range, closed, label):
     lib = lmdb_version_store_v1
     sym = "test_resampling"
-    # Want an index with data every second for 2 days, with additional data points 1 nanosecond before and after each
-    # second to catch off-by-one errors
+    # Want an index with data every minute for 2 days, with additional data points 1 nanosecond before and after each
+    # minute to catch off-by-one errors
     idx_start_base = pd.Timestamp("2024-01-02")
     idx_end_base = pd.Timestamp("2024-01-04")
 
-    idx = pd.date_range(idx_start_base, idx_end_base, freq="S")
-    idx_1_nano_before = pd.date_range(idx_start_base - pd.Timedelta(1), idx_end_base - pd.Timedelta(1), freq="S")
-    idx_1_nano_after = pd.date_range(idx_start_base + pd.Timedelta(1), idx_end_base + pd.Timedelta(1), freq="S")
+    idx = pd.date_range(idx_start_base, idx_end_base, freq="min")
+    idx_1_nano_before = pd.date_range(idx_start_base - pd.Timedelta(1), idx_end_base - pd.Timedelta(1), freq="min")
+    idx_1_nano_after = pd.date_range(idx_start_base + pd.Timedelta(1), idx_end_base + pd.Timedelta(1), freq="min")
     idx = idx.join(idx_1_nano_before, how="outer").join(idx_1_nano_after, how="outer")
     rng = np.random.default_rng()
     df = pd.DataFrame(
@@ -255,45 +255,45 @@ def test_resampling_string_columns_supported_aggregations(lmdb_version_store_tin
     assert_frame_equal(expected, received)
 
 
-# def test_resampling_bool_columns(lmdb_version_store_tiny_segment):
-#     lib = lmdb_version_store_tiny_segment
-#     sym = "test_resampling_bool_columns"
-#
-#     idx = [0, 1, 1000, 1001, 2000, 2001, 3000, 3001]
-#     idx = np.array(idx, dtype="datetime64[ns]")
-#
-#     col = [True, True, True, False, False, True, False, False]
-#
-#     df = pd.DataFrame({"col": col}, index=idx)
-#     lib.write(sym, df)
-#
-#     expected = df.resample("us").agg(
-#         sum=pd.NamedAgg("col", "sum"),
-#         mean=pd.NamedAgg("col", "mean"),
-#         min=pd.NamedAgg("col", "min"),
-#         max=pd.NamedAgg("col", "max"),
-#         first=pd.NamedAgg("col", "first"),
-#         last=pd.NamedAgg("col", "last"),
-#         count=pd.NamedAgg("col", "count"),
-#     )
-#     expected = expected.reindex(columns=sorted(expected.columns))
-#     expected["count"] = expected["count"].astype(np.uint64)
-#
-#     q = QueryBuilder()
-#     q = q.resample("us").agg(
-#         {
-#             "sum": ("col", "sum"),
-#             "mean": ("col", "mean"),
-#             "min": ("col", "min"),
-#             "max": ("col", "max"),
-#             "first": ("col", "first"),
-#             "last": ("col", "last"),
-#             "count": ("col", "count"),
-#         }
-#     )
-#     received = lib.read(sym, query_builder=q).data
-#     received = received.reindex(columns=sorted(received.columns))
-#     assert_frame_equal(expected, received)
+def test_resampling_bool_columns(lmdb_version_store_tiny_segment):
+    lib = lmdb_version_store_tiny_segment
+    sym = "test_resampling_bool_columns"
+
+    idx = [0, 1, 1000, 1001, 2000, 2001, 3000, 3001]
+    idx = np.array(idx, dtype="datetime64[ns]")
+
+    col = [True, True, True, False, False, True, False, False]
+
+    df = pd.DataFrame({"col": col}, index=idx)
+    lib.write(sym, df)
+
+    expected = df.resample("us").agg(
+        sum=pd.NamedAgg("col", "sum"),
+        mean=pd.NamedAgg("col", "mean"),
+        min=pd.NamedAgg("col", "min"),
+        max=pd.NamedAgg("col", "max"),
+        first=pd.NamedAgg("col", "first"),
+        last=pd.NamedAgg("col", "last"),
+        count=pd.NamedAgg("col", "count"),
+    )
+    expected = expected.reindex(columns=sorted(expected.columns))
+    expected["count"] = expected["count"].astype(np.uint64)
+
+    q = QueryBuilder()
+    q = q.resample("us").agg(
+        {
+            "sum": ("col", "sum"),
+            "mean": ("col", "mean"),
+            "min": ("col", "min"),
+            "max": ("col", "max"),
+            "first": ("col", "first"),
+            "last": ("col", "last"),
+            "count": ("col", "count"),
+        }
+    )
+    received = lib.read(sym, query_builder=q).data
+    received = received.reindex(columns=sorted(received.columns))
+    assert_frame_equal(expected, received)
 
 
 def test_resampling_named_agg(lmdb_version_store_v1):