Skip to content

Commit

Permalink
Speed up mega-resampling test
Browse files Browse the repository at this point in the history
  • Loading branch information
alexowens90 committed Apr 19, 2024
1 parent 5f2e626 commit 3b49db6
Showing 1 changed file with 45 additions and 45 deletions.
90 changes: 45 additions & 45 deletions python/tests/unit/arcticdb/version_store/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,21 @@ def test_resample_rejects_frequency_strings(freq):
q = q.resample(freq + "1m")


@pytest.mark.parametrize("freq", ("s", "min", "h", "D", "1h30min"))
@pytest.mark.parametrize("freq", ("min", "h", "D", "1h30min"))
@pytest.mark.parametrize("date_range", (None, (pd.Timestamp("2024-01-02T12:00:00"), pd.Timestamp("2024-01-03T12:00:00"))))
@pytest.mark.parametrize("closed", ("left", "right"))
@pytest.mark.parametrize("label", ("left", "right"))
def test_resampling(lmdb_version_store_v1, freq, date_range, closed, label):
lib = lmdb_version_store_v1
sym = "test_resampling"
# Want an index with data every second for 2 days, with additional data points 1 nanosecond before and after each
# second to catch off-by-one errors
# Want an index with data every minute for 2 days, with additional data points 1 nanosecond before and after each
# minute to catch off-by-one errors
idx_start_base = pd.Timestamp("2024-01-02")
idx_end_base = pd.Timestamp("2024-01-04")

idx = pd.date_range(idx_start_base, idx_end_base, freq="S")
idx_1_nano_before = pd.date_range(idx_start_base - pd.Timedelta(1), idx_end_base - pd.Timedelta(1), freq="S")
idx_1_nano_after = pd.date_range(idx_start_base + pd.Timedelta(1), idx_end_base + pd.Timedelta(1), freq="S")
idx = pd.date_range(idx_start_base, idx_end_base, freq="min")
idx_1_nano_before = pd.date_range(idx_start_base - pd.Timedelta(1), idx_end_base - pd.Timedelta(1), freq="min")
idx_1_nano_after = pd.date_range(idx_start_base + pd.Timedelta(1), idx_end_base + pd.Timedelta(1), freq="min")
idx = idx.join(idx_1_nano_before, how="outer").join(idx_1_nano_after, how="outer")
rng = np.random.default_rng()
df = pd.DataFrame(
Expand Down Expand Up @@ -255,45 +255,45 @@ def test_resampling_string_columns_supported_aggregations(lmdb_version_store_tin
assert_frame_equal(expected, received)


# def test_resampling_bool_columns(lmdb_version_store_tiny_segment):
# lib = lmdb_version_store_tiny_segment
# sym = "test_resampling_bool_columns"
#
# idx = [0, 1, 1000, 1001, 2000, 2001, 3000, 3001]
# idx = np.array(idx, dtype="datetime64[ns]")
#
# col = [True, True, True, False, False, True, False, False]
#
# df = pd.DataFrame({"col": col}, index=idx)
# lib.write(sym, df)
#
# expected = df.resample("us").agg(
# sum=pd.NamedAgg("col", "sum"),
# mean=pd.NamedAgg("col", "mean"),
# min=pd.NamedAgg("col", "min"),
# max=pd.NamedAgg("col", "max"),
# first=pd.NamedAgg("col", "first"),
# last=pd.NamedAgg("col", "last"),
# count=pd.NamedAgg("col", "count"),
# )
# expected = expected.reindex(columns=sorted(expected.columns))
# expected["count"] = expected["count"].astype(np.uint64)
#
# q = QueryBuilder()
# q = q.resample("us").agg(
# {
# "sum": ("col", "sum"),
# "mean": ("col", "mean"),
# "min": ("col", "min"),
# "max": ("col", "max"),
# "first": ("col", "first"),
# "last": ("col", "last"),
# "count": ("col", "count"),
# }
# )
# received = lib.read(sym, query_builder=q).data
# received = received.reindex(columns=sorted(received.columns))
# assert_frame_equal(expected, received)
def test_resampling_bool_columns(lmdb_version_store_tiny_segment):
lib = lmdb_version_store_tiny_segment
sym = "test_resampling_bool_columns"

idx = [0, 1, 1000, 1001, 2000, 2001, 3000, 3001]
idx = np.array(idx, dtype="datetime64[ns]")

col = [True, True, True, False, False, True, False, False]

df = pd.DataFrame({"col": col}, index=idx)
lib.write(sym, df)

expected = df.resample("us").agg(
sum=pd.NamedAgg("col", "sum"),
mean=pd.NamedAgg("col", "mean"),
min=pd.NamedAgg("col", "min"),
max=pd.NamedAgg("col", "max"),
first=pd.NamedAgg("col", "first"),
last=pd.NamedAgg("col", "last"),
count=pd.NamedAgg("col", "count"),
)
expected = expected.reindex(columns=sorted(expected.columns))
expected["count"] = expected["count"].astype(np.uint64)

q = QueryBuilder()
q = q.resample("us").agg(
{
"sum": ("col", "sum"),
"mean": ("col", "mean"),
"min": ("col", "min"),
"max": ("col", "max"),
"first": ("col", "first"),
"last": ("col", "last"),
"count": ("col", "count"),
}
)
received = lib.read(sym, query_builder=q).data
received = received.reindex(columns=sorted(received.columns))
assert_frame_equal(expected, received)


def test_resampling_named_agg(lmdb_version_store_v1):
Expand Down

0 comments on commit 3b49db6

Please sign in to comment.