From e296ab2a4f2c699b99b89fd12df5ad1da5b0509e Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Tue, 19 Dec 2023 17:45:09 +0100 Subject: [PATCH] Add strings columns test for last agg --- .../version_store/test_aggregation.py | 117 +++++++++++------- 1 file changed, 75 insertions(+), 42 deletions(-) diff --git a/python/tests/unit/arcticdb/version_store/test_aggregation.py b/python/tests/unit/arcticdb/version_store/test_aggregation.py index d843e58a556..77c76c9a5c7 100644 --- a/python/tests/unit/arcticdb/version_store/test_aggregation.py +++ b/python/tests/unit/arcticdb/version_store/test_aggregation.py @@ -220,18 +220,7 @@ def test_count_aggregation(local_object_version_store): assert_frame_equal(res.data, df) -@use_of_function_scoped_fixtures_in_hypothesis_checked -@settings(deadline=None) -@given( - df=data_frames( - [ - column("grouping_column", elements=string_strategy, fill=string_strategy), - column("a", elements=numeric_type_strategies()), - ], - index=range_indexes(), - ) -) -def test_hypothesis_first_agg_numeric(lmdb_version_store, df): +def first_aggregation(lmdb_version_store, df): lib = lmdb_version_store assume(not df.empty) @@ -256,28 +245,31 @@ def test_hypothesis_first_agg_numeric(lmdb_version_store, df): df=data_frames( [ column("grouping_column", elements=string_strategy, fill=string_strategy), - column("a", elements=string_strategy), + column("a", elements=numeric_type_strategies()), ], index=range_indexes(), ) ) -def test_hypothesis_first_agg_strings(lmdb_version_store, df): - lib = lmdb_version_store - assume(not df.empty) +def test_hypothesis_first_agg_numeric(lmdb_version_store, df): + first_aggregation(lmdb_version_store, df) - q = QueryBuilder() - q = q.groupby("grouping_column").agg({"a": "first"}) - expected = df.groupby("grouping_column").agg({"a": "first"}) - symbol = "first_agg" - lib.write(symbol, df) - vit = lib.read(symbol, query_builder=q) - vit.data.sort_index(inplace=True) - - assert_frame_equal(expected, vit.data) +@use_of_function_scoped_fixtures_in_hypothesis_checked +@settings(deadline=None) +@given( + df=data_frames( + [ + column("grouping_column", elements=string_strategy, fill=string_strategy), + column("a", elements=string_strategy), + ], + index=range_indexes(), + ) +) +def test_hypothesis_first_agg_strings(lmdb_version_store, df): + first_aggregation(lmdb_version_store, df) -def test_first_aggregation(local_object_version_store): +def test_first_aggregation_numeric(local_object_version_store): df = DataFrame( { "grouping_column": ["group_1", "group_2", "group_4", "group_2", "group_1", "group_3", "group_1"], @@ -323,7 +315,7 @@ def test_first_aggregation_strings(local_object_version_store): assert_frame_equal(res.data, df) -def test_first_agg_with_append(local_object_version_store): +def test_first_agg_numeric_with_append(local_object_version_store): lib = local_object_version_store symbol = "first_agg" @@ -341,19 +333,7 @@ def test_first_agg_with_append(local_object_version_store): assert_frame_equal(vit.data, df) -# TODO add test for strings for last agg as well -@use_of_function_scoped_fixtures_in_hypothesis_checked -@settings(deadline=None) -@given( - df=data_frames( - [ - column("grouping_column", elements=string_strategy, fill=string_strategy), - column("a", elements=numeric_type_strategies()), - ], - index=range_indexes(), - ) -) -def test_hypothesis_last_agg_numeric(lmdb_version_store, df): +def last_aggregation(lmdb_version_store, df): lib = lmdb_version_store assume(not df.empty) @@ -372,7 +352,37 @@ def test_hypothesis_last_agg_numeric(lmdb_version_store, df): assert_frame_equal(expected, vit.data) -def test_last_aggregation(local_object_version_store): +@use_of_function_scoped_fixtures_in_hypothesis_checked +@settings(deadline=None) +@given( + df=data_frames( + [ + column("grouping_column", elements=string_strategy, fill=string_strategy), + column("a", elements=numeric_type_strategies()), + ], + index=range_indexes(), + ) +) +def test_hypothesis_last_agg_numeric(lmdb_version_store, df): + last_aggregation(lmdb_version_store, df) + + +@use_of_function_scoped_fixtures_in_hypothesis_checked +@settings(deadline=None) +@given( + df=data_frames( + [ + column("grouping_column", elements=string_strategy, fill=string_strategy), + column("a", elements=string_strategy), + ], + index=range_indexes(), + ) +) +def test_hypothesis_last_agg_strings(lmdb_version_store, df): + last_aggregation(lmdb_version_store, df) + + +def test_last_aggregation_numeric(local_object_version_store): df = DataFrame( { "grouping_column": ["group_1", "group_2", "group_4", "group_5", "group_2", "group_1", "group_3", "group_1", "group_5"], @@ -395,7 +405,30 @@ def test_last_aggregation(local_object_version_store): assert_frame_equal(res.data, df) -def test_last_agg_with_append(local_object_version_store): +def test_last_aggregation_strings(local_object_version_store): + df = DataFrame( + { + "grouping_column": ["group_1", "group_2", "group_1", "group_3"], + "get_last": ["Hello", "this", "is", "Homer", ], + }, + index=np.arange(4), + ) + q = QueryBuilder() + q = q.groupby("grouping_column").agg({"get_last": "last"}) + symbol = "test_last_aggregation" + local_object_version_store.write(symbol, df) + + res = local_object_version_store.read(symbol, query_builder=q) + res.data.sort_index(inplace=True) + + df = pd.DataFrame({"get_last": ["is", "this", "Homer"]}, index=["group_1", "group_2", "group_3"]) + df.index.rename("grouping_column", inplace=True) + res.data.sort_index(inplace=True) + + assert_frame_equal(res.data, df) + + +def test_last_agg_numeric_with_append(local_object_version_store): lib = local_object_version_store symbol = "last_agg"