more tests on read_batch() (#1987)

#### Reference Issues/PRs  #### What does this implement or fix? Several additional tests on read_batch() method. Primary focus of tests are to include requests for reading from more symbols and/or snapshots, with different combinations of parameters for the read - using query builder etc on a more diverse dataframe types. - Added more dataframe utility methods - added a special assert_frame_equals method for frames that have row range type of index or other not datetime/timestamp that is natevly supported - as of previous PR assert_frames_equal will now log dataframes structure in CSV format wich will be logged only in case of assertaion failiure (later this CSV could be imported back if needed to reproduce problem) #### Any other comments? #### Checklist <details> <summary> Checklist for code changes... </summary> - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details>  --------- Co-authored-by: Georgi Rusev <Georgi Rusev>
man-group · Nov 18, 2024 · 8ee3763 · 8ee3763
1 parent fd0ec5d
commit 8ee3763
Show file tree

Hide file tree

Showing 3 changed files with 545 additions and 33 deletions.
diff --git a/python/arcticdb/util/test.py b/python/arcticdb/util/test.py
@@ -8,9 +8,12 @@
 
 import os
 from contextlib import contextmanager
-from typing import Mapping, Any, Optional, Iterable, NamedTuple, List, AnyStr
+from typing import Mapping, Any, Optional, Iterable, NamedTuple, List, AnyStr, Sequence
 import numpy as np
 import pandas as pd
+from pandas.core.series import Series
+from pandas import Index
+from pandas._typing import Scalar
 import datetime as dt
 import string
 import random
@@ -35,7 +38,6 @@
 
 from arcticdb import log
 
-
 def create_df(start=0, columns=1) -> pd.DataFrame:
     data = {}
     for i in range(columns):
@@ -153,6 +155,26 @@ def dataframe_simulate_arcticdb_update_static(existing_df: pd.DataFrame, update_
     result_df = pd.concat(chunks)
     return result_df
 
+def dataframe_single_column_string(length=1000, column_label='string_short', seed=0, string_len=1):
+    """
+        creates a dataframe with one column, which label can be changed, containing string
+        with specified length. Useful for combining this dataframe with another dataframe
+    """
+    np.random.seed(seed)
+    return pd.DataFrame({ column_label : [random_string(string_len) for _ in range(length)] })
+
+def dataframe_filter_with_datetime_index(df: pd.DataFrame, start_timestamp:Scalar, end_timestamp:Scalar, inclusive='both') -> pd.DataFrame:
+    """
+        Filters dataframe which has datetime index, and selects dates from start till end,
+        where inclusive can be one of (both,left,right,neither)
+        start and end can be pandas.Timeframe, datetime or string datetime
+    """
+
+    return df[
+        df.index.to_series()
+        .between(start_timestamp, end_timestamp, inclusive='both')
+        ]
+
 def maybe_not_check_freq(f):
     """Ignore frequency when pandas is newer as starts to check frequency which it did not previously do."""
 
@@ -178,10 +200,21 @@ def wrapper(*args, **kwargs):
 
     return wrapper
 
-
 assert_frame_equal = maybe_not_check_freq(pd.testing.assert_frame_equal)
 assert_series_equal = maybe_not_check_freq(pd.testing.assert_series_equal)
 
+def assert_frame_equal_rebuild_index_first(expected : pd.DataFrame, actual : pd.DataFrame) -> None:
+    """
+        Use for dataframes that have index row range and you
+        obtain data from arctic with QueryBuilder.
+
+        First will rebuild index for dataframes to assure we
+        have same index in both frames when row range index is used
+    """
+    expected.reset_index(inplace = True, drop = True)
+    actual.reset_index(inplace = True, drop = True)
+    assert_frame_equal(left=expected, right=actual)
+
 def random_string(length: int):
     return "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))
 

diff --git a/python/tests/conftest.py b/python/tests/conftest.py
@@ -38,6 +38,7 @@
 from arcticdb.storage_fixtures.in_memory import InMemoryStorageFixture
 from arcticdb.version_store._normalization import MsgPackNormalizer
 from arcticdb.util.test import create_df
+from arcticdb.arctic import Arctic
 from .util.mark import (
     AZURE_TESTS_MARK,
     MONGO_TESTS_MARK,
@@ -281,7 +282,7 @@ def mem_storage() -> Iterator[InMemoryStorageFixture]:
         pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
     ],
 )
-def arctic_client(request, encoding_version):
+def arctic_client(request, encoding_version) -> Arctic:
     storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
     ac = storage_fixture.create_arctic(encoding_version=encoding_version)
     assert not ac.list_libraries()
@@ -298,15 +299,15 @@ def arctic_client(request, encoding_version):
         pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
     ],
 )
-def arctic_client_no_lmdb(request, encoding_version):
+def arctic_client_no_lmdb(request, encoding_version) -> Arctic:
     storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
     ac = storage_fixture.create_arctic(encoding_version=encoding_version)
     assert not ac.list_libraries()
     return ac
 
 
 @pytest.fixture
-def arctic_library(arctic_client, lib_name):
+def arctic_library(arctic_client, lib_name) -> Arctic:
     return arctic_client.create_library(lib_name)
 
 
@@ -318,15 +319,15 @@ def arctic_library(arctic_client, lib_name):
         pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
     ],
 )
-def basic_arctic_client(request, encoding_version):
+def basic_arctic_client(request, encoding_version) -> Arctic:
     storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
     ac = storage_fixture.create_arctic(encoding_version=encoding_version)
     assert not ac.list_libraries()
     return ac
 
 
 @pytest.fixture
-def basic_arctic_library(basic_arctic_client, lib_name):
+def basic_arctic_library(basic_arctic_client, lib_name) -> Arctic:
     return basic_arctic_client.create_library(lib_name)
 
 
@@ -554,17 +555,17 @@ def azure_version_store_dynamic_schema(azure_store_factory):
 
 
 @pytest.fixture
-def lmdb_version_store_string_coercion(version_store_factory):
+def lmdb_version_store_string_coercion(version_store_factory) ->NativeVersionStore:
     return version_store_factory()
 
 
 @pytest.fixture
-def lmdb_version_store_v1(version_store_factory):
+def lmdb_version_store_v1(version_store_factory) -> NativeVersionStore:
     return version_store_factory(dynamic_strings=True)
 
 
 @pytest.fixture
-def lmdb_version_store_v2(version_store_factory, lib_name):
+def lmdb_version_store_v2(version_store_factory, lib_name) -> NativeVersionStore:
     library_name = lib_name + "_v2"
     return version_store_factory(dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name)
 
@@ -575,31 +576,31 @@ def lmdb_version_store(request):
 
 
 @pytest.fixture
-def lmdb_version_store_prune_previous(version_store_factory):
+def lmdb_version_store_prune_previous(version_store_factory) -> NativeVersionStore:
     return version_store_factory(dynamic_strings=True, prune_previous_version=True, use_tombstones=True)
 
 
 @pytest.fixture
-def lmdb_version_store_big_map(version_store_factory):
+def lmdb_version_store_big_map(version_store_factory) -> NativeVersionStore:
     return version_store_factory(lmdb_config={"map_size": 2**30})
 
 
 @pytest.fixture
-def lmdb_version_store_very_big_map(version_store_factory):
+def lmdb_version_store_very_big_map(version_store_factory) -> NativeVersionStore:
     return version_store_factory(lmdb_config={"map_size": 2**35})
 
 @pytest.fixture
-def lmdb_version_store_column_buckets(version_store_factory):
+def lmdb_version_store_column_buckets(version_store_factory) -> NativeVersionStore:
     return version_store_factory(dynamic_schema=True, column_group_size=3, segment_row_size=2, bucketize_dynamic=True)
 
 
 @pytest.fixture
-def lmdb_version_store_dynamic_schema_v1(version_store_factory, lib_name):
+def lmdb_version_store_dynamic_schema_v1(version_store_factory, lib_name) -> NativeVersionStore:
     return version_store_factory(dynamic_schema=True, dynamic_strings=True)
 
 
 @pytest.fixture
-def lmdb_version_store_dynamic_schema_v2(version_store_factory, lib_name):
+def lmdb_version_store_dynamic_schema_v2(version_store_factory, lib_name) -> NativeVersionStore:
     library_name = lib_name + "_v2"
     return version_store_factory(
         dynamic_schema=True, dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name
@@ -619,27 +620,27 @@ def lmdb_version_store_dynamic_schema(
 
 
 @pytest.fixture
-def lmdb_version_store_empty_types_v1(version_store_factory, lib_name):
+def lmdb_version_store_empty_types_v1(version_store_factory, lib_name) -> NativeVersionStore:
     library_name = lib_name + "_v1"
     return version_store_factory(dynamic_strings=True, empty_types=True, name=library_name)
 
 
 @pytest.fixture
-def lmdb_version_store_empty_types_v2(version_store_factory, lib_name):
+def lmdb_version_store_empty_types_v2(version_store_factory, lib_name) -> NativeVersionStore:
     library_name = lib_name + "_v2"
     return version_store_factory(
         dynamic_strings=True, empty_types=True, encoding_version=int(EncodingVersion.V2), name=library_name
     )
 
 
 @pytest.fixture
-def lmdb_version_store_empty_types_dynamic_schema_v1(version_store_factory, lib_name):
+def lmdb_version_store_empty_types_dynamic_schema_v1(version_store_factory, lib_name) -> NativeVersionStore:
     library_name = lib_name + "_v1"
     return version_store_factory(dynamic_strings=True, empty_types=True, dynamic_schema=True, name=library_name)
 
 
 @pytest.fixture
-def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_name):
+def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_name) -> NativeVersionStore:
     library_name = lib_name + "_v2"
     return version_store_factory(
         dynamic_strings=True,
@@ -651,14 +652,14 @@ def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_
 
 
 @pytest.fixture
-def lmdb_version_store_delayed_deletes_v1(version_store_factory):
+def lmdb_version_store_delayed_deletes_v1(version_store_factory) -> NativeVersionStore:
     return version_store_factory(
         delayed_deletes=True, dynamic_strings=True, empty_types=True, prune_previous_version=True
     )
 
 
 @pytest.fixture
-def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name):
+def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name) -> NativeVersionStore:
     library_name = lib_name + "_v2"
     return version_store_factory(
         dynamic_strings=True,
@@ -670,52 +671,52 @@ def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name):
 
 
 @pytest.fixture
-def lmdb_version_store_tombstones_no_symbol_list(version_store_factory):
+def lmdb_version_store_tombstones_no_symbol_list(version_store_factory) -> NativeVersionStore:
     return version_store_factory(use_tombstones=True, dynamic_schema=True, symbol_list=False, dynamic_strings=True)
 
 
 @pytest.fixture
-def lmdb_version_store_allows_pickling(version_store_factory, lib_name):
+def lmdb_version_store_allows_pickling(version_store_factory, lib_name) -> NativeVersionStore:
     return version_store_factory(use_norm_failure_handler_known_types=True, dynamic_strings=True)
 
 
 @pytest.fixture
-def lmdb_version_store_no_symbol_list(version_store_factory):
+def lmdb_version_store_no_symbol_list(version_store_factory) -> NativeVersionStore:
     return version_store_factory(col_per_group=None, row_per_segment=None, symbol_list=False)
 
 
 @pytest.fixture
-def lmdb_version_store_tombstone_and_pruning(version_store_factory):
+def lmdb_version_store_tombstone_and_pruning(version_store_factory) -> NativeVersionStore:
     return version_store_factory(use_tombstones=True, prune_previous_version=True)
 
 
 @pytest.fixture
-def lmdb_version_store_tombstone(version_store_factory):
+def lmdb_version_store_tombstone(version_store_factory) -> NativeVersionStore:
     return version_store_factory(use_tombstones=True)
 
 
 @pytest.fixture
-def lmdb_version_store_tombstone_and_sync_passive(version_store_factory):
+def lmdb_version_store_tombstone_and_sync_passive(version_store_factory) -> NativeVersionStore:
     return version_store_factory(use_tombstones=True, sync_passive=True)
 
 
 @pytest.fixture
-def lmdb_version_store_ignore_order(version_store_factory):
+def lmdb_version_store_ignore_order(version_store_factory) -> NativeVersionStore:
     return version_store_factory(ignore_sort_order=True)
 
 
 @pytest.fixture
-def lmdb_version_store_small_segment(version_store_factory):
+def lmdb_version_store_small_segment(version_store_factory) -> NativeVersionStore:
     return version_store_factory(column_group_size=1000, segment_row_size=1000, lmdb_config={"map_size": 2**30})
 
 
 @pytest.fixture
-def lmdb_version_store_tiny_segment(version_store_factory):
+def lmdb_version_store_tiny_segment(version_store_factory) -> NativeVersionStore:
     return version_store_factory(column_group_size=2, segment_row_size=2, lmdb_config={"map_size": 2**30})
 
 
 @pytest.fixture
-def lmdb_version_store_tiny_segment_dynamic(version_store_factory):
+def lmdb_version_store_tiny_segment_dynamic(version_store_factory) -> NativeVersionStore:
     return version_store_factory(column_group_size=2, segment_row_size=2, dynamic_schema=True)