Skip to content

Commit

Permalink
more tests on read_batch() (#1987)
Browse files Browse the repository at this point in the history
#### Reference Issues/PRs
<!--Example: Fixes #1234. See also #3456.-->

#### What does this implement or fix?

Several additional tests on read_batch() method. Primary focus of tests
are to include requests for reading from more symbols and/or snapshots,
with different combinations of parameters for the read - using query
builder etc on a more diverse dataframe types.

 - Added more dataframe utility methods
- added a special assert_frame_equals method for frames that have row
range type of index or other not datetime/timestamp that is natevly
supported
- as of previous PR assert_frames_equal will now log dataframes
structure in CSV format wich will be logged only in case of assertaion
failiure (later this CSV could be imported back if needed to reproduce
problem)

#### Any other comments?

#### Checklist

<details>
  <summary>
   Checklist for code changes...
  </summary>
 
- [ ] Have you updated the relevant docstrings, documentation and
copyright notice?
- [ ] Is this contribution tested against [all ArcticDB's
features](../docs/mkdocs/docs/technical/contributing.md)?
- [ ] Do all exceptions introduced raise appropriate [error
messages](https://docs.arcticdb.io/error_messages/)?
 - [ ] Are API changes highlighted in the PR description?
- [ ] Is the PR labelled as enhancement or bug so it appears in
autogenerated release notes?
</details>

<!--
Thanks for contributing a Pull Request to ArcticDB! Please ensure you
have taken a look at:
- ArcticDB's Code of Conduct:
https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md
- ArcticDB's Contribution Licensing:
https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing
-->

---------

Co-authored-by: Georgi Rusev <Georgi Rusev>
  • Loading branch information
grusev authored Nov 18, 2024
1 parent fd0ec5d commit 8ee3763
Show file tree
Hide file tree
Showing 3 changed files with 545 additions and 33 deletions.
39 changes: 36 additions & 3 deletions python/arcticdb/util/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@

import os
from contextlib import contextmanager
from typing import Mapping, Any, Optional, Iterable, NamedTuple, List, AnyStr
from typing import Mapping, Any, Optional, Iterable, NamedTuple, List, AnyStr, Sequence
import numpy as np
import pandas as pd
from pandas.core.series import Series
from pandas import Index
from pandas._typing import Scalar
import datetime as dt
import string
import random
Expand All @@ -35,7 +38,6 @@

from arcticdb import log


def create_df(start=0, columns=1) -> pd.DataFrame:
data = {}
for i in range(columns):
Expand Down Expand Up @@ -153,6 +155,26 @@ def dataframe_simulate_arcticdb_update_static(existing_df: pd.DataFrame, update_
result_df = pd.concat(chunks)
return result_df

def dataframe_single_column_string(length=1000, column_label='string_short', seed=0, string_len=1):
"""
creates a dataframe with one column, which label can be changed, containing string
with specified length. Useful for combining this dataframe with another dataframe
"""
np.random.seed(seed)
return pd.DataFrame({ column_label : [random_string(string_len) for _ in range(length)] })

def dataframe_filter_with_datetime_index(df: pd.DataFrame, start_timestamp:Scalar, end_timestamp:Scalar, inclusive='both') -> pd.DataFrame:
"""
Filters dataframe which has datetime index, and selects dates from start till end,
where inclusive can be one of (both,left,right,neither)
start and end can be pandas.Timeframe, datetime or string datetime
"""

return df[
df.index.to_series()
.between(start_timestamp, end_timestamp, inclusive='both')
]

def maybe_not_check_freq(f):
"""Ignore frequency when pandas is newer as starts to check frequency which it did not previously do."""

Expand All @@ -178,10 +200,21 @@ def wrapper(*args, **kwargs):

return wrapper


assert_frame_equal = maybe_not_check_freq(pd.testing.assert_frame_equal)
assert_series_equal = maybe_not_check_freq(pd.testing.assert_series_equal)

def assert_frame_equal_rebuild_index_first(expected : pd.DataFrame, actual : pd.DataFrame) -> None:
"""
Use for dataframes that have index row range and you
obtain data from arctic with QueryBuilder.
First will rebuild index for dataframes to assure we
have same index in both frames when row range index is used
"""
expected.reset_index(inplace = True, drop = True)
actual.reset_index(inplace = True, drop = True)
assert_frame_equal(left=expected, right=actual)

def random_string(length: int):
return "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))

Expand Down
61 changes: 31 additions & 30 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from arcticdb.storage_fixtures.in_memory import InMemoryStorageFixture
from arcticdb.version_store._normalization import MsgPackNormalizer
from arcticdb.util.test import create_df
from arcticdb.arctic import Arctic
from .util.mark import (
AZURE_TESTS_MARK,
MONGO_TESTS_MARK,
Expand Down Expand Up @@ -281,7 +282,7 @@ def mem_storage() -> Iterator[InMemoryStorageFixture]:
pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
],
)
def arctic_client(request, encoding_version):
def arctic_client(request, encoding_version) -> Arctic:
storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
ac = storage_fixture.create_arctic(encoding_version=encoding_version)
assert not ac.list_libraries()
Expand All @@ -298,15 +299,15 @@ def arctic_client(request, encoding_version):
pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
],
)
def arctic_client_no_lmdb(request, encoding_version):
def arctic_client_no_lmdb(request, encoding_version) -> Arctic:
storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
ac = storage_fixture.create_arctic(encoding_version=encoding_version)
assert not ac.list_libraries()
return ac


@pytest.fixture
def arctic_library(arctic_client, lib_name):
def arctic_library(arctic_client, lib_name) -> Arctic:
return arctic_client.create_library(lib_name)


Expand All @@ -318,15 +319,15 @@ def arctic_library(arctic_client, lib_name):
pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
],
)
def basic_arctic_client(request, encoding_version):
def basic_arctic_client(request, encoding_version) -> Arctic:
storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
ac = storage_fixture.create_arctic(encoding_version=encoding_version)
assert not ac.list_libraries()
return ac


@pytest.fixture
def basic_arctic_library(basic_arctic_client, lib_name):
def basic_arctic_library(basic_arctic_client, lib_name) -> Arctic:
return basic_arctic_client.create_library(lib_name)


Expand Down Expand Up @@ -554,17 +555,17 @@ def azure_version_store_dynamic_schema(azure_store_factory):


@pytest.fixture
def lmdb_version_store_string_coercion(version_store_factory):
def lmdb_version_store_string_coercion(version_store_factory) ->NativeVersionStore:
return version_store_factory()


@pytest.fixture
def lmdb_version_store_v1(version_store_factory):
def lmdb_version_store_v1(version_store_factory) -> NativeVersionStore:
return version_store_factory(dynamic_strings=True)


@pytest.fixture
def lmdb_version_store_v2(version_store_factory, lib_name):
def lmdb_version_store_v2(version_store_factory, lib_name) -> NativeVersionStore:
library_name = lib_name + "_v2"
return version_store_factory(dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name)

Expand All @@ -575,31 +576,31 @@ def lmdb_version_store(request):


@pytest.fixture
def lmdb_version_store_prune_previous(version_store_factory):
def lmdb_version_store_prune_previous(version_store_factory) -> NativeVersionStore:
return version_store_factory(dynamic_strings=True, prune_previous_version=True, use_tombstones=True)


@pytest.fixture
def lmdb_version_store_big_map(version_store_factory):
def lmdb_version_store_big_map(version_store_factory) -> NativeVersionStore:
return version_store_factory(lmdb_config={"map_size": 2**30})


@pytest.fixture
def lmdb_version_store_very_big_map(version_store_factory):
def lmdb_version_store_very_big_map(version_store_factory) -> NativeVersionStore:
return version_store_factory(lmdb_config={"map_size": 2**35})

@pytest.fixture
def lmdb_version_store_column_buckets(version_store_factory):
def lmdb_version_store_column_buckets(version_store_factory) -> NativeVersionStore:
return version_store_factory(dynamic_schema=True, column_group_size=3, segment_row_size=2, bucketize_dynamic=True)


@pytest.fixture
def lmdb_version_store_dynamic_schema_v1(version_store_factory, lib_name):
def lmdb_version_store_dynamic_schema_v1(version_store_factory, lib_name) -> NativeVersionStore:
return version_store_factory(dynamic_schema=True, dynamic_strings=True)


@pytest.fixture
def lmdb_version_store_dynamic_schema_v2(version_store_factory, lib_name):
def lmdb_version_store_dynamic_schema_v2(version_store_factory, lib_name) -> NativeVersionStore:
library_name = lib_name + "_v2"
return version_store_factory(
dynamic_schema=True, dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name
Expand All @@ -619,27 +620,27 @@ def lmdb_version_store_dynamic_schema(


@pytest.fixture
def lmdb_version_store_empty_types_v1(version_store_factory, lib_name):
def lmdb_version_store_empty_types_v1(version_store_factory, lib_name) -> NativeVersionStore:
library_name = lib_name + "_v1"
return version_store_factory(dynamic_strings=True, empty_types=True, name=library_name)


@pytest.fixture
def lmdb_version_store_empty_types_v2(version_store_factory, lib_name):
def lmdb_version_store_empty_types_v2(version_store_factory, lib_name) -> NativeVersionStore:
library_name = lib_name + "_v2"
return version_store_factory(
dynamic_strings=True, empty_types=True, encoding_version=int(EncodingVersion.V2), name=library_name
)


@pytest.fixture
def lmdb_version_store_empty_types_dynamic_schema_v1(version_store_factory, lib_name):
def lmdb_version_store_empty_types_dynamic_schema_v1(version_store_factory, lib_name) -> NativeVersionStore:
library_name = lib_name + "_v1"
return version_store_factory(dynamic_strings=True, empty_types=True, dynamic_schema=True, name=library_name)


@pytest.fixture
def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_name):
def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_name) -> NativeVersionStore:
library_name = lib_name + "_v2"
return version_store_factory(
dynamic_strings=True,
Expand All @@ -651,14 +652,14 @@ def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_


@pytest.fixture
def lmdb_version_store_delayed_deletes_v1(version_store_factory):
def lmdb_version_store_delayed_deletes_v1(version_store_factory) -> NativeVersionStore:
return version_store_factory(
delayed_deletes=True, dynamic_strings=True, empty_types=True, prune_previous_version=True
)


@pytest.fixture
def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name):
def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name) -> NativeVersionStore:
library_name = lib_name + "_v2"
return version_store_factory(
dynamic_strings=True,
Expand All @@ -670,52 +671,52 @@ def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name):


@pytest.fixture
def lmdb_version_store_tombstones_no_symbol_list(version_store_factory):
def lmdb_version_store_tombstones_no_symbol_list(version_store_factory) -> NativeVersionStore:
return version_store_factory(use_tombstones=True, dynamic_schema=True, symbol_list=False, dynamic_strings=True)


@pytest.fixture
def lmdb_version_store_allows_pickling(version_store_factory, lib_name):
def lmdb_version_store_allows_pickling(version_store_factory, lib_name) -> NativeVersionStore:
return version_store_factory(use_norm_failure_handler_known_types=True, dynamic_strings=True)


@pytest.fixture
def lmdb_version_store_no_symbol_list(version_store_factory):
def lmdb_version_store_no_symbol_list(version_store_factory) -> NativeVersionStore:
return version_store_factory(col_per_group=None, row_per_segment=None, symbol_list=False)


@pytest.fixture
def lmdb_version_store_tombstone_and_pruning(version_store_factory):
def lmdb_version_store_tombstone_and_pruning(version_store_factory) -> NativeVersionStore:
return version_store_factory(use_tombstones=True, prune_previous_version=True)


@pytest.fixture
def lmdb_version_store_tombstone(version_store_factory):
def lmdb_version_store_tombstone(version_store_factory) -> NativeVersionStore:
return version_store_factory(use_tombstones=True)


@pytest.fixture
def lmdb_version_store_tombstone_and_sync_passive(version_store_factory):
def lmdb_version_store_tombstone_and_sync_passive(version_store_factory) -> NativeVersionStore:
return version_store_factory(use_tombstones=True, sync_passive=True)


@pytest.fixture
def lmdb_version_store_ignore_order(version_store_factory):
def lmdb_version_store_ignore_order(version_store_factory) -> NativeVersionStore:
return version_store_factory(ignore_sort_order=True)


@pytest.fixture
def lmdb_version_store_small_segment(version_store_factory):
def lmdb_version_store_small_segment(version_store_factory) -> NativeVersionStore:
return version_store_factory(column_group_size=1000, segment_row_size=1000, lmdb_config={"map_size": 2**30})


@pytest.fixture
def lmdb_version_store_tiny_segment(version_store_factory):
def lmdb_version_store_tiny_segment(version_store_factory) -> NativeVersionStore:
return version_store_factory(column_group_size=2, segment_row_size=2, lmdb_config={"map_size": 2**30})


@pytest.fixture
def lmdb_version_store_tiny_segment_dynamic(version_store_factory):
def lmdb_version_store_tiny_segment_dynamic(version_store_factory) -> NativeVersionStore:
return version_store_factory(column_group_size=2, segment_row_size=2, dynamic_schema=True)


Expand Down
Loading

0 comments on commit 8ee3763

Please sign in to comment.