Skip to content

Commit

Permalink
Adding update, append and delete asv benchmarks
Browse files Browse the repository at this point in the history
- Added under the ModificationFunctions asv group
- Will be useful to monitor for regressions in these commonly used
  functions
  • Loading branch information
IvoDD committed Mar 15, 2024
1 parent 75b14e1 commit ddd34ae
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 1 deletion.
2 changes: 1 addition & 1 deletion asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
"branches": ["master"], // for git
"branches": ["add-modification-asv-benchmarks"], // for git
// "branches": ["default"], // for mercurial

// The DVCS being used. If not set, it will be automatically
Expand Down
102 changes: 102 additions & 0 deletions python/benchmarks/basic_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,105 @@ def peakmem_read_batch_with_date_ranges(self, rows, num_symbols):
for sym in range(num_symbols)
]
self.lib.read_batch(read_reqs)



class ModificationFunctions:
number = 1 # We do a single run between setup and teardown because we e.g. can't delete a symbol twice
timeout = 6000
CONNECTION_STRING = "lmdb://modification_functions?map_size=20GB"
WIDE_DF_ROWS = 5_000
WIDE_DF_COLS = 30_000
DATE_RANGE = pd.date_range("2023-01-01", "2023-01-01")

params = ([100_000, 150_000], [500, 1000])
param_names = ["rows", "num_symbols"]

def setup_cache(self):
self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)
num_rows, num_symbols = ModificationFunctions.params

self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in num_rows}
for rows in num_rows:
lib_name = get_prewritten_lib_name(rows)
self.ac.delete_library(lib_name)
self.ac.create_library(lib_name)
lib = self.ac[lib_name]
for sym in range(num_symbols[-1]):
lib.write(f"{sym}_sym", self.init_dfs[rows])

lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)
self.ac.delete_library(lib_name)
lib = self.ac.create_library(lib_name)
lib.write(
"short_wide_sym",
generate_random_floats_dataframe(
ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS
),
)

def teardown(self, rows, num_symbols):
def restore_symbol(lib, symbol, df_if_missing):
versions = lib.list_versions(symbol=symbol)
if len(versions) == 0:
# If a symbol got deleted we rewrite it
lib.write(symbol, self.init_dfs[rows])
elif len(versions) > 1:
# If a symbol got a new version (via update or append) we keep only the oldest version
versions.sort(key=lambda entry: entry["version"])
for version in versions[1:]:
lib.delete_version(symbol, version["version"])

for sym in range(num_symbols[-1]):
symbol = f"{sym}_sym"
restore_symbol(self.lib, symbol, self.init_dfs[rows])

restore_symbol(self.lib_short_wide, "short_wide_sym", generate_random_floats_dataframe(ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS))


def setup(self, rows, num_symbols):
def get_time_at_fraction_of_df(fraction):
end_time = datetime.datetime("1/1/2023")
time_delta = datetime.timedelta(seconds=round(rows * (fraction-1)))
return end_time + time_delta

self.df_update_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(0.5))
self.df_update_half = generate_pseudo_random_dataframe(rows/2, "s", get_time_at_fraction_of_df(0.75))
self.df_update_upsert = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(1.5))
self.df_append_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(1.1))
self.df_append_large = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(2))

self.df_short_wide = generate_random_floats_dataframe(
BasicFunctions.WIDE_DF_ROWS, BasicFunctions.WIDE_DF_COLS
)

self.lib = self.ac[get_prewritten_lib_name(rows)]
self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]


def time_update_single(self, rows, num_symbols):
[self.lib.update(f"{sym}_sym", self.df_update_single) for sym in range(num_symbols)]

def time_update_half(self, rows, num_symbols):
[self.lib.update(f"{sym}_sym", self.df_update_half) for sym in range(num_symbols)]

def time_update_upsert(self, rows, num_symbols):
[self.lib.update(f"{sym}_sym", self.df_update_upsert, upsert=True) for sym in range(num_symbols)]

def time_update_short_wide(self, rows, num_symbols):
self.lib_short_wide.update("short_wide_sym", self.df_short_wide)

def time_append_single(self, rows, num_symbols):
[self.lib.append(f"{sym}_sym", self.df_append_single) for sym in range(num_symbols)]

def time_append_large(self, rows, num_symbols):
[self.lib.append(f"{sym}_sym", self.df_append_large) for sym in range(num_symbols)]

def time_append_short_wide(self, rows, num_symbols):
self.lib_short_wide.append("short_wide_sym", self.df_short_wide)

def time_delete(self, rows, num_symbols):
[self.lib.delete(f"{sym}_sym") for sym in range(num_symbols)]

def time_delete_short_wide(self, rows, num_symbols):
self.lib_short_wide.delete("short_wide_sym")
1 change: 1 addition & 0 deletions python/benchmarks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""
import pandas as pd
import numpy as np
import datetime


def generate_pseudo_random_dataframe(n, freq="s", end_timestamp="1/1/2023"):
Expand Down

0 comments on commit ddd34ae

Please sign in to comment.