From a02f90f7582ef69afc2336e5c04182c9230afd53 Mon Sep 17 00:00:00 2001 From: Georgi Petrov <32372905+G-D-Petrov@users.noreply.github.com> Date: Mon, 16 Dec 2024 13:04:00 +0200 Subject: [PATCH] Fix asv problems (#2065) #### Reference Issues/PRs Fixes problems with publishing of ASV benchmarks #### What does this implement or fix? This PR implements the following: - Upgrade the Python analysis flow and C++ tests to Python 3.11 - this is needed because some of the benchmarks need at least 3.10 - Fixes the problems with publishing of the benchmarks - because they can't evaluate np.inf correctly - Fixes LFS for BI benchmarks - Incorporates changes from https://github.com/man-group/ArcticDB/pull/2060 See successful run [here](https://github.com/man-group/ArcticDB/actions/runs/12317667688/job/34380720877) (had to be started manually due to the changes to the workflow files) #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--- .github/actions/setup_deps/action.yml | 6 +- .github/workflows/analysis_workflow.yml | 2 +- .github/workflows/benchmark_commits.yml | 21 ++-- build_tooling/transform_asv_results.py | 2 + python/.asv/results/benchmarks.json | 136 +++++++++++----------- python/benchmarks/basic_functions.py | 101 +++++++++++----- python/benchmarks/bi_benchmarks.py | 6 +- python/benchmarks/finalize_staged_data.py | 84 +++++++------ 8 files changed, 210 insertions(+), 148 deletions(-) diff --git a/.github/actions/setup_deps/action.yml b/.github/actions/setup_deps/action.yml index d79e91c7d6..f0a3ff938d 100644 --- a/.github/actions/setup_deps/action.yml +++ b/.github/actions/setup_deps/action.yml @@ -9,8 +9,8 @@ runs: dnf update -y dnf remove -y 'gcc-toolset-13-*' dnf install -y zip flex bison gcc-toolset-10 gcc-toolset-10-gdb gcc-toolset-10-libatomic-devel krb5-devel cyrus-sasl-devel openssl-devel \ - unzip tar epel-release jq wget libcurl-devel python3 \ - python3-devel python3-pip perl-IPC-Cmd + unzip tar epel-release jq wget libcurl-devel \ + python3.11-devel python3.11-pip perl-IPC-Cmd dnf groupinstall -y 'Development Tools' @@ -19,7 +19,7 @@ runs: echo "CXX=/opt/rh/gcc-toolset-10/root/bin/g++" | tee -a $GITHUB_ENV echo "CMAKE_CXX_COMPILER=/opt/rh/gcc-toolset-10/root/bin/g++" | tee -a $GITHUB_ENV echo "LD_LIBRARY_PATH=/opt/rh/gcc-toolset-10/root/usr/lib64:/opt/rh/gcc-toolset-10/root/usr/lib:/opt/rh/gcc-toolset-10/root/usr/lib64/dyninst" | tee -a $GITHUB_ENV - echo "/opt/rh/devtoolset-10/root/usr/bin" | tee -a $GITHUB_PATH + echo "/opt/rh/devtoolset-10/root/usr/bin:/opt/python/cp311-cp311/bin" | tee -a $GITHUB_PATH echo $GITHUB_ENV diff --git a/.github/workflows/analysis_workflow.yml b/.github/workflows/analysis_workflow.yml index ac2e93cef1..8532149b54 100644 --- a/.github/workflows/analysis_workflow.yml +++ b/.github/workflows/analysis_workflow.yml @@ -6,7 +6,7 @@ on: type: boolean default: false - schedule: # Schdeule the job to run at 12 a.m. daily + schedule: # Schedule the job to run at 12 a.m. daily - cron: '0 0 * * *' pull_request_target: diff --git a/.github/workflows/benchmark_commits.yml b/.github/workflows/benchmark_commits.yml index a27177db93..9a4327d1e7 100644 --- a/.github/workflows/benchmark_commits.yml +++ b/.github/workflows/benchmark_commits.yml @@ -31,8 +31,14 @@ jobs: defaults: run: {shell: bash} steps: + - name: Initialize LFS + shell: bash -l {0} + run: | + dnf install -y git-lfs + - uses: actions/checkout@v3.3.0 with: + lfs: 'true' fetch-depth: 0 submodules: recursive token: ${{ secrets.ARCTICDB_TEST_PAT }} @@ -46,14 +52,15 @@ jobs: - name: Install deps uses: ./.github/actions/setup_deps - # We are changing the python here because we want to use the default python to build (it is devel version) - # and this python for the rest of the testing - - name: Select Python (Linux) - shell: bash -el {0} + - name: Extra envs + shell: bash -l {0} run: | - ls /opt/python - echo /opt/python/cp36-cp36m/bin >> $GITHUB_PATH - + . build_tooling/vcpkg_caching.sh # Linux follower needs another call in CIBW + echo -e "VCPKG_BINARY_SOURCES=$VCPKG_BINARY_SOURCES + VCPKG_ROOT=$PLATFORM_VCPKG_ROOT" | tee -a $GITHUB_ENV + cmake -P cpp/CMake/CpuCount.cmake | sed 's/^-- //' | tee -a $GITHUB_ENV + env: + CMAKE_BUILD_PARALLEL_LEVEL: ${{vars.CMAKE_BUILD_PARALLEL_LEVEL}} - name: Set persistent storage variables uses: ./.github/actions/set_persistent_storage_env_vars diff --git a/build_tooling/transform_asv_results.py b/build_tooling/transform_asv_results.py index bab414460a..cd5c4418f2 100644 --- a/build_tooling/transform_asv_results.py +++ b/build_tooling/transform_asv_results.py @@ -5,7 +5,9 @@ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. """ + import pandas as pd +from numpy import inf from arcticdb.storage_fixtures.s3 import real_s3_from_environment_variables import json from pathlib import Path diff --git a/python/.asv/results/benchmarks.json b/python/.asv/results/benchmarks.json index b3fc895823..c659476028 100644 --- a/python/.asv/results/benchmarks.json +++ b/python/.asv/results/benchmarks.json @@ -11,7 +11,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -29,7 +29,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -47,7 +47,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -65,7 +65,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -83,7 +83,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -101,7 +101,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -119,7 +119,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -142,7 +142,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -166,7 +166,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -190,7 +190,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -214,7 +214,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -238,7 +238,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -262,7 +262,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -286,7 +286,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -310,7 +310,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -333,7 +333,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -356,7 +356,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -379,7 +379,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -407,7 +407,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -436,7 +436,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -465,7 +465,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -494,7 +494,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -523,7 +523,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -531,7 +531,7 @@ "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_append_large": { - "code": "class ModificationFunctions:\n def time_append_large(self, rows):\n self.lib.append(f\"sym\", self.df_append_large)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_append_large(self, lad: LargeAppendDataModify, rows):\n large: pd.DataFrame = lad.df_append_large[rows].pop()\n self.lib.append(f\"sym\", large)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_append_large", "number": 1, @@ -544,18 +544,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "be3be12028b2f1a949589e618252e94a88e5f35b5aa90f5815fd8aaa324c8550", + "version": "b817d86d1bf76649691197bfaf1261a96a1a34c9a25f053d66f6dfcf14c6f279", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_append_short_wide": { - "code": "class ModificationFunctions:\n def time_append_short_wide(self, rows):\n self.lib_short_wide.append(\"short_wide_sym\", self.df_append_short_wide)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_append_short_wide(self, lad: LargeAppendDataModify, rows):\n large: pd.DataFrame = lad.df_append_short_wide[rows].pop()\n self.lib_short_wide.append(\"short_wide_sym\", large)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_append_short_wide", "number": 1, @@ -568,18 +568,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "3a2e1e7a4dc518468ba388f560231ac1a1366b212dbd3309e3e877606c5630e8", + "version": "3678115ad2d40bf19062212095071431ff63cedc159661ee3056be7cbf109f98", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_append_single": { - "code": "class ModificationFunctions:\n def time_append_single(self, rows):\n self.lib.append(f\"sym\", self.df_append_single)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_append_single(self, lad: LargeAppendDataModify, rows):\n self.lib.append(f\"sym\", self.df_append_single)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_append_single", "number": 1, @@ -592,18 +592,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "c7f13a15b9074ab9bdb6f3e47ab97d75708938f005021b7a8fde82fe6902041d", + "version": "8f398155deb342c70fe4c65e8da636b1f18c9296632b4649aab8dae306aa8453", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_delete": { - "code": "class ModificationFunctions:\n def time_delete(self, rows):\n self.lib.delete(f\"sym\")\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_delete(self, lad: LargeAppendDataModify, rows):\n self.lib.delete(f\"sym\")\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_delete", "number": 1, @@ -616,18 +616,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "da4c95139bc0ae404ed6585b9e3398af8ed7e421cefcbeb9ff9ea6a77b85915a", + "version": "6d8afae2414e0f842495a7962f5950472814bde20e99eebc474db6953d8e1ae3", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_delete_short_wide": { - "code": "class ModificationFunctions:\n def time_delete_short_wide(self, rows):\n self.lib_short_wide.delete(\"short_wide_sym\")\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_delete_short_wide(self, lad: LargeAppendDataModify, rows):\n self.lib_short_wide.delete(\"short_wide_sym\")\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_delete_short_wide", "number": 1, @@ -640,18 +640,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "12254786f4a42e8bd488f48075cb70eddf4d87c8581271e2e2b526b7940123b9", + "version": "f867fc9cac4d0706b01166662af37434100460706d4f6118de0bc2e0e3087bae", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_half": { - "code": "class ModificationFunctions:\n def time_update_half(self, rows):\n self.lib.update(f\"sym\", self.df_update_half)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_half(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_half)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_half", "number": 1, @@ -664,18 +664,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "f56b8677f5b90b49568e6865c0656b734b9b2a8054baa71b78eaed8f53cb3176", + "version": "6a011f58b79c483849a70576915c2d56deed1227d38489a21140341ca860ce33", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_short_wide": { - "code": "class ModificationFunctions:\n def time_update_short_wide(self, rows):\n self.lib_short_wide.update(\"short_wide_sym\", self.df_update_short_wide)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_short_wide(self, lad: LargeAppendDataModify, rows):\n self.lib_short_wide.update(\"short_wide_sym\", self.df_update_short_wide)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_short_wide", "number": 1, @@ -688,18 +688,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "5db16777228d8de1ab4af9943d1ed0541c0b02c4dbcd888cfa3e26f37eb0215b", + "version": "111496c5bd4a4c498df28819d3cbcd9d699c4d3363ad3969f102a1d2076b3086", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_single": { - "code": "class ModificationFunctions:\n def time_update_single(self, rows):\n self.lib.update(f\"sym\", self.df_update_single)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_single(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_single)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_single", "number": 1, @@ -712,18 +712,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "cf62fa8a658e2f2ab16d286992423dd8d69334415ab61600906c6e9dc0185597", + "version": "c45c168d5713f3028a9a5b97959d52116c8d228870ad580be06d86336d2476c6", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_upsert": { - "code": "class ModificationFunctions:\n def time_update_upsert(self, rows):\n self.lib.update(f\"sym\", self.df_update_upsert, upsert=True)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_upsert(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_upsert, upsert=True)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_upsert", "number": 1, @@ -736,14 +736,14 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "80de9b1982a498c300177d02874a8626152eccb57cd0ba4228a5bb168e7608c8", + "version": "7f139bf03457104abe937914aa3572503ed52330b3a271d82112696060331d8f", "warmup_time": -1 }, "bi_benchmarks.BIBenchmarks.peakmem_query_groupby_city_count_all": { diff --git a/python/benchmarks/basic_functions.py b/python/benchmarks/basic_functions.py index 5ff8e4e9c2..fdcf53d319 100644 --- a/python/benchmarks/basic_functions.py +++ b/python/benchmarks/basic_functions.py @@ -5,6 +5,8 @@ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. """ +import time +from typing import List from arcticdb import Arctic from arcticdb.version_store.library import WritePayload, ReadRequest import pandas as pd @@ -185,6 +187,7 @@ def time_read_batch_pure(self, rows, num_symbols): def peakmem_read_batch(self, rows, num_symbols): read_reqs = [ReadRequest(f"{sym}_sym") for sym in range(num_symbols)] self.lib.read_batch(read_reqs) + def time_read_batch_with_columns(self, rows, num_symbols): COLS = ["value"] read_reqs = [ @@ -214,6 +217,10 @@ def peakmem_read_batch_with_date_ranges(self, rows, num_symbols): ] self.lib.read_batch(read_reqs) +def get_time_at_fraction_of_df(fraction, rows): + end_time = pd.Timestamp("1/1/2023") + time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1))) + return end_time + time_delta from shutil import copytree, rmtree class ModificationFunctions: @@ -221,7 +228,11 @@ class ModificationFunctions: Modification functions (update, append, delete) need a different setup/teardown process, thus we place them in a separate group. """ + rounds = 1 number = 1 # We do a single run between setup and teardown because we e.g. can't delete a symbol twice + repeat = 3 + warmup_time=0 + timeout = 6000 ARCTIC_DIR = "modification_functions" ARCTIC_DIR_ORIGINAL = "modification_functions_original" @@ -232,7 +243,42 @@ class ModificationFunctions: params = PARAMS param_names = PARAM_NAMES + class LargeAppendDataModify: + """ + This class will hold a cache of append large dataframes + The purpose of this cache is to create dataframes + which timestamps are sequenced over time so that + overlap does not occur + """ + + def __init__(self, num_rows_list:List[int], number_elements:int): + self.df_append_large = {} + self.df_append_short_wide = {} + start_time = time.time() + for rows in num_rows_list: + print("Generating dataframe with rows: ", rows) + lst = list() + lst_saw = list() + for n in range(number_elements+1): + print("Generating dataframe no: ", n) + + df = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(2*(n+1), rows)) + df_saw = generate_random_floats_dataframe_with_index( + ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, "s", + get_time_at_fraction_of_df(2*(n+1), rows=ModificationFunctions.WIDE_DF_ROWS) + ) + + lst.append(df) + lst_saw.append(df_saw) + print(f"STANDARD Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}") + print(f"SHORT_n_WIDE Index {df_saw.iloc[0].name} - {df_saw.iloc[df_saw.shape[0] - 1].name}") + print("Add dataframes: ", len(lst)) + self.df_append_large[rows] = lst + self.df_append_short_wide[rows] = lst_saw + print("APPEND LARGE cache generation took (s) :", time.time() - start_time) + def setup_cache(self): + self.ac = Arctic(ModificationFunctions.CONNECTION_STRING) rows_values = ModificationFunctions.params @@ -241,7 +287,9 @@ def setup_cache(self): lib_name = get_prewritten_lib_name(rows) self.ac.delete_library(lib_name) lib = self.ac.create_library(lib_name) - lib.write("sym", self.init_dfs[rows]) + df = self.init_dfs[rows] + lib.write("sym", df) + print(f"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}") lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS) self.ac.delete_library(lib_name) @@ -257,62 +305,61 @@ def setup_cache(self): # Then on each teardown we restore the initial state by overwriting the modified with the original. copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL) + number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds - def setup(self, rows): - def get_time_at_fraction_of_df(fraction, rows=rows): - end_time = pd.Timestamp("1/1/2023") - time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1))) - return end_time + time_delta + lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration) + + return lad + + def setup(self, lad: LargeAppendDataModify, rows): - self.df_update_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(0.5)) - self.df_update_half = generate_pseudo_random_dataframe(rows//2, "s", get_time_at_fraction_of_df(0.75)) - self.df_update_upsert = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(1.5)) - self.df_append_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(1.1)) - self.df_append_large = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(2)) + self.df_update_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(0.5, rows)) + self.df_update_half = generate_pseudo_random_dataframe(rows//2, "s", get_time_at_fraction_of_df(0.75, rows)) + self.df_update_upsert = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(1.5, rows)) + self.df_append_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(1.1, rows)) self.df_update_short_wide = generate_random_floats_dataframe_with_index( ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS ) - self.df_append_short_wide = generate_random_floats_dataframe_with_index( - ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, "s", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS) - ) self.ac = Arctic(ModificationFunctions.CONNECTION_STRING) self.lib = self.ac[get_prewritten_lib_name(rows)] self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)] - def teardown(self, rows): + def teardown(self, lad: LargeAppendDataModify, rows): # After the modification functions clean up the changes by replacing the modified ARCTIC_DIR with the original ARCTIC_DIR_ORIGINAL # TODO: We can use dirs_exist_ok=True on copytree instead of removing first if we run with python version >=3.8 rmtree(ModificationFunctions.ARCTIC_DIR) copytree(ModificationFunctions.ARCTIC_DIR_ORIGINAL, ModificationFunctions.ARCTIC_DIR) - del self.ac + del self.ac - def time_update_single(self, rows): + def time_update_single(self, lad: LargeAppendDataModify, rows): self.lib.update(f"sym", self.df_update_single) - def time_update_half(self, rows): + def time_update_half(self, lad: LargeAppendDataModify, rows): self.lib.update(f"sym", self.df_update_half) - def time_update_upsert(self, rows): + def time_update_upsert(self, lad: LargeAppendDataModify, rows): self.lib.update(f"sym", self.df_update_upsert, upsert=True) - def time_update_short_wide(self, rows): + def time_update_short_wide(self, lad: LargeAppendDataModify, rows): self.lib_short_wide.update("short_wide_sym", self.df_update_short_wide) - def time_append_single(self, rows): + def time_append_single(self, lad: LargeAppendDataModify, rows): self.lib.append(f"sym", self.df_append_single) - def time_append_large(self, rows): - self.lib.append(f"sym", self.df_append_large) + def time_append_large(self, lad: LargeAppendDataModify, rows): + large: pd.DataFrame = lad.df_append_large[rows].pop(0) + self.lib.append(f"sym", large) - def time_append_short_wide(self, rows): - self.lib_short_wide.append("short_wide_sym", self.df_append_short_wide) + def time_append_short_wide(self, lad: LargeAppendDataModify, rows): + large: pd.DataFrame = lad.df_append_short_wide[rows].pop(0) + self.lib_short_wide.append("short_wide_sym", large) - def time_delete(self, rows): + def time_delete(self, lad: LargeAppendDataModify, rows): self.lib.delete(f"sym") - def time_delete_short_wide(self, rows): + def time_delete_short_wide(self, lad: LargeAppendDataModify, rows): self.lib_short_wide.delete("short_wide_sym") diff --git a/python/benchmarks/bi_benchmarks.py b/python/benchmarks/bi_benchmarks.py index b67595b061..33481e5f83 100644 --- a/python/benchmarks/bi_benchmarks.py +++ b/python/benchmarks/bi_benchmarks.py @@ -184,8 +184,4 @@ def time_query_groupby_city_count_filter_two_aggregations(self, times_bigger) -> def peakmem_query_groupby_city_count_filter_two_aggregations(self, times_bigger): - return self.query_groupby_city_count_filter_two_aggregations(times_bigger) - - - - + return self.query_groupby_city_count_filter_two_aggregations(times_bigger) \ No newline at end of file diff --git a/python/benchmarks/finalize_staged_data.py b/python/benchmarks/finalize_staged_data.py index e0a2c86bab..0237cbddba 100644 --- a/python/benchmarks/finalize_staged_data.py +++ b/python/benchmarks/finalize_staged_data.py @@ -7,18 +7,20 @@ from arcticdb.util.utils import TimestampNumber """ +import sys from arcticdb.arctic import Arctic from arcticdb.util.utils import CachedDFGenerator, TimestampNumber, stage_chunks from arcticdb.version_store.library import Library, StagedDataFinalizeMethod from .common import * -from asv_runner.benchmarks.mark import SkipNotImplemented + class FinalizeStagedData: - ''' - Check and benchmark performance of finalize_staged_data(). - Due to specifics of this procedure we tune asv to make single measurement - which would be over a relatively big staged data. - ''' + """ + Check and benchmark performance of finalize_staged_data(). + Due to specifics of this procedure we tune asv to make single measurement + which would be over a relatively big staged data. + """ + number = 1 rounds = 1 repeat = 1 @@ -27,7 +29,7 @@ class FinalizeStagedData: timeout = 600 LIB_NAME = "Finalize_Staged_Data_LIB" - #Define the number of chunks + # Define the number of chunks params = [1000, 2000] def __init__(self): @@ -39,11 +41,11 @@ def setup_cache(self): # Generating dataframe with all kind of supported data types cachedDF = CachedDFGenerator(350000, [5]) return cachedDF - - def setup(self, cache:CachedDFGenerator, param:int): + + def setup(self, cache: CachedDFGenerator, param: int): cachedDF = cache - - # Unfortunately there is no way to tell asv to run single time + + # Unfortunately there is no way to tell asv to run single time # each of finalize_stage_data() tests if we do the large setup in the # setup_cache() method. We can only force it to work with single execution # if the symbol setup with stage data is in the setup() method @@ -52,7 +54,9 @@ def setup(self, cache:CachedDFGenerator, param:int): self.ac.delete_library(self.lib_name) self.lib = self.ac.create_library(self.lib_name) - INITIAL_TIMESTAMP: TimestampNumber = TimestampNumber(0, cachedDF.TIME_UNIT) # Synchronize index frequency + INITIAL_TIMESTAMP: TimestampNumber = TimestampNumber( + 0, cachedDF.TIME_UNIT + ) # Synchronize index frequency df = cachedDF.generate_dataframe_timestamp_indexed(200, 0, cachedDF.TIME_UNIT) list_of_chunks = [10000] * param @@ -61,45 +65,51 @@ def setup(self, cache:CachedDFGenerator, param:int): self.lib.write(self.symbol, data=df, prune_previous_versions=True) stage_chunks(self.lib, self.symbol, cachedDF, INITIAL_TIMESTAMP, list_of_chunks) - def time_finalize_staged_data(self, cache:CachedDFGenerator, param:int): + def time_finalize_staged_data(self, cache: CachedDFGenerator, param: int): print(">>> Library:", self.lib) print(">>> Symbol:", self.symbol) self.lib.finalize_staged_data(self.symbol, mode=StagedDataFinalizeMethod.WRITE) - def peakmem_finalize_staged_data(self, cache:CachedDFGenerator, param:int): + def peakmem_finalize_staged_data(self, cache: CachedDFGenerator, param: int): print(">>> Library:", self.lib) print(">>> Symbol:", self.symbol) self.lib.finalize_staged_data(self.symbol, mode=StagedDataFinalizeMethod.WRITE) - def teardown(self, cache:CachedDFGenerator, param:int): + def teardown(self, cache: CachedDFGenerator, param: int): self.ac.delete_library(self.lib_name) + +from asv_runner.benchmarks.mark import SkipNotImplemented + + class FinalizeStagedDataWiderDataframeX3(FinalizeStagedData): - ''' - The test is meant to be executed with 3 times wider dataframe than the base test - ''' + """ + The test is meant to be executed with 3 times wider dataframe than the base test + """ def setup_cache(self): # Generating dataframe with all kind of supported data type - cachedDF = CachedDFGenerator(350000, [5, 25, 50]) # 3 times wider DF with bigger string columns + cachedDF = CachedDFGenerator( + 350000, [5, 25, 50] + ) # 3 times wider DF with bigger string columns return cachedDF - - def setup(self, cache:CachedDFGenerator, param:int): - if (not SLOW_TESTS): - raise SkipNotImplemented ("Slow tests are skipped") - super().setup(cache,param) - - def time_finalize_staged_data(self, cache:CachedDFGenerator, param:int): - if (not SLOW_TESTS): - raise SkipNotImplemented ("Slow tests are skipped") - super().time_finalize_staged_data(cache,param) - - def peakmem_finalize_staged_data(self, cache:CachedDFGenerator, param:int): - if (not SLOW_TESTS): - raise SkipNotImplemented ("Slow tests are skipped") - super().peakmem_finalize_staged_data(cache,param) - - def teardown(self, cache:CachedDFGenerator, param:int): - if (SLOW_TESTS): + + def setup(self, cache: CachedDFGenerator, param: int): + if not SLOW_TESTS: + raise SkipNotImplemented("Slow tests are skipped") + super().setup(cache, param) + + def time_finalize_staged_data(self, cache: CachedDFGenerator, param: int): + if not SLOW_TESTS: + raise SkipNotImplemented("Slow tests are skipped") + super().time_finalize_staged_data(cache, param) + + def peakmem_finalize_staged_data(self, cache: CachedDFGenerator, param: int): + if not SLOW_TESTS: + raise SkipNotImplemented("Slow tests are skipped") + super().peakmem_finalize_staged_data(cache, param) + + def teardown(self, cache: CachedDFGenerator, param: int): + if SLOW_TESTS: # Run only on slow tests self.ac.delete_library(self.lib_name)