From 3d1489700df1bcdd1fc8765e4299c015810cbed9 Mon Sep 17 00:00:00 2001
From: Georgi Petrov <32372905+G-D-Petrov@users.noreply.github.com>
Date: Mon, 16 Dec 2024 13:04:00 +0200
Subject: [PATCH] Fix asv problems (#2065)
Fixes problems with publishing of ASV benchmarks
This PR implements the following:
- Upgrade the Python analysis flow and C++ tests to Python 3.11 - this
is needed because some of the benchmarks need at least 3.10
- Fixes the problems with publishing of the benchmarks - because they
can't evaluate np.inf correctly
- Fixes LFS for BI benchmarks
- Incorporates changes from
https://github.com/man-group/ArcticDB/pull/2060
See successful run
[here](https://github.com/man-group/ArcticDB/actions/runs/12317667688/job/34380720877)
(had to be started manually due to the changes to the workflow files)
Checklist for code changes...
- [ ] Have you updated the relevant docstrings, documentation and
copyright notice?
- [ ] Is this contribution tested against [all ArcticDB's
features](../docs/mkdocs/docs/technical/contributing.md)?
- [ ] Do all exceptions introduced raise appropriate [error
messages](https://docs.arcticdb.io/error_messages/)?
- [ ] Are API changes highlighted in the PR description?
- [ ] Is the PR labelled as enhancement or bug so it appears in
autogenerated release notes?
Fix CI issues
Try to override the file during benchmarks
Comment-out for testing
Flaky asv (#2066)
Checklist for code changes...
- [ ] Have you updated the relevant docstrings, documentation and
copyright notice?
- [ ] Is this contribution tested against [all ArcticDB's
features](../docs/mkdocs/docs/technical/contributing.md)?
- [ ] Do all exceptions introduced raise appropriate [error
messages](https://docs.arcticdb.io/error_messages/)?
- [ ] Are API changes highlighted in the PR description?
- [ ] Is the PR labelled as enhancement or bug so it appears in
autogenerated release notes?
---------
Co-authored-by: Georgi Rusev
Test with LFS checkout
Fix lfs install step
Remove unnecessary check
Fix code coverage flow
try to use ec2 runner for code cov
Test without the port
Fix for 311
---
.github/actions/run_local_pytest/action.yml | 2 +-
.github/actions/setup_deps/action.yml | 6 +-
.github/workflows/analysis_workflow.yml | 353 ++++++++++----------
.github/workflows/benchmark_commits.yml | 21 +-
build_tooling/transform_asv_results.py | 2 +
python/.asv/results/benchmarks.json | 136 ++++----
python/benchmarks/basic_functions.py | 162 +++++++--
python/benchmarks/bi_benchmarks.py | 6 +-
python/benchmarks/finalize_staged_data.py | 84 +++--
9 files changed, 444 insertions(+), 328 deletions(-)
diff --git a/.github/actions/run_local_pytest/action.yml b/.github/actions/run_local_pytest/action.yml
index 4c3c96de05..515e7f7cb8 100644
--- a/.github/actions/run_local_pytest/action.yml
+++ b/.github/actions/run_local_pytest/action.yml
@@ -16,7 +16,7 @@ runs:
npm install -g azurite
cd python
- ln -s ../cpp/out/linux-${{ inputs.build_type }}-build/arcticdb/arcticdb_ext.cpython-36m-x86_64-linux-gnu.so
+ ln -s ../cpp/out/linux-${{ inputs.build_type }}-build/arcticdb/arcticdb_ext.cpython-311m-x86_64-linux-gnu.so
export ARCTICDB_RAND_SEED=$RANDOM
python ${{inputs.other_params}} -m pytest --timeout=3600 -n ${{ inputs.threads }} tests
env:
diff --git a/.github/actions/setup_deps/action.yml b/.github/actions/setup_deps/action.yml
index d79e91c7d6..f0a3ff938d 100644
--- a/.github/actions/setup_deps/action.yml
+++ b/.github/actions/setup_deps/action.yml
@@ -9,8 +9,8 @@ runs:
dnf update -y
dnf remove -y 'gcc-toolset-13-*'
dnf install -y zip flex bison gcc-toolset-10 gcc-toolset-10-gdb gcc-toolset-10-libatomic-devel krb5-devel cyrus-sasl-devel openssl-devel \
- unzip tar epel-release jq wget libcurl-devel python3 \
- python3-devel python3-pip perl-IPC-Cmd
+ unzip tar epel-release jq wget libcurl-devel \
+ python3.11-devel python3.11-pip perl-IPC-Cmd
dnf groupinstall -y 'Development Tools'
@@ -19,7 +19,7 @@ runs:
echo "CXX=/opt/rh/gcc-toolset-10/root/bin/g++" | tee -a $GITHUB_ENV
echo "CMAKE_CXX_COMPILER=/opt/rh/gcc-toolset-10/root/bin/g++" | tee -a $GITHUB_ENV
echo "LD_LIBRARY_PATH=/opt/rh/gcc-toolset-10/root/usr/lib64:/opt/rh/gcc-toolset-10/root/usr/lib:/opt/rh/gcc-toolset-10/root/usr/lib64/dyninst" | tee -a $GITHUB_ENV
- echo "/opt/rh/devtoolset-10/root/usr/bin" | tee -a $GITHUB_PATH
+ echo "/opt/rh/devtoolset-10/root/usr/bin:/opt/python/cp311-cp311/bin" | tee -a $GITHUB_PATH
echo $GITHUB_ENV
diff --git a/.github/workflows/analysis_workflow.yml b/.github/workflows/analysis_workflow.yml
index ac2e93cef1..6ac5ccb1bc 100644
--- a/.github/workflows/analysis_workflow.yml
+++ b/.github/workflows/analysis_workflow.yml
@@ -6,7 +6,7 @@ on:
type: boolean
default: false
- schedule: # Schdeule the job to run at 12 a.m. daily
+ schedule: # Schedule the job to run at 12 a.m. daily
- cron: '0 0 * * *'
pull_request_target:
@@ -99,175 +99,188 @@ jobs:
python -m asv publish -v
python -m asv gh-pages -v --rewrite
- # code_coverage:
- # runs-on: "ubuntu-22.04"
- # container:
- # image: quay.io/pypa/manylinux_2_28_x86_64:latest
- # services:
- # mongodb:
- # image: mongo:4.4
- # ports:
- # - 27017:27017
- # env:
- # VCPKG_NUGET_USER: ${{secrets.VCPKG_NUGET_USER || github.repository_owner}}
- # VCPKG_NUGET_TOKEN: ${{secrets.VCPKG_NUGET_TOKEN || secrets.GITHUB_TOKEN}}
- # VCPKG_MAN_NUGET_USER: ${{secrets.VCPKG_MAN_NUGET_USER}} # For forks to download pre-compiled dependencies from the Man repo
- # VCPKG_MAN_NUGET_TOKEN: ${{secrets.VCPKG_MAN_NUGET_TOKEN}}
- # ARCTIC_CMAKE_PRESET: linux-debug
- # ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
- # steps:
- # - uses: actions/checkout@v3.3.0
- # with:
- # submodules: recursive
- # ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || '' }} # Note: This is dangerous if we run automatic CI on external PRs
-
- # - name: Get number of CPU cores
- # uses: SimenB/github-actions-cpu-cores@v1.1.0
- # id: cpu-cores
-
- # - name: Install deps
- # uses: ./.github/actions/setup_deps
-
- # - name: Extra envs
- # shell: bash -l {0}
- # run: |
- # . build_tooling/vcpkg_caching.sh # Linux follower needs another call in CIBW
- # echo -e "VCPKG_BINARY_SOURCES=$VCPKG_BINARY_SOURCES
- # VCPKG_ROOT=$PLATFORM_VCPKG_ROOT" | tee -a $GITHUB_ENV
- # cmake -P cpp/CMake/CpuCount.cmake | sed 's/^-- //' | tee -a $GITHUB_ENV
- # echo "ARCTICDB_CODE_COVERAGE_BUILD=1" | tee -a $GITHUB_ENV
- # env:
- # CMAKE_BUILD_PARALLEL_LEVEL: ${{vars.CMAKE_BUILD_PARALLEL_LEVEL}}
-
- # - name: Prepare C++ compilation env
- # run: . build_tooling/prep_cpp_build.sh
+ start_ec2_runner_code_coverage:
+ uses: ./.github/workflows/ec2_runner_jobs.yml
+ secrets: inherit
+ with:
+ job_type: start
+ instance-type: t2.2xlarge
+
+ code_coverage:
+ needs: [start_ec2_runner_code_coverage]
+ if: |
+ always() &&
+ !cancelled()
+ runs-on: ${{ needs.start_ec2_runner_code_coverage.outputs.label }}
+ container:
+ image: quay.io/pypa/manylinux_2_28_x86_64:latest
+ services:
+ mongodb:
+ image: mongo:4.4
+ env:
+ VCPKG_NUGET_USER: ${{secrets.VCPKG_NUGET_USER || github.repository_owner}}
+ VCPKG_NUGET_TOKEN: ${{secrets.VCPKG_NUGET_TOKEN || secrets.GITHUB_TOKEN}}
+ VCPKG_MAN_NUGET_USER: ${{secrets.VCPKG_MAN_NUGET_USER}} # For forks to download pre-compiled dependencies from the Man repo
+ VCPKG_MAN_NUGET_TOKEN: ${{secrets.VCPKG_MAN_NUGET_TOKEN}}
+ ARCTIC_CMAKE_PRESET: linux-debug
+ steps:
+ - uses: actions/checkout@v3.3.0
+ with:
+ submodules: recursive
+ ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || '' }} # Note: This is dangerous if we run automatic CI on external PRs
+
+ - name: Get number of CPU cores
+ uses: SimenB/github-actions-cpu-cores@v1.1.0
+ id: cpu-cores
+
+ - name: Install deps
+ uses: ./.github/actions/setup_deps
+
+ - name: Extra envs
+ shell: bash -l {0}
+ run: |
+ . build_tooling/vcpkg_caching.sh # Linux follower needs another call in CIBW
+ echo -e "VCPKG_BINARY_SOURCES=$VCPKG_BINARY_SOURCES
+ VCPKG_ROOT=$PLATFORM_VCPKG_ROOT" | tee -a $GITHUB_ENV
+ cmake -P cpp/CMake/CpuCount.cmake | sed 's/^-- //' | tee -a $GITHUB_ENV
+ echo "ARCTICDB_CODE_COVERAGE_BUILD=1" | tee -a $GITHUB_ENV
+ env:
+ CMAKE_BUILD_PARALLEL_LEVEL: ${{vars.CMAKE_BUILD_PARALLEL_LEVEL}}
+
+ - name: Prepare C++ compilation env
+ run: . build_tooling/prep_cpp_build.sh
- # - name: CMake compile
- # # We are pinning the version to 10.6 because >= 10.7, use node20 which is not supported in the container
- # uses: lukka/run-cmake@v10.6
- # with:
- # cmakeListsTxtPath: ${{github.workspace}}/cpp/CMakeLists.txt
- # configurePreset: ${{env.ARCTIC_CMAKE_PRESET}}
- # buildPreset: ${{env.ARCTIC_CMAKE_PRESET}}
- # env:
- # ARCTICDB_DEBUG_FIND_PYTHON: ${{vars.ARCTICDB_DEBUG_FIND_PYTHON}}
- # python_impl_name: 'cp311'
+ - name: CMake compile
+ # We are pinning the version to 10.6 because >= 10.7, use node20 which is not supported in the container
+ uses: lukka/run-cmake@v10.6
+ with:
+ cmakeListsTxtPath: ${{github.workspace}}/cpp/CMakeLists.txt
+ configurePreset: ${{env.ARCTIC_CMAKE_PRESET}}
+ buildPreset: ${{env.ARCTIC_CMAKE_PRESET}}
+ env:
+ ARCTICDB_DEBUG_FIND_PYTHON: ${{vars.ARCTICDB_DEBUG_FIND_PYTHON}}
+ python_impl_name: 'cp311'
- # - name: Run C++ Tests
- # shell: bash -l {0}
- # run: |
- # cd cpp/out/linux-debug-build/
- # ls arcticdb
- # make -j ${{ steps.cpu-cores.outputs.count }} arcticdb_rapidcheck_tests
- # make -j ${{ steps.cpu-cores.outputs.count }} test_unit_arcticdb
- # ctest
-
- # # We are chainging the python here because we want to use the default python to build (it is devel version)
- # # and this python for the rest of the testing
- # - name: Select Python (Linux)
- # run: echo /opt/python/cp36-cp36m/bin >> $GITHUB_PATH
-
- # - name: Install local dependencies with pip
- # shell: bash
- # run: |
- # python -m pip install --upgrade pip
- # ARCTIC_CMAKE_PRESET=skip pip install -ve .[Testing]
-
- # # - name: Test with pytest
- # # uses: ./.github/actions/run_local_pytest
- # # with:
- # # build_type: debug
- # # threads: 1
- # # fast_tests_only: 0
- # # other_params: '-m coverage run '
-
- # - name: Get python Coverage report
- # shell: bash -l {0}
- # run: |
- # cd python
- # python -m coverage report -m | tee output.txt
- # python -m coverage html
- # zip -r python_cov.zip htmlcov/
-
- # echo "PYTHON_COV_PERCENT=$(cat output.txt | grep 'TOTAL' | awk '{print $NF}' | tr -d '%')" >> $GITHUB_ENV
-
- # - name: Run Gcovr manually post-pytest
- # shell: bash -l {0}
- # run: |
- # cd cpp/out/linux-debug-build/
- # python -m pip install gcovr
- # mkdir coverage
- # python -m gcovr --txt --html-details coverage/index.html -e vcpkg_installed/ -e proto/ -e ../../third_party -e ../../arcticdb/util/test/ -r ../.. --exclude-throw-branches --exclude-unreachable-branches -u --exclude-function-lines | tee output.txt
- # zip -r coverage.zip coverage/
-
- # echo "CPP_COV_PERCENT=$(cat output.txt | grep 'TOTAL' | awk '{print $NF}' | tr -d '%')" >> $GITHUB_ENV
-
- # - name: Upload Coverage
- # uses: actions/upload-artifact@v3.1.3
- # with:
- # name: cpp-coverage-artifact
- # path: cpp/out/linux-debug-build/coverage.zip
-
- # - name: Upload Python Coverage
- # uses: actions/upload-artifact@v3.1.3
- # with:
- # name: python-coverage-artifact
- # path: python/python_cov.zip
-
- # - name: Restore cached CPP Coverage Percentage from the previous run
- # id: cache-cov-restore
- # uses: actions/cache/restore@v3.3.2
- # with:
- # path: prev_coverage.txt
- # key: coverage
+ - name: Run C++ Tests
+ shell: bash -l {0}
+ run: |
+ cd cpp/out/linux-debug-build/
+ ls arcticdb
+ make -j ${{ steps.cpu-cores.outputs.count }} arcticdb_rapidcheck_tests
+ make -j ${{ steps.cpu-cores.outputs.count }} test_unit_arcticdb
+ ctest
+
+ - name: Install local dependencies with pip
+ shell: bash
+ run: |
+ python -m pip install --upgrade pip
+ ARCTIC_CMAKE_PRESET=skip pip install -ve .[Testing]
+
+ - name: Test with pytest
+ uses: ./.github/actions/run_local_pytest
+ with:
+ build_type: debug
+ fast_tests_only: 0
+ other_params: '-m coverage run '
+
+ - name: Get python Coverage report
+ shell: bash -l {0}
+ run: |
+ cd python
+ python -m coverage report -m | tee output.txt
+ python -m coverage html
+ zip -r python_cov.zip htmlcov/
+
+ echo "PYTHON_COV_PERCENT=$(cat output.txt | grep 'TOTAL' | awk '{print $NF}' | tr -d '%')" >> $GITHUB_ENV
+
+ - name: Run Gcovr manually post-pytest
+ shell: bash -l {0}
+ run: |
+ cd cpp/out/linux-debug-build/
+ python -m pip install gcovr
+ mkdir coverage
+ python -m gcovr --txt --html-details coverage/index.html -e vcpkg_installed/ -e proto/ -e ../../third_party -e ../../arcticdb/util/test/ -r ../.. --exclude-throw-branches --exclude-unreachable-branches -u --exclude-function-lines | tee output.txt
+ zip -r coverage.zip coverage/
+
+ echo "CPP_COV_PERCENT=$(cat output.txt | grep 'TOTAL' | awk '{print $NF}' | tr -d '%')" >> $GITHUB_ENV
+
+ - name: Upload Coverage
+ uses: actions/upload-artifact@v3.1.3
+ with:
+ name: cpp-coverage-artifact
+ path: cpp/out/linux-debug-build/coverage.zip
+
+ - name: Upload Python Coverage
+ uses: actions/upload-artifact@v3.1.3
+ with:
+ name: python-coverage-artifact
+ path: python/python_cov.zip
+
+ - name: Restore cached CPP Coverage Percentage from the previous run
+ id: cache-cov-restore
+ uses: actions/cache/restore@v3.3.2
+ with:
+ path: prev_coverage.txt
+ key: coverage
- # - name: Get and compare coverage if cache was restored
- # run: |
- # # if cache was restored, compare coverage
- # if [ -f coverage.txt ]; then
- # PREV_COVERAGE=$(cat prev_coverage.txt | cut -d' ' -f2)
- # echo "Previous coverage: $PREV_COVERAGE"
- # CURR_COVERAGE=${{env.CPP_COV_PERCENT}}
- # echo "CPP_COV_PREV_PERCENT=$PREV_COVERAGE" >> $GITHUB_ENV
- # echo "Current coverage: $CURR_COVERAGE"
- # if [ $CURR_COVERAGE -gt $PREV_COVERAGE ]; then
- # echo "Coverage increased"
- # elif [ $CURR_COVERAGE -lt $PREV_COVERAGE ]; then
- # echo "Coverage decreased"
- # else
- # echo "Coverage unchanged"
- # fi
- # fi
-
- # - name: Save CPP Coverage Percentage to file
- # run: |
- # echo "Coverage: ${{ env.CPP_COV_PERCENT }}" > current_coverage.txt
-
- # - name: Save the current CPP Coverage Percentage to the cache
- # id: cache-cov-save
- # uses: actions/cache/save@v3.3.2
- # with:
- # path: current_coverage.txt
- # key: coverage
-
- # - name: Check percentage and send Slack notification
- # if: ${{ env.CPP_COV_PREV_PERCENT && env.CPP_COV_PERCENT && env.CPP_COV_PERCENT < env.CPP_COV_PREV_PERCENT }}
- # uses: slackapi/slack-github-action@v1.24.0
- # with:
- # # For posting a rich message using Block Kit
- # payload: |
- # {
- # "text": "The CPP Code Coverage has been reduced",
- # "blocks": [
- # {
- # "type": "section",
- # "text": {
- # "type": "mrkdwn",
- # "text": "The CPP Code Coverage from the current run(${{ env.CPP_COV_PERCENT }}%) is lower the previous one(${{ env.CPP_COV_PREV_PERCENT }}%)."
- # }
- # }
- # ]
- # }
- # env:
- # SLACK_WEBHOOK_URL: ${{ secrets.ARCTICDB_DEV_WEBHOOK_URL }}
- # SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
+ - name: Get and compare coverage if cache was restored
+ run: |
+ # if cache was restored, compare coverage
+ if [ -f coverage.txt ]; then
+ PREV_COVERAGE=$(cat prev_coverage.txt | cut -d' ' -f2)
+ echo "Previous coverage: $PREV_COVERAGE"
+ CURR_COVERAGE=${{env.CPP_COV_PERCENT}}
+ echo "CPP_COV_PREV_PERCENT=$PREV_COVERAGE" >> $GITHUB_ENV
+ echo "Current coverage: $CURR_COVERAGE"
+ if [ $CURR_COVERAGE -gt $PREV_COVERAGE ]; then
+ echo "Coverage increased"
+ elif [ $CURR_COVERAGE -lt $PREV_COVERAGE ]; then
+ echo "Coverage decreased"
+ else
+ echo "Coverage unchanged"
+ fi
+ fi
+
+ - name: Save CPP Coverage Percentage to file
+ run: |
+ echo "Coverage: ${{ env.CPP_COV_PERCENT }}" > current_coverage.txt
+
+ - name: Save the current CPP Coverage Percentage to the cache
+ id: cache-cov-save
+ uses: actions/cache/save@v3.3.2
+ with:
+ path: current_coverage.txt
+ key: coverage
+
+ - name: Check percentage and send Slack notification
+ if: ${{ env.CPP_COV_PREV_PERCENT && env.CPP_COV_PERCENT && env.CPP_COV_PERCENT < env.CPP_COV_PREV_PERCENT }}
+ uses: slackapi/slack-github-action@v1.24.0
+ with:
+ # For posting a rich message using Block Kit
+ payload: |
+ {
+ "text": "The CPP Code Coverage has been reduced",
+ "blocks": [
+ {
+ "type": "section",
+ "text": {
+ "type": "mrkdwn",
+ "text": "The CPP Code Coverage from the current run(${{ env.CPP_COV_PERCENT }}%) is lower the previous one(${{ env.CPP_COV_PREV_PERCENT }}%)."
+ }
+ }
+ ]
+ }
+ env:
+ SLACK_WEBHOOK_URL: ${{ secrets.ARCTICDB_DEV_WEBHOOK_URL }}
+ SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
+
+ stop_ec2_runner_code_coverage:
+ needs: [start_ec2_runner_code_coverage, code_coverage]
+ if: |
+ always()
+ uses: ./.github/workflows/ec2_runner_jobs.yml
+ secrets: inherit
+ with:
+ job_type: stop
+ label: ${{ needs.start_ec2_runner_code_coverage.outputs.label }}
+ ec2-instance-id: ${{ needs.start_ec2_runner_code_coverage.outputs.ec2-instance-id }}
diff --git a/.github/workflows/benchmark_commits.yml b/.github/workflows/benchmark_commits.yml
index a27177db93..9a4327d1e7 100644
--- a/.github/workflows/benchmark_commits.yml
+++ b/.github/workflows/benchmark_commits.yml
@@ -31,8 +31,14 @@ jobs:
defaults:
run: {shell: bash}
steps:
+ - name: Initialize LFS
+ shell: bash -l {0}
+ run: |
+ dnf install -y git-lfs
+
- uses: actions/checkout@v3.3.0
with:
+ lfs: 'true'
fetch-depth: 0
submodules: recursive
token: ${{ secrets.ARCTICDB_TEST_PAT }}
@@ -46,14 +52,15 @@ jobs:
- name: Install deps
uses: ./.github/actions/setup_deps
- # We are changing the python here because we want to use the default python to build (it is devel version)
- # and this python for the rest of the testing
- - name: Select Python (Linux)
- shell: bash -el {0}
+ - name: Extra envs
+ shell: bash -l {0}
run: |
- ls /opt/python
- echo /opt/python/cp36-cp36m/bin >> $GITHUB_PATH
-
+ . build_tooling/vcpkg_caching.sh # Linux follower needs another call in CIBW
+ echo -e "VCPKG_BINARY_SOURCES=$VCPKG_BINARY_SOURCES
+ VCPKG_ROOT=$PLATFORM_VCPKG_ROOT" | tee -a $GITHUB_ENV
+ cmake -P cpp/CMake/CpuCount.cmake | sed 's/^-- //' | tee -a $GITHUB_ENV
+ env:
+ CMAKE_BUILD_PARALLEL_LEVEL: ${{vars.CMAKE_BUILD_PARALLEL_LEVEL}}
- name: Set persistent storage variables
uses: ./.github/actions/set_persistent_storage_env_vars
diff --git a/build_tooling/transform_asv_results.py b/build_tooling/transform_asv_results.py
index bab414460a..cd5c4418f2 100644
--- a/build_tooling/transform_asv_results.py
+++ b/build_tooling/transform_asv_results.py
@@ -5,7 +5,9 @@
As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
"""
+
import pandas as pd
+from numpy import inf
from arcticdb.storage_fixtures.s3 import real_s3_from_environment_variables
import json
from pathlib import Path
diff --git a/python/.asv/results/benchmarks.json b/python/.asv/results/benchmarks.json
index b3fc895823..c659476028 100644
--- a/python/.asv/results/benchmarks.json
+++ b/python/.asv/results/benchmarks.json
@@ -11,7 +11,7 @@
"1500000"
]
],
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -29,7 +29,7 @@
"1500000"
]
],
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -47,7 +47,7 @@
"1500000"
]
],
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -65,7 +65,7 @@
"1500000"
]
],
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -83,7 +83,7 @@
"1500000"
]
],
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -101,7 +101,7 @@
"1500000"
]
],
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -119,7 +119,7 @@
"1500000"
]
],
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -142,7 +142,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -166,7 +166,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -190,7 +190,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -214,7 +214,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -238,7 +238,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -262,7 +262,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -286,7 +286,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:36",
+ "setup_cache_key": "basic_functions:38",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -310,7 +310,7 @@
"1000"
]
],
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -333,7 +333,7 @@
"1000"
]
],
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -356,7 +356,7 @@
"1000"
]
],
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -379,7 +379,7 @@
"1000"
]
],
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "peakmemory",
"unit": "bytes",
@@ -407,7 +407,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -436,7 +436,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -465,7 +465,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -494,7 +494,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -523,7 +523,7 @@
"repeat": 0,
"rounds": 2,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:137",
+ "setup_cache_key": "basic_functions:139",
"timeout": 6000,
"type": "time",
"unit": "seconds",
@@ -531,7 +531,7 @@
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_append_large": {
- "code": "class ModificationFunctions:\n def time_append_large(self, rows):\n self.lib.append(f\"sym\", self.df_append_large)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_append_large(self, lad: LargeAppendDataModify, rows):\n large: pd.DataFrame = lad.df_append_large[rows].pop()\n self.lib.append(f\"sym\", large)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_append_large",
"number": 1,
@@ -544,18 +544,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "be3be12028b2f1a949589e618252e94a88e5f35b5aa90f5815fd8aaa324c8550",
+ "version": "b817d86d1bf76649691197bfaf1261a96a1a34c9a25f053d66f6dfcf14c6f279",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_append_short_wide": {
- "code": "class ModificationFunctions:\n def time_append_short_wide(self, rows):\n self.lib_short_wide.append(\"short_wide_sym\", self.df_append_short_wide)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_append_short_wide(self, lad: LargeAppendDataModify, rows):\n large: pd.DataFrame = lad.df_append_short_wide[rows].pop()\n self.lib_short_wide.append(\"short_wide_sym\", large)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_append_short_wide",
"number": 1,
@@ -568,18 +568,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "3a2e1e7a4dc518468ba388f560231ac1a1366b212dbd3309e3e877606c5630e8",
+ "version": "3678115ad2d40bf19062212095071431ff63cedc159661ee3056be7cbf109f98",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_append_single": {
- "code": "class ModificationFunctions:\n def time_append_single(self, rows):\n self.lib.append(f\"sym\", self.df_append_single)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_append_single(self, lad: LargeAppendDataModify, rows):\n self.lib.append(f\"sym\", self.df_append_single)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_append_single",
"number": 1,
@@ -592,18 +592,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "c7f13a15b9074ab9bdb6f3e47ab97d75708938f005021b7a8fde82fe6902041d",
+ "version": "8f398155deb342c70fe4c65e8da636b1f18c9296632b4649aab8dae306aa8453",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_delete": {
- "code": "class ModificationFunctions:\n def time_delete(self, rows):\n self.lib.delete(f\"sym\")\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_delete(self, lad: LargeAppendDataModify, rows):\n self.lib.delete(f\"sym\")\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_delete",
"number": 1,
@@ -616,18 +616,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "da4c95139bc0ae404ed6585b9e3398af8ed7e421cefcbeb9ff9ea6a77b85915a",
+ "version": "6d8afae2414e0f842495a7962f5950472814bde20e99eebc474db6953d8e1ae3",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_delete_short_wide": {
- "code": "class ModificationFunctions:\n def time_delete_short_wide(self, rows):\n self.lib_short_wide.delete(\"short_wide_sym\")\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_delete_short_wide(self, lad: LargeAppendDataModify, rows):\n self.lib_short_wide.delete(\"short_wide_sym\")\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_delete_short_wide",
"number": 1,
@@ -640,18 +640,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "12254786f4a42e8bd488f48075cb70eddf4d87c8581271e2e2b526b7940123b9",
+ "version": "f867fc9cac4d0706b01166662af37434100460706d4f6118de0bc2e0e3087bae",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_update_half": {
- "code": "class ModificationFunctions:\n def time_update_half(self, rows):\n self.lib.update(f\"sym\", self.df_update_half)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_update_half(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_half)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_update_half",
"number": 1,
@@ -664,18 +664,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "f56b8677f5b90b49568e6865c0656b734b9b2a8054baa71b78eaed8f53cb3176",
+ "version": "6a011f58b79c483849a70576915c2d56deed1227d38489a21140341ca860ce33",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_update_short_wide": {
- "code": "class ModificationFunctions:\n def time_update_short_wide(self, rows):\n self.lib_short_wide.update(\"short_wide_sym\", self.df_update_short_wide)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_update_short_wide(self, lad: LargeAppendDataModify, rows):\n self.lib_short_wide.update(\"short_wide_sym\", self.df_update_short_wide)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_update_short_wide",
"number": 1,
@@ -688,18 +688,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "5db16777228d8de1ab4af9943d1ed0541c0b02c4dbcd888cfa3e26f37eb0215b",
+ "version": "111496c5bd4a4c498df28819d3cbcd9d699c4d3363ad3969f102a1d2076b3086",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_update_single": {
- "code": "class ModificationFunctions:\n def time_update_single(self, rows):\n self.lib.update(f\"sym\", self.df_update_single)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_update_single(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_single)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_update_single",
"number": 1,
@@ -712,18 +712,18 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "cf62fa8a658e2f2ab16d286992423dd8d69334415ab61600906c6e9dc0185597",
+ "version": "c45c168d5713f3028a9a5b97959d52116c8d228870ad580be06d86336d2476c6",
"warmup_time": -1
},
"basic_functions.ModificationFunctions.time_update_upsert": {
- "code": "class ModificationFunctions:\n def time_update_upsert(self, rows):\n self.lib.update(f\"sym\", self.df_update_upsert, upsert=True)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)",
+ "code": "class ModificationFunctions:\n def time_update_upsert(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_upsert, upsert=True)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad",
"min_run_count": 2,
"name": "basic_functions.ModificationFunctions.time_update_upsert",
"number": 1,
@@ -736,14 +736,14 @@
"1500000"
]
],
- "repeat": 0,
- "rounds": 2,
+ "repeat": 3,
+ "rounds": 1,
"sample_time": 0.01,
- "setup_cache_key": "basic_functions:235",
+ "setup_cache_key": "basic_functions:278",
"timeout": 6000,
"type": "time",
"unit": "seconds",
- "version": "80de9b1982a498c300177d02874a8626152eccb57cd0ba4228a5bb168e7608c8",
+ "version": "7f139bf03457104abe937914aa3572503ed52330b3a271d82112696060331d8f",
"warmup_time": -1
},
"bi_benchmarks.BIBenchmarks.peakmem_query_groupby_city_count_all": {
diff --git a/python/benchmarks/basic_functions.py b/python/benchmarks/basic_functions.py
index 5ff8e4e9c2..09cedaadeb 100644
--- a/python/benchmarks/basic_functions.py
+++ b/python/benchmarks/basic_functions.py
@@ -5,6 +5,9 @@
As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
"""
+
+import time
+from typing import List
from arcticdb import Arctic
from arcticdb.version_store.library import WritePayload, ReadRequest
import pandas as pd
@@ -16,7 +19,7 @@
WIDE_DF_ROWS = 5_000
WIDE_DF_COLS = 30_000
# We use larger dataframes for non-batch methods
-PARAMS = ([1_000_000, 1_500_000])
+PARAMS = [1_000_000, 1_500_000]
PARAM_NAMES = ["rows"]
BATCH_PARAMS = ([25_000, 50_000], [500, 1000])
BATCH_PARAM_NAMES = ["rows", "num_symbols"]
@@ -37,7 +40,9 @@ def setup_cache(self):
self.ac = Arctic(BasicFunctions.CONNECTION_STRING)
rows_values = BasicFunctions.params
- self.dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}
+ self.dfs = {
+ rows: generate_pseudo_random_dataframe(rows) for rows in rows_values
+ }
for rows in rows_values:
lib = get_prewritten_lib_name(rows)
self.ac.delete_library(lib)
@@ -134,11 +139,14 @@ class BatchBasicFunctions:
DATE_RANGE = DATE_RANGE
params = BATCH_PARAMS
param_names = BATCH_PARAM_NAMES
+
def setup_cache(self):
self.ac = Arctic(BatchBasicFunctions.CONNECTION_STRING)
rows_values, num_symbols_values = BatchBasicFunctions.params
- self.dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}
+ self.dfs = {
+ rows: generate_pseudo_random_dataframe(rows) for rows in rows_values
+ }
for rows in rows_values:
lib = get_prewritten_lib_name(rows)
self.ac.delete_library(lib)
@@ -185,6 +193,7 @@ def time_read_batch_pure(self, rows, num_symbols):
def peakmem_read_batch(self, rows, num_symbols):
read_reqs = [ReadRequest(f"{sym}_sym") for sym in range(num_symbols)]
self.lib.read_batch(read_reqs)
+
def time_read_batch_with_columns(self, rows, num_symbols):
COLS = ["value"]
read_reqs = [
@@ -199,7 +208,6 @@ def peakmem_read_batch_with_columns(self, rows, num_symbols):
]
self.lib.read_batch(read_reqs)
-
def time_read_batch_with_date_ranges(self, rows, num_symbols):
read_reqs = [
ReadRequest(f"{sym}_sym", date_range=BatchBasicFunctions.DATE_RANGE)
@@ -215,13 +223,26 @@ def peakmem_read_batch_with_date_ranges(self, rows, num_symbols):
self.lib.read_batch(read_reqs)
+def get_time_at_fraction_of_df(fraction, rows):
+ end_time = pd.Timestamp("1/1/2023")
+ time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction - 1)))
+ return end_time + time_delta
+
+
from shutil import copytree, rmtree
+
+
class ModificationFunctions:
"""
Modification functions (update, append, delete) need a different setup/teardown process, thus we place them in a
separate group.
"""
- number = 1 # We do a single run between setup and teardown because we e.g. can't delete a symbol twice
+
+ rounds = 1
+ number = 1 # We do a single run between setup and teardown because we e.g. can't delete a symbol twice
+ repeat = 3
+ warmup_time = 0
+
timeout = 6000
ARCTIC_DIR = "modification_functions"
ARCTIC_DIR_ORIGINAL = "modification_functions_original"
@@ -232,16 +253,66 @@ class ModificationFunctions:
params = PARAMS
param_names = PARAM_NAMES
+ class LargeAppendDataModify:
+ """
+ This class will hold a cache of append large dataframes
+ The purpose of this cache is to create dataframes
+ which timestamps are sequenced over time so that
+ overlap does not occur
+ """
+
+ def __init__(self, num_rows_list: List[int], number_elements: int):
+ self.df_append_large = {}
+ self.df_append_short_wide = {}
+ start_time = time.time()
+ for rows in num_rows_list:
+ print("Generating dataframe with rows: ", rows)
+ lst = list()
+ lst_saw = list()
+ for n in range(number_elements + 1):
+ print("Generating dataframe no: ", n)
+
+ df = generate_pseudo_random_dataframe(
+ rows, "s", get_time_at_fraction_of_df(2 * (n + 1), rows)
+ )
+ df_saw = generate_random_floats_dataframe_with_index(
+ ModificationFunctions.WIDE_DF_ROWS,
+ ModificationFunctions.WIDE_DF_COLS,
+ "s",
+ get_time_at_fraction_of_df(
+ 2 * (n + 1), rows=ModificationFunctions.WIDE_DF_ROWS
+ ),
+ )
+
+ lst.append(df)
+ lst_saw.append(df_saw)
+ print(
+ f"STANDARD Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}"
+ )
+ print(
+ f"SHORT_n_WIDE Index {df_saw.iloc[0].name} - {df_saw.iloc[df_saw.shape[0] - 1].name}"
+ )
+ print("Add dataframes: ", len(lst))
+ self.df_append_large[rows] = lst
+ self.df_append_short_wide[rows] = lst_saw
+ print("APPEND LARGE cache generation took (s) :", time.time() - start_time)
+
def setup_cache(self):
self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)
rows_values = ModificationFunctions.params
- self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}
+ self.init_dfs = {
+ rows: generate_pseudo_random_dataframe(rows) for rows in rows_values
+ }
for rows in rows_values:
lib_name = get_prewritten_lib_name(rows)
self.ac.delete_library(lib_name)
lib = self.ac.create_library(lib_name)
- lib.write("sym", self.init_dfs[rows])
+ df = self.init_dfs[rows]
+ lib.write("sym", df)
+ print(
+ f"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}"
+ )
lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)
self.ac.delete_library(lib_name)
@@ -255,64 +326,81 @@ def setup_cache(self):
# We use the fact that we're running on LMDB to store a copy of the initial arctic directory.
# Then on each teardown we restore the initial state by overwriting the modified with the original.
- copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)
+ copytree(
+ ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL
+ )
+ number_iteration = (
+ ModificationFunctions.repeat
+ * ModificationFunctions.number
+ * ModificationFunctions.rounds
+ )
- def setup(self, rows):
- def get_time_at_fraction_of_df(fraction, rows=rows):
- end_time = pd.Timestamp("1/1/2023")
- time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))
- return end_time + time_delta
+ lad = ModificationFunctions.LargeAppendDataModify(
+ ModificationFunctions.params, number_iteration
+ )
- self.df_update_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(0.5))
- self.df_update_half = generate_pseudo_random_dataframe(rows//2, "s", get_time_at_fraction_of_df(0.75))
- self.df_update_upsert = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(1.5))
- self.df_append_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(1.1))
- self.df_append_large = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(2))
+ return lad
+
+ def setup(self, lad: LargeAppendDataModify, rows):
+ self.df_update_single = generate_pseudo_random_dataframe(
+ 1, "s", get_time_at_fraction_of_df(0.5, rows)
+ )
+ self.df_update_half = generate_pseudo_random_dataframe(
+ rows // 2, "s", get_time_at_fraction_of_df(0.75, rows)
+ )
+ self.df_update_upsert = generate_pseudo_random_dataframe(
+ rows, "s", get_time_at_fraction_of_df(1.5, rows)
+ )
+ self.df_append_single = generate_pseudo_random_dataframe(
+ 1, "s", get_time_at_fraction_of_df(1.1, rows)
+ )
self.df_update_short_wide = generate_random_floats_dataframe_with_index(
ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS
)
- self.df_append_short_wide = generate_random_floats_dataframe_with_index(
- ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, "s", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)
- )
self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)
self.lib = self.ac[get_prewritten_lib_name(rows)]
- self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]
-
+ self.lib_short_wide = self.ac[
+ get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)
+ ]
- def teardown(self, rows):
+ def teardown(self, lad: LargeAppendDataModify, rows):
# After the modification functions clean up the changes by replacing the modified ARCTIC_DIR with the original ARCTIC_DIR_ORIGINAL
# TODO: We can use dirs_exist_ok=True on copytree instead of removing first if we run with python version >=3.8
rmtree(ModificationFunctions.ARCTIC_DIR)
- copytree(ModificationFunctions.ARCTIC_DIR_ORIGINAL, ModificationFunctions.ARCTIC_DIR)
- del self.ac
+ copytree(
+ ModificationFunctions.ARCTIC_DIR_ORIGINAL, ModificationFunctions.ARCTIC_DIR
+ )
+ del self.ac
- def time_update_single(self, rows):
+ def time_update_single(self, lad: LargeAppendDataModify, rows):
self.lib.update(f"sym", self.df_update_single)
- def time_update_half(self, rows):
+ def time_update_half(self, lad: LargeAppendDataModify, rows):
self.lib.update(f"sym", self.df_update_half)
- def time_update_upsert(self, rows):
+ def time_update_upsert(self, lad: LargeAppendDataModify, rows):
self.lib.update(f"sym", self.df_update_upsert, upsert=True)
- def time_update_short_wide(self, rows):
+ def time_update_short_wide(self, lad: LargeAppendDataModify, rows):
self.lib_short_wide.update("short_wide_sym", self.df_update_short_wide)
- def time_append_single(self, rows):
+ def time_append_single(self, lad: LargeAppendDataModify, rows):
self.lib.append(f"sym", self.df_append_single)
- def time_append_large(self, rows):
- self.lib.append(f"sym", self.df_append_large)
+ def time_append_large(self, lad: LargeAppendDataModify, rows):
+ large: pd.DataFrame = lad.df_append_large[rows].pop(0)
+ self.lib.append(f"sym", large)
- def time_append_short_wide(self, rows):
- self.lib_short_wide.append("short_wide_sym", self.df_append_short_wide)
+ def time_append_short_wide(self, lad: LargeAppendDataModify, rows):
+ large: pd.DataFrame = lad.df_append_short_wide[rows].pop(0)
+ self.lib_short_wide.append("short_wide_sym", large)
- def time_delete(self, rows):
+ def time_delete(self, lad: LargeAppendDataModify, rows):
self.lib.delete(f"sym")
- def time_delete_short_wide(self, rows):
+ def time_delete_short_wide(self, lad: LargeAppendDataModify, rows):
self.lib_short_wide.delete("short_wide_sym")
diff --git a/python/benchmarks/bi_benchmarks.py b/python/benchmarks/bi_benchmarks.py
index b67595b061..33481e5f83 100644
--- a/python/benchmarks/bi_benchmarks.py
+++ b/python/benchmarks/bi_benchmarks.py
@@ -184,8 +184,4 @@ def time_query_groupby_city_count_filter_two_aggregations(self, times_bigger) ->
def peakmem_query_groupby_city_count_filter_two_aggregations(self, times_bigger):
- return self.query_groupby_city_count_filter_two_aggregations(times_bigger)
-
-
-
-
+ return self.query_groupby_city_count_filter_two_aggregations(times_bigger)
\ No newline at end of file
diff --git a/python/benchmarks/finalize_staged_data.py b/python/benchmarks/finalize_staged_data.py
index e0a2c86bab..0237cbddba 100644
--- a/python/benchmarks/finalize_staged_data.py
+++ b/python/benchmarks/finalize_staged_data.py
@@ -7,18 +7,20 @@
from arcticdb.util.utils import TimestampNumber
"""
+import sys
from arcticdb.arctic import Arctic
from arcticdb.util.utils import CachedDFGenerator, TimestampNumber, stage_chunks
from arcticdb.version_store.library import Library, StagedDataFinalizeMethod
from .common import *
-from asv_runner.benchmarks.mark import SkipNotImplemented
+
class FinalizeStagedData:
- '''
- Check and benchmark performance of finalize_staged_data().
- Due to specifics of this procedure we tune asv to make single measurement
- which would be over a relatively big staged data.
- '''
+ """
+ Check and benchmark performance of finalize_staged_data().
+ Due to specifics of this procedure we tune asv to make single measurement
+ which would be over a relatively big staged data.
+ """
+
number = 1
rounds = 1
repeat = 1
@@ -27,7 +29,7 @@ class FinalizeStagedData:
timeout = 600
LIB_NAME = "Finalize_Staged_Data_LIB"
- #Define the number of chunks
+ # Define the number of chunks
params = [1000, 2000]
def __init__(self):
@@ -39,11 +41,11 @@ def setup_cache(self):
# Generating dataframe with all kind of supported data types
cachedDF = CachedDFGenerator(350000, [5])
return cachedDF
-
- def setup(self, cache:CachedDFGenerator, param:int):
+
+ def setup(self, cache: CachedDFGenerator, param: int):
cachedDF = cache
-
- # Unfortunately there is no way to tell asv to run single time
+
+ # Unfortunately there is no way to tell asv to run single time
# each of finalize_stage_data() tests if we do the large setup in the
# setup_cache() method. We can only force it to work with single execution
# if the symbol setup with stage data is in the setup() method
@@ -52,7 +54,9 @@ def setup(self, cache:CachedDFGenerator, param:int):
self.ac.delete_library(self.lib_name)
self.lib = self.ac.create_library(self.lib_name)
- INITIAL_TIMESTAMP: TimestampNumber = TimestampNumber(0, cachedDF.TIME_UNIT) # Synchronize index frequency
+ INITIAL_TIMESTAMP: TimestampNumber = TimestampNumber(
+ 0, cachedDF.TIME_UNIT
+ ) # Synchronize index frequency
df = cachedDF.generate_dataframe_timestamp_indexed(200, 0, cachedDF.TIME_UNIT)
list_of_chunks = [10000] * param
@@ -61,45 +65,51 @@ def setup(self, cache:CachedDFGenerator, param:int):
self.lib.write(self.symbol, data=df, prune_previous_versions=True)
stage_chunks(self.lib, self.symbol, cachedDF, INITIAL_TIMESTAMP, list_of_chunks)
- def time_finalize_staged_data(self, cache:CachedDFGenerator, param:int):
+ def time_finalize_staged_data(self, cache: CachedDFGenerator, param: int):
print(">>> Library:", self.lib)
print(">>> Symbol:", self.symbol)
self.lib.finalize_staged_data(self.symbol, mode=StagedDataFinalizeMethod.WRITE)
- def peakmem_finalize_staged_data(self, cache:CachedDFGenerator, param:int):
+ def peakmem_finalize_staged_data(self, cache: CachedDFGenerator, param: int):
print(">>> Library:", self.lib)
print(">>> Symbol:", self.symbol)
self.lib.finalize_staged_data(self.symbol, mode=StagedDataFinalizeMethod.WRITE)
- def teardown(self, cache:CachedDFGenerator, param:int):
+ def teardown(self, cache: CachedDFGenerator, param: int):
self.ac.delete_library(self.lib_name)
+
+from asv_runner.benchmarks.mark import SkipNotImplemented
+
+
class FinalizeStagedDataWiderDataframeX3(FinalizeStagedData):
- '''
- The test is meant to be executed with 3 times wider dataframe than the base test
- '''
+ """
+ The test is meant to be executed with 3 times wider dataframe than the base test
+ """
def setup_cache(self):
# Generating dataframe with all kind of supported data type
- cachedDF = CachedDFGenerator(350000, [5, 25, 50]) # 3 times wider DF with bigger string columns
+ cachedDF = CachedDFGenerator(
+ 350000, [5, 25, 50]
+ ) # 3 times wider DF with bigger string columns
return cachedDF
-
- def setup(self, cache:CachedDFGenerator, param:int):
- if (not SLOW_TESTS):
- raise SkipNotImplemented ("Slow tests are skipped")
- super().setup(cache,param)
-
- def time_finalize_staged_data(self, cache:CachedDFGenerator, param:int):
- if (not SLOW_TESTS):
- raise SkipNotImplemented ("Slow tests are skipped")
- super().time_finalize_staged_data(cache,param)
-
- def peakmem_finalize_staged_data(self, cache:CachedDFGenerator, param:int):
- if (not SLOW_TESTS):
- raise SkipNotImplemented ("Slow tests are skipped")
- super().peakmem_finalize_staged_data(cache,param)
-
- def teardown(self, cache:CachedDFGenerator, param:int):
- if (SLOW_TESTS):
+
+ def setup(self, cache: CachedDFGenerator, param: int):
+ if not SLOW_TESTS:
+ raise SkipNotImplemented("Slow tests are skipped")
+ super().setup(cache, param)
+
+ def time_finalize_staged_data(self, cache: CachedDFGenerator, param: int):
+ if not SLOW_TESTS:
+ raise SkipNotImplemented("Slow tests are skipped")
+ super().time_finalize_staged_data(cache, param)
+
+ def peakmem_finalize_staged_data(self, cache: CachedDFGenerator, param: int):
+ if not SLOW_TESTS:
+ raise SkipNotImplemented("Slow tests are skipped")
+ super().peakmem_finalize_staged_data(cache, param)
+
+ def teardown(self, cache: CachedDFGenerator, param: int):
+ if SLOW_TESTS:
# Run only on slow tests
self.ac.delete_library(self.lib_name)