Back to pull request #1964

Refactor DataFrameNormalizer to improve performance #3699

Workflow file for this run

.github/workflows/analysis_workflow.yml at dcf908d

	name: Build with analysis tools
	on:
	workflow_dispatch:
	inputs:
	run_all_benchmarks:
	type: boolean
	default: false

	schedule: # Schdeule the job to run at 12 a.m. daily
	- cron: '0 0 * * *'

	pull_request_target:
	paths-ignore:
	- "*/.md"

	jobs:
	get_commits_to_benchmark:
	name: Get tag commits
	runs-on: ubuntu-22.04
	steps:
	- name: Checkout code
	uses: actions/[email protected]
	with:
	fetch-depth: 0
	ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha \|\| '' }} # Note: This is dangerous if we run automatic CI on external PRs

	- name: Get tags
	id: get_tags
	run: \|
	python3 build_tooling/get_commits_for_benchmark.py ${{ inputs.run_all_benchmarks == true && '--run_all_benchmarks' \|\| ''}}
	outputs:
	matrix: ${{ steps.get_tags.outputs.commits }}

	cibw_docker_image:
	uses: ./.github/workflows/cibw_docker_image.yml
	permissions: {packages: write}
	with:
	cibuildwheel_ver: "2.12.1"
	force_update: false

	benchmark_commits:
	needs: [get_commits_to_benchmark, cibw_docker_image]
	strategy:
	fail-fast: false
	matrix:
	commits: ${{ fromJson(needs.get_commits_to_benchmark.outputs.matrix)}}
	name: Benchmark commit ${{ matrix.commits }}
	uses: ./.github/workflows/benchmark_commits.yml
	secrets: inherit
	with:
	commit: ${{ matrix.commits }}
	cibw_image_tag: ${{ needs.cibw_docker_image.outputs.tag }}
	run_all_benchmarks: ${{ inputs.run_all_benchmarks \|\| false }}
	run_on_pr_head: ${{ github.event_name == 'pull_request_target' }}

	publish_benchmark_results_to_gh_pages:
	name: Publish benchmark results to gh-pages
	if: github.ref == 'refs/heads/master'
	needs: [benchmark_commits]
	runs-on: ubuntu-22.04
	permissions:
	contents: write

	steps:
	- uses: actions/[email protected]
	with:
	fetch-depth: 0
	token: ${{ secrets.ARCTICDB_TEST_PAT }}
	ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha \|\| '' }} # Note: This is dangerous if we run automatic CI on external PRs

	- name: Set persistent storage variables
	uses: ./.github/actions/set_persistent_storage_env_vars
	with:
	bucket: "arcticdb-ci-benchmark-results"
	aws_access_key: "${{ secrets.AWS_S3_ACCESS_KEY }}"
	aws_secret_key: "${{ secrets.AWS_S3_SECRET_KEY }}"

	- name: Install ArcticDB[Testing]
	shell: bash -el {0}
	run: \|
	pip install arcticdb[Testing] "protobuf<5"

	- name: Publish results to Github Pages
	shell: bash -el {0}
	run: \|
	git config --global --add safe.directory /__w/ArcticDB/ArcticDB
	git config --global user.name "${GITHUB_ACTOR}"
	git config --global user.email "${GITHUB_ACTOR_ID}+${GITHUB_ACTOR}@users.noreply.github.com"
	python build_tooling/transform_asv_results.py --mode extract
	python -m asv publish -v
	python -m asv gh-pages -v --rewrite

	code_coverage:
	needs: [cibw_docker_image]
	runs-on: "ubuntu-22.04"
	container:
	image: ${{needs.cibw_docker_image.outputs.tag}}
	services:
	mongodb:
	image: mongo:4.4
	ports:
	- 27017:27017
	env:
	VCPKG_NUGET_USER: ${{secrets.VCPKG_NUGET_USER \|\| github.repository_owner}}
	VCPKG_NUGET_TOKEN: ${{secrets.VCPKG_NUGET_TOKEN \|\| secrets.GITHUB_TOKEN}}
	VCPKG_MAN_NUGET_USER: ${{secrets.VCPKG_MAN_NUGET_USER}} # For forks to download pre-compiled dependencies from the Man repo
	VCPKG_MAN_NUGET_TOKEN: ${{secrets.VCPKG_MAN_NUGET_TOKEN}}
	ARCTIC_CMAKE_PRESET: linux-debug
	ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
	steps:
	- uses: actions/[email protected]
	with:
	submodules: recursive
	ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha \|\| '' }} # Note: This is dangerous if we run automatic CI on external PRs

	- name: Get number of CPU cores
	uses: SimenB/[email protected]
	id: cpu-cores

	- name: Extra envs
	run: \|
	. build_tooling/vcpkg_caching.sh # Linux follower needs another call in CIBW
	echo -e "VCPKG_BINARY_SOURCES=$VCPKG_BINARY_SOURCES
	VCPKG_ROOT=$PLATFORM_VCPKG_ROOT" \| tee -a $GITHUB_ENV
	cmake -P cpp/CMake/CpuCount.cmake \| sed 's/^-- //' \| tee -a $GITHUB_ENV
	echo "ARCTICDB_CODE_COVERAGE_BUILD=1" \| tee -a $GITHUB_ENV
	env:
	CMAKE_BUILD_PARALLEL_LEVEL: ${{vars.CMAKE_BUILD_PARALLEL_LEVEL}}

	- name: Prepare C++ compilation env
	run: . build_tooling/prep_cpp_build.sh

	- name: CMake compile
	# We are pinning the version to 10.6 because >= 10.7, use node20 which is not supported in the container
	uses: lukka/[email protected]
	with:
	cmakeListsTxtPath: ${{github.workspace}}/cpp/CMakeLists.txt
	configurePreset: ${{env.ARCTIC_CMAKE_PRESET}}
	buildPreset: ${{env.ARCTIC_CMAKE_PRESET}}
	env:
	ARCTICDB_DEBUG_FIND_PYTHON: ${{vars.ARCTICDB_DEBUG_FIND_PYTHON}}
	python_impl_name: 'cp311'

	- name: Run C++ Tests
	shell: bash -l {0}
	run: \|
	cd cpp/out/linux-debug-build/
	ls arcticdb
	make -j ${{ steps.cpu-cores.outputs.count }} arcticdb_rapidcheck_tests
	make -j ${{ steps.cpu-cores.outputs.count }} test_unit_arcticdb
	ctest

	# We are chainging the python here because we want to use the default python to build (it is devel version)
	# and this python for the rest of the testing
	- name: Select Python (Linux)
	run: echo /opt/python/cp36-cp36m/bin >> $GITHUB_PATH

	- name: Install local dependencies with pip
	shell: bash
	run: \|
	python -m pip install --upgrade pip
	ARCTIC_CMAKE_PRESET=skip pip install -ve .[Testing]

	# - name: Test with pytest
	# uses: ./.github/actions/run_local_pytest
	# with:
	# build_type: debug
	# threads: 1
	# fast_tests_only: 0
	# other_params: '-m coverage run '

	- name: Get python Coverage report
	shell: bash -l {0}
	run: \|
	cd python
	python -m coverage report -m \| tee output.txt
	python -m coverage html
	zip -r python_cov.zip htmlcov/

	echo "PYTHON_COV_PERCENT=$(cat output.txt \| grep 'TOTAL' \| awk '{print $NF}' \| tr -d '%')" >> $GITHUB_ENV

	- name: Run Gcovr manually post-pytest
	shell: bash -l {0}
	run: \|
	cd cpp/out/linux-debug-build/
	python -m pip install gcovr
	mkdir coverage
	python -m gcovr --txt --html-details coverage/index.html -e vcpkg_installed/ -e proto/ -e ../../third_party -e ../../arcticdb/util/test/ -r ../.. --exclude-throw-branches --exclude-unreachable-branches -u --exclude-function-lines \| tee output.txt
	zip -r coverage.zip coverage/

	echo "CPP_COV_PERCENT=$(cat output.txt \| grep 'TOTAL' \| awk '{print $NF}' \| tr -d '%')" >> $GITHUB_ENV

	- name: Upload Coverage
	uses: actions/[email protected]
	with:
	name: cpp-coverage-artifact
	path: cpp/out/linux-debug-build/coverage.zip

	- name: Upload Python Coverage
	uses: actions/[email protected]
	with:
	name: python-coverage-artifact
	path: python/python_cov.zip

	- name: Restore cached CPP Coverage Percentage from the previous run
	id: cache-cov-restore
	uses: actions/cache/[email protected]
	with:
	path: prev_coverage.txt
	key: coverage

	- name: Get and compare coverage if cache was restored
	run: \|
	# if cache was restored, compare coverage
	if [ -f coverage.txt ]; then
	PREV_COVERAGE=$(cat prev_coverage.txt \| cut -d' ' -f2)
	echo "Previous coverage: $PREV_COVERAGE"
	CURR_COVERAGE=${{env.CPP_COV_PERCENT}}
	echo "CPP_COV_PREV_PERCENT=$PREV_COVERAGE" >> $GITHUB_ENV
	echo "Current coverage: $CURR_COVERAGE"
	if [ $CURR_COVERAGE -gt $PREV_COVERAGE ]; then
	echo "Coverage increased"
	elif [ $CURR_COVERAGE -lt $PREV_COVERAGE ]; then
	echo "Coverage decreased"
	else
	echo "Coverage unchanged"
	fi
	fi

	- name: Save CPP Coverage Percentage to file
	run: \|
	echo "Coverage: ${{ env.CPP_COV_PERCENT }}" > current_coverage.txt

	- name: Save the current CPP Coverage Percentage to the cache
	id: cache-cov-save
	uses: actions/cache/[email protected]
	with:
	path: current_coverage.txt
	key: coverage

	- name: Check percentage and send Slack notification
	if: ${{ env.CPP_COV_PREV_PERCENT && env.CPP_COV_PERCENT && env.CPP_COV_PERCENT < env.CPP_COV_PREV_PERCENT }}
	uses: slackapi/[email protected]
	with:
	# For posting a rich message using Block Kit
	payload: \|
	{
	"text": "The CPP Code Coverage has been reduced",
	"blocks": [
	{
	"type": "section",
	"text": {
	"type": "mrkdwn",
	"text": "The CPP Code Coverage from the current run(${{ env.CPP_COV_PERCENT }}%) is lower the previous one(${{ env.CPP_COV_PREV_PERCENT }}%)."
	}
	}
	]
	}
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.ARCTICDB_DEV_WEBHOOK_URL }}
	SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Refactor DataFrameNormalizer to improve performance #3699

Workflow file

Refactor DataFrameNormalizer to improve performance #3699

Jobs

Run details

Workflow file for this run