From 42c80fd8a9f6b4e19ab65ccae2fb218e803e7aff Mon Sep 17 00:00:00 2001 From: Vitaly Terentyev Date: Thu, 21 Sep 2023 18:55:52 +0400 Subject: [PATCH] Add Python Load Tests Combine Dataflow Batch github action (#28431) * Add Python Load Tests Combine Dataflow Batch github action * Change cron * Refactoring --- .github/workflows/README.md | 1 + ...oadTests_Python_Combine_Dataflow_Batch.yml | 115 ++++++++++++++++++ .../config_Combine_Python_Batch_2GB_10b.txt | 32 +++++ ...nfig_Combine_Python_Batch_2GB_Fanout_4.txt | 33 +++++ ...nfig_Combine_Python_Batch_2GB_Fanout_8.txt | 33 +++++ 5 files changed, 214 insertions(+) create mode 100644 .github/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml create mode 100644 .github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_10b.txt create mode 100644 .github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_4.txt create mode 100644 .github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_8.txt diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 0d0277bd478d..4b1bf01b5e3d 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -181,6 +181,7 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | Workflow name | Matrix | Trigger Phrase | Cron Status | |:-------------:|:------:|:--------------:|:-----------:| | [ Load Tests CoGBK Dataflow Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) | N/A |`Run Load Tests Java CoGBK Dataflow Streaming`| [![.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) +| [ Load Tests Combine Dataflow Batch Python ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml) | N/A |`Run Load Tests Python Combine Dataflow Batch`| [![.github/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml) | [ Performance Tests BigQueryIO Batch Java Avro ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Avro`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) | [ Performance Tests BigQueryIO Batch Java Json ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Json`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) | [ Performance Tests BigQueryIO Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) | N/A |`Run BigQueryIO Streaming Performance Test Java`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) diff --git a/.github/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml b/.github/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml new file mode 100644 index 000000000000..f7d7a056d595 --- /dev/null +++ b/.github/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Load Tests Combine Dataflow Batch Python + +on: + issue_comment: + types: [created] + schedule: + - cron: '40 5 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_LoadTests_Python_Combine_Dataflow_Batch: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Load Tests Python Combine Dataflow Batch' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 720 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_LoadTests_Python_Combine_Dataflow_Batch"] + job_phrase: ["Run Load Tests Python Combine Dataflow Batch"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: Prepare configs + #Reads config files, excludes comments, appends current date to the job_name parameter + id: set_configs + shell: bash + run: | + CURDATE=$(date '+%m%d%H%M%S' --utc) + CONFIG_ARR=('config_Combine_Python_Batch_2GB_10b.txt' 'config_Combine_Python_Batch_2GB_Fanout_4.txt' 'config_Combine_Python_Batch_2GB_Fanout_8.txt') + for INDEX in ${!CONFIG_ARR[@]} + do + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/${CONFIG_ARR[INDEX]} | tr '\n' ' ') + CURCONFIG=$(echo "${CURCONFIG/load-tests-python-dataflow-batch-combine-$((INDEX + 1))-/load-tests-python-dataflow-batch-combine-$((INDEX + 1))-$CURDATE}") + echo "prepared_config_$((INDEX + 1))=$CURCONFIG" >> $GITHUB_OUTPUT + done + - name: run Combine Dataflow Batch Python Load Test 1 (10 bytes records) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.combine_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_1 }}' \ + - name: run Combine Dataflow Batch Python Load Test 2 (fanout 4) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.combine_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_2 }}' \ + - name: run Combine Dataflow Batch Python Load Test 3 (fanout 8) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.combine_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_3 }}' \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_10b.txt b/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_10b.txt new file mode 100644 index 000000000000..a6dabb5e5086 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_10b.txt @@ -0,0 +1,32 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--job_name=load-tests-python-dataflow-batch-combine-1- +--project=apache-beam-testing +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/smoketests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_batch_combine_1 +--influx_measurement=python_batch_combine_1 +--input_options=''{\\"num_records\\":200000000,\\"key_size\\":1,\\"value_size\\":9,\\"algorithm\\":\\"lcg\\"}'' +--num_workers=5 +--autoscaling_algorithm=NONE +--top_count=20 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_4.txt b/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_4.txt new file mode 100644 index 000000000000..7639456296b6 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_4.txt @@ -0,0 +1,33 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--job_name=load-tests-python-dataflow-batch-combine-2- +--project=apache-beam-testing +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/smoketests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_batch_combine_2 +--influx_measurement=python_batch_combine_2 +--input_options=''{\\"num_records\\":5000000,\\"key_size\\":10,\\"value_size\\":90,\\"algorithm\\":\\"lcg\\"}'' +--num_workers=16 +--autoscaling_algorithm=NONE +--fanout=4 +--top_count=20 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_8.txt b/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_8.txt new file mode 100644 index 000000000000..e5d46791a83c --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_Combine_Python_Batch_2GB_Fanout_8.txt @@ -0,0 +1,33 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--job_name=load-tests-python-dataflow-batch-combine-3- +--project=apache-beam-testing +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/smoketests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_batch_combine_3 +--influx_measurement=python_batch_combine_3 +--input_options=''{\\"num_records\\":2500000,\\"key_size\\":10,\\"value_size\\":90,\\"algorithm\\":\\"lcg\\"}'' +--num_workers=16 +--autoscaling_algorithm=NONE +--fanout=8 +--top_count=20 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file