diff --git a/.github/workflows/beam_LoadTests_Python_CoGBK_Dataflow_Batch.yml b/.github/workflows/beam_LoadTests_Python_CoGBK_Dataflow_Batch.yml new file mode 100644 index 000000000000..284ba2420654 --- /dev/null +++ b/.github/workflows/beam_LoadTests_Python_CoGBK_Dataflow_Batch.yml @@ -0,0 +1,127 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: LoadTests Python CoGBK Dataflow Batch + +on: + issue_comment: + types: [created] + schedule: + - cron: '30 11 * * *' + workflow_dispatch: + +# Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_LoadTests_Python_CoGBK_Dataflow_Batch: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Load Tests Python CoGBK Dataflow Batch' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 720 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_LoadTests_Python_CoGBK_Dataflow_Batch"] + job_phrase: ["Run Load Tests Python CoGBK Dataflow Batch"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: 3.8 + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: load + test-language: python + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Single_Key.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Multiple_Keys.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_10kB.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_2MB.txt + - name: Set current datetime + id: datetime + run: | + echo "datetime=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_OUTPUT + # The env variables are created and populated in the test-arguments-action as "_test_arguments_" + - name: run CoGBK 2GB of 100B records with a single key + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + --info \ + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Batch_test_arguments_1 }} --job_name=load-tests-python-dataflow-batch-cogbk-1-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK 2GB of 100B records with multiple keys + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Batch_test_arguments_2 }} --job_name=load-tests-python-dataflow-batch-cogbk-2-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK reiterate 4 times 10kB values + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Batch_test_arguments_3 }} --job_name=load-tests-python-dataflow-batch-cogbk-3-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK reiterate 4 times 2MB values + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Batch_test_arguments_4 }} --job_name=load-tests-python-dataflow-batch-cogbk-4-${{ steps.datetime.outputs.datetime }}' \ No newline at end of file diff --git a/.github/workflows/beam_LoadTests_Python_CoGBK_Dataflow_Streaming.yml b/.github/workflows/beam_LoadTests_Python_CoGBK_Dataflow_Streaming.yml new file mode 100644 index 000000000000..313ba94f9df3 --- /dev/null +++ b/.github/workflows/beam_LoadTests_Python_CoGBK_Dataflow_Streaming.yml @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: LoadTests Python CoGBK Dataflow Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '30 11 * * *' + workflow_dispatch: + +# Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_LoadTests_Python_CoGBK_Dataflow_Streaming: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Load Tests Python CoGBK Dataflow Streaming' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 720 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_LoadTests_Python_CoGBK_Dataflow_Streaming"] + job_phrase: ["Run Load Tests Python CoGBK Dataflow Streaming"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: 3.8 + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: load + test-language: python + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Single_Key.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Multiple_Keys.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_10kB.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_2MB.txt + - name: Set current datetime + id: datetime + run: | + echo "datetime=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_OUTPUT + # The env variables are created and populated in the test-arguments-action as "_test_arguments_" + - name: run CoGBK 2GB of 100B records with a single key + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Streaming_test_arguments_1 }} --job_name=load-tests-python-dataflow-streaming-cogbk-1-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK 2GB of 100B records with multiple keys + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Streaming_test_arguments_2 }} --job_name=load-tests-python-dataflow-streaming-cogbk-2-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK reiterate 4 times 10kB values + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Streaming_test_arguments_3 }} --job_name=load-tests-python-dataflow-streaming-cogbk-3-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK reiterate 4 times 2MB values + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.8 \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Dataflow_Streaming_test_arguments_4 }} --job_name=load-tests-python-dataflow-streaming-cogbk-4-${{ steps.datetime.outputs.datetime }}' \ No newline at end of file diff --git a/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml new file mode 100644 index 000000000000..6d668c1b2566 --- /dev/null +++ b/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml @@ -0,0 +1,135 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: LoadTests Python CoGBK Dataflow Flink Batch + +on: + issue_comment: + types: [created] + schedule: + - cron: '40 12 * * *' + workflow_dispatch: + +# Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GCLOUD_ZONE: us-central1-a + CLUSTER_NAME: beam-loadtests-python-cogbk-flink-batch-${{ github.run_id }} + GCS_BUCKET: gs://beam-flink-cluster + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar + FLINK_TASKMANAGER_SLOTS: 1 + DETACHED_MODE: true + HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-cogbk-flink-batch-${{ github.run_id }} + +jobs: + beam_LoadTests_Python_CoGBK_Flink_Batch: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Load Tests Python CoGBK Flink Batch' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 720 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_LoadTests_Python_CoGBK_Flink_Batch"] + job_phrase: ["Run Load Tests Python CoGBK Flink Batch"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: load + test-language: python + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Single_Key.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Multiple_Keys.txt + ${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_10kB.txt + - name: Start Flink with parallelism 5 + env: + FLINK_NUM_WORKERS: 5 + run: | + cd ${{ github.workspace }}/.test-infra/dataproc; ./flink_cluster.sh create + - name: Set current datetime + id: datetime + run: | + echo "datetime=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_OUTPUT + # The env variables are created and populated in the test-arguments-action as "_test_arguments_" + - name: run CoGBK 2GB of 100B records with a single key + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + --info \ + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=FlinkRunner \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Flink_Batch_test_arguments_1 }} --job_name=load-tests-python-flink-batch-cogbk-1-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK 2GB of 100B records with multiple keys + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + --info \ + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=FlinkRunner \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Flink_Batch_test_arguments_2 }} --job_name=load-tests-python-flink-batch-cogbk-2-${{ steps.datetime.outputs.datetime }}' \ + - name: run CoGBK reiterate 4 times 10kB values + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + --info \ + -PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \ + -Prunner=FlinkRunner \ + '-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Flink_Batch_test_arguments_2 }} --job_name=load-tests-python-flink-batch-cogbk-3-${{ steps.datetime.outputs.datetime }}' \ + - name: Teardown Flink + if: always() + run: | + ${{ github.workspace }}/.test-infra/dataproc/flink_cluster.sh delete \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Multiple_Keys.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Multiple_Keys.txt new file mode 100644 index 000000000000..d5ba43180738 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Multiple_Keys.txt @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_batch_cogbk_2 +--influx_measurement=python_batch_cogbk_2 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":5,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=1 +--num_workers=5 +--autoscaling_algorithm=NONE +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Single_Key.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Single_Key.txt new file mode 100644 index 000000000000..47ebf22dc835 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_100b_Single_Key.txt @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_batch_cogbk_1 +--influx_measurement=python_batch_cogbk_1 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=1 +--num_workers=5 +--autoscaling_algorithm=NONE +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_10kB.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_10kB.txt new file mode 100644 index 000000000000..13161125b570 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_10kB.txt @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_batch_cogbk_3 +--influx_measurement=python_batch_cogbk_3 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":200000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=4 +--num_workers=5 +--autoscaling_algorithm=NONE +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_2MB.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_2MB.txt new file mode 100644 index 000000000000..052c2464a1cc --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Batch_2MB.txt @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_batch_cogbk_4 +--influx_measurement=python_batch_cogbk_4 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=4 +--num_workers=5 +--autoscaling_algorithm=NONE +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Multiple_Keys.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Multiple_Keys.txt new file mode 100644 index 000000000000..4b8a2f72010b --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Multiple_Keys.txt @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_flink_batch_cogbk_2 +--influx_measurement=python_batch_cogbk_2 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":5,\\"hot_key_fraction\\":1}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":5,\\"hot_key_fraction\\":1}'' +--iterations=1 +--parallelism=5 +--endpoint=localhost:8099 +--environment_type=DOCKER +--environment_config=gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Single_Key.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Single_Key.txt new file mode 100644 index 000000000000..3aeb927f04ee --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Single_Key.txt @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_flink_batch_cogbk_1 +--influx_measurement=python_batch_cogbk_1 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1,\\"hot_key_fraction\\":1}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1}'' +--iterations=1 +--parallelism=5 +--endpoint=localhost:8099 +--environment_type=DOCKER +--environment_config=gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_10kB.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_10kB.txt new file mode 100644 index 000000000000..e350e2d29944 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_10kB.txt @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_flink_batch_cogbk_3 +--influx_measurement=python_batch_cogbk_3 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":200000,\\"hot_key_fraction\\":1}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1}'' +--iterations=4 +--parallelism=5 +--endpoint=localhost:8099 +--environment_type=DOCKER +--environment_config=gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Multiple_Keys.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Multiple_Keys.txt new file mode 100644 index 000000000000..a687f0cf5de7 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Multiple_Keys.txt @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_streaming_cogbk_2 +--influx_measurement=python_streaming_cogbk_2 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":5,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=1 +--num_workers=5 +--autoscaling_algorithm=NONE +--streaming +--worker_machine_type=n1-highmem-4 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Single_Key.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Single_Key.txt new file mode 100644 index 000000000000..9141182b90fc --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_100b_Single_Key.txt @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_streaming_cogbk_1 +--influx_measurement=python_streaming_cogbk_1 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=1 +--num_workers=5 +--autoscaling_algorithm=NONE +--streaming +--worker_machine_type=n1-highmem-4 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_10kB.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_10kB.txt new file mode 100644 index 000000000000..7250f073f25e --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_10kB.txt @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_streaming_cogbk_3 +--influx_measurement=python_streaming_cogbk_3 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":200000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=4 +--num_workers=5 +--autoscaling_algorithm=NONE +--streaming +--worker_machine_type=n1-highmem-4 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_2MB.txt b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_2MB.txt new file mode 100644 index 000000000000..59723107d53c --- /dev/null +++ b/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Streaming_2MB.txt @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--region=us-central1 +--temp_location=gs://temp-storage-for-perf-tests/loadtests +--publish_to_big_query=true +--metrics_dataset=load_test +--metrics_table=python_dataflow_streaming_cogbk_4 +--influx_measurement=python_streaming_cogbk_4 +--input_options=''{\\"num_records\\":20000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--co_input_options=''{\\"num_records\\":2000000,\\"key_size\\":10,\\"value_size\\":90,\\"num_hot_keys\\":1000,\\"hot_key_fraction\\":1,\\"algorithm\\":\\"lcg\\"}'' +--iterations=4 +--num_workers=5 +--autoscaling_algorithm=NONE +--streaming +--worker_machine_type=n1-highmem-4 +--runner=DataflowRunner \ No newline at end of file