-
Notifications
You must be signed in to change notification settings - Fork 4.3k
135 lines (129 loc) · 6.32 KB
/
beam_LoadTests_Python_CoGBK_Flink_Batch.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: LoadTests Python CoGBK Dataflow Flink Batch
on:
issue_comment:
types: [created]
schedule:
- cron: '40 12 * * *'
workflow_dispatch:
# Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: read
checks: read
contents: read
deployments: read
id-token: none
issues: read
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read
# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}'
cancel-in-progress: true
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
GCLOUD_ZONE: us-central1-a
CLUSTER_NAME: beam-loadtests-python-cogbk-flink-batch-${{ github.run_id }}
GCS_BUCKET: gs://beam-flink-cluster
FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
FLINK_TASKMANAGER_SLOTS: 1
DETACHED_MODE: true
HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest
JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest
ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-cogbk-flink-batch-${{ github.run_id }}
jobs:
beam_LoadTests_Python_CoGBK_Flink_Batch:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'schedule' ||
github.event.comment.body == 'Run Load Tests Python CoGBK Flink Batch'
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 720
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
strategy:
matrix:
job_name: ["beam_LoadTests_Python_CoGBK_Flink_Batch"]
job_phrase: ["Run Load Tests Python CoGBK Flink Batch"]
steps:
- uses: actions/checkout@v3
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: '3.8'
- name: Prepare test arguments
uses: ./.github/actions/test-arguments-action
with:
test-type: load
test-language: python
argument-file-paths: |
${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Single_Key.txt
${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_100b_Multiple_Keys.txt
${{ github.workspace }}/.github/workflows/load-tests-job-configs/python-cogbk/python_CoGBK_Dataflow_Flink_Batch_10kB.txt
- name: Start Flink with parallelism 5
env:
FLINK_NUM_WORKERS: 5
run: |
cd ${{ github.workspace }}/.test-infra/dataproc; ./flink_cluster.sh create
- name: Set current datetime
id: datetime
run: |
echo "datetime=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_OUTPUT
# The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
- name: run CoGBK 2GB of 100B records with a single key
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
--info \
-PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \
-Prunner=FlinkRunner \
'-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Flink_Batch_test_arguments_1 }} --job_name=load-tests-python-flink-batch-cogbk-1-${{ steps.datetime.outputs.datetime }}' \
- name: run CoGBK 2GB of 100B records with multiple keys
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
--info \
-PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \
-Prunner=FlinkRunner \
'-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Flink_Batch_test_arguments_2 }} --job_name=load-tests-python-flink-batch-cogbk-2-${{ steps.datetime.outputs.datetime }}' \
- name: run CoGBK reiterate 4 times 10kB values
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
arguments: |
--info \
-PloadTest.mainClass=apache_beam.testing.load_tests.co_group_by_key_test \
-Prunner=FlinkRunner \
'-PloadTest.args=${{ env.beam_LoadTests_Python_CoGBK_Flink_Batch_test_arguments_2 }} --job_name=load-tests-python-flink-batch-cogbk-3-${{ steps.datetime.outputs.datetime }}' \
- name: Teardown Flink
if: always()
run: |
${{ github.workspace }}/.test-infra/dataproc/flink_cluster.sh delete