From e328ab51f1b8fcb035af7257d93692ed28f4d7ea Mon Sep 17 00:00:00 2001 From: magicgoody <131876064+magicgoody@users.noreply.github.com> Date: Thu, 28 Sep 2023 21:28:07 +0600 Subject: [PATCH] Github Workflow Replacement for Jenkins Jobs, beam_PerformanceTests_ManyFiles_TextIOIT* (#28581) * beam_PerformanceTests_ManyFiles_TextIOIT * fix * cron fix and whitespace --- ...am_PerformanceTests_ManyFiles_TextIOIT.yml | 91 +++++++++++++++ ...rformanceTests_ManyFiles_TextIOIT_HDFS.yml | 109 ++++++++++++++++++ .../config_ManyFiles_TextIOIT.txt | 29 +++++ .../config_ManyFiles_TextIOIT_HDFS.txt | 29 +++++ 4 files changed, 258 insertions(+) create mode 100644 .github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT.yml create mode 100644 .github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS.yml create mode 100644 .github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT.txt create mode 100644 .github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT_HDFS.txt diff --git a/.github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT.yml b/.github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT.yml new file mode 100644 index 000000000000..5834bd8ab3e0 --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT.yml @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PerformanceTests ManyFiles TextIOIT + +on: + issue_comment: + types: [created] + schedule: + - cron: '10 2/12 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + beam_PerformanceTests_ManyFiles_TextIOIT_test_arguments_1: '' + +jobs: + beam_PerformanceTests_ManyFiles_TextIOIT: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java ManyFilesTextIO Performance Test' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_ManyFiles_TextIOIT"] + job_phrase: ["Run Java ManyFilesTextIO Performance Test"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: performance + test-language: java + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT.txt + arguments: | + --filenamePrefix=gs://temp-storage-for-perf-tests/${{ matrix.job_name }}/${{github.run_id}}/ + - name: run integrationTest + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:file-based-io-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.io.text.TextIOIT \ + --info \ + -Dfilesystem=gcs \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions='[${{ env.beam_PerformanceTests_ManyFiles_TextIOIT_test_arguments_1 }}]' \ No newline at end of file diff --git a/.github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS.yml b/.github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS.yml new file mode 100644 index 000000000000..03163a41dcf9 --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PerformanceTests ManyFiles TextIOIT HDFS + +on: + issue_comment: + types: [created] + schedule: + - cron: '50 2/12 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + beam_PerformanceTests_ManyFiles_TextIOIT_HDFS_test_arguments_1: '' + +jobs: + beam_PerformanceTests_ManyFiles_TextIOIT_HDFS: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java ManyFilesTextIO Performance Test HDFS' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_ManyFiles_TextIOIT_HDFS"] + job_phrase: ["Run Java ManyFilesTextIO Performance Test HDFS"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Authenticate on GCP + id: auth + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + - name: Set k8s access + uses: ./.github/actions/setup-k8s-access + with: + k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }} + - name: Install Hadoop + id: install_hadoop + run: | + kubectl apply -f ${{ github.workspace }}/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml + kubectl wait svc/hadoop --for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=120s + loadbalancer_IP=$(kubectl get svc hadoop -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo hadoop_IP=$loadbalancer_IP >> $GITHUB_OUTPUT + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: performance + test-language: java + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT_HDFS.txt + arguments: | + --filenamePrefix=hdfs://${{ steps.install_hadoop.outputs.hadoop_IP }}:9000/TEXTIO_IT_ + --hdfsConfiguration=[{\\\"fs.defaultFS\\\":\\\"hdfs:${{ steps.install_hadoop.outputs.hadoop_IP }}:9000\\\",\\\"dfs.replication\\\":1}] + - name: run integrationTest + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:file-based-io-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.io.text.TextIOIT \ + --info \ + -Dfilesystem=hdfs \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions='[${{ env.beam_PerformanceTests_ManyFiles_TextIOIT_HDFS_test_arguments_1 }}]' \ No newline at end of file diff --git a/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT.txt b/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT.txt new file mode 100644 index 000000000000..dcb6e31526ff --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT.txt @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--runner=DataflowRunner +--tempRoot=gs://temp-storage-for-perf-tests +--bigQueryDataset=beam_performance +--bigQueryTable=many_files_textioit_results +--influxMeasurement=many_files_textioit_results +--reportGcsPerformanceMetrics=true +--gcsPerformanceMetrics=true +--numberOfRecords=25000000 +--expectedHash=f8453256ccf861e8a312c125dfe0e436 +--datasetSize=1062290000 +--numberOfShards=1000 +--numWorkers=5 +--autoscalingAlgorithm=NONE \ No newline at end of file diff --git a/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT_HDFS.txt b/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT_HDFS.txt new file mode 100644 index 000000000000..f01a4f488312 --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT_HDFS.txt @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--runner=DataflowRunner +--tempRoot=gs://temp-storage-for-perf-tests +--bigQueryDataset=beam_performance +--bigQueryTable=many_files_textioit_hdfs_results +--influxMeasurement=many_files_textioit_hdfs_results +--reportGcsPerformanceMetrics=true +--gcsPerformanceMetrics=true +--numberOfRecords=25000000 +--expectedHash=f8453256ccf861e8a312c125dfe0e436 +--datasetSize=1062290000 +--numberOfShards=1000 +--numWorkers=5 +--autoscalingAlgorithm=NONE \ No newline at end of file