From a1ecc8b61a206a1987646bcb6efa4dc083455126 Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Tue, 26 Sep 2023 00:34:09 +0200 Subject: [PATCH] Migrate "beam_PerformanceTests_AvroIOIT" Jenkins jobs to GitHub Actions (#28580) * added beam_PerformanceTests_AvroIOIT jobs to GitHub Actions * added the 'test-arguments-action' action to encapsulate common arguments and their processing --- .github/actions/setup-k8s-access/action.yml | 73 ++++++++++++ .../actions/test-arguments-action/action.yml | 96 +++++++++++++++ .github/workflows/README.md | 12 +- .../beam_PerformanceTests_AvroIOIT.yml | 91 +++++++++++++++ .../beam_PerformanceTests_AvroIOIT_HDFS.yml | 110 ++++++++++++++++++ .../config_AvroIOIT.txt | 26 +++++ .../config_AvroIOIT_HDFS.txt | 26 +++++ 7 files changed, 429 insertions(+), 5 deletions(-) create mode 100644 .github/actions/setup-k8s-access/action.yml create mode 100644 .github/actions/test-arguments-action/action.yml create mode 100644 .github/workflows/beam_PerformanceTests_AvroIOIT.yml create mode 100644 .github/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml create mode 100644 .github/workflows/performance-tests-job-configs/config_AvroIOIT.txt create mode 100644 .github/workflows/performance-tests-job-configs/config_AvroIOIT_HDFS.txt diff --git a/.github/actions/setup-k8s-access/action.yml b/.github/actions/setup-k8s-access/action.yml new file mode 100644 index 000000000000..79a2ac8d243c --- /dev/null +++ b/.github/actions/setup-k8s-access/action.yml @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +#Action used to trigger a failed check re-run within a PR using a comment. Add this action to your workflow with an if condition +#to check if the comment is present +#If the check is failed this will trigger it again. If its not failed a new instance of workflow will run which will not show in the status box or checks tab in the PR and can be found in the actions tab https://github.com/apache/beam/actions + +name: "Setup Kuberenetes Access" +description: Sets up kuberenetes access in gcp for the current workflow +inputs: + cluster_name: + description: "Name of the cluster to be created" + required: true + default: "io-datastores" + k8s_namespace: + description: "Name of the namespace to be created" + required: true + cluster_zone: + description: "Zone of the cluster to be created" + required: true + default: "us-central1-a" + + +runs: + using: composite + steps: + - name: Check if inputs were provided + shell: bash + run: | + if [ -z "${{ inputs.k8s_namespace }}" ]; then + echo "Kubernetes namespace not provided" + exit 1 + fi + - name: replace '_' with '-' in namespace + shell: bash + id: replace_namespace + run: | + TEST_NAMESPACE=$(echo "${{ inputs.k8s_namespace }}" | tr '_' '-' | tr '[:upper:]' '[:lower:]') + echo TEST_NAMESPACE=$TEST_NAMESPACE >> $GITHUB_OUTPUT + - name: Get the kubeconfig using gcloud + shell: bash + run: | + gcloud container clusters get-credentials ${{ inputs.cluster_name }} --zone ${{ inputs.cluster_zone }} --project apache-beam-testing + - name: Create namespace + shell: bash + run: | + kubectl create namespace ${{ steps.replace_namespace.outputs.TEST_NAMESPACE }} + - name: Set default namespace + shell: bash + run: | + kubectl config set-context --current --namespace=${{ steps.replace_namespace.outputs.TEST_NAMESPACE }} + - name: Post cleanup + uses: pyTooling/Actions/with-post-step@v0.4.6 + with: + main: echo "Post Cleanup" + post: | + echo "Post Cleanup" + kubectl delete namespace ${{ steps.replace_namespace.outputs.TEST_NAMESPACE }} \ No newline at end of file diff --git a/.github/actions/test-arguments-action/action.yml b/.github/actions/test-arguments-action/action.yml new file mode 100644 index 000000000000..17814098980e --- /dev/null +++ b/.github/actions/test-arguments-action/action.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: 'Set test arguments action' +description: 'Set test arguments action to run the test' +inputs: + argument-file-paths: + required: true + description: 'List of paths to files with test arguments' + default: '' + arguments: + required: false + description: 'Test arguments generated at runtime' + default: '' + test-type: + required: true + description: 'Specify if this is a "load" or "performance" test' + test-language: + required: true + description: 'Specify if this is a "java", "python" or "go" test' + +runs: + using: composite + steps: + - name: Check if test-type was provided + shell: bash + run: | + if [ -z "${{ inputs.test-type }}" ]; then + echo "Test type was not provided" + exit 1 + fi + - name: Check if test-language was provided + shell: bash + run: | + if [ -z "${{ inputs.test-language }}" ]; then + echo "Test language was not provided" + exit 1 + fi + - name: Set common arguments + id: common_arguments + shell: bash + run: | + echo project="apache-beam-testing" >> $GITHUB_OUTPUT + echo influx_db_name="beam_test_metrics" >> $GITHUB_OUTPUT + echo influx_host="http://10.128.0.96:8086" >> $GITHUB_OUTPUT + - name: Get default ${{ inputs.test-language }} test arguments + id: default_arguments + shell: bash + run: | + DEFAULT_ARGUMENTS="" + if ${{ inputs.test-language == 'java' }}; then + DEFAULT_ARGUMENTS=$(echo " + --project=${{ steps.common_arguments.outputs.project }} + --influxDatabase=${{ steps.common_arguments.outputs.influx_db_name }} + --influxHost=${{ steps.common_arguments.outputs.influx_host }} + ") + elif ${{ inputs.test-language == 'python' || inputs.test-language == 'go' }}; then + DEFAULT_ARGUMENTS=$(echo " + --project=${{ steps.common_arguments.outputs.project }} + --influx_db_name=${{ steps.common_arguments.outputs.influx_db_name }} + --influx_hostname=${{ steps.common_arguments.outputs.influx_host }} + ") + fi + echo arguments=$DEFAULT_ARGUMENTS >> $GITHUB_OUTPUT + - name: Group arguments + shell: bash + run: | + PATHS=($(echo "${{ inputs.argument-file-paths }}" | tr '\n' ' ')) + for index in "${!PATHS[@]}"; do + CONFIG=$(grep -v "^#.*" ${PATHS[index]}) + ARGUMENTS=$(echo "${{ steps.default_arguments.outputs.arguments }} ${{ inputs.arguments }} $CONFIG" | tr '\n' ' ') + ARGUMENTS="${ARGUMENTS% }" + if ${{ inputs.test-type == 'performance' }}; then + arguments="" + read -ra args <<< "$ARGUMENTS" + for arg in "${args[@]}"; do + arguments="${arguments}\"${arg}\"," + done + ARGUMENTS="${arguments%,}" + fi + echo "${{ github.job }}_test_arguments_$((index + 1))=$ARGUMENTS" >> $GITHUB_ENV + done diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 40c7058da512..d8fd17d8a8e3 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -180,12 +180,14 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex ```Run Python PreCommit (3.8)``` | Workflow name | Matrix | Trigger Phrase | Cron Status | |:-------------:|:------:|:--------------:|:-----------:| -| [ Load Tests CoGBK Dataflow Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) | N/A |`Run Load Tests Java CoGBK Dataflow Streaming`| [![.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) -| [ Load Tests Combine Dataflow Batch Python ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml) | N/A |`Run Load Tests Python Combine Dataflow Batch`| [![.github/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml) | [ Load Tests GBK Dataflow Batch Go ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Go_GBK_Dataflow_Batch.yml) | N/A |`Run Load Tests Go GBK Dataflow Batch`| [![.github/workflows/beam_LoadTests_Go_GBK_Dataflow_Batch](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Go_GBK_Dataflow_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Go_GBK_Dataflow_Batch.yml) -| [ Performance Tests BigQueryIO Batch Java Avro ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Avro`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) -| [ Performance Tests BigQueryIO Batch Java Json ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Json`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) -| [ Performance Tests BigQueryIO Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) | N/A |`Run BigQueryIO Streaming Performance Test Java`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) +| [ Load Tests CoGBK Dataflow Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) | N/A |`Run Load Tests Java CoGBK Dataflow Streaming`| [![.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) +| [ Load Tests Combine Dataflow Batch Python ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml) | N/A |`Run Load Tests Python Combine Dataflow Batch`| [![.github/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Python_Combine_Dataflow_Batch.yml) +| [ Performance Tests AvroIOIT HDFS ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml) | N/A |`Run Java AvroIO Performance Test HDFS`| [![.github/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml) +| [ Performance Tests AvroIOIT ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_AvroIOIT.yml) | N/A |`Run Java AvroIO Performance Test`| [![.github/workflows/beam_PerformanceTests_AvroIOIT.yml](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_AvroIOIT.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_AvroIOIT.yml) +| [ Performance Tests BigQueryIO Batch Java Avro ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Avro`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) +| [ Performance Tests BigQueryIO Batch Java Json ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Json`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) +| [ Performance Tests BigQueryIO Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) | N/A |`Run BigQueryIO Streaming Performance Test Java`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) | [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`Run Go PostCommit`| [![.github/workflows/beam_PostCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | | [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | diff --git a/.github/workflows/beam_PerformanceTests_AvroIOIT.yml b/.github/workflows/beam_PerformanceTests_AvroIOIT.yml new file mode 100644 index 000000000000..a6c56287da41 --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_AvroIOIT.yml @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests AvroIOIT + +on: + issue_comment: + types: [created] + schedule: + - cron: '10 1/13 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + beam_PerformanceTests_AvroIOIT_test_arguments_1: '' + +jobs: + beam_PerformanceTests_AvroIOIT: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java AvroIO Performance Test' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_AvroIOIT"] + job_phrase: ["Run Java AvroIO Performance Test"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: performance + test-language: java + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/performance-tests-job-configs/config_AvroIOIT.txt + arguments: | + --filenamePrefix=gs://temp-storage-for-perf-tests/${{ matrix.job_name }}/${{github.run_id}}/ + - name: run integrationTest + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:file-based-io-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.io.avro.AvroIOIT \ + --info \ + -Dfilesystem=gcs \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions='[${{ env.beam_PerformanceTests_AvroIOIT_test_arguments_1 }}]' \ diff --git a/.github/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml b/.github/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml new file mode 100644 index 000000000000..7393e0e39b37 --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_AvroIOIT_HDFS.yml @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests AvroIOIT HDFS + +on: + issue_comment: + types: [created] + schedule: + - cron: '10 1/13 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + beam_PerformanceTests_AvroIOIT_HDFS_test_arguments_1: '' + +jobs: + beam_PerformanceTests_AvroIOIT_HDFS: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java AvroIO Performance Test HDFS' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_AvroIOIT_HDFS"] + job_phrase: ["Run Java AvroIO Performance Test HDFS"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Authenticate on GCP + id: auth + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + - name: Set k8s access + uses: ./.github/actions/setup-k8s-access + with: + cluster_name: io-datastores + k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }} + - name: Install Hadoop + id: install_hadoop + run: | + kubectl apply -f ${{ github.workspace }}/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml + kubectl wait svc/hadoop --for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=60s + loadbalancer_IP=$(kubectl get svc hadoop -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo hadoop_IP=$loadbalancer_IP >> $GITHUB_OUTPUT + - name: Prepare test arguments + uses: ./.github/actions/test-arguments-action + with: + test-type: performance + test-language: java + argument-file-paths: | + ${{ github.workspace }}/.github/workflows/performance-tests-job-configs/config_AvroIOIT_HDFS.txt + arguments: | + --filenamePrefix=hdfs://${{ steps.install_hadoop.outputs.hadoop_IP }}:9000/TEXTIO_IT_ + --hdfsConfiguration=[{\\\"fs.defaultFS\\\":\\\"hdfs:${{ steps.install_hadoop.outputs.hadoop_IP }}:9000\\\",\\\"dfs.replication\\\":1}] + - name: run integrationTest + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:file-based-io-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.io.avro.AvroIOIT \ + --info \ + -Dfilesystem=hdfs \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions='[${{ env.beam_PerformanceTests_AvroIOIT_HDFS_test_arguments_1 }}]' \ diff --git a/.github/workflows/performance-tests-job-configs/config_AvroIOIT.txt b/.github/workflows/performance-tests-job-configs/config_AvroIOIT.txt new file mode 100644 index 000000000000..12ae78de6cc0 --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_AvroIOIT.txt @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--runner=DataflowRunner +--tempRoot=gs://temp-storage-for-perf-tests +--bigQueryDataset=beam_performance +--bigQueryTable=avroioit_results +--influxMeasurement=avroioit_results +--numberOfRecords=225000000 +--expectedHash=2f9f5ca33ea464b25109c0297eb6aecb +--datasetSize=1089730000 +--numWorkers=5 +--autoscalingAlgorithm=NONE \ No newline at end of file diff --git a/.github/workflows/performance-tests-job-configs/config_AvroIOIT_HDFS.txt b/.github/workflows/performance-tests-job-configs/config_AvroIOIT_HDFS.txt new file mode 100644 index 000000000000..6d27c85393c3 --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_AvroIOIT_HDFS.txt @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--runner=DataflowRunner +--tempRoot=gs://temp-storage-for-perf-tests +--bigQueryDataset=beam_performance +--bigQueryTable=avroioit_hdfs_results +--influxMeasurement=avroioit_hdfs_results +--numberOfRecords=225000000 +--expectedHash=2f9f5ca33ea464b25109c0297eb6aecb +--datasetSize=1089730000 +--numWorkers=5 +--autoscalingAlgorithm=NONE \ No newline at end of file