From 6e570d6e5651a7b1ff42cc035db9954776e5c2f2 Mon Sep 17 00:00:00 2001 From: Yi Hu Date: Wed, 9 Oct 2024 10:55:28 -0400 Subject: [PATCH] Bump to Dataproc 2.2 and Flink 1.17 for load tests (#32632) --- .../beam_LoadTests_Go_CoGBK_Flink_batch.yml | 4 +- .../beam_LoadTests_Go_Combine_Flink_Batch.yml | 4 +- .../beam_LoadTests_Go_GBK_Flink_Batch.yml | 4 +- .../beam_LoadTests_Go_ParDo_Flink_Batch.yml | 4 +- ...eam_LoadTests_Go_SideInput_Flink_Batch.yml | 4 +- ...eam_LoadTests_Python_CoGBK_Flink_Batch.yml | 4 +- ...m_LoadTests_Python_Combine_Flink_Batch.yml | 4 +- ...adTests_Python_Combine_Flink_Streaming.yml | 4 +- .../beam_LoadTests_Python_GBK_Flink_Batch.yml | 4 +- ...eam_LoadTests_Python_ParDo_Flink_Batch.yml | 4 +- ...LoadTests_Python_ParDo_Flink_Streaming.yml | 4 +- .../beam_Publish_Docker_Snapshots.yml | 2 +- .test-infra/dataproc/flink_cluster.sh | 10 +- .../jenkins/CommonTestProperties.groovy | 2 +- .test-infra/jenkins/Flink.groovy | 120 ------------------ 15 files changed, 29 insertions(+), 149 deletions(-) delete mode 100644 .test-infra/jenkins/Flink.groovy diff --git a/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml b/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml index fae86961ea27..a2c347ebddb6 100644 --- a/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml +++ b/.github/workflows/beam_LoadTests_Go_CoGBK_Flink_batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-go-cogbk-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-cogbk-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml index e814cc809be2..cdb034edcd27 100644 --- a/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Go_Combine_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-go-combine-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-combine-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml index 8c01bc1cf304..f95e1c831da7 100644 --- a/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Go_GBK_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-go-gbk-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-gbk-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml index ba7323a8b63c..89b31e02261d 100644 --- a/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Go_ParDo_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-go-pardo-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-pardo-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml index 5440ce968898..7ab3d837721b 100644 --- a/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Go_SideInput_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-go-sideinput-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-go-sideinput-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml index e2afb2e2cfd7..9b0dec2249f6 100644 --- a/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Python_CoGBK_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-py-cogbk-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-cogbk-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml index 0f666a0b7db6..6363de044149 100644 --- a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-py-cmb-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-cmb-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml index 6f491e6b9fa9..baf950589c8e 100644 --- a/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml +++ b/.github/workflows/beam_LoadTests_Python_Combine_Flink_Streaming.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-py-cmb-flink-streaming-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-cmb-flink-streaming-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml index c938b284a866..e05885246090 100644 --- a/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Python_GBK_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-py-gbk-flk-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-py-gbk-flk-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml index b6c86e01c299..8d907cf643bf 100644 --- a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml +++ b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Batch.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-py-pardo-flink-batch-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-pardo-flink-batch-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml index a6443c0df10b..142d1b5e2dc2 100644 --- a/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml +++ b/.github/workflows/beam_LoadTests_Python_ParDo_Flink_Streaming.yml @@ -50,12 +50,12 @@ env: GCLOUD_ZONE: us-central1-a CLUSTER_NAME: beam-loadtests-py-pardo-flink-stream-${{ github.run_id }} GCS_BUCKET: gs://beam-flink-cluster - FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz + FLINK_DOWNLOAD_URL: https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz HADOOP_DOWNLOAD_URL: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar FLINK_TASKMANAGER_SLOTS: 1 DETACHED_MODE: true HARNESS_IMAGES_TO_PULL: gcr.io/apache-beam-testing/beam-sdk/beam_go_sdk:latest - JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.15_job_server:latest + JOB_SERVER_IMAGE: gcr.io/apache-beam-testing/beam_portability/beam_flink1.17_job_server:latest ARTIFACTS_DIR: gs://beam-flink-cluster/beam-loadtests-python-pardo-flink-stream-${{ github.run_id }} jobs: diff --git a/.github/workflows/beam_Publish_Docker_Snapshots.yml b/.github/workflows/beam_Publish_Docker_Snapshots.yml index 334fa537be56..e37a202267c4 100644 --- a/.github/workflows/beam_Publish_Docker_Snapshots.yml +++ b/.github/workflows/beam_Publish_Docker_Snapshots.yml @@ -83,7 +83,7 @@ jobs: - name: run Publish Docker Snapshots script for Flink uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:flink:1.15:job-server-container:dockerPush + gradle-command: :runners:flink:1.17:job-server-container:dockerPush arguments: | -Pdocker-repository-root=gcr.io/apache-beam-testing/beam_portability \ -Pdocker-tag-list=latest \ No newline at end of file diff --git a/.test-infra/dataproc/flink_cluster.sh b/.test-infra/dataproc/flink_cluster.sh index b623e890d08f..759d7a6fcc38 100755 --- a/.test-infra/dataproc/flink_cluster.sh +++ b/.test-infra/dataproc/flink_cluster.sh @@ -17,7 +17,7 @@ # Provide the following environment to run this script: # # GCLOUD_ZONE: Google cloud zone. Optional. Default: "us-central1-a" -# DATAPROC_VERSION: Dataproc version. Optional. Default: 2.1 +# DATAPROC_VERSION: Dataproc version. Optional. Default: 2.2 # CLUSTER_NAME: Cluster name # GCS_BUCKET: GCS bucket url for Dataproc resources (init actions) # HARNESS_IMAGES_TO_PULL: Urls to SDK Harness' images to pull on dataproc workers (optional: 0, 1 or multiple urls for every harness image) @@ -35,8 +35,8 @@ # HARNESS_IMAGES_TO_PULL='gcr.io//python:latest gcr.io//java:latest' \ # JOB_SERVER_IMAGE=gcr.io//job-server-flink:latest \ # ARTIFACTS_DIR=gs:// \ -# FLINK_DOWNLOAD_URL=https://archive.apache.org/dist/flink/flink-1.12.3/flink-1.12.3-bin-scala_2.11.tgz \ -# HADOOP_DOWNLOAD_URL=https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-9.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar \ +# FLINK_DOWNLOAD_URL=https://archive.apache.org/dist/flink/flink-1.17.0/flink-1.17.0-bin-scala_2.12.tgz \ +# HADOOP_DOWNLOAD_URL=https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar \ # FLINK_NUM_WORKERS=2 \ # FLINK_TASKMANAGER_SLOTS=1 \ # DETACHED_MODE=false \ @@ -46,7 +46,7 @@ set -Eeuxo pipefail # GCloud properties GCLOUD_ZONE="${GCLOUD_ZONE:=us-central1-a}" -DATAPROC_VERSION="${DATAPROC_VERSION:=2.1-debian}" +DATAPROC_VERSION="${DATAPROC_VERSION:=2.2-debian}" GCLOUD_REGION=`echo $GCLOUD_ZONE | sed -E "s/(-[a-z])?$//"` MASTER_NAME="$CLUSTER_NAME-m" @@ -133,7 +133,7 @@ function create_cluster() { # This is why flink init action is invoked last. # TODO(11/11/2022) remove --worker-machine-type and --master-machine-type once N2 CPUs quota relaxed # Dataproc 2.1 uses n2-standard-2 by default but there is N2 CPUs=24 quota limit - gcloud dataproc clusters create $CLUSTER_NAME --region=$GCLOUD_REGION --num-workers=$FLINK_NUM_WORKERS \ + gcloud dataproc clusters create $CLUSTER_NAME --region=$GCLOUD_REGION --num-workers=$FLINK_NUM_WORKERS --public-ip-address \ --master-machine-type=n1-standard-2 --worker-machine-type=n1-standard-2 --metadata "${metadata}", \ --image-version=$image_version --zone=$GCLOUD_ZONE --optional-components=FLINK,DOCKER --quiet } diff --git a/.test-infra/jenkins/CommonTestProperties.groovy b/.test-infra/jenkins/CommonTestProperties.groovy index c6870dea59a1..0670b96ef47c 100644 --- a/.test-infra/jenkins/CommonTestProperties.groovy +++ b/.test-infra/jenkins/CommonTestProperties.groovy @@ -26,7 +26,7 @@ class CommonTestProperties { } static String getFlinkVersion() { - return "1.15" + return "1.17" } static String getSparkVersion() { diff --git a/.test-infra/jenkins/Flink.groovy b/.test-infra/jenkins/Flink.groovy deleted file mode 100644 index 34f3b60709c0..000000000000 --- a/.test-infra/jenkins/Flink.groovy +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -class Flink { - private static final String flinkDownloadUrl = 'https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz' - private static final String hadoopDownloadUrl = 'https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar' - private static final String FLINK_DIR = '"$WORKSPACE/src/.test-infra/dataproc"' - private static final String FLINK_SCRIPT = 'flink_cluster.sh' - private def job - private String jobName - - Flink(job, String jobName) { - this.job = job - this.jobName = jobName - } - - /** - * Creates Flink cluster and specifies cleanup steps. - * - * @param sdkHarnessImages - the list of published SDK Harness images tags - * @param workerCount - the initial number of worker nodes - * @param jobServerImage - the Flink job server image tag. If left empty, cluster will be set up without the job server. - * @param slotsPerTaskmanager - the number of slots per Flink task manager - */ - void setUp(List sdkHarnessImages, Integer workerCount, String jobServerImage = '', Integer slotsPerTaskmanager = 1) { - setupFlinkCluster(sdkHarnessImages, workerCount, jobServerImage, slotsPerTaskmanager) - addTeardownFlinkStep() - } - - private void setupFlinkCluster(List sdkHarnessImages, Integer workerCount, String jobServerImage, Integer slotsPerTaskmanager) { - String gcsBucket = 'gs://beam-flink-cluster' - String clusterName = getClusterName() - String artifactsDir = "${gcsBucket}/${clusterName}" - String imagesToPull = sdkHarnessImages.join(' ') - - job.steps { - environmentVariables { - env("GCLOUD_ZONE", "us-central1-a") - env("CLUSTER_NAME", clusterName) - env("GCS_BUCKET", gcsBucket) - env("FLINK_DOWNLOAD_URL", flinkDownloadUrl) - env("HADOOP_DOWNLOAD_URL", hadoopDownloadUrl) - env("FLINK_NUM_WORKERS", workerCount) - env("FLINK_TASKMANAGER_SLOTS", slotsPerTaskmanager) - env("DETACHED_MODE", 'true') - - if(imagesToPull) { - env("HARNESS_IMAGES_TO_PULL", imagesToPull) - } - - if(jobServerImage) { - env("JOB_SERVER_IMAGE", jobServerImage) - env("ARTIFACTS_DIR", artifactsDir) - } - } - - shell('echo Setting up flink cluster') - shell("cd ${FLINK_DIR}; ./${FLINK_SCRIPT} create") - } - } - - /** - * Updates the number of worker nodes in a cluster. - * - * @param workerCount - the new number of worker nodes in the cluster - */ - void scaleCluster(Integer workerCount) { - job.steps { - shell("echo Changing number of workers to ${workerCount}") - environmentVariables { - env("FLINK_NUM_WORKERS", workerCount) - } - shell("cd ${FLINK_DIR}; ./${FLINK_SCRIPT} restart") - } - } - - private GString getClusterName() { - return "${jobName.toLowerCase().replace("_", "-")}-\$BUILD_ID" - } - - private void addTeardownFlinkStep() { - job.publishers { - postBuildScript { - buildSteps { - postBuildStep { - stopOnFailure(false) - results([ - 'SUCCESS', - 'UNSTABLE', - 'FAILURE', - 'NOT_BUILT', - 'ABORTED' - ]) - buildSteps { - shell { - command("cd ${FLINK_DIR}; ./${FLINK_SCRIPT} delete") - } - } - } - } - markBuildUnstable(false) - } - } - } -}