From 13b12dab0042b174be06c03f56bf8422f6967095 Mon Sep 17 00:00:00 2001 From: Jack Dingilian Date: Thu, 7 Sep 2023 17:17:58 -0400 Subject: [PATCH 01/10] Add mechanism to override bigtable client for testing --- .../sdk/io/gcp/bigtable/BigtableConfig.java | 12 ++++ .../beam/sdk/io/gcp/bigtable/BigtableIO.java | 19 ++++++ .../dao/BigtableChangeStreamAccessor.java | 7 +++ .../dao/BigtableClientOverride.java | 58 +++++++++++++++++++ 4 files changed, 96 insertions(+) create mode 100644 sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java index 018e538f552c..15230c8adef9 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java @@ -25,6 +25,7 @@ import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.extensions.gcp.auth.CredentialFactory; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableClientOverride; import org.apache.beam.sdk.options.ValueProvider; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.display.DisplayData; @@ -49,6 +50,9 @@ public abstract class BigtableConfig implements Serializable { /** Returns the app profile being read from. */ public abstract @Nullable ValueProvider getAppProfileId(); + /** Returns the Bigtable client override. */ + public abstract @Nullable BigtableClientOverride getBigtableClientOverride(); + /** * Returns the Google Cloud Bigtable instance being written to, and other parameters. * @@ -113,6 +117,8 @@ abstract Builder setBigtableOptionsConfigurator( abstract Builder setChannelCount(int count); + abstract Builder setBigtableClientOverride(BigtableClientOverride clientOverride); + abstract BigtableConfig build(); } @@ -156,6 +162,12 @@ public BigtableConfig withEmulator(String emulatorHost) { return toBuilder().setEmulatorHost(emulatorHost).build(); } + @VisibleForTesting + BigtableConfig withBigtableClientOverride(BigtableClientOverride clientOverride) { + checkArgument(clientOverride != null, "clientOverride can not be null"); + return toBuilder().setBigtableClientOverride(clientOverride).build(); + } + void validate() { checkArgument( (getProjectId() != null diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java index a655a29e92b2..9f3c627a89ef 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java @@ -49,6 +49,7 @@ import org.apache.beam.sdk.io.gcp.bigtable.changestreams.UniqueIdGenerator; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.action.ActionFactory; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableChangeStreamAccessor; +import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableClientOverride; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.DaoFactory; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.MetadataTableAdminDao; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dofn.DetectNewPartitionsDoFn; @@ -1988,6 +1989,24 @@ public ReadChangeStream withMetadataTableAppProfileId(String appProfileId) { .build(); } + /** + * Returns a new {@link BigtableIO.ReadChangeStream} that overrides the config of data and/or + * admin client for streaming changes and for managing the metadata. For testing purposes only. + * Not intended for use. + * + *

Does not modify this object. + */ + @VisibleForTesting + ReadChangeStream withBigtableClientOverride(BigtableClientOverride clientOverride) { + BigtableConfig config = getBigtableConfig(); + BigtableConfig metadataTableConfig = getMetadataTableBigtableConfig(); + return toBuilder() + .setBigtableConfig(config.withBigtableClientOverride(clientOverride)) + .setMetadataTableBigtableConfig( + metadataTableConfig.withBigtableClientOverride(clientOverride)) + .build(); + } + /** * Returns a new {@link BigtableIO.ReadChangeStream} that, if set to true, will create or update * metadata table before launching pipeline. Otherwise, it is expected that a metadata table diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java index ecf9a7039598..cb296aef6c28 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java @@ -210,6 +210,13 @@ private static BigtableChangeStreamAccessor createAccessor(@NonNull BigtableConf .setMaxAttempts(10) .build()); + final BigtableClientOverride clientOverride = bigtableConfig.getBigtableClientOverride(); + if (clientOverride != null) { + clientOverride.updateTableAdminClientSettings(tableAdminSettingsBuilder); + clientOverride.updateInstanceAdminClientSettings(instanceAdminSettingsBuilder); + clientOverride.updateDataClientSettings(dataSettingsBuilder); + } + BigtableDataClient dataClient = BigtableDataClient.create(dataSettingsBuilder.build()); BigtableTableAdminClient tableAdminClient = BigtableTableAdminClient.create(tableAdminSettingsBuilder.build()); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java new file mode 100644 index 000000000000..72b3e39871ef --- /dev/null +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao; + +import com.google.cloud.bigtable.admin.v2.BigtableInstanceAdminSettings; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings; +import com.google.cloud.bigtable.data.v2.BigtableDataSettings; +import java.io.IOException; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; + +/** Override the configuration of Cloud Bigtable data and admin client. */ +@VisibleForTesting +public interface BigtableClientOverride { + /** + * Update {@link BigtableInstanceAdminSettings.Builder} with custom configurations. + * + *

For example, to update the admin api endpoint. + * + * @param builder builds the instance admin client + * @throws IOException when dependency initialization fails + */ + void updateInstanceAdminClientSettings(BigtableInstanceAdminSettings.Builder builder) + throws IOException; + + /** + * Update {@link BigtableTableAdminSettings.Builder} with custom configurations. + * + *

For example, to update the admin api endpoint. + * + * @param builder builds the table admin client + * @throws IOException when dependency initialization fails + */ + void updateTableAdminClientSettings(BigtableTableAdminSettings.Builder builder) + throws IOException; + + /** + * Update {@link BigtableDataSettings.Builder} with custom configurations. + * + * @param builder builds the data client + * @throws IOException when dependency initialization fails + */ + void updateDataClientSettings(BigtableDataSettings.Builder builder) throws IOException; +} From f09955e47d71c1fc53396c707290f9d6de6369f3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Sep 2023 10:20:50 -0400 Subject: [PATCH 02/10] Bump google.golang.org/grpc from 1.58.0 to 1.58.1 in /sdks (#28464) Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.58.0 to 1.58.1. - [Release notes](https://github.com/grpc/grpc-go/releases) - [Commits](https://github.com/grpc/grpc-go/compare/v1.58.0...v1.58.1) --- updated-dependencies: - dependency-name: google.golang.org/grpc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- sdks/go.mod | 2 +- sdks/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/go.mod b/sdks/go.mod index 5e91aea021f8..3cf0b4eb5c8b 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -59,7 +59,7 @@ require ( golang.org/x/text v0.13.0 google.golang.org/api v0.140.0 google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93 - google.golang.org/grpc v1.58.0 + google.golang.org/grpc v1.58.1 google.golang.org/protobuf v1.31.0 gopkg.in/retry.v1 v1.0.3 gopkg.in/yaml.v2 v2.4.0 diff --git a/sdks/go.sum b/sdks/go.sum index c30891294dbd..b53fdd5c3f69 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -689,8 +689,8 @@ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.58.0 h1:32JY8YpPMSR45K+c3o6b8VL73V+rR8k+DeMIr4vRH8o= -google.golang.org/grpc v1.58.0/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/grpc v1.58.1 h1:OL+Vz23DTtrrldqHK49FUOPHyY75rvFqJfXC84NYW58= +google.golang.org/grpc v1.58.1/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= From 108680aadbd124e11954b40ffccfa2926c8e3846 Mon Sep 17 00:00:00 2001 From: Arwin Tio Date: Fri, 15 Sep 2023 08:45:37 -0700 Subject: [PATCH 03/10] gcsio.py use RuntimeError with exc chaining instead of overwriting previous exception (#28470) * use RuntimeError with exc chaining instead of overwriting previous exception * update changes --- CHANGES.md | 3 ++- sdks/python/apache_beam/io/gcp/gcsio.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 40a9a1dc9490..3705cebc88df 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -80,7 +80,8 @@ ## Bugfixes -* Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Fixed exception chaining issue in GCS connector (Python) ([#26769](https://github.com/apache/beam/issues/26769#issuecomment-1700422615)). + ## Security Fixes * Python containers updated, fixing [CVE-2021-30474](https://nvd.nist.gov/vuln/detail/CVE-2021-30474), [CVE-2021-30475](https://nvd.nist.gov/vuln/detail/CVE-2021-30475), [CVE-2021-30473](https://nvd.nist.gov/vuln/detail/CVE-2021-30473), [CVE-2020-36133](https://nvd.nist.gov/vuln/detail/CVE-2020-36133), [CVE-2020-36131](https://nvd.nist.gov/vuln/detail/CVE-2020-36131), [CVE-2020-36130](https://nvd.nist.gov/vuln/detail/CVE-2020-36130), and [CVE-2020-36135](https://nvd.nist.gov/vuln/detail/CVE-2020-36135) diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index 2fdbce73170a..d75af4fe6ac1 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -825,5 +825,4 @@ def finish(self): # Check for exception since the last put() call. if self._upload_thread.last_error is not None: e = self._upload_thread.last_error - raise type(self._upload_thread.last_error)( - "Error while uploading file %s" % self._path) from e # pylint: disable=raising-bad-type + raise RuntimeError("Error while uploading file %s" % self._path) from e From 8415534040bf2876a56ee7bdc8283ed6751bc1a2 Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Fri, 15 Sep 2023 19:23:29 +0200 Subject: [PATCH 04/10] added beam_PostCommit_PortableJar jobs to GitHub Actions (#28447) --- .github/workflows/README.md | 2 + .../beam_PostCommit_PortableJar_Flink.yml | 88 +++++++++++++++++++ .../beam_PostCommit_PortableJar_Spark.yml | 88 +++++++++++++++++++ 3 files changed, 178 insertions(+) create mode 100644 .github/workflows/beam_PostCommit_PortableJar_Flink.yml create mode 100644 .github/workflows/beam_PostCommit_PortableJar_Spark.yml diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 5513604f5cfc..383df86d1496 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -215,6 +215,8 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`Run Spark StructuredStreaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | | [ PostCommit Java ValidatesRunner Twister2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | N/A |`Run Twister2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | | [ PostCommit Java ValidatesRunner ULR ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | N/A |`Run ULR Loopback ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | +| [ PostCommit PortableJar Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml) | N/A |`Run PortableJar_Flink PostCommit`| [![.github/workflows/beam_PostCommit_PortableJar_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml) | +| [ PostCommit PortableJar Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml) | N/A |`Run PortableJar_Spark PostCommit`| [![.github/workflows/beam_PostCommit_PortableJar_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml) | | [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | | [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Examples_Direct (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | | [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | ['3.8','3.11'] |`Run Python Examples_Flink (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | diff --git a/.github/workflows/beam_PostCommit_PortableJar_Flink.yml b/.github/workflows/beam_PostCommit_PortableJar_Flink.yml new file mode 100644 index 000000000000..f3670a377df5 --- /dev/null +++ b/.github/workflows/beam_PostCommit_PortableJar_Flink.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit PortableJar Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_PortableJar_Flink: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run PortableJar_Flink PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: ["beam_PostCommit_PortableJar_Flink"] + job_phrase: ["Run PortableJar_Flink PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run testPipelineJarFlinkRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py38:testPipelineJarFlinkRunner + arguments: | + -PpythonVersion=3.8 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_PortableJar_Spark.yml b/.github/workflows/beam_PostCommit_PortableJar_Spark.yml new file mode 100644 index 000000000000..a4079b654874 --- /dev/null +++ b/.github/workflows/beam_PostCommit_PortableJar_Spark.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit PortableJar Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_PortableJar_Spark: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run PortableJar_Spark PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: ["beam_PostCommit_PortableJar_Spark"] + job_phrase: ["Run PortableJar_Spark PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run testPipelineJarSparkRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py38:testPipelineJarSparkRunner + arguments: | + -PpythonVersion=3.8 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file From 52eed458d2aa3fbb9011377d055d1c41f1a3110c Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Fri, 15 Sep 2023 19:24:44 +0200 Subject: [PATCH 05/10] Migrate "beam_PostCommit_Python_ValidatesContainer_Dataflow" Jenkins jobs to GitHub Actions (#28448) * added beam_PostCommit_Python_ValidatesContainer jobs to GitHub Actions * updated README file --- .github/workflows/README.md | 4 +- ...mit_Python_ValidatesContainer_Dataflow.yml | 105 +++++++++++++++++ ...on_ValidatesContainer_Dataflow_With_RC.yml | 106 ++++++++++++++++++ 3 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml create mode 100644 .github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 383df86d1496..c89067a97097 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -204,7 +204,7 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PostCommit Java ValidatesRunner Dataflow Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | N/A |`Run Dataflow Streaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | | [ PostCommit Java ValidatesRunner Dataflow V2 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner Streaming`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | | [ PostCommit Java ValidatesRunner Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | -| [.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | N/A |`Run Dataflow ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | +| [ PostCommit Java ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | N/A |`Run Dataflow ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | | [ PostCommit Java ValidatesRunner Direct JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | ['11','17'] |`Run Direct ValidatesRunner Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | | [ PostCommit Java ValidatesRunner Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | N/A |`Run Direct ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | | [ PostCommit Java ValidatesRunner Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | N/A |`Run Flink ValidatesRunner Java 11`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | @@ -221,6 +221,8 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Examples_Direct (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | | [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | ['3.8','3.11'] |`Run Python Examples_Flink (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | | [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | ['3.8','3.11'] |`Run Python Examples_Spark (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | +| [ PostCommit Python ValidatesContainer Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Dataflow ValidatesContainer (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml) | +| [ PostCommit Python ValidatesContainer Dataflow With RC ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python RC Dataflow ValidatesContainer (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml) | | [ PostCommit Python ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml) | ['3.8','3.11'] |`Run Python Dataflow ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml) | | [ PostCommit Python ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | ['3.8','3.11'] |`Run Python Flink ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | | [ PostCommit Python ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | ['3.8','3.11'] |`Run Python Samza ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml new file mode 100644 index 000000000000..dc2d2554b9a1 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesContainer Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesContainer_Dataflow: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Dataflow ValidatesContainer') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesContainer_Dataflow"] + job_phrase: ["Run Python Dataflow ValidatesContainer"] + python_version: ['3.8','3.9','3.10','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python_version }} + - name: Install Java + uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '8' + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesContainer script + env: + USER: github-actions + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesContainer + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml new file mode 100644 index 000000000000..a4c518c930c2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesContainer Dataflow With RC + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python RC Dataflow ValidatesContainer') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC"] + job_phrase: ["Run Python RC Dataflow ValidatesContainer"] + python_version: ['3.8','3.9','3.10','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python_version }} + - name: Install Java + uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '8' + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesContainer script + env: + USER: github-actions + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesContainer + arguments: | + -PtestRCDependencies=true + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file From da6f1348ef098a65e74f82bc3c920dd1489d35e2 Mon Sep 17 00:00:00 2001 From: magicgoody <131876064+magicgoody@users.noreply.github.com> Date: Fri, 15 Sep 2023 23:26:28 +0600 Subject: [PATCH 06/10] beam_PostCommit_Python_Xlang (#28466) --- .github/workflows/README.md | 3 + ...m_PostCommit_Python_Xlang_Gcp_Dataflow.yml | 88 ++++++++++++++++++ ...eam_PostCommit_Python_Xlang_Gcp_Direct.yml | 87 ++++++++++++++++++ ...am_PostCommit_Python_Xlang_IO_Dataflow.yml | 90 +++++++++++++++++++ 4 files changed, 268 insertions(+) create mode 100644 .github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml create mode 100644 .github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml create mode 100644 .github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml diff --git a/.github/workflows/README.md b/.github/workflows/README.md index c89067a97097..190fcae1771b 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -227,6 +227,9 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PostCommit Python ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | ['3.8','3.11'] |`Run Python Flink ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | | [ PostCommit Python ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | ['3.8','3.11'] |`Run Python Samza ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | | [ PostCommit Python ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml) | ['3.8','3.9','3.11'] |`Run Python Spark ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml) | +| [ PostCommit Python Xlang Gcp Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | N/A |`Run Python_Xlang_Gcp_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | +| [ PostCommit Python Xlang Gcp Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | N/A |`Run Python_Xlang_Gcp_Direct PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | +| [ PostCommit Python Xlang IO Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml) | N/A |`Run Python_Xlang_IO_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml) | | [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python (matrix_element) PostCommit Sickbay`| [![.github/workflows/beam_PostCommit_Sickbay_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | | [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | [ PostCommit Website Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![.github/workflows/beam_PostCommit_Website_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml new file mode 100644 index 000000000000..c52dd1869432 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang Gcp Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_Gcp_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_Gcp_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_Gcp_Dataflow"] + job_phrase: ["Run Python_Xlang_Gcp_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang Gcp Dataflow script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:gcpCrossLanguagePostCommit + arguments: -PuseWheelDistribution + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml new file mode 100644 index 000000000000..e1b5e401438f --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang Gcp Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_Gcp_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_Gcp_Direct PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_Gcp_Direct"] + job_phrase: ["Run Python_Xlang_Gcp_Direct PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang Gcp Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:gcpCrossLanguagePostCommit + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml new file mode 100644 index 000000000000..0937fcbadb6c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang IO Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_IO_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_IO_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_IO_Dataflow"] + job_phrase: ["Run Python_Xlang_IO_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang IO Dataflow script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:ioCrossLanguagePostCommit + arguments: | + -PuseWheelDistribution \ + -PkafkaBootstrapServer=10.128.0.40:9094,10.128.0.28:9094,10.128.0.165:9094 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file From cfc96940d5a104e2beb0dcc4f2e0693ca75cefd9 Mon Sep 17 00:00:00 2001 From: magicgoody <131876064+magicgoody@users.noreply.github.com> Date: Fri, 15 Sep 2023 23:28:17 +0600 Subject: [PATCH 07/10] Github Workflow Replacement for Jenkins Jobs, beam_PostCommit_XVR_* (#28432) * beam_PostCommit_XVR * update --- .github/workflows/README.md | 7 ++ .../workflows/beam_PostCommit_XVR_Direct.yml | 104 +++++++++++++++++ .../workflows/beam_PostCommit_XVR_Flink.yml | 105 ++++++++++++++++++ ...ostCommit_XVR_JavaUsingPython_Dataflow.yml | 92 +++++++++++++++ ...Commit_XVR_PythonUsingJavaSQL_Dataflow.yml | 89 +++++++++++++++ ...ostCommit_XVR_PythonUsingJava_Dataflow.yml | 92 +++++++++++++++ .../workflows/beam_PostCommit_XVR_Samza.yml | 104 +++++++++++++++++ .../workflows/beam_PostCommit_XVR_Spark3.yml | 104 +++++++++++++++++ 8 files changed, 697 insertions(+) create mode 100644 .github/workflows/beam_PostCommit_XVR_Direct.yml create mode 100644 .github/workflows/beam_PostCommit_XVR_Flink.yml create mode 100644 .github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml create mode 100644 .github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml create mode 100644 .github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml create mode 100644 .github/workflows/beam_PostCommit_XVR_Samza.yml create mode 100644 .github/workflows/beam_PostCommit_XVR_Spark3.yml diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 190fcae1771b..66866de5ec2e 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -234,6 +234,13 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | [ PostCommit Website Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![.github/workflows/beam_PostCommit_Website_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | | [ PostCommit XVR GoUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | N/A |`Run XVR_GoUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | +| [ PostCommit XVR Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml) | N/A |`Run XVR_Direct PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml) | +| [ PostCommit XVR Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml) | N/A |`Run XVR_Flink PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml) | +| [ PostCommit XVR JavaUsingPython Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml) | N/A |`Run XVR_JavaUsingPython_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml) | +| [ PostCommit XVR PythonUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml) | N/A |`Run XVR_PythonUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml) | +| [ PostCommit XVR PythonUsingJavaSQL Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | N/A |`Run XVR_PythonUsingJavaSQL_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | +| [ PostCommit XVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | N/A |`Run XVR_Samza PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | +| [ PostCommit XVR Spark3 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | N/A |`Run XVR_Spark3 PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Spark3](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | | [ PreCommit Community Metrics ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | N/A |`Run CommunityMetrics PreCommit`| [![.github/workflows/beam_PreCommit_CommunityMetrics.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | | [ PreCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | N/A |`Run Go PreCommit`| [![.github/workflows/beam_PreCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | | [ PreCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | N/A |`Run Java PreCommit`| [![.github/workflows/beam_PreCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | diff --git a/.github/workflows/beam_PostCommit_XVR_Direct.yml b/.github/workflows/beam_PostCommit_XVR_Direct.yml new file mode 100644 index 000000000000..0f74e817f4e9 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Direct.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Direct PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Direct"] + job_phrase: ["Run XVR_Direct PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Direct script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:xlang:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Direct script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:xlang:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Flink.yml b/.github/workflows/beam_PostCommit_XVR_Flink.yml new file mode 100644 index 000000000000..fd2cb95d9c32 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Flink.yml @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + FlinkVersion: 1.15 + +jobs: + beam_PostCommit_XVR_Flink: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Flink PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Flink"] + job_phrase: ["Run XVR_Flink PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Flink script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:${{ env.FlinkVersion }}:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Flink script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:${{ env.FlinkVersion }}:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml new file mode 100644 index 000000000000..dbdfbe2e7c73 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR JavaUsingPython Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_JavaUsingPython_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_JavaUsingPython_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_JavaUsingPython_Dataflow"] + job_phrase: ["Run XVR_JavaUsingPython_Dataflow PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR JavaUsingPython Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerJavaUsingPython + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml new file mode 100644 index 000000000000..68239e8329c2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR PythonUsingJavaSQL Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_PythonUsingJavaSQL_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow"] + job_phrase: ["Run XVR_PythonUsingJavaSQL_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: run PostCommit XVR PythonUsingJavaSQL Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerPythonUsingSql + arguments: | + -PpythonVersion=3.11 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml new file mode 100644 index 000000000000..eb184bf0c905 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR PythonUsingJava Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_PythonUsingJava_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_PythonUsingJava_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_PythonUsingJava_Dataflow"] + job_phrase: ["Run XVR_PythonUsingJava_Dataflow PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR PythonUsingJava Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerPythonUsingJava + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Samza.yml b/.github/workflows/beam_PostCommit_XVR_Samza.yml new file mode 100644 index 000000000000..1ae684bc6911 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Samza.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Samza: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Samza PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Samza"] + job_phrase: ["Run XVR_Samza PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Spark3.yml b/.github/workflows/beam_PostCommit_XVR_Spark3.yml new file mode 100644 index 000000000000..3cf4300222b3 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Spark3.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Spark3 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Spark3: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Spark3 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Spark3"] + job_phrase: ["Run XVR_Spark3 PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Spark3 script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Spark3 script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file From 2dd8d28d4241b3ac7a9e94c2c4bf4924512bbdb7 Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Fri, 15 Sep 2023 19:31:00 +0200 Subject: [PATCH 08/10] Migrate "beam_PostCommit_Java_Examples_*" Jenkins jobs to GitHub Actions (#28409) * added beam_PostCommit_Java_Examples jobs to GitHub Actions * added beam_PostCommit_Java_Examples jobs to GitHub Actions * added reporting steps * added reporting steps * updated README file --- .github/workflows/README.md | 6 +- ..._PostCommit_Java_Examples_Dataflow_ARM.yml | 2 +- ...ostCommit_Java_Examples_Dataflow_Java.yml} | 27 +++-- ... beam_PostCommit_Java_Examples_Direct.yml} | 25 ++--- .../beam_PostCommit_Java_Examples_Flink.yml | 101 ++++++++++++++++++ .../beam_PostCommit_Java_Examples_Spark.yml | 93 ++++++++++++++++ 6 files changed, 223 insertions(+), 31 deletions(-) rename .github/workflows/{beam_PostCommit_Java_Examples_Dataflow_Java11.yml => beam_PostCommit_Java_Examples_Dataflow_Java.yml} (80%) rename .github/workflows/{beam_PostCommit_Java_Examples_Dataflow_Java17.yml => beam_PostCommit_Java_Examples_Direct.yml} (82%) create mode 100644 .github/workflows/beam_PostCommit_Java_Examples_Flink.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Examples_Spark.yml diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 66866de5ec2e..9a0d6a4799f5 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -191,8 +191,10 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![.github/workflows/beam_PostCommit_Java_DataflowV1.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | | [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![.github/workflows/beam_PostCommit_Java_DataflowV2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | | [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Examples Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | N/A |`Run Java examples on Dataflow Java 17`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | +| [ PostCommit Java Examples Dataflow Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml) | ['11','17'] |`Run Java examples on Dataflow Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml) | +| [ PostCommit Java Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml) | N/A |`Run Java Examples_Direct`| [![.github/workflows/beam_PostCommit_Java_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml) | +| [ PostCommit Java Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml) | N/A |`Run Java Examples_Flink`| [![.github/workflows/beam_PostCommit_Java_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml) | +| [ PostCommit Java Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml) | N/A |`Run Java Examples_Spark`| [![.github/workflows/beam_PostCommit_Java_Examples_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml) | | [ PostCommit Java Jpms Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | N/A |`Run Jpms Dataflow Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | | [ PostCommit Java Jpms Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | N/A |`Run Jpms Dataflow Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | | [ PostCommit Java Jpms Direct Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | N/A |`Run Jpms Direct Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml index 596b7f35ab5f..8516ccd393a6 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml @@ -84,7 +84,7 @@ jobs: with: comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{matrix.java_version}}) - name: Setup self-hosted uses: ./.github/actions/setup-self-hosted-action with: diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml similarity index 80% rename from .github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml rename to .github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml index bc26da173042..7015c4fee20a 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -name: PostCommit Java Examples Dataflow Java11 +name: PostCommit Java Examples Dataflow Java on: issue_comment: @@ -51,40 +51,39 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_Examples_Dataflow_Java11: - name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + beam_PostCommit_Java_Examples_Dataflow_Java: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.java_version }}) runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: fail-fast: false matrix: - job_name: [beam_PostCommit_Java_Examples_Dataflow_Java11] - job_phrase: [Run Java examples on Dataflow Java 11] + job_name: [beam_PostCommit_Java_Examples_Dataflow_Java] + job_phrase: [Run Java examples on Dataflow Java] + java_version: ['11','17'] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java examples on Dataflow Java 11' + startswith(github.event.comment.body, 'Run Java examples on Dataflow Java') steps: - uses: actions/checkout@v3 - name: Setup repository uses: ./.github/actions/setup-action with: - comment_phrase: ${{ matrix.job_phrase }} + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - # The test requires Java 11 and Java 8 versions. - # Java 8 is installed second because JAVA_HOME needs to point to Java 8. - - name: Set up Java + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.java_version }}) + - name: Set up Java${{ matrix.java_version }} uses: actions/setup-java@v3.8.0 with: distribution: 'temurin' java-version: | - 11 + ${{ matrix.java_version }} 8 - - name: run PostCommit Java Examples Dataflow Java11 script + - name: run java${{ matrix.java_version }}PostCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:google-cloud-dataflow-java:examples:java11PostCommit + gradle-command: :runners:google-cloud-dataflow-java:examples:java${{ matrix.java_version }}PostCommit max-workers: 12 - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml b/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml similarity index 82% rename from .github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml rename to .github/workflows/beam_PostCommit_Java_Examples_Direct.yml index f61cc41d5222..6831d7db3b5d 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -name: PostCommit Java Examples Dataflow Java17 +name: PostCommit Java Examples Direct on: issue_comment: @@ -51,18 +51,18 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_Examples_Dataflow_Java17: + beam_PostCommit_Java_Examples_Direct: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) runs-on: [self-hosted, ubuntu-20.04, main] - timeout-minutes: 180 + timeout-minutes: 120 strategy: matrix: - job_name: [beam_PostCommit_Java_Examples_Dataflow_Java17] - job_phrase: [Run Java examples on Dataflow Java 17] + job_name: [beam_PostCommit_Java_Examples_Direct] + job_phrase: [Run Java Examples_Direct] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java examples on Dataflow Java 17' + github.event.comment.body == 'Run Java Examples_Direct' steps: - uses: actions/checkout@v3 - name: Setup repository @@ -71,18 +71,15 @@ jobs: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Set up Java + - name: Install Java uses: actions/setup-java@v3.8.0 with: - distribution: 'temurin' - java-version: | - 17 - 8 - - name: run PostCommit Java Examples Dataflow Java17 script + distribution: 'zulu' + java-version: '8' + - name: run examplesIntegrationTest script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:google-cloud-dataflow-java:examples:java17PostCommit - max-workers: 12 + gradle-command: :runners:direct:examplesIntegrationTest - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 if: failure() diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml b/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml new file mode 100644 index 000000000000..60644c434edc --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Flink: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Flink] + job_phrase: [Run Java Examples_Flink] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Flink' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml b/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml new file mode 100644 index 000000000000..17cbc8e583e2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Spark: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Spark] + job_phrase: [Run Java Examples_Spark] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Spark' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file From a0f14a84c3f62015740cc61c8dbb900a5a41c625 Mon Sep 17 00:00:00 2001 From: Robert Burke Date: Fri, 15 Sep 2023 10:39:26 -0700 Subject: [PATCH 09/10] Prism docker env (#28444) * Move envs to own file. Err, not panic. * artifact and docker finagling * kill containers on job completion. * switch to docker sdk * tiny cleanup, log copy propperly * Fix pulls. Reduce logging. * Reduce metrics spam. * Minimize diff * Check only after worker start * Correct chunk size. --------- Co-authored-by: lostluck <13907733+lostluck@users.noreply.github.com> --- .../beam/core/runtime/metricsx/metricsx.go | 3 +- .../runners/prism/internal/environments.go | 199 ++++++++++++++++++ .../beam/runners/prism/internal/execute.go | 90 +++----- .../prism/internal/jobservices/artifact.go | 48 ++++- .../runners/prism/internal/jobservices/job.go | 10 + .../prism/internal/jobservices/management.go | 2 + .../prism/internal/jobservices/server.go | 5 + .../runners/prism/internal/worker/worker.go | 17 +- .../beam/runners/universal/runnerlib/stage.go | 2 +- 9 files changed, 299 insertions(+), 77 deletions(-) create mode 100644 sdks/go/pkg/beam/runners/prism/internal/environments.go diff --git a/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go b/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go index 4a872e291c6a..c71ead208364 100644 --- a/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go +++ b/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go @@ -24,6 +24,7 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "golang.org/x/exp/slog" ) // FromMonitoringInfos extracts metrics from monitored states and @@ -139,7 +140,7 @@ func groupByType(p *pipepb.Pipeline, minfos []*pipepb.MonitoringInfo) ( } } if len(errs) > 0 { - log.Printf("Warning: %v errors during metrics processing: %v\n", len(errs), errs) + slog.Debug("errors during metrics processing", "count", len(errs), "errors", errs) } return counters, distributions, gauges, msecs, pcols } diff --git a/sdks/go/pkg/beam/runners/prism/internal/environments.go b/sdks/go/pkg/beam/runners/prism/internal/environments.go new file mode 100644 index 000000000000..5830325bd054 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/environments.go @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" + "golang.org/x/exp/slog" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/protobuf/proto" + + dtyp "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" + dcli "github.com/docker/docker/client" + "github.com/docker/docker/pkg/stdcopy" +) + +// TODO move environment handling to the worker package. + +func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *worker.W) error { + logger := slog.With(slog.String("envID", wk.Env)) + // TODO fix broken abstraction. + // We're starting a worker pool here, because that's the loopback environment. + // It's sort of a mess, largely because of loopback, which has + // a different flow from a provisioned docker container. + e := j.Pipeline.GetComponents().GetEnvironments()[env] + switch e.GetUrn() { + case urns.EnvExternal: + ep := &pipepb.ExternalPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), ep); err != nil { + logger.Error("unmarshing external environment payload", "error", err) + } + go func() { + externalEnvironment(ctx, ep, wk) + slog.Debug("environment stopped", slog.String("job", j.String())) + }() + return nil + case urns.EnvDocker: + dp := &pipepb.DockerPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), dp); err != nil { + logger.Error("unmarshing docker environment payload", "error", err) + } + return dockerEnvironment(ctx, logger, dp, wk, j.ArtifactEndpoint()) + default: + return fmt.Errorf("environment %v with urn %v unimplemented", env, e.GetUrn()) + } +} + +func externalEnvironment(ctx context.Context, ep *pipepb.ExternalPayload, wk *worker.W) { + conn, err := grpc.Dial(ep.GetEndpoint().GetUrl(), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + panic(fmt.Sprintf("unable to dial sdk worker %v: %v", ep.GetEndpoint().GetUrl(), err)) + } + defer conn.Close() + pool := fnpb.NewBeamFnExternalWorkerPoolClient(conn) + + endpoint := &pipepb.ApiServiceDescriptor{ + Url: wk.Endpoint(), + } + pool.StartWorker(ctx, &fnpb.StartWorkerRequest{ + WorkerId: wk.ID, + ControlEndpoint: endpoint, + LoggingEndpoint: endpoint, + ArtifactEndpoint: endpoint, + ProvisionEndpoint: endpoint, + Params: ep.GetParams(), + }) + // Job processing happens here, but orchestrated by other goroutines + // This goroutine blocks until the context is cancelled, signalling + // that the pool runner should stop the worker. + <-ctx.Done() + + // Previous context cancelled so we need a new one + // for this request. + pool.StopWorker(context.Background(), &fnpb.StopWorkerRequest{ + WorkerId: wk.ID, + }) +} + +func dockerEnvironment(ctx context.Context, logger *slog.Logger, dp *pipepb.DockerPayload, wk *worker.W, artifactEndpoint string) error { + logger = logger.With("worker_id", wk.ID, "image", dp.GetContainerImage()) + + // TODO consider preserving client? + cli, err := dcli.NewClientWithOpts(dcli.FromEnv, dcli.WithAPIVersionNegotiation()) + if err != nil { + return fmt.Errorf("couldn't connect to docker:%w", err) + } + + // TODO abstract mounting cloud specific auths better. + const gcloudCredsEnv = "GOOGLE_APPLICATION_CREDENTIALS" + gcloudCredsFile, ok := os.LookupEnv(gcloudCredsEnv) + var mounts []mount.Mount + var envs []string + if ok { + _, err := os.Stat(gcloudCredsFile) + // File exists + if err == nil { + dockerGcloudCredsFile := "/docker_cred_file.json" + mounts = append(mounts, mount.Mount{ + Type: "bind", + Source: gcloudCredsFile, + Target: dockerGcloudCredsFile, + }) + credEnv := fmt.Sprintf("%v=%v", gcloudCredsEnv, dockerGcloudCredsFile) + envs = append(envs, credEnv) + } + } + + if rc, err := cli.ImagePull(ctx, dp.GetContainerImage(), dtyp.ImagePullOptions{}); err == nil { + // Copy the output, but discard it so we can wait until the image pull is finished. + io.Copy(io.Discard, rc) + rc.Close() + } else { + logger.Warn("unable to pull image", "error", err) + } + + ccr, err := cli.ContainerCreate(ctx, &container.Config{ + Image: dp.GetContainerImage(), + Cmd: []string{ + fmt.Sprintf("--id=%v-%v", wk.JobKey, wk.Env), + fmt.Sprintf("--control_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--artifact_endpoint=%v", artifactEndpoint), + fmt.Sprintf("--provision_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--logging_endpoint=%v", wk.Endpoint()), + }, + Env: envs, + Tty: false, + }, &container.HostConfig{ + NetworkMode: "host", + Mounts: mounts, + }, nil, nil, "") + if err != nil { + cli.Close() + return fmt.Errorf("unable to create container image %v with docker for env %v, err: %w", dp.GetContainerImage(), wk.Env, err) + } + containerID := ccr.ID + logger = logger.With("container", containerID) + + if err := cli.ContainerStart(ctx, containerID, dtyp.ContainerStartOptions{}); err != nil { + cli.Close() + return fmt.Errorf("unable to start container image %v with docker for env %v, err: %w", dp.GetContainerImage(), wk.Env, err) + } + + // Start goroutine to wait on container state. + go func() { + defer cli.Close() + + statusCh, errCh := cli.ContainerWait(ctx, containerID, container.WaitConditionNotRunning) + select { + case <-ctx.Done(): + // Can't use command context, since it's already canceled here. + err := cli.ContainerKill(context.Background(), containerID, "") + if err != nil { + logger.Error("docker container kill error", "error", err) + } + case err := <-errCh: + if err != nil { + logger.Error("docker container wait error", "error", err) + } + case resp := <-statusCh: + logger.Info("docker container has self terminated", "status_code", resp.StatusCode) + + rc, err := cli.ContainerLogs(ctx, containerID, dtyp.ContainerLogsOptions{Details: true, ShowStdout: true, ShowStderr: true}) + if err != nil { + logger.Error("docker container logs error", "error", err) + } + defer rc.Close() + var buf bytes.Buffer + stdcopy.StdCopy(&buf, &buf, rc) + logger.Error("container self terminated", "log", buf.String()) + } + }() + + return nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute.go b/sdks/go/pkg/beam/runners/prism/internal/execute.go index b2f9d866603a..e0c67105d451 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/execute.go +++ b/sdks/go/pkg/beam/runners/prism/internal/execute.go @@ -24,7 +24,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" - fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" @@ -32,8 +31,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" "golang.org/x/exp/maps" "golang.org/x/exp/slog" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" "google.golang.org/protobuf/proto" ) @@ -54,30 +51,21 @@ func RunPipeline(j *jobservices.Job) { return } env, _ := getOnlyPair(envs) - wk := worker.New(j.String()+"_"+env, env) // Cheating by having the worker id match the environment id. - go wk.Serve() - timeout := time.Minute - time.AfterFunc(timeout, func() { - if wk.Connected() { - return - } - err := fmt.Errorf("prism %v didn't get control connection after %v", wk, timeout) + wk, err := makeWorker(env, j) + if err != nil { j.Failed(err) - j.CancelFn(err) - }) - + return + } // When this function exits, we cancel the context to clear // any related job resources. defer func() { j.CancelFn(fmt.Errorf("runPipeline returned, cleaning up")) }() - go runEnvironment(j.RootCtx, j, env, wk) j.SendMsg("running " + j.String()) j.Running() - err := executePipeline(j.RootCtx, wk, j) - if err != nil { + if err := executePipeline(j.RootCtx, wk, j); err != nil { j.Failed(err) return } @@ -90,57 +78,27 @@ func RunPipeline(j *jobservices.Job) { j.Done() } -// TODO move environment handling to the worker package. - -func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *worker.W) { - // TODO fix broken abstraction. - // We're starting a worker pool here, because that's the loopback environment. - // It's sort of a mess, largely because of loopback, which has - // a different flow from a provisioned docker container. - e := j.Pipeline.GetComponents().GetEnvironments()[env] - switch e.GetUrn() { - case urns.EnvExternal: - ep := &pipepb.ExternalPayload{} - if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), ep); err != nil { - slog.Error("unmarshing environment payload", err, slog.String("envID", wk.Env)) - } - externalEnvironment(ctx, ep, wk) - slog.Debug("environment stopped", slog.String("envID", wk.String()), slog.String("job", j.String())) - default: - panic(fmt.Sprintf("environment %v with urn %v unimplemented", env, e.GetUrn())) - } -} - -func externalEnvironment(ctx context.Context, ep *pipepb.ExternalPayload, wk *worker.W) { - conn, err := grpc.Dial(ep.GetEndpoint().GetUrl(), grpc.WithTransportCredentials(insecure.NewCredentials())) - if err != nil { - panic(fmt.Sprintf("unable to dial sdk worker %v: %v", ep.GetEndpoint().GetUrl(), err)) - } - defer conn.Close() - pool := fnpb.NewBeamFnExternalWorkerPoolClient(conn) - - endpoint := &pipepb.ApiServiceDescriptor{ - Url: wk.Endpoint(), +// makeWorker creates a worker for that environment. +func makeWorker(env string, j *jobservices.Job) (*worker.W, error) { + wk := worker.New(j.String()+"_"+env, env) + wk.EnvPb = j.Pipeline.GetComponents().GetEnvironments()[env] + wk.JobKey = j.JobKey() + wk.ArtifactEndpoint = j.ArtifactEndpoint() + go wk.Serve() + if err := runEnvironment(j.RootCtx, j, env, wk); err != nil { + return nil, fmt.Errorf("failed to start environment %v for job %v: %w", env, j, err) } - pool.StartWorker(ctx, &fnpb.StartWorkerRequest{ - WorkerId: wk.ID, - ControlEndpoint: endpoint, - LoggingEndpoint: endpoint, - ArtifactEndpoint: endpoint, - ProvisionEndpoint: endpoint, - Params: nil, - }) - - // Job processing happens here, but orchestrated by other goroutines - // This goroutine blocks until the context is cancelled, signalling - // that the pool runner should stop the worker. - <-ctx.Done() - - // Previous context cancelled so we need a new one - // for this request. - pool.StopWorker(context.Background(), &fnpb.StopWorkerRequest{ - WorkerId: wk.ID, + // Check for connection succeeding after we've created the environment successfully. + timeout := 1 * time.Minute + time.AfterFunc(timeout, func() { + if wk.Connected() { + return + } + err := fmt.Errorf("prism %v didn't get control connection to %v after %v", wk, wk.Endpoint(), timeout) + j.Failed(err) + j.CancelFn(err) }) + return wk, nil } type transformExecuter interface { diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go index e66def5b0fe8..99b786d45980 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go @@ -16,11 +16,14 @@ package jobservices import ( + "bytes" + "context" "fmt" "io" jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" "golang.org/x/exp/slog" + "google.golang.org/protobuf/encoding/prototext" ) func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingService_ReverseArtifactRetrievalServiceServer) error { @@ -47,7 +50,7 @@ func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingSer }, }, }) - var count int + var buf bytes.Buffer for { in, err := stream.Recv() if err == io.EOF { @@ -56,26 +59,61 @@ func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingSer if err != nil { return err } - if in.IsLast { - slog.Debug("GetArtifact finish", + if in.GetIsLast() { + slog.Debug("GetArtifact finished", slog.Group("dep", slog.String("urn", dep.GetTypeUrn()), slog.String("payload", string(dep.GetTypePayload()))), - slog.Int("bytesReceived", count)) + slog.Int("bytesReceived", buf.Len()), + slog.String("rtype", fmt.Sprintf("%T", in.GetResponse())), + ) break } // Here's where we go through each environment's artifacts. // We do nothing with them. switch req := in.GetResponse().(type) { case *jobpb.ArtifactResponseWrapper_GetArtifactResponse: - count += len(req.GetArtifactResponse.GetData()) + buf.Write(req.GetArtifactResponse.GetData()) + case *jobpb.ArtifactResponseWrapper_ResolveArtifactResponse: err := fmt.Errorf("unexpected ResolveArtifactResponse to GetArtifact: %v", in.GetResponse()) slog.Error("GetArtifact failure", err) return err } } + if len(s.artifacts) == 0 { + s.artifacts = map[string][]byte{} + } + s.artifacts[string(dep.GetTypePayload())] = buf.Bytes() } } return nil } + +func (s *Server) ResolveArtifacts(_ context.Context, req *jobpb.ResolveArtifactsRequest) (*jobpb.ResolveArtifactsResponse, error) { + return &jobpb.ResolveArtifactsResponse{ + Replacements: req.GetArtifacts(), + }, nil +} + +func (s *Server) GetArtifact(req *jobpb.GetArtifactRequest, stream jobpb.ArtifactRetrievalService_GetArtifactServer) error { + info := req.GetArtifact() + buf, ok := s.artifacts[string(info.GetTypePayload())] + if !ok { + pt := prototext.Format(info) + slog.Warn("unable to provide artifact to worker", "artifact_info", pt) + return fmt.Errorf("unable to provide %v to worker", pt) + } + chunk := 128 * 1024 * 1024 // 128 MB + var i int + for i+chunk < len(buf) { + stream.Send(&jobpb.GetArtifactResponse{ + Data: buf[i : i+chunk], + }) + i += chunk + } + stream.Send(&jobpb.GetArtifactResponse{ + Data: buf[i:], + }) + return nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go index 10d36066391f..87b0ec007bfb 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go @@ -68,6 +68,8 @@ type Job struct { key string jobName string + artifactEndpoint string + Pipeline *pipepb.Pipeline options *structpb.Struct @@ -88,6 +90,10 @@ type Job struct { metrics metricsStore } +func (j *Job) ArtifactEndpoint() string { + return j.artifactEndpoint +} + // ContributeTentativeMetrics returns the datachannel read index, and any unknown monitoring short ids. func (j *Job) ContributeTentativeMetrics(payloads *fnpb.ProcessBundleProgressResponse) (int64, []string) { return j.metrics.ContributeTentativeMetrics(payloads) @@ -113,6 +119,10 @@ func (j *Job) LogValue() slog.Value { slog.String("name", j.jobName)) } +func (j *Job) JobKey() string { + return j.key +} + func (j *Job) SendMsg(msg string) { j.streamCond.L.Lock() defer j.streamCond.L.Unlock() diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go index e626a05b51e1..213e33a78379 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go @@ -79,6 +79,8 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo streamCond: sync.NewCond(&sync.Mutex{}), RootCtx: rootCtx, CancelFn: cancelFn, + + artifactEndpoint: s.Endpoint(), } // Queue initial state of the job. diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go index e3fb7766b519..bf2db814813c 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go @@ -29,6 +29,7 @@ import ( type Server struct { jobpb.UnimplementedJobServiceServer jobpb.UnimplementedArtifactStagingServiceServer + jobpb.UnimplementedArtifactRetrievalServiceServer fnpb.UnimplementedProvisionServiceServer // Server management @@ -42,6 +43,9 @@ type Server struct { // execute defines how a job is executed. execute func(*Job) + + // Artifact hack + artifacts map[string][]byte } // NewServer acquires the indicated port. @@ -60,6 +64,7 @@ func NewServer(port int, execute func(*Job)) *Server { s.server = grpc.NewServer(opts...) jobpb.RegisterJobServiceServer(s.server, s) jobpb.RegisterArtifactStagingServiceServer(s.server, s) + jobpb.RegisterArtifactRetrievalServiceServer(s.server, s) return s } diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go index 0ad7ccb37032..3a862a143b73 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go @@ -57,6 +57,9 @@ type W struct { ID, Env string + JobKey, ArtifactEndpoint string + EnvPb *pipepb.Environment + // Server management lis net.Listener server *grpc.Server @@ -107,6 +110,7 @@ func New(id, env string) *W { fnpb.RegisterBeamFnDataServer(wk.server, wk) fnpb.RegisterBeamFnLoggingServer(wk.server, wk) fnpb.RegisterBeamFnStateServer(wk.server, wk) + fnpb.RegisterProvisionServiceServer(wk.server, wk) return wk } @@ -164,10 +168,15 @@ func (wk *W) GetProvisionInfo(_ context.Context, _ *fnpb.GetProvisionInfoRequest RunnerCapabilities: []string{ urns.CapabilityMonitoringInfoShortIDs, }, - LoggingEndpoint: endpoint, - ControlEndpoint: endpoint, - ArtifactEndpoint: endpoint, - // TODO add this job's RetrievalToken + LoggingEndpoint: endpoint, + ControlEndpoint: endpoint, + ArtifactEndpoint: &pipepb.ApiServiceDescriptor{ + Url: wk.ArtifactEndpoint, + }, + + RetrievalToken: wk.JobKey, + Dependencies: wk.EnvPb.GetDependencies(), + // TODO add this job's artifact Dependencies Metadata: map[string]string{ diff --git a/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go b/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go index 732f4382ab5d..d5cc6aa7327a 100644 --- a/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go +++ b/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go @@ -44,7 +44,7 @@ func Stage(ctx context.Context, id, endpoint, binary, st string) (retrievalToken defer cc.Close() if err := StageViaPortableAPI(ctx, cc, binary, st); err == nil { - return "", nil + return st, nil } log.Warnf(ctx, "unable to stage with PortableAPI: %v; falling back to legacy", err) From 67a9cf378611754ac563a9b18e9b4c6f32469f39 Mon Sep 17 00:00:00 2001 From: Xinyu Liu Date: Fri, 15 Sep 2023 11:07:33 -0700 Subject: [PATCH 10/10] Allow dropLataData in GBK for SamzaRunner (#28461) --- .../runners/samza/SamzaPipelineOptions.java | 6 + .../runners/samza/runtime/GroupByKeyOp.java | 12 +- .../samza/runtime/GroupByKeyOpTest.java | 123 ++++++++++++++++++ 3 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java index af6d890cf51b..a34303d92552 100644 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java +++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java @@ -173,4 +173,10 @@ public ExecutorService create(PipelineOptions options) { new ThreadFactoryBuilder().setNameFormat("Process Element Thread-%d").build()); } } + + @Description("Enable/disable late data dropping in GroupByKey/Combine transforms") + @Default.Boolean(false) + boolean getDropLateData(); + + void setDropLateData(boolean dropLateData); } diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java index 3ecd406da615..1b19275dd967 100644 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java +++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java @@ -180,11 +180,19 @@ public TimerInternals timerInternals() { DoFnSchemaInformation.create(), Collections.emptyMap()); + final DoFnRunner, KV> dropLateDataRunner = + pipelineOptions.getDropLateData() + ? DoFnRunners.lateDataDroppingRunner( + doFnRunner, keyedInternals.timerInternals(), windowingStrategy) + : doFnRunner; + final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext(); - this.fnRunner = + final DoFnRunner, KV> doFnRunnerWithMetrics = DoFnRunnerWithMetrics.wrap( - doFnRunner, executionContext.getMetricsContainer(), transformFullName); + dropLateDataRunner, executionContext.getMetricsContainer(), transformFullName); + + this.fnRunner = new DoFnRunnerWithKeyedInternals<>(doFnRunnerWithMetrics, keyedInternals); } @Override diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java new file mode 100644 index 000000000000..8670d9a46eac --- /dev/null +++ b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.samza.runtime; + +import java.io.Serializable; +import java.util.Arrays; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestStream; +import org.apache.beam.sdk.transforms.Combine; +import org.apache.beam.sdk.transforms.Sum; +import org.apache.beam.sdk.transforms.windowing.FixedWindows; +import org.apache.beam.sdk.transforms.windowing.Window; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.TimestampedValue; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.junit.Rule; +import org.junit.Test; + +/** Tests for GroupByKeyOp. */ +public class GroupByKeyOpTest implements Serializable { + @Rule + public final transient TestPipeline pipeline = + TestPipeline.fromOptions( + PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner").create()); + + @Rule + public final transient TestPipeline dropLateDataPipeline = + TestPipeline.fromOptions( + PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner", "--dropLateData=true") + .create()); + + @Test + public void testDefaultGbk() { + TestStream.Builder testStream = + TestStream.create(VarIntCoder.of()) + .addElements(TimestampedValue.of(1, new Instant(1000))) + .addElements(TimestampedValue.of(2, new Instant(2000))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(10, new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection aggregated = + pipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); + + PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(3, 10)); + + pipeline.run().waitUntilFinish(); + } + + @Test + public void testDropLateDataNonKeyed() { + TestStream.Builder testStream = + TestStream.create(VarIntCoder.of()) + .addElements(TimestampedValue.of(1, new Instant(1000))) + .addElements(TimestampedValue.of(2, new Instant(2000))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(10, new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection aggregated = + dropLateDataPipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); + + PAssert.that(aggregated).containsInAnyOrder(3); + + dropLateDataPipeline.run().waitUntilFinish(); + } + + @Test + public void testDropLateDataKeyed() { + TestStream.Builder> testStream = + TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) + .addElements(TimestampedValue.of(KV.of("a", 1), new Instant(1000))) + .addElements(TimestampedValue.of(KV.of("b", 2), new Instant(2000))) + .addElements(TimestampedValue.of(KV.of("a", 3), new Instant(2500))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(KV.of("a", 10), new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection> aggregated = + dropLateDataPipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.>into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Sum.integersPerKey()); + + PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(KV.of("a", 4), KV.of("b", 2))); + + dropLateDataPipeline.run().waitUntilFinish(); + } +}