diff --git a/.github/actions/setup-action/action.yml b/.github/actions/setup-action/action.yml index cb24a065f98c..da69dd9a97dd 100644 --- a/.github/actions/setup-action/action.yml +++ b/.github/actions/setup-action/action.yml @@ -70,3 +70,5 @@ runs: shell: bash run: | echo KUBELET_GCLOUD_CONFIG_PATH=/var/lib/kubelet/pods/$POD_UID/volumes/kubernetes.io~empty-dir/gcloud >> $GITHUB_ENV + - name: Setup environment + uses: ./.github/actions/setup-environment-action diff --git a/.github/actions/setup-self-hosted-action/action.yml b/.github/actions/setup-environment-action/action.yml similarity index 50% rename from .github/actions/setup-self-hosted-action/action.yml rename to .github/actions/setup-environment-action/action.yml index ba3bf8d0d5d8..3452a16c132c 100644 --- a/.github/actions/setup-self-hosted-action/action.yml +++ b/.github/actions/setup-environment-action/action.yml @@ -15,47 +15,42 @@ # specific language governing permissions and limitations # under the License. -name: 'Setup environment for self-hosted runners' -description: 'Setup action to run jobs in a self-hosted runner' +name: 'Setup environment action' +description: 'Setup environment to run jobs' inputs: - requires-py-38: + python-version: required: false - description: 'Set as false if does not require py38 setup' - default: 'true' - requires-py-39: + description: 'Install Python version' + default: '' + java-version: required: false - description: 'Set as false if does not require py39 setup' - default: 'true' - requires-java-8: + description: 'Install Java version' + default: '' + go-version: required: false - description: 'Set as false if does not require java-8 setup' - default: 'true' - requires-go: - required: false - description: 'Set as false if does not require go setup' - default: 'true' + description: 'Install Go version' + default: '' runs: using: "composite" steps: - - name: Install python 3.8 - if: ${{ inputs.requires-py-38 == 'true' }} - uses: actions/setup-python@v4 - with: - python-version: "3.8" - - name: Install python 3.9 - if: ${{ inputs.requires-py-39 == 'true' }} + - name: Install Python + if: ${{ inputs.python-version != '' }} uses: actions/setup-python@v4 with: - python-version: "3.9" - - name: Set Java Version - if: ${{ inputs.requires-java-8 == 'true' }} + python-version: ${{ inputs.python-version }} + - name: Install Java + if: ${{ inputs.java-version != '' }} uses: actions/setup-java@v3 with: distribution: 'temurin' - java-version: 8 - - name: Set Go Version - if: ${{ inputs.requires-go == 'true' }} + java-version: ${{ inputs.java-version }} + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: Install Go + if: ${{ inputs.go-version != '' }} uses: actions/setup-go@v3 with: - go-version: '1.21' # never set patch, to get latest patch releases. + go-version: ${{ inputs.go-version }} # never set patch, to get latest patch releases. diff --git a/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile b/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile index 41f8061d44a9..2cbfea75ab55 100644 --- a/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile +++ b/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile @@ -59,10 +59,15 @@ RUN curl -OL https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-c rm google-cloud-sdk-367.0.0-linux-x86_64.tar.gz && \ mv google-cloud-sdk /usr/local/google-cloud-sdk && \ /usr/local/google-cloud-sdk/install.sh --quiet && \ - /usr/local/google-cloud-sdk/bin/gcloud components install kubectl && \ + /usr/local/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin && \ #revert permission chown -R runner:runner /home/runner/.config +ENV USE_GKE_GCLOUD_AUTH_PLUGIN=True ENV PATH="${PATH}:/usr/local/google-cloud-sdk/bin" +#Install Kubectl +RUN curl -OL https://dl.k8s.io/release/v1.28.1/bin/linux/amd64/kubectl && \ + chmod +x ./kubectl && \ + mv ./kubectl /usr/local/bin/kubectl #Install Apache Maven RUN curl -OL https://dlcdn.apache.org/maven/maven-3/3.9.4/binaries/apache-maven-3.9.4-bin.tar.gz && \ tar -xvf apache-maven-3.9.4-bin.tar.gz && \ @@ -73,4 +78,5 @@ ENV MAVEN_HOME="/usr/local/maven" # Needed to transfer path addtitions to runner environment RUN echo PATH=$PATH >> /runnertmp/.env +RUN echo USE_GKE_GCLOUD_AUTH_PLUGIN=$USE_GKE_GCLOUD_AUTH_PLUGIN >> /runnertmp/.env USER runner diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 8636f513b71b..0d0277bd478d 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -180,36 +180,90 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex ```Run Python PreCommit (3.8)``` | Workflow name | Matrix | Trigger Phrase | Cron Status | |:-------------:|:------:|:--------------:|:-----------:| -| [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) -| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) -| [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`Run Go PostCommit`| [![.github/workflows/beam_PostCommit_Go](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | -| [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | -| [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`Run Go Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | -| [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`Run Go Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | -| [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`Run Go Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | -| [ PostCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | N/A |`Run Java PostCommit`| [![PostCommit Java](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`Run Java Avro Versions PostCommit`| [![PostCommit Java Avro Versions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | -| [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![PostCommit Java Dataflow V1](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | -| [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![PostCommit Java Dataflow V2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![PostCommit Java Examples Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Examples Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | N/A |`Run Java examples on Dataflow Java 17`| [![PostCommit Java Examples Dataflow Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | +| [ Load Tests CoGBK Dataflow Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) | N/A |`Run Load Tests Java CoGBK Dataflow Streaming`| [![.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) +| [ Performance Tests BigQueryIO Batch Java Avro ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Avro`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) +| [ Performance Tests BigQueryIO Batch Java Json ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Json`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) +| [ Performance Tests BigQueryIO Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) | N/A |`Run BigQueryIO Streaming Performance Test Java`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) +| [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) +| [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`Run Go PostCommit`| [![.github/workflows/beam_PostCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | +| [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) |[label](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) +| [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`Run Go Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | +| [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`Run Go Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | +| [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`Run Go Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | +| [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`Run Java Avro Versions PostCommit`| [![.github/workflows/beam_PostCommit_Java_Avro_Versions.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | +| [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![.github/workflows/beam_PostCommit_Java_DataflowV1.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | +| [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![.github/workflows/beam_PostCommit_Java_DataflowV2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | +| [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | +| [ PostCommit Java Examples Dataflow Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml) | ['11','17'] |`Run Java examples on Dataflow Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml) | +| [ PostCommit Java Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml) | N/A |`Run Java Examples_Direct`| [![.github/workflows/beam_PostCommit_Java_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml) | +| [ PostCommit Java Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml) | N/A |`Run Java Examples_Flink`| [![.github/workflows/beam_PostCommit_Java_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml) | +| [ PostCommit Java Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml) | N/A |`Run Java Examples_Spark`| [![.github/workflows/beam_PostCommit_Java_Examples_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml) | +| [ PostCommit Java Hadoop Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Hadoop_Versions.yml) | N/A |`Run PostCommit_Java_Hadoop_Versions`| [![.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Hadoop_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Hadoop_Versions.yml) | | [ PostCommit Java Jpms Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | N/A |`Run Jpms Dataflow Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | | [ PostCommit Java Jpms Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | N/A |`Run Jpms Dataflow Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | | [ PostCommit Java Jpms Direct Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | N/A |`Run Jpms Direct Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | | [ PostCommit Java Jpms Direct Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | N/A |`Run Jpms Direct Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | | [ PostCommit Java Jpms Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | N/A |`Run Jpms Flink Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | | [ PostCommit Java Jpms Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | N/A |`Run Jpms Spark Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | -| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/bbeam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![PostCommit Java Sickbay](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/bbeam_PostCommit_Java_Sickbay.yml) | -| [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![PostCommit Python Examples Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | -| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | N/A |`Run Python Examples_Direct`| [![PostCommit Python Examples Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | -| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | N/A |`Run Python Examples_Flink`| [![PostCommit Python Examples Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | -| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | N/A |`Run Python Examples_Spark`| [![PostCommit Python Examples Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | -| [ PostCommit Website Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![.github/workflows/beam_PostCommit_Website_Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | +| [ PostCommit Java Nexmark Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml) | N/A |`Run Dataflow Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml) | +| [ PostCommit Java Nexmark Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml) | N/A |`Run Dataflow Runner V2 Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml) | +| [ PostCommit Java Nexmark Dataflow V2 Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml) | ['11','17'] |`Run Dataflow Runner V2 Java (matrix) Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml) | +| [ PostCommit Java Nexmark Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Direct.yml) | N/A |`Run Direct Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Direct.yml) | +| [ PostCommit Java Nexmark Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Flink.yml) | N/A |`Run Flink Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Flink.yml) | +| [ PostCommit Java Nexmark Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml) | N/A |`Run Spark Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml) | +| [ PostCommit Java PVR Flink Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml) | N/A |`Run Java Flink PortableValidatesRunner Streaming`| [![PostCommit Java PVR Flink Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml) | +| [ PostCommit Java PVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml) | N/A |`Run Java Samza PortableValidatesRunner`| [![PostCommit Java PVR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml) | +| [ PostCommit Java PVR Spark3 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml) | N/A |`Run Java Spark v3 PortableValidatesRunner Streaming`| [![PostCommit Java PVR Spark3 Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml) | +| [ PostCommit Java PVR Spark Batch ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml) | N/A |`Run Java Spark PortableValidatesRunner Batch`| [![PostCommit Java PVR Spark Batch](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml) | +| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![.github/workflows/beam_PostCommit_Java_Sickbay.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | +| [ PostCommit Java ValidatesRunner Dataflow JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | ['11','17'] |`Run Dataflow ValidatesRunner Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | +| [ PostCommit Java ValidatesRunner Dataflow Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | N/A |`Run Dataflow Streaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner Streaming`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | +| [ PostCommit Java ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | N/A |`Run Dataflow ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | +| [ PostCommit Java ValidatesRunner Direct JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | ['11','17'] |`Run Direct ValidatesRunner Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | +| [ PostCommit Java ValidatesRunner Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | N/A |`Run Direct ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | +| [ PostCommit Java ValidatesRunner Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | N/A |`Run Flink ValidatesRunner Java 11`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | +| [ PostCommit Java ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | N/A |`Run Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | +| [ PostCommit Java ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | N/A |`Run Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | +| [ PostCommit Java ValidatesRunner Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | N/A |`Run Spark ValidatesRunner Java 11`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | +| [ PostCommit Java ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | N/A |`Run Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | +| [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`Run Spark StructuredStreaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | +| [ PostCommit Java ValidatesRunner Twister2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | N/A |`Run Twister2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | +| [ PostCommit Java ValidatesRunner ULR ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | N/A |`Run ULR Loopback ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | +| [ PostCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | N/A |`Run Java PostCommit`| [![.github/workflows/beam_PostCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | +| [ PostCommit Javadoc ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Javadoc.yml) | N/A |`Run Javadoc PostCommit`| [![.github/workflows/beam_PostCommit_Javadoc.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Javadoc.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Javadoc.yml) | +| [ PostCommit PortableJar Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml) | N/A |`Run PortableJar_Flink PostCommit`| [![.github/workflows/beam_PostCommit_PortableJar_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml) | +| [ PostCommit PortableJar Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml) | N/A |`Run PortableJar_Spark PostCommit`| [![.github/workflows/beam_PostCommit_PortableJar_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml) | +| [ PostCommit Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python PostCommit (matrix_element)`| [![.github/workflows/beam_PostCommit_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python.yml) | +| [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | +| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Examples_Direct (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | +| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | ['3.8','3.11'] |`Run Python Examples_Flink (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | +| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | ['3.8','3.11'] |`Run Python Examples_Spark (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | +| [ PostCommit Python MongoDBIO IT ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml) | N/A |`Run Python MongoDBIO_IT`| [![.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml) | +| [ PostCommit Python Nexmark Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Nexmark_Direct.yml) | N/A |`Run Python Direct Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Nexmark_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Nexmark_Direct.yml) | +| [ PostCommit Python ValidatesContainer Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Dataflow ValidatesContainer (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml) | +| [ PostCommit Python ValidatesContainer Dataflow With RC ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python RC Dataflow ValidatesContainer (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml) | +| [ PostCommit Python ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml) | ['3.8','3.11'] |`Run Python Dataflow ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml) | +| [ PostCommit Python ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | ['3.8','3.11'] |`Run Python Flink ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | +| [ PostCommit Python ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | ['3.8','3.11'] |`Run Python Samza ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | +| [ PostCommit Python ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml) | ['3.8','3.9','3.11'] |`Run Python Spark ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml) | +| [ PostCommit Python Xlang Gcp Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | N/A |`Run Python_Xlang_Gcp_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | +| [ PostCommit Python Xlang Gcp Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | N/A |`Run Python_Xlang_Gcp_Direct PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | +| [ PostCommit Python Xlang IO Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml) | N/A |`Run Python_Xlang_IO_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml) | +| [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python (matrix_element) PostCommit Sickbay`| [![.github/workflows/beam_PostCommit_Sickbay_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | +| [ PostCommit SQL ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml) | N/A |`Run SQL PostCommit`| [![.github/workflows/beam_PostCommit_SQL.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml) | +| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) +| [ PostCommit Website Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![.github/workflows/beam_PostCommit_Website_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | +| [ PostCommit Website Test](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml) | N/A |`Run Full Website Test`| [![.github/workflows/beam_PostCommit_Website_Test](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml) | | [ PostCommit XVR GoUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | N/A |`Run XVR_GoUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | -| [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python PostCommit Sickbay tests (matrix_element)Upda`| [![PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | +| [ PostCommit XVR Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml) | N/A |`Run XVR_Direct PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml) | +| [ PostCommit XVR Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml) | N/A |`Run XVR_Flink PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml) | +| [ PostCommit XVR JavaUsingPython Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml) | N/A |`Run XVR_JavaUsingPython_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml) | +| [ PostCommit XVR PythonUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml) | N/A |`Run XVR_PythonUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml) | +| [ PostCommit XVR PythonUsingJavaSQL Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | N/A |`Run XVR_PythonUsingJavaSQL_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | +| [ PostCommit XVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | N/A |`Run XVR_Samza PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | +| [ PostCommit XVR Spark3 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | N/A |`Run XVR_Spark3 PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Spark3](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | | [ PreCommit Community Metrics ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | N/A |`Run CommunityMetrics PreCommit`| [![.github/workflows/beam_PreCommit_CommunityMetrics.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | | [ PreCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | N/A |`Run Go PreCommit`| [![.github/workflows/beam_PreCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | | [ PreCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | N/A |`Run Java PreCommit`| [![.github/workflows/beam_PreCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | @@ -280,3 +334,4 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PreCommit GoPortable ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_GoPortable.yml) | N/A |`Run GoPortable PreCommit`| [![.github/workflows/beam_PreCommit_GoPortable.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_GoPortable.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_GoPortable.yml) | | [ PreCommit Kotlin Examples ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Kotlin_Examples.yml) | N/A | `Run Kotlin_Examples PreCommit` | [![.github/workflows/beam_PreCommit_Kotlin_Examples.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Kotlin_Examples.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Kotlin_Examples.yml) | | [ PreCommit Portable Python ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Portable_Python.yml) | ['3.8','3.11'] | `Run Portable_Python PreCommit` | [![.github/workflows/beam_PreCommit_Portable_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Portable_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Portable_Python.yml) | +| [ Cancel Stale Dataflow Jobs ](https://github.com/apache/beam/actions/workflows/beam_CancelStaleDataflowJobs.yml) | N/A | `Run Cancel Stale Dataflow Jobs` | [![.github/workflows/beam_CancelStaleDataflowJobs.yml](https://github.com/apache/beam/actions/workflows/beam_CancelStaleDataflowJobs.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_CancelStaleDataflowJobs.yml) | diff --git a/.github/workflows/assign_milestone.yml b/.github/workflows/assign_milestone.yml index 9be68ecd3d0f..17cc167ebcbb 100644 --- a/.github/workflows/assign_milestone.yml +++ b/.github/workflows/assign_milestone.yml @@ -31,7 +31,7 @@ jobs: issues: write runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 2 diff --git a/.github/workflows/beam_CancelStaleDataflowJobs.yml b/.github/workflows/beam_CancelStaleDataflowJobs.yml new file mode 100644 index 000000000000..df896528c84e --- /dev/null +++ b/.github/workflows/beam_CancelStaleDataflowJobs.yml @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Cancel Stale Dataflow Jobs + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */4 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +jobs: + beam_CancelStaleDataflowJobs: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_CancelStaleDataflowJobs] + job_phrase: [Run Cancel Stale Dataflow Jobs] + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run Cancel Stale Dataflow Jobs' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Authenticate on GCP + id: auth + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + - name: run cancel stale dataflow jobs + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :beam-test-tools:cancelStaleDataflowJobs + diff --git a/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml new file mode 100644 index 000000000000..0cb601522a81 --- /dev/null +++ b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Load Tests CoGBK Dataflow Streaming Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '50 10 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_LoadTests_Java_CoGBK_Dataflow_Streaming: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Load Tests Java CoGBK Dataflow Streaming' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: [ "beam_LoadTests_Java_CoGBK_Dataflow_Streaming" ] + job_phrase: ["Run Load Tests Java CoGBK Dataflow Streaming"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare configs + id: set_configs + shell: bash + run: | + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt | tr '\n' ' ') + echo "prepared_config_1=$CURCONFIG" >> $GITHUB_OUTPUT + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt | tr '\n' ' ') + echo "prepared_config_2=$CURCONFIG" >> $GITHUB_OUTPUT + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt | tr '\n' ' ') + echo "prepared_config_3=$CURCONFIG" >> $GITHUB_OUTPUT + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt | tr '\n' ' ') + echo "prepared_config_4=$CURCONFIG" >> $GITHUB_OUTPUT + - name: run CoGBK Dataflow Streaming Java Load Test 1 (single key) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_1 }}' \ + - name: run CoGBK Dataflow Streaming Java Load Test 2 (multiple key) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_2 }}' \ + - name: run CoGBK Dataflow Streaming Java Load Test 3 (reiteration 10KB value) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_3 }}' \ + - name: run CoGBK Dataflow Streaming Java Load Test 4 (reiteration 2MB value) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_4 }}' \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml new file mode 100644 index 000000000000..d29acbfc765f --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests BigQueryIO Batch Java Avro + +on: + issue_comment: + types: [created] + schedule: + - cron: '10 1,13 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PerformanceTests_BigQueryIO_Batch_Java_Avro: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run BigQueryIO Batch Performance Test Java Avro' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_BigQueryIO_Batch_Java_Avro"] + job_phrase: ["Run BigQueryIO Batch Performance Test Java Avro"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare config + id: set_config + shell: bash + run: | + CURDATE=$(date '+%m%d%H%M%S' --utc) + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt | tr '\n' ' ') + CONFIGWITHDATE=$(echo "${CURCONFIG/bqio_write_10GB_java_avro_/bqio_write_10GB_java_avro_$CURDATE}") + echo "prepared_config=$CONFIGWITHDATE" >> $GITHUB_OUTPUT + - name: run Java BigQueryIO Batch Avro Performance Test + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:bigquery-io-perf-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.bigqueryioperftests.BigQueryIOIT \ + --info \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions=${{ steps.set_config.outputs.prepared_config }} \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml new file mode 100644 index 000000000000..067d0e4b95b8 --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests BigQueryIO Batch Java Json + +on: + issue_comment: + types: [created] + schedule: + - cron: '30 8,20 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PerformanceTests_BigQueryIO_Batch_Java_Json: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run BigQueryIO Batch Performance Test Java Json' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_BigQueryIO_Batch_Java_Json"] + job_phrase: ["Run BigQueryIO Batch Performance Test Java Json"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare config + id: set_config + shell: bash + run: | + CURDATE=$(date '+%m%d%H%M%S' --utc) + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt | tr '\n' ' ') + CONFIGWITHDATE=$(echo "${CURCONFIG/bqio_write_10GB_java_json_/bqio_write_10GB_java_json_$CURDATE}") + echo "prepared_config=$CONFIGWITHDATE" >> $GITHUB_OUTPUT + - name: run Java BigQueryIO Batch Json Performance Test + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:bigquery-io-perf-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.bigqueryioperftests.BigQueryIOIT \ + --info \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions=${{ steps.set_config.outputs.prepared_config }} \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml b/.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml new file mode 100644 index 000000000000..bf10d4be522e --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests BigQueryIO Streaming Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '20 15,22 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PerformanceTests_BigQueryIO_Streaming_Java: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run BigQueryIO Streaming Performance Test Java' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_BigQueryIO_Streaming_Java"] + job_phrase: ["Run BigQueryIO Streaming Performance Test Java"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare config + id: set_config + shell: bash + run: | + CURDATE=$(date '+%m%d%H%M%S' --utc) + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt | tr '\n' ' ') + CONFIGWITHDATE=$(echo "${CURCONFIG/bqio_write_10GB_java_stream_/bqio_write_10GB_java_stream_$CURDATE}") + echo "prepared_config=$CONFIGWITHDATE" >> $GITHUB_OUTPUT + - name: run Java BigQueryIO Streaming Performance Test + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:bigquery-io-perf-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.bigqueryioperftests.BigQueryIOIT \ + --info \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions=${{ steps.set_config.outputs.prepared_config }} \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml b/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml index dfc86953d4b9..3ce0a48824aa 100644 --- a/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml +++ b/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml @@ -66,7 +66,7 @@ jobs: job_name: ["beam_PostCommit_BeamMetrics_Publish"] job_phrase: ["Run Beam Metrics Deployment"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go.yml b/.github/workflows/beam_PostCommit_Go.yml index 1b27c006125e..dc1180314d67 100644 --- a/.github/workflows/beam_PostCommit_Go.yml +++ b/.github/workflows/beam_PostCommit_Go.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go"] job_phrase: ["Run Go PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml b/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml index 223640bbbaa3..8e5651e29279 100644 --- a/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml +++ b/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml @@ -66,26 +66,18 @@ jobs: job_name: ["beam_PostCommit_Go_Dataflow_ARM"] job_phrase: ["Run Go PostCommit Dataflow ARM"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.21' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + go-version: 1.21 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - name: Authenticate on GCP diff --git a/.github/workflows/beam_PostCommit_Go_VR_Flink.yml b/.github/workflows/beam_PostCommit_Go_VR_Flink.yml index 0f7d9b0c5355..21dcf7f8e72a 100644 --- a/.github/workflows/beam_PostCommit_Go_VR_Flink.yml +++ b/.github/workflows/beam_PostCommit_Go_VR_Flink.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go_VR_Flink"] job_phrase: ["Run Go Flink ValidatesRunner"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go_VR_Samza.yml b/.github/workflows/beam_PostCommit_Go_VR_Samza.yml index e199d04b11f2..4e647962ccac 100644 --- a/.github/workflows/beam_PostCommit_Go_VR_Samza.yml +++ b/.github/workflows/beam_PostCommit_Go_VR_Samza.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go_VR_Samza"] job_phrase: ["Run Go Samza ValidatesRunner"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go_VR_Spark.yml b/.github/workflows/beam_PostCommit_Go_VR_Spark.yml index eed15c1fe966..0fc0f8d400de 100644 --- a/.github/workflows/beam_PostCommit_Go_VR_Spark.yml +++ b/.github/workflows/beam_PostCommit_Go_VR_Spark.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go_VR_Spark"] job_phrase: ["Run Go Spark ValidatesRunner"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Java.yml b/.github/workflows/beam_PostCommit_Java.yml index 4cc7f638e1c1..3e3cd79e8477 100644 --- a/.github/workflows/beam_PostCommit_Java.yml +++ b/.github/workflows/beam_PostCommit_Java.yml @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java PostCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -75,8 +75,16 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :javaPostCommit - - name: Upload test report + - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 + if: failure() with: - name: java-code-coverage-report - path: "**/build/test-results/**/*.xml" \ No newline at end of file + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml b/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml index b18beb51beed..cf3064a6a7de 100644 --- a/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml +++ b/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml @@ -45,6 +45,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PostCommit_Java_Avro_Versions: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -59,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java Avro Versions PostCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -80,4 +85,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_DataflowV1.yml b/.github/workflows/beam_PostCommit_Java_DataflowV1.yml index 1a3f07201fe8..02896bd3a383 100644 --- a/.github/workflows/beam_PostCommit_Java_DataflowV1.yml +++ b/.github/workflows/beam_PostCommit_Java_DataflowV1.yml @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run PostCommit_Java_Dataflow' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -92,4 +92,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_DataflowV2.yml b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml index 4b06592a972c..56b1e6dc91bb 100644 --- a/.github/workflows/beam_PostCommit_Java_DataflowV2.yml +++ b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run PostCommit_Java_DataflowV2' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -85,4 +85,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml index 596b7f35ab5f..5a42b9f95237 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml @@ -78,28 +78,17 @@ jobs: github. event_name == 'workflow_dispatch' || startswith(github.event.comment.body, 'Run Java_Examples_Dataflow_ARM PostCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{matrix.java_version}}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - requires-go: false - - name: Set up Java${{ matrix.java_version }} - uses: actions/setup-java@v3.8.0 - with: - distribution: 'temurin' java-version: ${{ matrix.java_version }} - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Authenticate on GCP @@ -140,4 +129,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml new file mode 100644 index 000000000000..f50ae9d30f6c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Dataflow Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Dataflow_Java: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.java_version }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Examples_Dataflow_Java] + job_phrase: [Run Java examples on Dataflow Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Java examples on Dataflow Java') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.java_version }}) + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run java${{ matrix.java_version }}PostCommit script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:examples:java${{ matrix.java_version }}PostCommit + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2.yml new file mode 100644 index 000000000000..af01933be446 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Dataflow_V2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Dataflow_V2] + job_phrase: [Run Java Examples on Dataflow Runner V2] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples on Dataflow Runner V2' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Examples Dataflow V2 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:examplesJavaRunnerV2IntegrationTest + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml new file mode 100644 index 000000000000..0dfea680f824 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Dataflow V2 Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Dataflow_V2_Java: + name: ${{matrix.job_name}} (${{matrix.job_phrase_1}}${{matrix.job_phrase_2}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Examples_Dataflow_V2_Java] + job_phrase_1: [Run Java ] + job_phrase_2: [Examples on Dataflow Runner V2] + java_version: ['11', '17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + (contains(github.event.comment.body, 'Run Java') && + contains(github.event.comment.body, 'Examples on Dataflow Runner V2')) + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: ${{ matrix.java_version }} + - name: run PostCommit Java Examples Dataflow V2 Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:examplesJavaRunnerV2IntegrationTest + arguments: | + -PdisableSpotlessCheck=true \ + -PdisableCheckStyle=true \ + -PskipCheckerFramework \ + -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml b/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml new file mode 100644 index 000000000000..b8014342042c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Direct: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Direct] + job_phrase: [Run Java Examples_Direct] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Direct' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml b/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml new file mode 100644 index 000000000000..3aec68316f81 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Flink: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Flink] + job_phrase: [Run Java Examples_Flink] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Flink' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: 3.8 + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml b/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml new file mode 100644 index 000000000000..fd5c60952b7b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Spark: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Spark] + job_phrase: [Run Java Examples_Spark] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Spark' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml b/.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml new file mode 100644 index 000000000000..da4606400e8b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Hadoop Versions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Hadoop_Versions: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Hadoop_Versions] + job_phrase: [Run PostCommit_Java_Hadoop_Versions] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run PostCommit_Java_Hadoop_Versions' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :javaHadoopVersionsTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index ffd5751fd8b3..471782621fa7 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -67,7 +67,7 @@ jobs: job_phrase: ["Run Java PostCommit IO Performance Tests"] test_case: ["GCSPerformanceTest", "BigTablePerformanceTest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -76,19 +76,14 @@ jobs: github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) - name: Checkout release branch if: github.event_name == 'schedule' #This has scheduled runs run against the latest release - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: v2.50.0 #TODO(https://github.com/apache/beam/issues/28330) automate updating this repository: apache/beam - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: @@ -111,4 +106,17 @@ jobs: gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests - exportTable: io_performance_metrics_test \ No newline at end of file + exportTable: io_performance_metrics_test + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml index 925942d46b90..6c35a10e5e92 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Dataflow_Java11"] job_phrase: ["Run Jpms Dataflow Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml index 6094cdc7e4e6..5d0d2273cf55 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Dataflow_Java17"] job_phrase: ["Run Jpms Dataflow Java 17 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -94,4 +94,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml index 0fa065edd34b..4e26f29276b2 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Direct_Java11"] job_phrase: ["Run Jpms Direct Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml index f85ad12437bc..0a4801d46c6c 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Direct_Java17"] job_phrase: ["Run Jpms Direct Java 17 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -94,4 +94,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml index 84f294de5c21..aa5e3b40c9dd 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Flink_Java11"] job_phrase: ["Run Jpms Flink Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml index 56ec589b38d7..a81df741d9cc 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Spark_Java11"] job_phrase: ["Run Jpms Spark Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml new file mode 100644 index 000000000000..ef90fbad5bf0 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --region=us-central1 + --suite=STRESS + --numWorkers=4 + --maxNumWorkers=4 + --autoscalingAlgorithm=NONE + --nexmarkParallel=16 + --enforceEncodability=true + --enforceImmutability=true + --runner=DataflowRunner + +jobs: + beam_PostCommit_Java_Nexmark_Dataflow: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Dataflow] + job_phrase: [Run Dataflow Runner Nexmark Tests] + streaming: [false, true] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Dataflow (${{ matrix.streaming }} ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Dataflow (${{ matrix.streaming }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml new file mode 100644 index 000000000000..3eb93e6687f8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=false + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --influxTags={"runnerVersion":"V2","javaVersion":"8"} + --region=us-central1 + --suite=STRESS + --numWorkers=4 + --maxNumWorkers=4 + --autoscalingAlgorithm=NONE + --nexmarkParallel=16 + --enforceEncodability=true + --enforceImmutability=true + --runner=DataflowRunner + +jobs: + beam_PostCommit_Java_Nexmark_Dataflow_V2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Dataflow_V2] + job_phrase: [Run Dataflow Runner V2 Nexmark Tests] + streaming: [false, true] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow Runner V2 Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Dataflow V2 (streaming = ${{ matrix.streaming }}) script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + '${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml new file mode 100644 index 000000000000..06438510400b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Dataflow V2 Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=false + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --region=us-central1 + --suite=STRESS + --numWorkers=4 + --maxNumWorkers=4 + --autoscalingAlgorithm=NONE + --nexmarkParallel=16 + --enforceEncodability=true + --enforceImmutability=true + --runner=DataflowRunner + +jobs: + beam_PostCommit_Java_Nexmark_Dataflow_V2_Java: + name: ${{matrix.job_name}} (${{matrix.job_phrase_1}} ${{matrix.java_version}} ${{matrix.job_phrase_2}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Dataflow_V2_Java] + job_phrase_1: [Run Dataflow Runner V2 Java] + job_phrase_2: [Nexmark Tests] + streaming: [false, true] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + (contains(github.event.comment.body, 'Run Dataflow Runner V2 Java') && + contains(github.event.comment.body, 'Nexmark Tests')) + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: ${{ matrix.java_version }} + - name: run PostCommit Java ${{matrix.java_version}} Nexmark Dataflow V2 (streaming = ${{ matrix.streaming }}) script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + -Pnexmark.runner.version=V2 \ + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + '${{ env.GRADLE_COMMAND_ARGUMENTS }}--influxTags={"runnerVersion":"V2","javaVersion":"${{matrix.java_version}}"}--streaming=${{ matrix.streaming }}' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml new file mode 100644 index 000000000000..2386d7e26f38 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --enforceEncodability=true + --enforceImmutability=true + --runner=DirectRunner + +jobs: + beam_PostCommit_Java_Nexmark_Direct: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Direct] + job_phrase: [Run Direct Runner Nexmark Tests] + streaming: [false, true] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Direct Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Direct (${{ matrix.streaming }} ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:direct-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }} --streaming=${{ matrix.streaming }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Direct (${{ matrix.streaming }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:direct-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml new file mode 100644 index 000000000000..9123c9079605 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --streamTimeout=60 + --runner=FlinkRunner + +jobs: + beam_PostCommit_Java_Nexmark_Flink: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Flink] + job_phrase: [Run Flink Runner Nexmark Tests] + streaming: [false, true] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Flink Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Flink (${{ matrix.streaming }} ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:flink:1.15 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }} --streaming=${{ matrix.streaming }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Flink (${{ matrix.streaming }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:flink:1.15 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml new file mode 100644 index 000000000000..7492eb9b8262 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --streamTimeout=60 + --streaming=false + +jobs: + beam_PostCommit_Java_Nexmark_Spark: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Spark] + job_phrase: [Run Spark Runner Nexmark Tests] + runner: [SparkRunner, SparkStructuredStreamingRunner --skipQueries=3] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Spark (runner = ${{ matrix.runner }} queryLanguage = ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:spark:3 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }} --runner=${{ matrix.runner }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Spark (${{ matrix.runner }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:spark:3 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--runner=${{ matrix.runner }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml b/.github/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml new file mode 100644 index 000000000000..709ec8215983 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Flink Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Flink_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Flink_Streaming] + job_phrase: [Run Java Flink PortableValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Flink PortableValidatesRunner Streaming' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Flink PortableValidatesRunner Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: runners:flink:1.15:job-server:validatesPortableRunnerStreaming diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml b/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml new file mode 100644 index 000000000000..06b88ee5bcb8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Samza: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Samza] + job_phrase: [Run Java Samza PortableValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Samza PortableValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesPortableRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml b/.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml new file mode 100644 index 000000000000..e87c62674c40 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Spark3 Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Spark3_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Spark3_Streaming] + job_phrase: [Run Java Spark v3 PortableValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Spark v3 PortableValidatesRunner Streaming' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java PortableValidatesRunner Spark3 Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesPortableRunnerStreaming + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml b/.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml new file mode 100644 index 000000000000..ce4e17dc25ea --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Spark Batch + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Spark_Batch: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Spark_Batch] + job_phrase: [Run Java Spark PortableValidatesRunner Batch] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Spark PortableValidatesRunner Batch' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java PortableValidatesRunner Spark Batch script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: | + :runners:spark:3:job-server:validatesPortableRunnerBatch + :runners:spark:3:job-server:validatesPortableRunnerDocker + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Sickbay.yml b/.github/workflows/beam_PostCommit_Java_Sickbay.yml index ad437500dba4..dc719c2d53dd 100644 --- a/.github/workflows/beam_PostCommit_Java_Sickbay.yml +++ b/.github/workflows/beam_PostCommit_Java_Sickbay.yml @@ -21,7 +21,7 @@ on: issue_comment: types: [created] schedule: - - cron: '29 6 * * *' + - cron: '0 0 * * *' workflow_dispatch: # This allows a subsequently queued workflow run to interrupt previous runs @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java Sickbay' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -85,4 +85,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml similarity index 78% rename from .github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml rename to .github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml index bc26da173042..92ddeeb8663e 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -name: PostCommit Java Examples Dataflow Java11 +name: PostCommit Java ValidatesRunner Dataflow on: issue_comment: @@ -51,40 +51,35 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_Examples_Dataflow_Java11: + beam_PostCommit_Java_ValidatesRunner_Dataflow: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) runs-on: [self-hosted, ubuntu-20.04, main] - timeout-minutes: 180 + timeout-minutes: 480 strategy: - fail-fast: false matrix: - job_name: [beam_PostCommit_Java_Examples_Dataflow_Java11] - job_phrase: [Run Java examples on Dataflow Java 11] + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow] + job_phrase: [Run Dataflow ValidatesRunner] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java examples on Dataflow Java 11' + github.event.comment.body == 'Run Dataflow ValidatesRunner' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - # The test requires Java 11 and Java 8 versions. - # Java 8 is installed second because JAVA_HOME needs to point to Java 8. - - name: Set up Java + - name: Install Java uses: actions/setup-java@v3.8.0 with: - distribution: 'temurin' - java-version: | - 11 - 8 - - name: run PostCommit Java Examples Dataflow Java11 script + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:google-cloud-dataflow-java:examples:java11PostCommit + gradle-command: :runners:google-cloud-dataflow-java:validatesRunner max-workers: 12 - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 @@ -96,4 +91,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml new file mode 100644 index 000000000000..37ec6b145f35 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow JavaVersions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.java_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 480 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions] + job_phrase: [Run Dataflow ValidatesRunner Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Dataflow ValidatesRunner Java') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run jar Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:testJar :runners:google-cloud-dataflow-java:worker:shadowJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml new file mode 100644 index 000000000000..708ef0b7ad16 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 720 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming] + job_phrase: [Run Dataflow Streaming ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow Streaming ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerStreaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerStreaming + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml new file mode 100644 index 000000000000..aa665508ddee --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */8 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_V2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 390 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_V2] + job_phrase: [Run Java Dataflow V2 ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Dataflow V2 ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerV2 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerV2 + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml new file mode 100644 index 000000000000..ca380efa5cb4 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow V2 Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */8 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 510 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming] + job_phrase: [Run Java Dataflow V2 ValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Dataflow V2 ValidatesRunner Streaming' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerV2Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerV2Streaming + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml similarity index 81% rename from .github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml rename to .github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml index f61cc41d5222..24e2c8014336 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -name: PostCommit Java Examples Dataflow Java17 +name: PostCommit Java ValidatesRunner Direct on: issue_comment: @@ -51,38 +51,35 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_Examples_Dataflow_Java17: + beam_PostCommit_Java_ValidatesRunner_Direct: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: matrix: - job_name: [beam_PostCommit_Java_Examples_Dataflow_Java17] - job_phrase: [Run Java examples on Dataflow Java 17] + job_name: [beam_PostCommit_Java_ValidatesRunner_Direct] + job_phrase: [Run Direct ValidatesRunner] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java examples on Dataflow Java 17' + github.event.comment.body == 'Run Direct ValidatesRunner' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Set up Java + - name: Install Java uses: actions/setup-java@v3.8.0 with: - distribution: 'temurin' - java-version: | - 17 - 8 - - name: run PostCommit Java Examples Dataflow Java17 script + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:google-cloud-dataflow-java:examples:java17PostCommit - max-workers: 12 + gradle-command: :runners:direct-java:validatesRunner - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 if: failure() @@ -93,4 +90,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml new file mode 100644 index 000000000000..168cd245e0db --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Direct JavaVersions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.java_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 480 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions] + job_phrase: [Run Direct ValidatesRunner Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Direct ValidatesRunner Java') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run jar Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:shadowJar :runners:direct-java:shadowTestJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -Dorg.gradle.java.home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml new file mode 100644 index 000000000000..20aac8a30608 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Flink: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Flink] + job_phrase: [Run Flink ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Flink ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: 3.8 + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml new file mode 100644 index 000000000000..d5de5fbfa3ff --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Flink Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Flink_Java11: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 270 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Flink_Java11] + job_phrase: [Run Flink ValidatesRunner Java 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Flink ValidatesRunner Java 11') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + 11 + 8 + - name: run jar Java8 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:jar :runners:flink:1.15:testJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_11_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml new file mode 100644 index 000000000000..0613f794a4f4 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Samza: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Samza] + job_phrase: [Run Samza ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Samza ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml new file mode 100644 index 000000000000..146a88c921d2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Spark: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Spark] + job_phrase: [Run Spark ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml new file mode 100644 index 000000000000..fcaa3c6eabd4 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner SparkStructuredStreaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming] + job_phrase: [Run Spark StructuredStreaming ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark StructuredStreaming ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesStructuredStreamingRunnerBatch script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesStructuredStreamingRunnerBatch + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml new file mode 100644 index 000000000000..e6fe19393d13 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Spark Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Spark_Java11: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 270 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Spark_Java11] + job_phrase: [Run Spark ValidatesRunner Java 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Spark ValidatesRunner Java 11') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + 11 + 8 + - name: run jar Java8 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:jar :runners:spark:3:testJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_11_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml new file mode 100644 index 000000000000..89364f32865b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Twister2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Twister2: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Twister2] + job_phrase: [Run Twister2 ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Twister2 ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:twister2:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml new file mode 100644 index 000000000000..e5e397c8c9d3 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner ULR + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_ULR: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_ULR] + job_phrase: [Run ULR Loopback ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run ULR Loopback ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: run ulrLoopbackValidatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:portability:java:ulrLoopbackValidatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Javadoc.yml b/.github/workflows/beam_PostCommit_Javadoc.yml new file mode 100644 index 000000000000..240b0e43d271 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Javadoc.yml @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Javadoc + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Javadoc: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Javadoc] + job_phrase: [Run Javadoc PostCommit] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Javadoc PostCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run aggregateJavadoc script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:javadoc:aggregateJavadoc + - name: Upload Javadoc Results + uses: actions/upload-artifact@v3 + with: + name: Javadoc Results + path: '**/sdks/java/javadoc/build/docs/javadoc/**' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_PortableJar_Flink.yml b/.github/workflows/beam_PostCommit_PortableJar_Flink.yml new file mode 100644 index 000000000000..38c68059bdbe --- /dev/null +++ b/.github/workflows/beam_PostCommit_PortableJar_Flink.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit PortableJar Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_PortableJar_Flink: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run PortableJar_Flink PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: ["beam_PostCommit_PortableJar_Flink"] + job_phrase: ["Run PortableJar_Flink PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run testPipelineJarFlinkRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py38:testPipelineJarFlinkRunner + arguments: | + -PpythonVersion=3.8 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_PortableJar_Spark.yml b/.github/workflows/beam_PostCommit_PortableJar_Spark.yml new file mode 100644 index 000000000000..4cb99f85f0fb --- /dev/null +++ b/.github/workflows/beam_PostCommit_PortableJar_Spark.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit PortableJar Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_PortableJar_Spark: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run PortableJar_Spark PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: ["beam_PostCommit_PortableJar_Spark"] + job_phrase: ["Run PortableJar_Spark PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run testPipelineJarSparkRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py38:testPipelineJarSparkRunner + arguments: | + -PpythonVersion=3.8 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python.yml b/.github/workflows/beam_PostCommit_Python.yml new file mode 100644 index 000000000000..7dde6ed150df --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python.yml @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Python + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.python_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Python] + job_phrase: [Run Python PostCommit] + python_version: ['3.8', '3.9', '3.10', '3.11'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python PostCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: ${{matrix.python_version}} + - name: Install docker compose + run: | + sudo curl -L https://github.com/docker/compose/releases/download/1.22.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: run PostCommit Python ${{ matrix.python_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :python${{steps.set_py_ver_clean.outputs.py_ver_clean}}PostCommit + arguments: | + -PuseWheelDistribution \ + -PpythonVersion=${{ matrix.python_version }} \ + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml index 2aba1760606c..14cdecb356bd 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml @@ -58,31 +58,22 @@ jobs: runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: - fail-fast: false matrix: job_name: ["beam_PostCommit_Python_Examples_Dataflow"] job_phrase: ["Run Python Examples_Dataflow"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: 3.11 - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Run examplesPostCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml b/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml index 913bac96a174..7c792d7a3c27 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml @@ -64,26 +64,18 @@ jobs: job_phrase: ["Run Python Examples_Direct"] python_version: ['3.8','3.9','3.10','3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml b/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml index 8d2bc47f267e..ccf03918f29d 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml @@ -64,26 +64,18 @@ jobs: job_phrase: ["Run Python Examples_Flink"] python_version: ['3.8', '3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml b/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml index 9435720a1af9..073ed0aeda64 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml @@ -64,26 +64,18 @@ jobs: job_phrase: ["Run Python Examples_Spark"] python_version: ['3.8', '3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml b/.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml new file mode 100644 index 000000000000..be8f0e10dc18 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python MongoDBIO IT + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_MongoDBIO_IT: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run Python MongoDBIO_IT' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: ["beam_PostCommit_Python_MongoDBIO_IT"] + job_phrase: ["Run Python MongoDBIO_IT"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: 3.11 + - name: Run mongodbioIT script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:py311:mongodbioIT + arguments: | + -PpythonVersion=3.11 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml b/.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml new file mode 100644 index 000000000000..81c9b4a8b484 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Python Nexmark Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_JAVA_COMMAND_ARGUMENTS: | + --manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --enforceEncodability=true + --enforceImmutability=true + --runner=DirectRunner + --numEvents=100000 + GRADLE_PYTHON_COMMAND_ARGUMENTS: | + --monitor_jobs + --big_query_table=nexmark + --big_query_dataset=nexmark + --project=apache-beam-testing + --resource_name_mode=QUERY_RUNNER_AND_MODE + --export_summary_to_big_query + --temp_location=gs://temp-storage-for-perf-tests/nexmark + --influx_database=beam_test_metrics + --influx_host=http://10.128.0.96:8086 + --base_influx_measurement=nexmark + --export_summary_to_influx_db + --influx_retention_policy=forever + --suite=SMOKE + --enforce_encodability + --enforce_immutability + --runner=DirectRunner + --num_events=100000 + +jobs: + beam_PostCommit_Python_Nexmark_Direct: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Python_Nexmark_Direct] + job_phrase: [Run Python Direct Runner Nexmark Tests] + # Query numbers are listed explicitly due to issues with Python Nexmark Query: + # query = 1 - https://github.com/apache/beam/issues/24678 + # query = 4,6,9 - https://github.com/apache/beam/issues/24679 + # query = 12 - https://github.com/apache/beam/issues/24680 + query: [0, 2, 3, 5, 7, 8, 10, 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python Direct Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run Java Testing Nexmark (query ${{ matrix.query }}) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:direct-java \ + "-Pnexmark.args=${{ env.GRADLE_JAVA_COMMAND_ARGUMENTS }} \ + --query=${{ matrix.query }} \ + --generateEventFilePathPrefix=gs://temp-storage-for-perf-tests/nexmark/eventFiles/beam_PostCommit_Python_Nexmark_Direct/query${{ matrix.query }}-" \ + - name: run Python Testing Benchmarks Nexmark (query ${{ matrix.query }}) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:benchmarks:nexmark:run + arguments: | + -PpythonVersion=3.8 \ + "-Pnexmark.args=${{ env.GRADLE_PYTHON_COMMAND_ARGUMENTS }} \ + --query=${{ matrix.query }} \ + --input=gs://temp-storage-for-perf-tests/nexmark/eventFiles/beam_PostCommit_Python_Nexmark_Direct/query${{ matrix.query }}-\*" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml new file mode 100644 index 000000000000..ca5753010d1c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesContainer Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesContainer_Dataflow: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Dataflow ValidatesContainer') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesContainer_Dataflow"] + job_phrase: ["Run Python Dataflow ValidatesContainer"] + python_version: ['3.8','3.9','3.10','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesContainer script + env: + USER: github-actions + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesContainer + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml new file mode 100644 index 000000000000..ded9ff0a4bd5 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesContainer Dataflow With RC + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python RC Dataflow ValidatesContainer') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC"] + job_phrase: ["Run Python RC Dataflow ValidatesContainer"] + python_version: ['3.8','3.9','3.10','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesContainer script + env: + USER: github-actions + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesContainer + arguments: | + -PtestRCDependencies=true + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml new file mode 100644 index 000000000000..4119ddd56020 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Dataflow: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Dataflow ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 200 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Dataflow"] + job_phrase: ["Run Python Dataflow ValidatesRunner"] + python_version: ['3.8', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesRunnerBatchTests script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesRunnerBatchTests + arguments: | + -PuseWheelDistribution \ + -PpythonVersion=${{ matrix.python_version }} \ + - name: Run validatesRunnerStreamingTests script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesRunnerStreamingTests + arguments: | + -PuseWheelDistribution \ + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml new file mode 100644 index 000000000000..608bba248b3b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Flink: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Flink ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Flink"] + job_phrase: ["Run Python Flink ValidatesRunner"] + python_version: ['3.8', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run flinkValidatesRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:flinkValidatesRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml new file mode 100644 index 000000000000..bd0bbe1d6ff1 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Samza: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Samza ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Samza"] + job_phrase: ["Run Python Samza ValidatesRunner"] + python_version: ['3.8', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run samzaValidatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:samzaValidatesRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml new file mode 100644 index 000000000000..6fda3a210aaf --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Spark: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Spark ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Spark"] + job_phrase: ["Run Python Spark ValidatesRunner"] + python_version: ['3.8', '3.9', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run sparkValidatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:sparkValidatesRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml new file mode 100644 index 000000000000..34364bb38bcc --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang Gcp Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_Gcp_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_Gcp_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_Gcp_Dataflow"] + job_phrase: ["Run Python_Xlang_Gcp_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang Gcp Dataflow script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:gcpCrossLanguagePostCommit + arguments: -PuseWheelDistribution + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml new file mode 100644 index 000000000000..f753b3cf15df --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang Gcp Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_Gcp_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_Gcp_Direct PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_Gcp_Direct"] + job_phrase: ["Run Python_Xlang_Gcp_Direct PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang Gcp Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:gcpCrossLanguagePostCommit + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml new file mode 100644 index 000000000000..6cc132935e66 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang IO Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_IO_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_IO_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_IO_Dataflow"] + job_phrase: ["Run Python_Xlang_IO_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang IO Dataflow script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:ioCrossLanguagePostCommit + arguments: | + -PuseWheelDistribution \ + -PkafkaBootstrapServer=10.128.0.40:9094,10.128.0.28:9094,10.128.0.165:9094 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_SQL.yml b/.github/workflows/beam_PostCommit_SQL.yml new file mode 100644 index 000000000000..d27c5718d6f9 --- /dev/null +++ b/.github/workflows/beam_PostCommit_SQL.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit SQL + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_SQL: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_SQL] + job_phrase: [Run SQL PostCommit] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run SQL PostCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit SQL script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sqlPostCommit + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' diff --git a/.github/workflows/beam_PostCommit_Sickbay_Python.yml b/.github/workflows/beam_PostCommit_Sickbay_Python.yml index af5fda81a4ec..465dc9966023 100644 --- a/.github/workflows/beam_PostCommit_Sickbay_Python.yml +++ b/.github/workflows/beam_PostCommit_Sickbay_Python.yml @@ -21,12 +21,12 @@ on: issue_comment: types: [created] schedule: - - cron: '0 */6 * * *' + - cron: '0 0 * * *' workflow_dispatch: # This allows a subsequently queued workflow run to interrupt previous runs concurrency: - group: '${{ github.workflow }} @ ${{ github.sha || github.head_ref || github.ref }}-${{ github.event.sender.login }}-${{ github.event.schedule }}' + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true #Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event @@ -52,27 +52,29 @@ env: jobs: beam_PostCommit_Sickbay_Python: - name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.python_version}}) + name: ${{matrix.job_name}} (${{matrix.job_phrase_1}} ${{matrix.python_version}} ${{matrix.job_phrase_2}}) runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: fail-fast: false matrix: job_name: [beam_PostCommit_Sickbay_Python] - job_phrase: [Run Python PostCommit Sickbay tests] + job_phrase_1: [Run Python] + job_phrase_2: [PostCommit Sickbay] python_version: ['3.8', '3.9', '3.10', '3.11'] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Python PostCommit Sickbay tests' + (startswith(github.event.comment.body, 'Run Python') && + endswith(github.event.comment.body, 'PostCommit Sickbay')) steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: - comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + comment_phrase: ${{ matrix.job_phrase_1 }} ${{matrix.python_version}} ${{ matrix.job_phrase_2 }} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + github_job: ${{ matrix.job_name }} ${{ matrix.job_phrase_1 }} ${{matrix.python_version}} ${{ matrix.job_phrase_2 }} - name: Install Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/beam_PostCommit_TransformService_Direct.yml b/.github/workflows/beam_PostCommit_TransformService_Direct.yml index 346abcc9951a..1d8b35098e45 100644 --- a/.github/workflows/beam_PostCommit_TransformService_Direct.yml +++ b/.github/workflows/beam_PostCommit_TransformService_Direct.yml @@ -63,7 +63,7 @@ jobs: job_phrase: ["Run TransformService_Direct PostCommit"] python_version: ['3.8','3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Website_Publish.yml b/.github/workflows/beam_PostCommit_Website_Publish.yml index 1493c506772f..4cb99532543d 100644 --- a/.github/workflows/beam_PostCommit_Website_Publish.yml +++ b/.github/workflows/beam_PostCommit_Website_Publish.yml @@ -55,7 +55,7 @@ jobs: timeout-minutes: 30 name: beam_PostCommit_Website_Publish steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: run PostCommit Website Publish script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PostCommit_Website_Test.yml b/.github/workflows/beam_PostCommit_Website_Test.yml new file mode 100644 index 000000000000..6155a45ef6a6 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Website_Test.yml @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Website Test + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Website_Test: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Full Website Test' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 30 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Website_Test"] + job_phrase: ["Run Full Website Test"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Website Test script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :website:testWebsite + arguments: -PdisableExternal=false \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Direct.yml b/.github/workflows/beam_PostCommit_XVR_Direct.yml new file mode 100644 index 000000000000..988b153986e9 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Direct.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Direct PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Direct"] + job_phrase: ["Run XVR_Direct PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Direct script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:xlang:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Direct script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:xlang:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Flink.yml b/.github/workflows/beam_PostCommit_XVR_Flink.yml new file mode 100644 index 000000000000..0ab819c57a02 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Flink.yml @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + FlinkVersion: 1.15 + +jobs: + beam_PostCommit_XVR_Flink: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Flink PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Flink"] + job_phrase: ["Run XVR_Flink PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Flink script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:${{ env.FlinkVersion }}:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Flink script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:${{ env.FlinkVersion }}:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml index ee715495ab0e..687c77e572f2 100644 --- a/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml +++ b/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_XVR_GoUsingJava_Dataflow"] job_phrase: ["Run XVR_GoUsingJava_Dataflow PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml new file mode 100644 index 000000000000..71b3d27473e1 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR JavaUsingPython Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_JavaUsingPython_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_JavaUsingPython_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_JavaUsingPython_Dataflow"] + job_phrase: ["Run XVR_JavaUsingPython_Dataflow PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR JavaUsingPython Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerJavaUsingPython + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml new file mode 100644 index 000000000000..ea3fd2cb86b8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR PythonUsingJavaSQL Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_PythonUsingJavaSQL_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow"] + job_phrase: ["Run XVR_PythonUsingJavaSQL_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: run PostCommit XVR PythonUsingJavaSQL Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerPythonUsingSql + arguments: | + -PpythonVersion=3.11 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml new file mode 100644 index 000000000000..d575bbeabbc8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR PythonUsingJava Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_PythonUsingJava_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_PythonUsingJava_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_PythonUsingJava_Dataflow"] + job_phrase: ["Run XVR_PythonUsingJava_Dataflow PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR PythonUsingJava Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerPythonUsingJava + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Samza.yml b/.github/workflows/beam_PostCommit_XVR_Samza.yml new file mode 100644 index 000000000000..f808456b8c97 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Samza.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Samza: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Samza PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Samza"] + job_phrase: ["Run XVR_Samza PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Spark3.yml b/.github/workflows/beam_PostCommit_XVR_Spark3.yml new file mode 100644 index 000000000000..8ca97bb23edb --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Spark3.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Spark3 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Spark3: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Spark3 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Spark3"] + job_phrase: ["Run XVR_Spark3 PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Spark3 script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Spark3 script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_CommunityMetrics.yml b/.github/workflows/beam_PreCommit_CommunityMetrics.yml index 2b59e6290a58..7a93873168e0 100644 --- a/.github/workflows/beam_PreCommit_CommunityMetrics.yml +++ b/.github/workflows/beam_PreCommit_CommunityMetrics.yml @@ -71,22 +71,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run CommunityMetrics PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Remove default github maven configuration diff --git a/.github/workflows/beam_PreCommit_Go.yml b/.github/workflows/beam_PreCommit_Go.yml index 50c2b3a265b1..227f3c7648ab 100644 --- a/.github/workflows/beam_PreCommit_Go.yml +++ b/.github/workflows/beam_PreCommit_Go.yml @@ -71,26 +71,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Go PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.21' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + go-version: 1.21 - name: run goPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_GoPortable.yml b/.github/workflows/beam_PreCommit_GoPortable.yml index 397dac39b3f0..8156df15133c 100644 --- a/.github/workflows/beam_PreCommit_GoPortable.yml +++ b/.github/workflows/beam_PreCommit_GoPortable.yml @@ -71,18 +71,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run GoPortable PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-39: false - requires-go: false + python-version: 3.8 + java-version: 8 - name: Run goPortablePreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_GoPrism.yml b/.github/workflows/beam_PreCommit_GoPrism.yml new file mode 100644 index 000000000000..1a669c157007 --- /dev/null +++ b/.github/workflows/beam_PreCommit_GoPrism.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PreCommit GoPrism + +on: + push: + tags: ['v*'] + branches: ['master', 'release-*'] + paths: ['model/**', 'sdks/go.**', 'release/**','.github/workflows/beam_PreCommit_GoPrism.yml'] + pull_request_target: + branches: ['master', 'release-*'] + paths: ['model/**', 'sdks/go.**', 'release/**'] + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +# Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +jobs: + beam_PreCommit_GoPrism: + name: ${{matrix.job_name}} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + strategy: + matrix: + job_name: [beam_PreCommit_GoPrism] + job_phrase: [Run GoPrism PreCommit] + timeout-minutes: 120 + if: | + github.event_name == 'push' || + github.event_name == 'pull_request_target' || + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run GoPrism PreCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: 3.8 + java-version: 8 + - name: Run goPrismPreCommit script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :goPrismPreCommit \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_ItFramework.yml b/.github/workflows/beam_PreCommit_ItFramework.yml index 83f29211da35..e8ec1287be51 100644 --- a/.github/workflows/beam_PreCommit_ItFramework.yml +++ b/.github/workflows/beam_PreCommit_ItFramework.yml @@ -74,23 +74,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run It_Framework PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: run ItFrameworkPrecommit script run: ./gradlew -p it build - name: Archive JUnit Test Results @@ -103,4 +97,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java.yml b/.github/workflows/beam_PreCommit_Java.yml index 98022dba98ab..ab5c17c55f4c 100644 --- a/.github/workflows/beam_PreCommit_Java.yml +++ b/.github/workflows/beam_PreCommit_Java.yml @@ -141,6 +141,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -158,7 +163,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -182,6 +187,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml index ef1dcb636032..220dea3208cc 100644 --- a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Amazon-Web-Services2_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -123,6 +123,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml index cf57623f0602..03ee673e2e24 100644 --- a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Amazon-Web-Services_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -123,6 +123,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml index 28aeae758376..fede8fa6d548 100644 --- a/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Amqp_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml index 05f1bb5ca53b..7e20dd043cad 100644 --- a/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Azure_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -116,6 +116,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml index 3c56c4f062a5..5f63a25440b8 100644 --- a/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Cassandra_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml index fa83a5958fe2..caa1c475f5b0 100644 --- a/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml @@ -78,7 +78,7 @@ jobs: github.event.comment.body == 'Run Java_Cdap_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -102,6 +102,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml index 266e311c092a..8f2a5cde3749 100644 --- a/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Clickhouse_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml index 9b6d73395a1b..1c304500b567 100644 --- a/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Csv_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml index 5d08a5e03e61..db348a7684af 100644 --- a/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml @@ -53,6 +53,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_Debezium_IO_Direct: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -69,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Debezium_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -102,6 +107,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml index c58876de7ff0..27a3e175e7e1 100644 --- a/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml @@ -55,6 +55,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_ElasticSearch_IO_Direct: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -71,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_ElasticSearch_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -106,6 +111,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml index 2d9e34eeebae..1dfd9ea1eb4d 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml @@ -85,23 +85,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_Examples_Dataflow PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: @@ -126,4 +120,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml index 5c7c81497756..8484360de7b1 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml @@ -86,7 +86,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_Examples_Dataflow_Java11 PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -127,4 +127,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml index 724daf16d889..bc5c457eb761 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml @@ -63,6 +63,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_Examples_Dataflow_Java17: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -79,7 +84,7 @@ jobs: github.event.comment.body == 'Run Java_Examples_Dataflow_Java17 PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -124,6 +129,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v2 diff --git a/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml index abb3c5d029a4..d256bca9ebaa 100644 --- a/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml @@ -53,6 +53,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_File-schema-transform_IO_Direct: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -69,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_File-schema-transform_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -94,6 +99,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml b/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml index 8fafdbcaa9e5..e4a04839cef4 100644 --- a/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml +++ b/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml @@ -73,25 +73,17 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java_Flink_Versions PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: 'Run Java_Flink_Versions PreCommit' github_token: ${{ secrets.GITHUB_TOKEN }} - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run Java Flink Versions PreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: @@ -108,4 +100,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml index f567a4fd2a0d..840b753d8190 100644 --- a/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_GCP_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -120,6 +120,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml index 0664f969996e..5734e0e9d453 100644 --- a/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_HBase_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -100,6 +100,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml index 688a4ef2e5ff..6230c8dbae8a 100644 --- a/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_HCatalog_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -100,6 +100,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml index cbfa9c371e38..ad69f5fb0895 100644 --- a/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml @@ -100,7 +100,7 @@ jobs: github.event.comment.body == 'Run Java_Hadoop_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -138,6 +138,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml b/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml index 8c7796da9a74..b20e9886e701 100644 --- a/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_IOs_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -99,6 +99,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml index 17f1676eb70f..9acbe5f8705a 100644 --- a/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_InfluxDb_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml index cbdfdaa5d304..869c189c1c9d 100644 --- a/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_JDBC_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -105,6 +105,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml index 57aae16ebefc..160e01520845 100644 --- a/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Jms_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -105,6 +105,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml index 003b56f80416..64fc4122923b 100644 --- a/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml @@ -82,7 +82,7 @@ jobs: github.event.comment.body == 'Run Java_Kafka_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -107,6 +107,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml index 3673fbcfef3a..d1788288d3d5 100644 --- a/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Kinesis_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -130,6 +130,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml index be65e5522761..693cead4b031 100644 --- a/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Kudu_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml index e457693adfec..472b0f7f5531 100644 --- a/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_MongoDb_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml index d01078fa08ca..ad0e5ac9feda 100644 --- a/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Mqtt_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml index 0a0ba8664a07..b6f51b243a8f 100644 --- a/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_Neo4j_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -107,6 +107,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml index b23d70c4f5af..9679b1825cf5 100644 --- a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml +++ b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml @@ -59,6 +59,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_PVR_Flink_Batch: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -75,7 +80,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_PVR_Flink_Batch PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,4 +103,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml index 312210245477..30dd710e1848 100644 --- a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml +++ b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml @@ -85,7 +85,7 @@ jobs: github.event.comment.body == 'Run Java_PVR_Flink_Docker PreCommit' timeout-minutes: 240 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -108,4 +108,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml index 04468ae7588c..f2a27da20dd2 100644 --- a/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Parquet_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml index 258e95e5d1b9..ed7c55c98f4d 100644 --- a/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Pulsar_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -116,6 +116,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml index f137a5441bf3..962c1526c90a 100644 --- a/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_RabbitMq_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml index 7a90efe0ede2..efd8d472f8ae 100644 --- a/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Redis_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml index 26b7c81503bd..ad64ff286aab 100644 --- a/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_SingleStore_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -100,6 +100,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml index 30cc95108239..f8eaec4c11c4 100644 --- a/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml @@ -78,7 +78,7 @@ jobs: github.event.comment.body == 'Run Java_Snowflake_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -109,6 +109,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml index a19db8a6faf4..8b834682ffd5 100644 --- a/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Solr_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml b/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml index b7b1dfa0ea29..2bb53629614d 100644 --- a/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml +++ b/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml @@ -76,7 +76,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_Spark3_Versions PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -112,4 +112,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml index 4131bbdfa04f..ff0a31eb0d0e 100644 --- a/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Splunk_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml index d8b275ccfefb..649e4ad28c65 100644 --- a/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Thrift_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml index c3e340b3aa0e..42fe14cd8338 100644 --- a/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Tika_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Kotlin_Examples.yml b/.github/workflows/beam_PreCommit_Kotlin_Examples.yml index 892d0935a26a..b30163b12a47 100644 --- a/.github/workflows/beam_PreCommit_Kotlin_Examples.yml +++ b/.github/workflows/beam_PreCommit_Kotlin_Examples.yml @@ -86,7 +86,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Kotlin_Examples PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PreCommit_Portable_Python.yml b/.github/workflows/beam_PreCommit_Portable_Python.yml index 7da598d58310..35e2c535f4ea 100644 --- a/.github/workflows/beam_PreCommit_Portable_Python.yml +++ b/.github/workflows/beam_PreCommit_Portable_Python.yml @@ -91,7 +91,7 @@ jobs: github.event_name == 'schedule' || startsWith(github.event.comment.body, 'Run Portable_Python PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PreCommit_Python.yml b/.github/workflows/beam_PreCommit_Python.yml index fe0a15aa651b..80c69afce6e2 100644 --- a/.github/workflows/beam_PreCommit_Python.yml +++ b/.github/workflows/beam_PreCommit_Python.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_PythonDocker.yml b/.github/workflows/beam_PreCommit_PythonDocker.yml index 94f3d0bc7592..669f316549c5 100644 --- a/.github/workflows/beam_PreCommit_PythonDocker.yml +++ b/.github/workflows/beam_PreCommit_PythonDocker.yml @@ -71,30 +71,19 @@ jobs: github.event_name == 'schedule' || startsWith(github.event.comment.body, 'Run PythonDocker PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.16' - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + go-version: 1.16 - name: Setup Buildx uses: docker/setup-buildx-action@v2 with: diff --git a/.github/workflows/beam_PreCommit_PythonDocs.yml b/.github/workflows/beam_PreCommit_PythonDocs.yml index e5e30c2a00cc..a67e8afa3a4e 100644 --- a/.github/workflows/beam_PreCommit_PythonDocs.yml +++ b/.github/workflows/beam_PreCommit_PythonDocs.yml @@ -71,26 +71,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run PythonDocs PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run pythonDocsPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_PythonFormatter.yml b/.github/workflows/beam_PreCommit_PythonFormatter.yml index 2bfdaff26f68..1a3335b370e9 100644 --- a/.github/workflows/beam_PreCommit_PythonFormatter.yml +++ b/.github/workflows/beam_PreCommit_PythonFormatter.yml @@ -70,26 +70,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run PythonFormatter PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run pythonFormatterPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_PythonLint.yml b/.github/workflows/beam_PreCommit_PythonLint.yml index 859c6532654d..9d290f1ba86d 100644 --- a/.github/workflows/beam_PreCommit_PythonLint.yml +++ b/.github/workflows/beam_PreCommit_PythonLint.yml @@ -70,30 +70,19 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run PythonLint PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.16' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 + go-version: 1.16 - name: run pythonLintPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Python_Coverage.yml b/.github/workflows/beam_PreCommit_Python_Coverage.yml index 36cd2a6b42b4..65002f9da894 100644 --- a/.github/workflows/beam_PreCommit_Python_Coverage.yml +++ b/.github/workflows/beam_PreCommit_Python_Coverage.yml @@ -70,26 +70,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Python_Coverage PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - python-version: '3.8' - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: Run preCommitPyCoverage uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Python_Dataframes.yml b/.github/workflows/beam_PreCommit_Python_Dataframes.yml index 5b1ec1492d34..a7a0ec1836ce 100644 --- a/.github/workflows/beam_PreCommit_Python_Dataframes.yml +++ b/.github/workflows/beam_PreCommit_Python_Dataframes.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Dataframes PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase}} ${{ matrix.python_version}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name}} (${{ matrix.job_phrase}} ${{ matrix.python_version}}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_Examples.yml b/.github/workflows/beam_PreCommit_Python_Examples.yml index b0dd7c16d296..1b03b4c0a35a 100644 --- a/.github/workflows/beam_PreCommit_Python_Examples.yml +++ b/.github/workflows/beam_PreCommit_Python_Examples.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Examples PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_Integration.yml b/.github/workflows/beam_PreCommit_Python_Integration.yml index ffe53bcfc323..5a1b7bc720f3 100644 --- a/.github/workflows/beam_PreCommit_Python_Integration.yml +++ b/.github/workflows/beam_PreCommit_Python_Integration.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Integration PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3.8.0 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml b/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml index 840abc69a86a..a0011354749a 100644 --- a/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml +++ b/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml @@ -71,6 +71,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Python_PVR_Flink: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -87,7 +92,7 @@ jobs: github.event.comment.body == 'Run Python_PVR_Flink PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,9 +103,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.11 - - name: Configure passenv for tox - run: | - sed -i '/^\[testenv\]$/,/^\[/ s/^passenv=TERM/passenv=TERM,CLOUDSDK_CONFIG/' sdks/python/tox.ini - name: run Python PVR Flink PreCommit script uses: ./.github/actions/gradle-command-self-hosted-action env: diff --git a/.github/workflows/beam_PreCommit_Python_Runners.yml b/.github/workflows/beam_PreCommit_Python_Runners.yml index e317902a3eb8..775af7f39d24 100644 --- a/.github/workflows/beam_PreCommit_Python_Runners.yml +++ b/.github/workflows/beam_PreCommit_Python_Runners.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Runners PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_Transforms.yml b/.github/workflows/beam_PreCommit_Python_Transforms.yml index 95f6945caad7..291dcde8665a 100644 --- a/.github/workflows/beam_PreCommit_Python_Transforms.yml +++ b/.github/workflows/beam_PreCommit_Python_Transforms.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Transforms PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_RAT.yml b/.github/workflows/beam_PreCommit_RAT.yml index 3a09b28af7da..390413fb7aca 100644 --- a/.github/workflows/beam_PreCommit_RAT.yml +++ b/.github/workflows/beam_PreCommit_RAT.yml @@ -69,22 +69,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run RAT PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: run RAT script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_SQL.yml b/.github/workflows/beam_PreCommit_SQL.yml index 3b8ad98db155..18d84b219d5b 100644 --- a/.github/workflows/beam_PreCommit_SQL.yml +++ b/.github/workflows/beam_PreCommit_SQL.yml @@ -71,7 +71,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run SQL PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -101,6 +101,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v2 diff --git a/.github/workflows/beam_PreCommit_SQL_Java11.yml b/.github/workflows/beam_PreCommit_SQL_Java11.yml index 2ff5e7459214..29a89d7f54a8 100644 --- a/.github/workflows/beam_PreCommit_SQL_Java11.yml +++ b/.github/workflows/beam_PreCommit_SQL_Java11.yml @@ -71,7 +71,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run SQL_Java11 PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -116,6 +116,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_SQL_Java17.yml b/.github/workflows/beam_PreCommit_SQL_Java17.yml index 15a223dcc19e..b493dd7f5fee 100644 --- a/.github/workflows/beam_PreCommit_SQL_Java17.yml +++ b/.github/workflows/beam_PreCommit_SQL_Java17.yml @@ -71,7 +71,7 @@ jobs: github.event.comment.body == 'Run SQL_Java17 PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -119,4 +119,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Spotless.yml b/.github/workflows/beam_PreCommit_Spotless.yml index a703454a74fe..552a92e104a1 100644 --- a/.github/workflows/beam_PreCommit_Spotless.yml +++ b/.github/workflows/beam_PreCommit_Spotless.yml @@ -62,6 +62,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Spotless: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -79,7 +84,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Spotless PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PreCommit_Typescript.yml b/.github/workflows/beam_PreCommit_Typescript.yml index 728dede57106..21c760f2e525 100644 --- a/.github/workflows/beam_PreCommit_Typescript.yml +++ b/.github/workflows/beam_PreCommit_Typescript.yml @@ -72,22 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Typescript PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + python-version: 3.8 + java-version: 8 - name: run typescriptPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Website.yml b/.github/workflows/beam_PreCommit_Website.yml index 7dd58b27768a..87218dc28033 100644 --- a/.github/workflows/beam_PreCommit_Website.yml +++ b/.github/workflows/beam_PreCommit_Website.yml @@ -71,22 +71,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Website PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: run websitePreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml b/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml index da2ba216994e..f910f9a88da3 100644 --- a/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml +++ b/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml @@ -38,6 +38,8 @@ env: GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + gcsbucket: apache-beam-website-pull-requests + ghprbPullId: #Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event permissions: @@ -55,10 +57,6 @@ permissions: security-events: read statuses: read -env: - gcsbucket: apache-beam-website-pull-requests - ghprbPullId: - jobs: beam_PreCommit_Website_Stage_GCS: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -75,7 +73,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Website_Stage_GCS PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -84,15 +82,11 @@ jobs: github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - name: Echo PR number run: echo "ghprbPullId=${{ github.event.pull_request.number || github.event.issue.number }}" >> $GITHUB_ENV - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action - with: - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - cache-read-only: false + python-version: 3.8 + java-version: 8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: diff --git a/.github/workflows/beam_PreCommit_Whitespace.yml b/.github/workflows/beam_PreCommit_Whitespace.yml index 04705d49928c..03a976cfe444 100644 --- a/.github/workflows/beam_PreCommit_Whitespace.yml +++ b/.github/workflows/beam_PreCommit_Whitespace.yml @@ -70,26 +70,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Whitespace PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run whitespacePreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml b/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml index 7e929b751e35..d4c9178e6c91 100644 --- a/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml +++ b/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml @@ -69,21 +69,17 @@ jobs: runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: python-version: ${{ matrix.python_version }} - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Authenticate on GCP diff --git a/.github/workflows/beam_Release_NightlySnapshot.yml b/.github/workflows/beam_Release_NightlySnapshot.yml index 1b572f4aa0f3..a4be830cd3c5 100644 --- a/.github/workflows/beam_Release_NightlySnapshot.yml +++ b/.github/workflows/beam_Release_NightlySnapshot.yml @@ -54,22 +54,17 @@ jobs: github.event_name == 'schedule' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: github_job: ${{matrix.job_name}} github_token: ${{ secrets.GITHUB_TOKEN }} comment_phrase: "Release Nightly Snapshot" - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Auth on snapshot repository run: | mkdir -p ${HOME}/.m2 diff --git a/.github/workflows/beam_Release_Python_NightlySnapshot.yml b/.github/workflows/beam_Release_Python_NightlySnapshot.yml index 2787de9eefef..62019c536969 100644 --- a/.github/workflows/beam_Release_Python_NightlySnapshot.yml +++ b/.github/workflows/beam_Release_Python_NightlySnapshot.yml @@ -53,26 +53,18 @@ jobs: github.event_name == 'schedule' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: github_job: ${{matrix.job_name}} github_token: ${{ secrets.GITHUB_TOKEN }} comment_phrase: ${{matrix.job_phrase}} - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: '3.8' + java-version: 8 + python-version: 3.8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: diff --git a/.github/workflows/build_release_candidate.yml b/.github/workflows/build_release_candidate.yml index 9e7cbe741a26..2d826d572d90 100644 --- a/.github/workflows/build_release_candidate.yml +++ b/.github/workflows/build_release_candidate.yml @@ -50,7 +50,7 @@ jobs: runs-on: [self-hosted, ubuntu-20.04, main] steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: "v${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}" repository: apache/beam @@ -154,7 +154,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: "v${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}" repository: apache/beam @@ -204,13 +204,13 @@ jobs: SITE_ROOT_DIR: ${{ github.workspace }}/beam-site steps: - name: Checkout Beam Repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: "v${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}" repository: apache/beam path: beam - name: Checkout Beam Site Repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: apache/beam-site path: beam-site diff --git a/.github/workflows/build_runner_image.yml b/.github/workflows/build_runner_image.yml index f64ada281d72..069b8b7db68c 100644 --- a/.github/workflows/build_runner_image.yml +++ b/.github/workflows/build_runner_image.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - name: Authenticate on GCP diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 1dd586082331..1028dd79af02 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -54,7 +54,7 @@ jobs: py-versions-full: ${{ steps.set-py-versions.outputs.py-versions-full }} py-versions-test: ${{ steps.set-py-versions.outputs.py-versions-test }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -87,7 +87,7 @@ jobs: rc_num: ${{ steps.get_rc_version.outputs.RC_NUM }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -269,7 +269,7 @@ jobs: # TODO: https://github.com/apache/beam/issues/23048 CIBW_SKIP: "*-musllinux_*" CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy && pip install --upgrade setuptools + CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse shell: bash - name: install sha512sum on MacOS @@ -295,7 +295,7 @@ jobs: # TODO: https://github.com/apache/beam/issues/23048 CIBW_SKIP: "*-musllinux_*" CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy && pip install --upgrade setuptools + CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse shell: bash - name: Add RC checksums @@ -385,7 +385,7 @@ jobs: if: github.repository_owner == 'apache' && github.event_name == 'schedule' steps: - name: Checkout code on master branch - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml index df9172473f98..f826b22e043b 100644 --- a/.github/workflows/cancel.yml +++ b/.github/workflows/cancel.yml @@ -37,7 +37,7 @@ jobs: runs-on: ubuntu-latest steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive diff --git a/.github/workflows/choose_rc_commit.yml b/.github/workflows/choose_rc_commit.yml index d4de9d454a5c..0e51e5284a76 100644 --- a/.github/workflows/choose_rc_commit.yml +++ b/.github/workflows/choose_rc_commit.yml @@ -55,7 +55,7 @@ jobs: DEBUG: "" steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: release-${{ github.event.inputs.RELEASE }} - name: Set git config diff --git a/.github/workflows/code_completion_plugin_tests.yml b/.github/workflows/code_completion_plugin_tests.yml index 9244608543db..38ffd2fbd3f4 100644 --- a/.github/workflows/code_completion_plugin_tests.yml +++ b/.github/workflows/code_completion_plugin_tests.yml @@ -56,13 +56,13 @@ jobs: # Check out beam repository - name: Fetch beam Sources - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: path: main # Check out intellij community repository for tests - name: Fetch intellij-community Sources - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: JetBrains/intellij-community path: intellij diff --git a/.github/workflows/cut_release_branch.yml b/.github/workflows/cut_release_branch.yml index 4e104d78a445..4201d6018c60 100644 --- a/.github/workflows/cut_release_branch.yml +++ b/.github/workflows/cut_release_branch.yml @@ -68,7 +68,7 @@ jobs: exit 1 fi - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR @@ -116,7 +116,7 @@ jobs: exit 1 fi - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR @@ -184,7 +184,7 @@ jobs: tar zvxvf hub-linux-amd64-2.14.2.tgz sudo ./hub-linux-amd64-2.14.2/install echo "eval "$(hub alias -s)"" >> ~/.bashrc - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR diff --git a/.github/workflows/dask_runner_tests.yml b/.github/workflows/dask_runner_tests.yml index c4d8c9a1cf9e..46d2707ca063 100644 --- a/.github/workflows/dask_runner_tests.yml +++ b/.github/workflows/dask_runner_tests.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -73,7 +73,7 @@ jobs: ] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/git_tag_released_version.yml b/.github/workflows/git_tag_released_version.yml index 871149bd26a1..0c6782603856 100644 --- a/.github/workflows/git_tag_released_version.yml +++ b/.github/workflows/git_tag_released_version.yml @@ -37,7 +37,7 @@ jobs: VERSION_PATH: ${{ github.event.inputs.VERSION_TAG }} steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR diff --git a/.github/workflows/go_tests.yml b/.github/workflows/go_tests.yml index 07e58bba2f1d..32b494da25be 100644 --- a/.github/workflows/go_tests.yml +++ b/.github/workflows/go_tests.yml @@ -29,6 +29,7 @@ on: branches: ['master', 'release-*'] tags: ['v*'] paths: ['sdks/go/pkg/**', 'sdks/go.mod', 'sdks/go.sum', 'sdks/go/container/*', 'sdks/java/container/*', 'sdks/python/container/*', 'sdks/typescript/container/*'] + workflow_dispatch: # This allows a subsequently queued workflow run to interrupt previous runs concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' @@ -39,7 +40,7 @@ jobs: name: Go Build steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 2 - uses: actions/setup-go@v4 @@ -65,3 +66,8 @@ jobs: go install "honnef.co/go/tools/cmd/staticcheck@2023.1.3" cd sdks/go/pkg/beam $(go env GOPATH)/bin/staticcheck ./... + - uses: golang/govulncheck-action@v1.0.1 + with: + work-dir: ./sdks + go-package: ./... + go-version-input: 1.21 \ No newline at end of file diff --git a/.github/workflows/issue-tagger.yml b/.github/workflows/issue-tagger.yml index 39f92d87f788..dbfe2e996d5e 100644 --- a/.github/workflows/issue-tagger.yml +++ b/.github/workflows/issue-tagger.yml @@ -24,7 +24,7 @@ jobs: permissions: issues: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: damccorm/tag-ur-it@6fa72bbf1a2ea157b533d7e7abeafdb5855dbea5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/java_tests.yml b/.github/workflows/java_tests.yml index 82e2d3ce3df5..ceff29b50d4f 100644 --- a/.github/workflows/java_tests.yml +++ b/.github/workflows/java_tests.yml @@ -52,7 +52,7 @@ jobs: gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -73,15 +73,15 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false + java-version: 8 + go-version: 1.21 - name: Remove default github maven configuration # This step is a workaround to avoid a decryption issue of Beam's # net.linguica.gradle.maven.settings plugin and github's provided maven @@ -132,16 +132,15 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - + java-version: 8 + go-version: 1.21 - name: Remove default github maven configuration # This step is a workaround to avoid a decryption issue of Beam's # net.linguica.gradle.maven.settings plugin and github's provided maven @@ -176,15 +175,15 @@ jobs: ) steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action - with: - requires-py-38: false - requires-py-39: false + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + go-version: 1.21 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt new file mode 100644 index 000000000000..5fd9518bc8d0 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_2 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_2 +--publishToInfluxDB=true +--sourceOptions={"numRecords":20000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":5} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=1 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt new file mode 100644 index 000000000000..2840fe75d5af --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_3 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_3 +--publishToInfluxDB=true +--sourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":200000} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=4 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt new file mode 100644 index 000000000000..bcc8a36cf31f --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_4 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_4 +--publishToInfluxDB=true +--sourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=4 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt new file mode 100644 index 000000000000..afae1a1bd6bf --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_1 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_1 +--publishToInfluxDB=true +--sourceOptions={"numRecords":20000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=1 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/local_env_tests.yml b/.github/workflows/local_env_tests.yml index a689959dac7e..32e2975c0712 100644 --- a/.github/workflows/local_env_tests.yml +++ b/.github/workflows/local_env_tests.yml @@ -45,7 +45,7 @@ jobs: name: "Ubuntu run local environment shell script" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-go@v4 with: go-version: '1.21' @@ -63,7 +63,7 @@ jobs: name: "Mac run local environment shell script" runs-on: macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-go@v4 with: go-version: '1.21' diff --git a/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt new file mode 100644 index 000000000000..5e7e53821231 --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt @@ -0,0 +1,38 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +'["--tempLocation=gs://temp-storage-for-perf-tests/loadtests", +"--project=apache-beam-testing", +"--tempRoot=gs://temp-storage-for-perf-tests/loadtests", +"--writeMethod=FILE_LOADS", +"--writeFormat=AVRO", +"--testBigQueryDataset=beam_performance", +"--testBigQueryTable=bqio_write_10GB_java_avro_", +"--metricsBigQueryDataset=beam_performance", +"--metricsBigQueryTable=bqio_10GB_results_java_batch_avro", +"--influxMeasurement=bqio_10GB_results_java_batch_avro", +"--sourceOptions={ +\"numRecords\":\"10485760\", +\"keySizeBytes\":\"1\", +\"valueSizeBytes\":\"1024\" +}", +"--runner=DataflowRunner", +"--maxNumWorkers=5", +"--numWorkers=5", +"--autoscalingAlgorithm=NONE", +"--influxDatabase=beam_test_metrics", +"--influxHost=http://10.128.0.96:8086"]' \ No newline at end of file diff --git a/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt new file mode 100644 index 000000000000..7bd9c30ae738 --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt @@ -0,0 +1,38 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +'["--tempLocation=gs://temp-storage-for-perf-tests/loadtests", +"--project=apache-beam-testing", +"--tempRoot=gs://temp-storage-for-perf-tests/loadtests", +"--writeMethod=FILE_LOADS", +"--writeFormat=JSON", +"--testBigQueryDataset=beam_performance", +"--testBigQueryTable=bqio_write_10GB_java_json_", +"--metricsBigQueryDataset=beam_performance", +"--metricsBigQueryTable=bqio_10GB_results_java_batch_json", +"--influxMeasurement=bqio_10GB_results_java_batch_json", +"--sourceOptions={ +\"numRecords\":\"10485760\", +\"keySizeBytes\":\"1\", +\"valueSizeBytes\":\"1024\" +}", +"--runner=DataflowRunner", +"--maxNumWorkers=5", +"--numWorkers=5", +"--autoscalingAlgorithm=NONE", +"--influxDatabase=beam_test_metrics", +"--influxHost=http://10.128.0.96:8086"]' \ No newline at end of file diff --git a/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt new file mode 100644 index 000000000000..8bddea5fcb8b --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt @@ -0,0 +1,39 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +'["--tempLocation=gs://temp-storage-for-perf-tests/loadtests", +"--project=apache-beam-testing", +"--tempRoot=gs://temp-storage-for-perf-tests/loadtests", +"--writeMethod=STREAMING_INSERTS", +"--writeFormat=JSON", +"--pipelineTimeout=1200", +"--testBigQueryDataset=beam_performance", +"--testBigQueryTable=bqio_write_10GB_java_stream_", +"--metricsBigQueryDataset=beam_performance", +"--metricsBigQueryTable=bqio_10GB_results_java_stream", +"--influxMeasurement=bqio_10GB_results_java_stream", +"--sourceOptions={ +\"numRecords\":\"10485760\", +\"keySizeBytes\":\"1\", +\"valueSizeBytes\":\"1024\" +}", +"--runner=DataflowRunner", +"--maxNumWorkers=5", +"--numWorkers=5", +"--autoscalingAlgorithm=NONE", +"--influxDatabase=beam_test_metrics", +"--influxHost=http://10.128.0.96:8086"]' \ No newline at end of file diff --git a/.github/workflows/playground_backend_precommit.yml b/.github/workflows/playground_backend_precommit.yml index dedac3db9299..114ca4aac1cb 100644 --- a/.github/workflows/playground_backend_precommit.yml +++ b/.github/workflows/playground_backend_precommit.yml @@ -37,20 +37,13 @@ jobs: JAVA_VERSION: '11' steps: - name: Check out the repo - uses: actions/checkout@v3 - - - uses: actions/setup-python@v4 - with: - python-version: '${{ env.PYTHON_VERSION }}' - - uses: actions/setup-java@v3.8.0 + uses: actions/checkout@v4 + + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' java-version: '${{ env.JAVA_VERSION }}' - - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + python-version: '${{ env.PYTHON_VERSION }}' - name: Add GOPATH/bin to PATH run: echo "PATH=$PATH:$(go env GOPATH)/bin" >> $GITHUB_ENV diff --git a/.github/workflows/playground_frontend_test.yml b/.github/workflows/playground_frontend_test.yml index 543d166f432d..6f6e02a9697c 100644 --- a/.github/workflows/playground_frontend_test.yml +++ b/.github/workflows/playground_frontend_test.yml @@ -45,7 +45,7 @@ jobs: FLUTTER_VERSION: '3.10.4' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: 'Cache Flutter Dependencies' uses: actions/cache@v3 diff --git a/.github/workflows/pr-bot-new-prs.yml b/.github/workflows/pr-bot-new-prs.yml index 8ba27fbec3dc..ef825e067b7d 100644 --- a/.github/workflows/pr-bot-new-prs.yml +++ b/.github/workflows/pr-bot-new-prs.yml @@ -33,7 +33,7 @@ jobs: if: github.repository == 'apache/beam' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/pr-bot-pr-updates.yml b/.github/workflows/pr-bot-pr-updates.yml index d96a11368cb8..c882c18feeba 100644 --- a/.github/workflows/pr-bot-pr-updates.yml +++ b/.github/workflows/pr-bot-pr-updates.yml @@ -35,7 +35,7 @@ jobs: steps: # Pin to master so users can't do anything malicious on their own branch and run it here. - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: 'master' - name: Setup Node diff --git a/.github/workflows/pr-bot-prs-needing-attention.yml b/.github/workflows/pr-bot-prs-needing-attention.yml index e96d3983746b..9dff7c8565a4 100644 --- a/.github/workflows/pr-bot-prs-needing-attention.yml +++ b/.github/workflows/pr-bot-prs-needing-attention.yml @@ -33,7 +33,7 @@ jobs: if: github.repository == 'apache/beam' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/pr-bot-update-reviewers.yml b/.github/workflows/pr-bot-update-reviewers.yml index f3d343b12fb9..b4c41b66f9d6 100644 --- a/.github/workflows/pr-bot-update-reviewers.yml +++ b/.github/workflows/pr-bot-update-reviewers.yml @@ -33,7 +33,7 @@ jobs: if: github.repository == 'apache/beam' runs-on: [self-hosted, ubuntu-20.04] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/publish_github_release_notes.yml b/.github/workflows/publish_github_release_notes.yml index 246ce690f8b1..473e0deef83d 100644 --- a/.github/workflows/publish_github_release_notes.yml +++ b/.github/workflows/publish_github_release_notes.yml @@ -36,7 +36,7 @@ jobs: properties: ${{ steps.test-properties.outputs.properties }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - id: test-properties uses: ./.github/actions/setup-default-test-properties @@ -49,7 +49,7 @@ jobs: name: Publish Github Release Notes steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Publish github release notes run: | POST_PATH="website/www/site/content/en/blog/beam-${{env.RELEASE_VERSION}}.md" diff --git a/.github/workflows/python_dependency_tests.yml b/.github/workflows/python_dependency_tests.yml index aea77330f0ae..0c91e64b0db9 100644 --- a/.github/workflows/python_dependency_tests.yml +++ b/.github/workflows/python_dependency_tests.yml @@ -33,7 +33,7 @@ jobs: ] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index ba7585c31bf5..406949eda96e 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -49,7 +49,7 @@ jobs: outputs: gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -73,7 +73,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -108,7 +108,7 @@ jobs: ] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -143,7 +143,7 @@ jobs: python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -171,7 +171,7 @@ jobs: python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/reportGenerator.yml b/.github/workflows/reportGenerator.yml index 44055cd56310..8f6bccddcfad 100644 --- a/.github/workflows/reportGenerator.yml +++ b/.github/workflows/reportGenerator.yml @@ -26,7 +26,7 @@ jobs: name: Generate issue report runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/run_perf_alert_tool.yml b/.github/workflows/run_perf_alert_tool.yml index 1cdcd858e61a..6946011f0617 100644 --- a/.github/workflows/run_perf_alert_tool.yml +++ b/.github/workflows/run_perf_alert_tool.yml @@ -35,7 +35,7 @@ jobs: issues: write steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/run_rc_validation.yml b/.github/workflows/run_rc_validation.yml index 720150b57450..4902fee81016 100644 --- a/.github/workflows/run_rc_validation.yml +++ b/.github/workflows/run_rc_validation.yml @@ -78,7 +78,7 @@ jobs: WORKING_BRANCH: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}_validations" steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ env.RC_TAG }} - name: Setup GitHub CLI @@ -109,7 +109,7 @@ jobs: py_version: [3.8] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} @@ -174,7 +174,7 @@ jobs: py_version: [3.8] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Verify ENV values @@ -280,7 +280,7 @@ jobs: needs: generate_shared_pubsub steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Install Python @@ -355,7 +355,7 @@ jobs: needs: generate_shared_pubsub steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} @@ -403,7 +403,7 @@ jobs: needs: [generate_shared_pubsub] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} @@ -456,7 +456,7 @@ jobs: needs: [generate_shared_pubsub] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Install Python @@ -505,7 +505,7 @@ jobs: needs: [generate_shared_pubsub] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Install Python diff --git a/.github/workflows/tour_of_beam_backend.yml b/.github/workflows/tour_of_beam_backend.yml index 665cec4e42cd..5c67c9f54fe3 100644 --- a/.github/workflows/tour_of_beam_backend.yml +++ b/.github/workflows/tour_of_beam_backend.yml @@ -41,7 +41,7 @@ jobs: run: working-directory: ./learning/tour-of-beam/backend steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-go@v4 with: # pin to the biggest Go version supported by Cloud Functions runtime diff --git a/.github/workflows/tour_of_beam_backend_integration.yml b/.github/workflows/tour_of_beam_backend_integration.yml index ab644358c977..8f56d3f2e2fa 100644 --- a/.github/workflows/tour_of_beam_backend_integration.yml +++ b/.github/workflows/tour_of_beam_backend_integration.yml @@ -74,17 +74,14 @@ jobs: run: working-directory: ./learning/tour-of-beam/backend steps: - - uses: actions/checkout@v3 - - uses: actions/setup-go@v4 + - uses: actions/checkout@v4 + + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: # pin to the biggest Go version supported by Cloud Functions runtime go-version: '1.16' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - - name: Build Playground router image run: ./gradlew -i playground:backend:containers:router:docker working-directory: ${{ env.GITHUB_WORKSPACE }} diff --git a/.github/workflows/tour_of_beam_frontend_test.yml b/.github/workflows/tour_of_beam_frontend_test.yml index 8880bc287266..5337bb7dd720 100644 --- a/.github/workflows/tour_of_beam_frontend_test.yml +++ b/.github/workflows/tour_of_beam_frontend_test.yml @@ -47,7 +47,7 @@ jobs: FLUTTER_VERSION: '3.10.4' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: 'Cache Flutter Dependencies' uses: actions/cache@v3 diff --git a/.github/workflows/typescript_tests.yml b/.github/workflows/typescript_tests.yml index 542673cbbca8..825b2808af5b 100644 --- a/.github/workflows/typescript_tests.yml +++ b/.github/workflows/typescript_tests.yml @@ -45,7 +45,7 @@ jobs: os: [[self-hosted, ubuntu-20.04], macos-latest, [self-hosted, windows-server-2019]] steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive @@ -74,7 +74,7 @@ jobs: fail-fast: false steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive @@ -109,7 +109,7 @@ jobs: outputs: gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -131,7 +131,7 @@ jobs: fail-fast: false steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive diff --git a/.github/workflows/update_python_dependencies.yml b/.github/workflows/update_python_dependencies.yml index d2b8fccdb586..b4b839c3204c 100644 --- a/.github/workflows/update_python_dependencies.yml +++ b/.github/workflows/update_python_dependencies.yml @@ -38,7 +38,7 @@ jobs: properties: ${{ steps.test-properties.outputs.properties }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - id: test-properties uses: ./.github/actions/setup-default-test-properties @@ -48,9 +48,15 @@ jobs: name: Update Python Dependencies steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: ./.github/actions/setup-self-hosted-action + uses: ./.github/actions/setup-environment-action + with: + python-version: | + 3.8 + 3.9 + java-version: 8 + go-version: 1.21 - name: Update Python Dependencies uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.test-infra/jenkins/job_PreCommit_CommunityMetrics.groovy b/.test-infra/jenkins/job_PreCommit_CommunityMetrics.groovy deleted file mode 100644 index 2bf63df9cd5a..000000000000 --- a/.test-infra/jenkins/job_PreCommit_CommunityMetrics.groovy +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'CommunityMetrics', - gradleTask: ':communityMetricsPreCommit', - triggerPathPatterns: ['^.test-infra/metrics/.*$']) -builder.build() - diff --git a/.test-infra/jenkins/job_PreCommit_Go.groovy b/.test-infra/jenkins/job_PreCommit_Go.groovy deleted file mode 100644 index 73b6cf4c9384..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Go.groovy +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Go', - gradleTask: ':goPreCommit', - triggerPathPatterns: [ - '^model/.*$', - '^sdks/go.*$', - '^release/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_Go_Portable.groovy b/.test-infra/jenkins/job_PreCommit_Go_Portable.groovy deleted file mode 100644 index 12c762e5eb37..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Go_Portable.groovy +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'GoPortable', - gradleTask: ':goPortablePreCommit', - triggerPathPatterns: [ - '^model/.*$', - '^sdks/go.*$', - '^release/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_Kotlin_Examples.groovy b/.test-infra/jenkins/job_PreCommit_Kotlin_Examples.groovy deleted file mode 100644 index c3ce31f1f6d1..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Kotlin_Examples.groovy +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Kotlin_Examples', - gradleTask: ':examples:kotlin:preCommit', - triggerPathPatterns: [ - '^model/.*$', - '^sdks/java/.*$', - '^runners/flink/.*$', - '^runners/spark/.*$', - '^runners/direct-java/.*$', - '^examples/kotlin/.*$', - '^release/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_PythonAutoformatter.groovy b/.test-infra/jenkins/job_PreCommit_PythonAutoformatter.groovy deleted file mode 100644 index 90e037edf2af..000000000000 --- a/.test-infra/jenkins/job_PreCommit_PythonAutoformatter.groovy +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'PythonFormatter', - gradleTask: ':pythonFormatterPreCommit', - triggerPathPatterns: [ - '^sdks/python/apache_beam/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_Typescript.groovy b/.test-infra/jenkins/job_PreCommit_Typescript.groovy deleted file mode 100644 index 29cb93ede8b6..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Typescript.groovy +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Typescript', - gradleTask: ':typescriptPreCommit', - triggerPathPatterns: [ - '^sdks/python/apache_beam/runners/interactive/extensions/.*$', - ] - ) -builder.build() diff --git a/CHANGES.md b/CHANGES.md index 40a9a1dc9490..a990a5fd7304 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -68,11 +68,14 @@ * In Python, [RunInference](https://beam.apache.org/documentation/sdks/python-machine-learning/#why-use-the-runinference-api) now supports loading many models in the same transform using a [KeyedModelHandler](https://beam.apache.org/documentation/sdks/python-machine-learning/#use-a-keyed-modelhandler) ([#27628](https://github.com/apache/beam/issues/27628)). * In Python, the [VertexAIModelHandlerJSON](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.vertex_ai_inference.html#apache_beam.ml.inference.vertex_ai_inference.VertexAIModelHandlerJSON) now supports passing in inference_args. These will be passed through to the Vertex endpoint as parameters. +* Added support to run `mypy` on user pipelines ([#27906](https://github.com/apache/beam/issues/27906)) ## Breaking Changes * Removed fastjson library dependency for Beam SQL. Table property is changed to be based on jackson ObjectNode (Java) ([#24154](https://github.com/apache/beam/issues/24154)). * Removed TensorFlow from Beam Python container images [PR](https://github.com/apache/beam/pull/28424). If you have been negatively affected by this change, please comment on [#20605](https://github.com/apache/beam/issues/20605). +* Removed the parameter `t reflect.Type` from `parquetio.Write`. The element type is derived from the input PCollection (Go) ([#28490](https://github.com/apache/beam/issues/28490)) +* Refactor BeamSqlSeekableTable.setUp adding a parameter joinSubsetType. [#28283](https://github.com/apache/beam/issues/28283) ## Deprecations @@ -80,7 +83,9 @@ ## Bugfixes -* Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Fixed exception chaining issue in GCS connector (Python) ([#26769](https://github.com/apache/beam/issues/26769#issuecomment-1700422615)). +* Fixed streaming inserts exception handling, GoogleAPICallErrors are now retried according to retry strategy and routed to failed rows where appropriate rather than causing a pipeline error (Python) ([#21080](https://github.com/apache/beam/issues/21080)). + ## Security Fixes * Python containers updated, fixing [CVE-2021-30474](https://nvd.nist.gov/vuln/detail/CVE-2021-30474), [CVE-2021-30475](https://nvd.nist.gov/vuln/detail/CVE-2021-30475), [CVE-2021-30473](https://nvd.nist.gov/vuln/detail/CVE-2021-30473), [CVE-2020-36133](https://nvd.nist.gov/vuln/detail/CVE-2020-36133), [CVE-2020-36131](https://nvd.nist.gov/vuln/detail/CVE-2020-36131), [CVE-2020-36130](https://nvd.nist.gov/vuln/detail/CVE-2020-36130), and [CVE-2020-36135](https://nvd.nist.gov/vuln/detail/CVE-2020-36135) diff --git a/build.gradle.kts b/build.gradle.kts index 0d23861a495b..7bd847895293 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -443,6 +443,10 @@ tasks.register("goPortablePreCommit") { dependsOn(":sdks:go:test:ulrValidatesRunner") } +tasks.register("goPrismPreCommit") { + dependsOn(":sdks:go:test:prismValidatesRunner") +} + tasks.register("goPostCommitDataflowARM") { dependsOn(":sdks:go:test:dataflowValidatesRunnerARM64") } diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index a8a760029069..0ca748e3eb04 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -57,7 +57,7 @@ dependencies { runtimeOnly("com.avast.gradle:gradle-docker-compose-plugin:0.16.12") // Enable docker compose tasks runtimeOnly("ca.cutterslade.gradle:gradle-dependency-analyze:1.8.3") // Enable dep analysis runtimeOnly("gradle.plugin.net.ossindex:ossindex-gradle-plugin:0.4.11") // Enable dep vulnerability analysis - runtimeOnly("org.checkerframework:checkerframework-gradle-plugin:0.6.30") // Enable enhanced static checking plugin + runtimeOnly("org.checkerframework:checkerframework-gradle-plugin:0.6.33") // Enable enhanced static checking plugin } // Because buildSrc is built and tested automatically _before_ gradle diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index a3c5bcf226fc..9f341c5673fd 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -724,7 +724,7 @@ class BeamModulePlugin implements Plugin { // Keep version consistent with the version in google_cloud_resourcemanager, managed by google_cloud_platform_libraries_bom google_api_services_cloudresourcemanager : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20230129-$google_clients_version", google_api_services_dataflow : "com.google.apis:google-api-services-dataflow:v1b3-rev20220920-$google_clients_version", - google_api_services_healthcare : "com.google.apis:google-api-services-healthcare:v1-rev20230817-$google_clients_version", + google_api_services_healthcare : "com.google.apis:google-api-services-healthcare:v1-rev20230830-$google_clients_version", google_api_services_pubsub : "com.google.apis:google-api-services-pubsub:v1-rev20220904-$google_clients_version", // Keep version consistent with the version in google_cloud_nio, managed by google_cloud_platform_libraries_bom google_api_services_storage : "com.google.apis:google-api-services-storage:v1-rev20230617-$google_clients_version", diff --git a/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java b/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java index 61f730bf3579..6aecc6609cfb 100644 --- a/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java +++ b/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java @@ -31,7 +31,7 @@ public class CassandraAsserts { /** - * Convert Cassandra {@link Row} list to a list of maps. + * Convert Cassandra {@link com.datastax.oss.driver.api.core.cql.Row} list to a list of maps. * * @param rows Rows to parse. * @return List of maps to use in {@link RecordsSubject}. diff --git a/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java b/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java index c696457bbdd9..d249d43d3789 100644 --- a/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java +++ b/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java @@ -27,6 +27,7 @@ import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CaseFormat; +import org.apache.commons.lang3.RandomStringUtils; /** Utilities to make working with Dataflow easier. */ public class PipelineUtils { @@ -73,6 +74,15 @@ public static boolean waitUntil( } } + /** + * Creates a job name. Method uses {@link #createJobName(String, int)}} without a random suffix. + * + * @see #createJobName(String, int) + */ + public static String createJobName(String prefix) { + return createJobName(prefix, 0); + } + /** * Creates a job name. * @@ -83,17 +93,24 @@ public static boolean waitUntil( * same prefix are requested in a short period of time. * * @param prefix a prefix for the job + * @param randomChars if the string should contain random chars at the end, to increase the + * likelihood of being unique. * @return the prefix plus some way of identifying it separate from other jobs with the same * prefix */ - public static String createJobName(String prefix) { + public static String createJobName(String prefix, int randomChars) { String convertedPrefix = CaseFormat.UPPER_CAMEL.converterTo(CaseFormat.LOWER_HYPHEN).convert(prefix); String formattedTimestamp = DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSS") .withZone(ZoneId.of("UTC")) .format(Instant.now()); - return String.format("%s-%s", convertedPrefix, formattedTimestamp); + + String suffix = ""; + if (randomChars > 0) { + suffix = "-" + RandomStringUtils.randomAlphanumeric(randomChars).toLowerCase(); + } + return String.format("%s-%s%s", convertedPrefix, formattedTimestamp, suffix); } /** Get raw job name (without prefix) from a jobName generated by createJobName. */ diff --git a/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java b/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java index acf203b06e6e..316283cdf7d2 100644 --- a/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java +++ b/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java @@ -37,7 +37,13 @@ public void testCreateJobName() { @Test public void testCreateJobNameWithUppercase() { - assertThat(createJobName("testWithUpperCase")).matches("test-with-upper-case" + "-\\d{17}"); + assertThat(createJobName("testWithUpperCase")).matches("test-with-upper-case-\\d{17}"); + } + + @Test + public void testCreateJobNameWithUppercaseSuffix() { + assertThat(createJobName("testWithUpperCase", 8)) + .matches("test-with-upper-case-\\d{17}-[a-z0-9]{8}"); } @Test diff --git a/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java b/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java index eb250a1c5f82..61d6b5d57c2c 100644 --- a/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java +++ b/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java @@ -34,7 +34,7 @@ public class ElasticsearchUtilsTest { @Test public void testGenerateIndexNameShouldReplaceForwardSlash() { String testBaseString = "Test/DB/Name"; - String actual = generateIndexName(testBaseString); + String actual = ElasticsearchUtils.generateIndexName(testBaseString); assertThat(actual).matches("test-db-name-\\d{8}-\\d{6}-\\d{6}"); } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java index 32f262f2eac1..6b728a6a60db 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java @@ -62,7 +62,7 @@ public void tearDownBase() throws IOException { } @Override - PipelineLauncher launcher() { + public PipelineLauncher launcher() { return DefaultPipelineLauncher.builder(CREDENTIALS).build(); } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java index f6e359fed963..14bb05394de2 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java @@ -18,6 +18,7 @@ package org.apache.beam.it.gcp; import static org.apache.beam.it.common.logging.LogStrings.formatForLogging; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.RUNNER_V2; import com.google.api.gax.core.CredentialsProvider; import com.google.api.gax.core.FixedCredentialsProvider; @@ -49,6 +50,7 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects; import org.junit.After; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.rules.TestRule; import org.junit.rules.TestWatcher; @@ -107,11 +109,14 @@ protected void starting(Description description) { } }; - @Before - @SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD") - public void setUp() throws IOException { + @BeforeClass + public static void setUpClass() { project = TestProperties.project(); region = TestProperties.region(); + } + + @Before + public void setUp() throws IOException { monitoringClient = MonitoringClient.builder(CREDENTIALS_PROVIDER).build(); pipelineLauncher = launcher(); pipelineOperator = new PipelineOperator(pipelineLauncher); @@ -123,7 +128,7 @@ public void tearDownLoadTestBase() throws IOException { monitoringClient.cleanupAll(); } - abstract PipelineLauncher launcher(); + public abstract PipelineLauncher launcher(); /** * Exports the metrics of given dataflow job to BigQuery. @@ -239,7 +244,7 @@ private void computeDataflowMetrics( metrics.put("EstimatedDataProcessedGB", dataProcessed / 1e9d); } metrics.putAll(getCpuUtilizationMetrics(launchInfo.jobId(), workerTimeInterval)); - metrics.putAll(getThroughputMetrics(launchInfo.jobId(), config, workerTimeInterval)); + metrics.putAll(getThroughputMetrics(launchInfo, config, workerTimeInterval)); } /** @@ -349,25 +354,30 @@ protected Map getCpuUtilizationMetrics(String jobId, TimeInterva /** * Computes throughput metrics of the given pcollection in dataflow job. * - * @param jobId dataflow job id + * @param jobInfo dataflow job LaunchInfo * @param config the {@class MetricsConfiguration} * @param timeInterval interval for the monitoring query * @return throughput metrics of the pcollection */ protected Map getThroughputMetrics( - String jobId, MetricsConfiguration config, TimeInterval timeInterval) { + LaunchInfo jobInfo, MetricsConfiguration config, TimeInterval timeInterval) { + String jobId = jobInfo.jobId(); + String iColl = + RUNNER_V2.equals(jobInfo.runner()) + ? config.inputPCollectionV2() + : config.inputPCollection(); + String oColl = + RUNNER_V2.equals(jobInfo.runner()) + ? config.outputPCollectionV2() + : config.outputPCollection(); List inputThroughputBytesPerSec = - monitoringClient.getThroughputBytesPerSecond( - project, jobId, config.inputPCollection(), timeInterval); + monitoringClient.getThroughputBytesPerSecond(project, jobId, iColl, timeInterval); List inputThroughputElementsPerSec = - monitoringClient.getThroughputElementsPerSecond( - project, jobId, config.inputPCollection(), timeInterval); + monitoringClient.getThroughputElementsPerSecond(project, jobId, iColl, timeInterval); List outputThroughputBytesPerSec = - monitoringClient.getThroughputBytesPerSecond( - project, jobId, config.outputPCollection(), timeInterval); + monitoringClient.getThroughputBytesPerSecond(project, jobId, oColl, timeInterval); List outputThroughputElementsPerSec = - monitoringClient.getThroughputElementsPerSecond( - project, jobId, config.outputPCollection(), timeInterval); + monitoringClient.getThroughputElementsPerSecond(project, jobId, oColl, timeInterval); return getThroughputMetrics( inputThroughputBytesPerSec, inputThroughputElementsPerSec, @@ -495,22 +505,31 @@ public abstract static class MetricsConfiguration { */ public abstract @Nullable String inputPCollection(); + /** Input PCollection name under Dataflow runner v2. */ + public abstract @Nullable String inputPCollectionV2(); + /** * Input PCollection of the Dataflow job to query additional metrics. If not provided, the * metrics for inputPCollection will not be calculated. */ public abstract @Nullable String outputPCollection(); - public static Builder builder() { + public abstract @Nullable String outputPCollectionV2(); + + public static MetricsConfiguration.Builder builder() { return new AutoValue_LoadTestBase_MetricsConfiguration.Builder(); } @AutoValue.Builder public abstract static class Builder { - public abstract Builder setInputPCollection(@Nullable String value); + public abstract MetricsConfiguration.Builder setInputPCollection(@Nullable String value); + + public abstract MetricsConfiguration.Builder setInputPCollectionV2(@Nullable String value); + + public abstract MetricsConfiguration.Builder setOutputPCollection(@Nullable String value); - public abstract Builder setOutputPCollection(@Nullable String value); + public abstract MetricsConfiguration.Builder setOutputPCollectionV2(@Nullable String value); public abstract MetricsConfiguration build(); } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java index 80bf5cfd9382..d6d348f524b2 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java @@ -461,6 +461,7 @@ public synchronized void cleanupAll() throws BigQueryResourceManagerException { projectId, dataset.getDatasetId().getDataset(), table.getTableId().getTable())); } bigQuery.delete(dataset.getDatasetId()); + dataset = null; } } catch (Exception e) { throw new BigQueryResourceManagerException("Failed to delete resources.", e); diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java index 1e6750cc81e4..713880229281 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java @@ -33,6 +33,7 @@ import com.google.cloud.bigtable.admin.v2.models.AppProfile.MultiClusterRoutingPolicy; import com.google.cloud.bigtable.admin.v2.models.AppProfile.RoutingPolicy; import com.google.cloud.bigtable.admin.v2.models.AppProfile.SingleClusterRoutingPolicy; +import com.google.cloud.bigtable.admin.v2.models.Cluster; import com.google.cloud.bigtable.admin.v2.models.CreateAppProfileRequest; import com.google.cloud.bigtable.admin.v2.models.CreateInstanceRequest; import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest; @@ -54,6 +55,8 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import javax.annotation.Nullable; import org.apache.beam.it.common.ResourceManager; import org.apache.commons.lang3.StringUtils; @@ -93,6 +96,8 @@ public class BigtableResourceManager implements ResourceManager { private final Set cdcEnabledTables; private boolean hasInstance; + private Iterable clusters; + private final boolean usingStaticInstance; private BigtableResourceManager(Builder builder) throws IOException { @@ -111,6 +116,7 @@ private BigtableResourceManager(Builder builder) throws IOException { this.createdTables = new ArrayList<>(); this.createdAppProfiles = new ArrayList<>(); this.cdcEnabledTables = new HashSet<>(); + this.clusters = new ArrayList<>(); // Check if RM was configured to use static Bigtable instance. if (builder.useStaticInstance) { @@ -223,6 +229,7 @@ public synchronized void createInstance(Iterable "Failed to create instance " + instanceId + ".", e); } hasInstance = true; + this.clusters = clusters; LOG.info("Successfully created instance {}.", instanceId); } @@ -544,6 +551,32 @@ public synchronized ImmutableList readTable(String tableId, @Nullable Long return tableRows; } + /** Get all the cluster names of the current instance. */ + public List getClusterNames() { + return StreamSupport.stream(getClusters().spliterator(), false) + .map(BigtableResourceManagerCluster::clusterId) + .collect(Collectors.toList()); + } + + private Iterable getClusters() { + if (usingStaticInstance && this.clusters == null) { + try (BigtableInstanceAdminClient instanceAdminClient = + bigtableResourceManagerClientFactory.bigtableInstanceAdminClient()) { + List managedClusters = new ArrayList<>(); + for (Cluster cluster : instanceAdminClient.listClusters(instanceId)) { + managedClusters.add( + BigtableResourceManagerCluster.create( + cluster.getId(), + cluster.getZone(), + cluster.getServeNodes(), + cluster.getStorageType())); + } + this.clusters = managedClusters; + } + } + return this.clusters; + } + /** * Deletes all created resources (instance and tables) and cleans up all Bigtable clients, making * the manager object unusable. diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java index eb2323e52974..a893493d766e 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java @@ -34,7 +34,7 @@ public final class BigtableResourceManagerUtils { private static final Pattern ILLEGAL_INSTANCE_ID_CHARS = Pattern.compile("[^a-z0-9-]"); private static final String REPLACE_INSTANCE_ID_CHAR = "-"; private static final int MIN_TABLE_ID_LENGTH = 1; - private static final int MAX_TABLE_ID_LENGTH = 30; + private static final int MAX_TABLE_ID_LENGTH = 40; private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_.]"); private static final String REPLACE_TABLE_ID_CHAR = "-"; diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java index 08688d88b104..b5c9535953b7 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java @@ -58,6 +58,11 @@ public abstract class AbstractPipelineLauncher implements PipelineLauncher { private static final Logger LOG = LoggerFactory.getLogger(AbstractPipelineLauncher.class); private static final Pattern CURRENT_METRICS = Pattern.compile(".*Current.*"); + public static final String LEGACY_RUNNER = "Dataflow Legacy Runner"; + public static final String RUNNER_V2 = "Dataflow Runner V2"; + public static final String PARAM_RUNNER = "runner"; + public static final String PARAM_JOB_TYPE = "jobType"; + public static final String PARAM_JOB_ID = "jobId"; protected final List launchedJobs = new ArrayList<>(); @@ -244,12 +249,12 @@ protected JobState handleJobState(Job job) { */ protected LaunchInfo.Builder getJobInfoBuilder(LaunchConfig options, JobState state, Job job) { Map labels = job.getLabels(); - String runner = "Dataflow Legacy Runner"; + String runner = LEGACY_RUNNER; Environment environment = job.getEnvironment(); if (environment != null && environment.getExperiments() != null && environment.getExperiments().contains("use_runner_v2")) { - runner = "Dataflow Runner V2"; + runner = RUNNER_V2; } LaunchInfo.Builder builder = LaunchInfo.builder() @@ -266,6 +271,10 @@ protected LaunchInfo.Builder getJobInfoBuilder(LaunchConfig options, JobState st // tests Map parameters = new HashMap<>(options.parameters()); options.environment().forEach((key, val) -> parameters.put(key, val.toString())); + // attach basic job info to parameters so that these are exported for load tests + parameters.put(PARAM_RUNNER, runner); + parameters.put(PARAM_JOB_TYPE, job.getType()); + parameters.put(PARAM_JOB_ID, job.getId()); builder.setParameters(ImmutableMap.copyOf(parameters)); if (labels != null && !labels.isEmpty()) { // template job diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java index 7918dd6227d9..ad2dcafc007b 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java @@ -99,7 +99,7 @@ public class DefaultPipelineLauncher extends AbstractPipelineLauncher { .put(PipelineResult.State.UNRECOGNIZED, JobState.UNKNOWN) .build(); - private DefaultPipelineLauncher(Builder builder) { + private DefaultPipelineLauncher(DefaultPipelineLauncher.Builder builder) { super( new Dataflow( Utils.getDefaultTransport(), @@ -109,8 +109,8 @@ private DefaultPipelineLauncher(Builder builder) { : new HttpCredentialsAdapter(builder.getCredentials()))); } - public static Builder builder(Credentials credentials) { - return new Builder(credentials); + public static DefaultPipelineLauncher.Builder builder(Credentials credentials) { + return new DefaultPipelineLauncher.Builder(credentials); } @Override diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java index 8017009ff378..57f8ad40c1b6 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java @@ -53,8 +53,8 @@ public class DirectRunnerClient implements PipelineLauncher { this.mainClass = builder.getMainClass(); } - public static Builder builder(Class mainClass) { - return new Builder(mainClass); + public static DirectRunnerClient.Builder builder(Class mainClass) { + return new DirectRunnerClient.Builder(mainClass); } @Override @@ -172,7 +172,7 @@ public Class getMainClass() { return mainClass; } - public Builder setCredentials(Credentials value) { + public DirectRunnerClient.Builder setCredentials(Credentials value) { credentials = value; return this; } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java index 99016b5dd3a4..832a75defd95 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java @@ -61,14 +61,16 @@ private DataGenerator(Builder builder) { .build(); } - public static Builder builderWithSchemaLocation(String testName, String schemaLocation) { - return new Builder(testName + "-data-generator") + public static DataGenerator.Builder builderWithSchemaLocation( + String testName, String schemaLocation) { + return new DataGenerator.Builder(testName + "-data-generator") .setSchemaLocation(schemaLocation) .setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED); } - public static Builder builderWithSchemaTemplate(String testName, String schemaTemplate) { - return new Builder(testName + "-data-generator") + public static DataGenerator.Builder builderWithSchemaTemplate( + String testName, String schemaTemplate) { + return new DataGenerator.Builder(testName + "-data-generator") .setSchemaTemplate(schemaTemplate) .setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED); } @@ -129,27 +131,27 @@ public Map getParameters() { return parameters; } - public Builder setSchemaTemplate(String value) { + public DataGenerator.Builder setSchemaTemplate(String value) { parameters.put("schemaTemplate", value); return this; } - public Builder setSchemaLocation(String value) { + public DataGenerator.Builder setSchemaLocation(String value) { parameters.put("schemaLocation", value); return this; } - public Builder setMessagesLimit(String value) { + public DataGenerator.Builder setMessagesLimit(String value) { parameters.put(MESSAGES_LIMIT, value); return this; } - public Builder setQPS(String value) { + public DataGenerator.Builder setQPS(String value) { parameters.put("qps", value); return this; } - public Builder setSinkType(String value) { + public DataGenerator.Builder setSinkType(String value) { parameters.put("sinkType", value); return this; } @@ -164,87 +166,87 @@ public Builder setNumWorkers(String value) { return this; } - public Builder setMaxNumWorkers(String value) { + public DataGenerator.Builder setMaxNumWorkers(String value) { parameters.put("maxNumWorkers", value); return this; } - public Builder setAutoscalingAlgorithm(AutoscalingAlgorithmType value) { + public DataGenerator.Builder setAutoscalingAlgorithm(AutoscalingAlgorithmType value) { parameters.put("autoscalingAlgorithm", value.toString()); return this; } - public Builder setOutputDirectory(String value) { + public DataGenerator.Builder setOutputDirectory(String value) { parameters.put("outputDirectory", value); return this; } - public Builder setOutputType(String value) { + public DataGenerator.Builder setOutputType(String value) { parameters.put("outputType", value); return this; } - public Builder setNumShards(String value) { + public DataGenerator.Builder setNumShards(String value) { parameters.put("numShards", value); return this; } - public Builder setAvroSchemaLocation(String value) { + public DataGenerator.Builder setAvroSchemaLocation(String value) { parameters.put("avroSchemaLocation", value); return this; } - public Builder setTopic(String value) { + public DataGenerator.Builder setTopic(String value) { parameters.put("topic", value); return this; } - public Builder setProjectId(String value) { + public DataGenerator.Builder setProjectId(String value) { parameters.put("projectId", value); return this; } - public Builder setSpannerInstanceName(String value) { + public DataGenerator.Builder setSpannerInstanceName(String value) { parameters.put("spannerInstanceName", value); return this; } - public Builder setSpannerDatabaseName(String value) { + public DataGenerator.Builder setSpannerDatabaseName(String value) { parameters.put("spannerDatabaseName", value); return this; } - public Builder setSpannerTableName(String value) { + public DataGenerator.Builder setSpannerTableName(String value) { parameters.put("spannerTableName", value); return this; } - public Builder setDriverClassName(String value) { + public DataGenerator.Builder setDriverClassName(String value) { parameters.put("driverClassName", value); return this; } - public Builder setConnectionUrl(String value) { + public DataGenerator.Builder setConnectionUrl(String value) { parameters.put("connectionUrl", value); return this; } - public Builder setUsername(String value) { + public DataGenerator.Builder setUsername(String value) { parameters.put("username", value); return this; } - public Builder setPassword(String value) { + public DataGenerator.Builder setPassword(String value) { parameters.put("password", value); return this; } - public Builder setConnectionProperties(String value) { + public DataGenerator.Builder setConnectionProperties(String value) { parameters.put("connectionProperties", value); return this; } - public Builder setStatement(String value) { + public DataGenerator.Builder setStatement(String value) { parameters.put("statement", value); return this; } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java index ef67a5a5c4fb..78fa7543150f 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java @@ -61,7 +61,8 @@ public static List> datastoreResultsToRecords(Collection results) { diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java index f59794af3e1f..de818a1bbff1 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java @@ -113,8 +113,9 @@ public void cleanupAll() { * @param project the GCP project ID * @return a new instance of Builder */ - public static Builder builder(String project, CredentialsProvider credentialsProvider) { - return new Builder(project, credentialsProvider); + public static DlpResourceManager.Builder builder( + String project, CredentialsProvider credentialsProvider) { + return new DlpResourceManager.Builder(project, credentialsProvider); } /** A builder class for creating instances of {@link DlpResourceManager}. */ diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java index 7e1a403c7352..2cad6d0b9fab 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java @@ -72,8 +72,9 @@ private KMSResourceManager(Builder builder) { this.keyRing = null; } - public static Builder builder(String projectId, CredentialsProvider credentialsProvider) { - return new Builder(projectId, credentialsProvider); + public static KMSResourceManager.Builder builder( + String projectId, CredentialsProvider credentialsProvider) { + return new KMSResourceManager.Builder(projectId, credentialsProvider); } /** diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java index 0fc5614a3630..06591ea4fe0a 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java @@ -150,8 +150,8 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_MEAN) - .setCrossSeriesReducer(Reducer.REDUCE_MEAN) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_MEAN) + .setCrossSeriesReducer(Aggregation.Reducer.REDUCE_MEAN) .addGroupByFields("resource.instance_id") .build(); ListTimeSeriesRequest request = @@ -188,7 +188,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_MEAN) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_MEAN) .setCrossSeriesReducer(Reducer.REDUCE_MAX) .build(); ListTimeSeriesRequest request = @@ -225,7 +225,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_MEAN) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_MEAN) .setCrossSeriesReducer(Reducer.REDUCE_MAX) .build(); ListTimeSeriesRequest request = @@ -269,7 +269,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_RATE) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_RATE) .build(); ListTimeSeriesRequest request = ListTimeSeriesRequest.newBuilder() @@ -312,7 +312,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_RATE) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_RATE) .build(); ListTimeSeriesRequest request = ListTimeSeriesRequest.newBuilder() diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java index 3a684d34c045..738620c15b7e 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java @@ -20,6 +20,7 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; import com.google.api.gax.core.CredentialsProvider; +import com.google.api.gax.rpc.DeadlineExceededException; import com.google.cloud.pubsub.v1.Publisher; import com.google.cloud.pubsub.v1.SchemaServiceClient; import com.google.cloud.pubsub.v1.SchemaServiceSettings; @@ -42,12 +43,16 @@ import com.google.pubsub.v1.Topic; import com.google.pubsub.v1.TopicName; import com.google.pubsub.v1.UpdateTopicRequest; +import dev.failsafe.Failsafe; +import dev.failsafe.RetryPolicy; import java.io.IOException; +import java.time.Duration; import java.util.Collections; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.beam.it.common.ResourceManager; +import org.apache.beam.it.common.utils.ExceptionUtils; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; import org.slf4j.Logger; @@ -66,6 +71,12 @@ public final class PubsubResourceManager implements ResourceManager { private static final int DEFAULT_ACK_DEADLINE_SECONDS = 600; private static final String RESOURCE_NAME_SEPARATOR = "-"; + // Retry settings for client operations + private static final int FAILSAFE_MAX_RETRIES = 5; + private static final Duration FAILSAFE_RETRY_DELAY = Duration.ofSeconds(10); + private static final Duration FAILSAFE_RETRY_MAX_DELAY = Duration.ofSeconds(60); + private static final double FAILSAFE_RETRY_JITTER = 0.1; + private final String testId; private final String projectId; private final PubsubPublisherFactory publisherFactory; @@ -184,11 +195,14 @@ public SubscriptionName createSubscription(TopicName topicName, String subscript LOG.info("Creating subscription '{}' for topic '{}'", subscriptionName, topicName); Subscription subscription = - subscriptionAdminClient.createSubscription( - getSubscriptionName(subscriptionName), - topicName, - PushConfig.getDefaultInstance(), - DEFAULT_ACK_DEADLINE_SECONDS); + Failsafe.with(retryOnDeadlineExceeded()) + .get( + () -> + subscriptionAdminClient.createSubscription( + getSubscriptionName(subscriptionName), + topicName, + PushConfig.getDefaultInstance(), + DEFAULT_ACK_DEADLINE_SECONDS)); SubscriptionName reference = PubsubUtils.toSubscriptionName(subscription); createdSubscriptions.add(getSubscriptionName(subscriptionName)); @@ -299,17 +313,19 @@ public synchronized void cleanupAll() { try { for (SubscriptionName subscription : createdSubscriptions) { LOG.info("Deleting subscription '{}'", subscription); - subscriptionAdminClient.deleteSubscription(subscription); + Failsafe.with(retryOnDeadlineExceeded()) + .run(() -> subscriptionAdminClient.deleteSubscription(subscription)); } for (TopicName topic : createdTopics) { LOG.info("Deleting topic '{}'", topic); - topicAdminClient.deleteTopic(topic); + Failsafe.with(retryOnDeadlineExceeded()).run(() -> topicAdminClient.deleteTopic(topic)); } for (SchemaName schemaName : createdSchemas) { LOG.info("Deleting schema '{}'", schemaName); - schemaServiceClient.deleteSchema(schemaName); + Failsafe.with(retryOnDeadlineExceeded()) + .run(() -> schemaServiceClient.deleteSchema(schemaName)); } } finally { subscriptionAdminClient.close(); @@ -342,7 +358,8 @@ private void checkIsUsable() throws IllegalStateException { private TopicName createTopicInternal(TopicName topicName) { LOG.info("Creating topic '{}'...", topicName.toString()); - Topic topic = topicAdminClient.createTopic(topicName); + Topic topic = + Failsafe.with(retryOnDeadlineExceeded()).get(() -> topicAdminClient.createTopic(topicName)); TopicName reference = PubsubUtils.toTopicName(topic); createdTopics.add(reference); @@ -355,6 +372,16 @@ private boolean isNotUsable() { return topicAdminClient.isShutdown() || subscriptionAdminClient.isShutdown(); } + private static RetryPolicy retryOnDeadlineExceeded() { + return RetryPolicy.builder() + .handleIf( + exception -> ExceptionUtils.containsType(exception, DeadlineExceededException.class)) + .withMaxRetries(FAILSAFE_MAX_RETRIES) + .withBackoff(FAILSAFE_RETRY_DELAY, FAILSAFE_RETRY_MAX_DELAY) + .withJitter(FAILSAFE_RETRY_JITTER) + .build(); + } + /** Builder for {@link PubsubResourceManager}. */ public static final class Builder { diff --git a/it/google-cloud-platform/src/main/resources/test-artifact.json b/it/google-cloud-platform/src/main/resources/test-artifact.json new file mode 100644 index 000000000000..551c80d14a66 --- /dev/null +++ b/it/google-cloud-platform/src/main/resources/test-artifact.json @@ -0,0 +1 @@ +["This is a test artifact."] \ No newline at end of file diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java index 03f6e8abfd41..a9ae68142778 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java @@ -99,12 +99,8 @@ public final class BigQueryIOLT extends IOLoadTestBase { private static final String READ_ELEMENT_METRIC_NAME = "read_count"; private Configuration configuration; private String tempLocation; - private TableSchema schema; - private static final String READ_PCOLLECTION = "Counting element.out0"; - private static final String WRITE_PCOLLECTION = "Map records.out0"; - @Rule public TestPipeline writePipeline = TestPipeline.create(); @Rule public TestPipeline readPipeline = TestPipeline.create(); @@ -268,7 +264,7 @@ private void testWrite(BigQueryIO.Write writeIO) throws IOException { .withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempLocation))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigquery-write") + PipelineLauncher.LaunchConfig.builder("write-bigquery") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.runner) @@ -284,7 +280,10 @@ private void testWrite(BigQueryIO.Write writeIO) throws IOException { // export metrics MetricsConfiguration metricsConfig = - MetricsConfiguration.builder().setInputPCollection(WRITE_PCOLLECTION).build(); + MetricsConfiguration.builder() + .setInputPCollection("Map records.out0") + .setInputPCollectionV2("Map records/ParMultiDo(MapKVToV).out0") + .build(); try { exportMetricsToBigQuery(launchInfo, getMetrics(launchInfo, metricsConfig)); } catch (ParseException | InterruptedException e) { @@ -301,7 +300,7 @@ private void testRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigquery-read") + PipelineLauncher.LaunchConfig.builder("read-bigquery") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.runner) @@ -326,7 +325,10 @@ private void testRead() throws IOException { // export metrics MetricsConfiguration metricsConfig = - MetricsConfiguration.builder().setOutputPCollection(READ_PCOLLECTION).build(); + MetricsConfiguration.builder() + .setOutputPCollection("Counting element.out0") + .setOutputPCollectionV2("Counting element/ParMultiDo(Counting).out0") + .build(); try { exportMetricsToBigQuery(launchInfo, getMetrics(launchInfo, metricsConfig)); } catch (ParseException | InterruptedException e) { diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java index fc7bd87707fc..e232ed31cb5a 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java @@ -115,8 +115,6 @@ public void teardown() { /** Run integration test with configurations specified by TestProperties. */ @Test public void testWriteAndRead() throws IOException { - final String readPCollection = "Counting element.out0"; - final String writePCollection = "Map records.out0"; tableId = generateTableId(testName); resourceManager.createTable( @@ -149,8 +147,10 @@ public void testWriteAndRead() throws IOException { // export metrics MetricsConfiguration metricsConfig = MetricsConfiguration.builder() - .setInputPCollection(writePCollection) - .setOutputPCollection(readPCollection) + .setInputPCollection("Map records.out0") + .setInputPCollectionV2("Map records/ParMultiDo(MapToBigTableFormat).out0") + .setOutputPCollection("Counting element.out0") + .setOutputPCollectionV2("Counting element/ParMultiDo(Counting).out0") .build(); try { exportMetricsToBigQuery(writeInfo, getMetrics(writeInfo, metricsConfig)); @@ -174,7 +174,7 @@ private PipelineLauncher.LaunchInfo testWrite() throws IOException { .apply("Write to BigTable", writeIO); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigtable-write") + PipelineLauncher.LaunchConfig.builder("write-bigtable") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.getRunner()) @@ -196,7 +196,7 @@ private PipelineLauncher.LaunchInfo testRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigtable-read") + PipelineLauncher.LaunchConfig.builder("read-bigtable") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.getRunner()) @@ -227,18 +227,18 @@ static Configuration of(long numRows, int pipelineTimeout, String runner, int va @AutoValue.Builder abstract static class Builder { - abstract Builder setNumRows(long numRows); + abstract Configuration.Builder setNumRows(long numRows); - abstract Builder setPipelineTimeout(int timeOutMinutes); + abstract Configuration.Builder setPipelineTimeout(int timeOutMinutes); - abstract Builder setRunner(String runner); + abstract Configuration.Builder setRunner(String runner); - abstract Builder setValueSizeBytes(int valueSizeBytes); + abstract Configuration.Builder setValueSizeBytes(int valueSizeBytes); abstract Configuration build(); } - abstract Builder toBuilder(); + abstract Configuration.Builder toBuilder(); } /** Maps long number to the BigTable format record. */ diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java index 65745aea49be..f8673ed696cc 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java @@ -442,6 +442,7 @@ public void testCleanupAllShouldWorkWhenBigtableDoesNotThrowAnyError() { setupReadyTable(); testManager.createTable(TABLE_ID, ImmutableList.of("cf1")); + when(bigtableResourceManagerClientFactory.bigtableTableAdminClient().exists(anyString())) .thenReturn(true); testManager.readTable(TABLE_ID); diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java index cfd56e596e52..88c35589f2be 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java @@ -18,6 +18,10 @@ package org.apache.beam.it.gcp.dataflow; import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.LEGACY_RUNNER; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_ID; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_TYPE; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_RUNNER; import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -153,8 +157,14 @@ public void testLaunchNewJob() throws IOException { .setSdk("Apache Beam Java") .setVersion("2.42.0") .setJobType("JOB_TYPE_BATCH") - .setRunner("Dataflow Legacy Runner") - .setParameters(ImmutableMap.of(PARAM_KEY, PARAM_VALUE)) + .setRunner(AbstractPipelineLauncher.LEGACY_RUNNER) + .setParameters( + ImmutableMap.builder() + .put(PARAM_KEY, PARAM_VALUE) + .put(PARAM_JOB_ID, JOB_ID) + .put(PARAM_RUNNER, LEGACY_RUNNER) + .put(PARAM_JOB_TYPE, "JOB_TYPE_BATCH") + .build()) .build(); assertThat(actual).isEqualTo(expected); } diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java index 4088efe67514..06f44437414a 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java @@ -18,6 +18,10 @@ package org.apache.beam.it.gcp.dataflow; import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.LEGACY_RUNNER; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_ID; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_TYPE; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_RUNNER; import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -158,8 +162,14 @@ public void testLaunchNewJob() throws IOException { .setSdk("Apache Beam Java") .setVersion("2.42.0") .setJobType("JOB_TYPE_BATCH") - .setRunner("Dataflow Legacy Runner") - .setParameters(ImmutableMap.of(PARAM_KEY, PARAM_VALUE)) + .setRunner(LEGACY_RUNNER) + .setParameters( + ImmutableMap.builder() + .put(PARAM_KEY, PARAM_VALUE) + .put(PARAM_JOB_ID, JOB_ID) + .put(PARAM_RUNNER, LEGACY_RUNNER) + .put(PARAM_JOB_TYPE, "JOB_TYPE_BATCH") + .build()) .build(); assertThat(actual).isEqualTo(expected); } diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java index fd1bc1772f2d..704f8337c66f 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java @@ -90,7 +90,7 @@ public class FileBasedIOLT extends IOLoadTestBase { @Rule public TestPipeline readPipeline = TestPipeline.create(); - private static final Map TEST_CONFIGS_PRESET; + private static final Map TEST_CONFIGS_PRESET; static { try { @@ -160,8 +160,6 @@ public void setup() { @Test public void testTextIOWriteThenRead() throws IOException { - final String readPCollection = "Counting element.out0"; - final String writePCollection = "Map records.out0"; TextIO.TypedWrite write = TextIO.write() @@ -182,7 +180,7 @@ public void testTextIOWriteThenRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig writeOptions = - PipelineLauncher.LaunchConfig.builder("test-textio-write") + PipelineLauncher.LaunchConfig.builder("write-textio") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.runner) @@ -196,7 +194,7 @@ public void testTextIOWriteThenRead() throws IOException { assertThatResult(writeResult).isLaunchFinished(); PipelineLauncher.LaunchConfig readOptions = - PipelineLauncher.LaunchConfig.builder("test-textio-read") + PipelineLauncher.LaunchConfig.builder("read-textio") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.runner) @@ -222,8 +220,10 @@ public void testTextIOWriteThenRead() throws IOException { // export metrics MetricsConfiguration metricsConfig = MetricsConfiguration.builder() - .setInputPCollection(writePCollection) - .setOutputPCollection(readPCollection) + .setInputPCollection("Map records.out0") + .setInputPCollectionV2("Map records/ParMultiDo(MapKVToString).out0") + .setOutputPCollection("Counting element.out0") + .setOutputPCollectionV2("Counting element/ParMultiDo(Counting).out0") .build(); try { exportMetricsToBigQuery(writeInfo, getMetrics(writeInfo, metricsConfig)); diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java index 3ec96da81007..0153573feaed 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java @@ -71,7 +71,7 @@ public final class GcsResourceManagerTest { @Mock private Blob blob; private GcsResourceManager gcsClient; - private static final String ARTIFACT_NAME = "test-artifact.txt"; + private static final String ARTIFACT_NAME = "test-artifact.json"; private static final Path LOCAL_PATH; private static final byte[] TEST_ARTIFACT_CONTENTS; diff --git a/it/google-cloud-platform/src/test/resources/test-artifact.txt b/it/google-cloud-platform/src/test/resources/test-artifact.txt deleted file mode 100644 index 22c4e1d122a7..000000000000 --- a/it/google-cloud-platform/src/test/resources/test-artifact.txt +++ /dev/null @@ -1 +0,0 @@ -This is a test artifact. \ No newline at end of file diff --git a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java index 0bcb16c61095..c515b2c4844f 100644 --- a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java +++ b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java @@ -61,13 +61,14 @@ private MSSQLResourceManager(Builder builder) { } @VisibleForTesting - > MSSQLResourceManager(T container, Builder builder) { + > MSSQLResourceManager( + T container, Builder builder) { super(container, builder); initialized = true; } - public static Builder builder(String testId) { - return new Builder(testId); + public static MSSQLResourceManager.Builder builder(String testId) { + return new MSSQLResourceManager.Builder(testId); } private synchronized void createDatabase(String databaseName) { diff --git a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java index e1bf3640b53d..688c26dfb56d 100644 --- a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java +++ b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java @@ -49,8 +49,8 @@ private MySQLResourceManager(Builder builder) { super(container, builder); } - public static Builder builder(String testId) { - return new Builder(testId); + public static MySQLResourceManager.Builder builder(String testId) { + return new MySQLResourceManager.Builder(testId); } @Override diff --git a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java index f44e939936d2..8054d26c33f7 100644 --- a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java +++ b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java @@ -45,7 +45,7 @@ public class OracleResourceManager extends AbstractJDBCResourceManager( DockerImageName.parse(builder.containerImageName).withTag(builder.containerImageTag)), @@ -46,12 +46,13 @@ private PostgresResourceManager(Builder builder) { } @VisibleForTesting - PostgresResourceManager(PostgreSQLContainer container, Builder builder) { + PostgresResourceManager( + PostgreSQLContainer container, PostgresResourceManager.Builder builder) { super(container, builder); } - public static Builder builder(String testId) { - return new Builder(testId); + public static PostgresResourceManager.Builder builder(String testId) { + return new PostgresResourceManager.Builder(testId); } @Override diff --git a/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java b/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java index d9a647dbeebd..7f7fb5b69569 100644 --- a/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java +++ b/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java @@ -71,13 +71,16 @@ public class KafkaResourceManager extends TestContainerResourceManager 0; @@ -102,8 +105,8 @@ private KafkaResourceManager(Builder builder) { : AdminClient.create(ImmutableMap.of("bootstrap.servers", this.connectionString)); } - public static Builder builder(String testId) { - return new Builder(testId); + public static KafkaResourceManager.Builder builder(String testId) { + return new KafkaResourceManager.Builder(testId); } /** Returns the kafka bootstrap server connection string. */ diff --git a/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java b/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java index a03030664de4..ce6ad877c375 100644 --- a/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java +++ b/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java @@ -175,7 +175,7 @@ private PipelineLauncher.LaunchInfo testWrite() throws IOException { .apply("Write to Kafka", writeIO.withTopic(kafkaTopic)); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-kafka-write") + PipelineLauncher.LaunchConfig.builder("write-kafka") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.getRunner()) @@ -195,7 +195,7 @@ private PipelineLauncher.LaunchInfo testRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-kafka-read") + PipelineLauncher.LaunchConfig.builder("read-kafka") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.getRunner()) diff --git a/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java b/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java index ed0e556bf0df..80216b14ac0e 100644 --- a/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java +++ b/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java @@ -69,7 +69,7 @@ public class MongoDBResourceManager extends TestContainerResourceManager( @@ -79,7 +79,10 @@ private Neo4jResourceManager(Builder builder) { @VisibleForTesting @SuppressWarnings("nullness") - Neo4jResourceManager(@Nullable Driver neo4jDriver, Neo4jContainer container, Builder builder) { + Neo4jResourceManager( + @Nullable Driver neo4jDriver, + Neo4jContainer container, + Neo4jResourceManager.Builder builder) { super(container, builder); this.adminPassword = builder.adminPassword; @@ -98,8 +101,8 @@ private Neo4jResourceManager(Builder builder) { } } - public static Builder builder(String testId) { - return new Builder(testId); + public static Neo4jResourceManager.Builder builder(String testId) { + return new Neo4jResourceManager.Builder(testId); } /** Returns the URI connection string to the Neo4j Database. */ diff --git a/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java b/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java index 0115a791eefe..1ef4726df43a 100644 --- a/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java +++ b/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java @@ -85,7 +85,7 @@ public class SplunkResourceManager extends TestContainerResourceManagerOptionally, a static resource can be specified by calling the useStaticContainer() method in - * the {@link Builder} class. A static resource is a pre-configured database or other resource that - * is ready to be connected to by the resource manager. This could be a pre-existing TestContainer - * that has not been closed, a local database instance, a remote VM, or any other source that can be - * connected to. If a static container is used, the host and port must also be configured using the - * Builder's setHost() and setPort() methods, respectively. + * the {@link TestContainerResourceManager.Builder} class. A static resource is a pre-configured + * database or other resource that is ready to be connected to by the resource manager. This could + * be a pre-existing TestContainer that has not been closed, a local database instance, a remote VM, + * or any other source that can be connected to. If a static container is used, the host and port + * must also be configured using the Builder's setHost() and setPort() methods, respectively. */ public abstract class TestContainerResourceManager> implements ResourceManager { @@ -48,11 +48,12 @@ public abstract class TestContainerResourceManager private final String host; protected int port; - protected > TestContainerResourceManager(T container, B builder) { + protected > TestContainerResourceManager( + T container, B builder) { this(container, builder, null); } - protected > TestContainerResourceManager( + protected > TestContainerResourceManager( T container, B builder, @Nullable Callable setup) { this.container = container; this.usingStaticContainer = builder.useStaticContainer; diff --git a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java index a496ecce9448..30a27c9ad259 100644 --- a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java +++ b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java @@ -43,7 +43,7 @@ public static Factory launchInfo() { } /** - * Check if the subject reflects succeeded states. A successfully {@link LaunchInfo} does not mean + * Check if the subject reflects succeeded states. A successful {@link LaunchInfo} does not mean * that the pipeline finished and no errors happened, it just means that the job was able to get * itself into an active state (RUNNING, UPDATED). */ diff --git a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java index 75d5ce3a67cd..39a0c0cebedc 100644 --- a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java +++ b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java @@ -81,7 +81,7 @@ public void hasRecordSubset(Map subset) { Map expected = convertMapToTreeMap(subset); for (Map candidate : actual) { boolean match = true; - for (Entry entry : subset.entrySet()) { + for (Map.Entry entry : subset.entrySet()) { if (!candidate.containsKey(entry.getKey()) || !candidate.get(entry.getKey()).equals(entry.getValue())) { match = false; diff --git a/learning/tour-of-beam/frontend/pubspec.lock b/learning/tour-of-beam/frontend/pubspec.lock index 5cde8a54211f..49bdc9ef95a8 100644 --- a/learning/tour-of-beam/frontend/pubspec.lock +++ b/learning/tour-of-beam/frontend/pubspec.lock @@ -45,10 +45,10 @@ packages: dependency: transitive description: name: archive - sha256: "0c8368c9b3f0abbc193b9d6133649a614204b528982bebc7026372d61677ce3a" + sha256: "49b1fad315e57ab0bbc15bcbb874e83116a1d78f77ebd500a4af6c9407d6b28e" url: "https://pub.dev" source: hosted - version: "3.3.7" + version: "3.3.8" args: dependency: transitive description: @@ -189,10 +189,10 @@ packages: dependency: "direct main" description: name: collection - sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c" + sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 url: "https://pub.dev" source: hosted - version: "1.17.1" + version: "1.17.2" color: dependency: transitive description: @@ -656,10 +656,10 @@ packages: dependency: transitive description: name: intl - sha256: a3715e3bc90294e971cb7dc063fbf3cd9ee0ebf8604ffeafabd9e6f16abbdbe6 + sha256: "3bc132a9dbce73a7e4a21a17d06e1878839ffbf975568bc875c60537824b0c4d" url: "https://pub.dev" source: hosted - version: "0.18.0" + version: "0.18.1" io: dependency: transitive description: @@ -728,18 +728,18 @@ packages: dependency: transitive description: name: matcher - sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb" + sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e" url: "https://pub.dev" source: hosted - version: "0.12.15" + version: "0.12.16" material_color_utilities: dependency: transitive description: name: material_color_utilities - sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724 + sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41" url: "https://pub.dev" source: hosted - version: "0.2.0" + version: "0.5.0" meta: dependency: transitive description: @@ -1115,10 +1115,10 @@ packages: dependency: transitive description: name: source_span - sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250 + sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c" url: "https://pub.dev" source: hosted - version: "1.9.1" + version: "1.10.0" stack_trace: dependency: transitive description: @@ -1171,26 +1171,26 @@ packages: dependency: transitive description: name: test - sha256: "3dac9aecf2c3991d09b9cdde4f98ded7b30804a88a0d7e4e7e1678e78d6b97f4" + sha256: "13b41f318e2a5751c3169137103b60c584297353d4b1761b66029bae6411fe46" url: "https://pub.dev" source: hosted - version: "1.24.1" + version: "1.24.3" test_api: dependency: transitive description: name: test_api - sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb + sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.6.0" test_core: dependency: transitive description: name: test_core - sha256: "5138dbffb77b2289ecb12b81c11ba46036590b72a64a7a90d6ffb880f1a29e93" + sha256: "99806e9e6d95c7b059b7a0fc08f07fc53fabe54a829497f0d9676299f1e8637e" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.5.3" time: dependency: transitive description: @@ -1347,10 +1347,10 @@ packages: dependency: transitive description: name: vm_service - sha256: f6deed8ed625c52864792459709183da231ebf66ff0cf09e69b573227c377efe + sha256: c620a6f783fa22436da68e42db7ebbf18b8c44b9a46ab911f666ff09ffd9153f url: "https://pub.dev" source: hosted - version: "11.3.0" + version: "11.7.1" watcher: dependency: transitive description: @@ -1359,6 +1359,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.0" + web: + dependency: transitive + description: + name: web + sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 + url: "https://pub.dev" + source: hosted + version: "0.1.4-beta" web_socket_channel: dependency: transitive description: diff --git a/playground/frontend/pubspec.lock b/playground/frontend/pubspec.lock index 425e33e4de7d..e49850e7a820 100644 --- a/playground/frontend/pubspec.lock +++ b/playground/frontend/pubspec.lock @@ -45,10 +45,10 @@ packages: dependency: transitive description: name: archive - sha256: "0c8368c9b3f0abbc193b9d6133649a614204b528982bebc7026372d61677ce3a" + sha256: "49b1fad315e57ab0bbc15bcbb874e83116a1d78f77ebd500a4af6c9407d6b28e" url: "https://pub.dev" source: hosted - version: "3.3.7" + version: "3.3.8" args: dependency: transitive description: @@ -189,10 +189,10 @@ packages: dependency: "direct main" description: name: collection - sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c" + sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 url: "https://pub.dev" source: hosted - version: "1.17.1" + version: "1.17.2" connectivity_plus: dependency: transitive description: @@ -600,10 +600,10 @@ packages: dependency: "direct main" description: name: intl - sha256: a3715e3bc90294e971cb7dc063fbf3cd9ee0ebf8604ffeafabd9e6f16abbdbe6 + sha256: "3bc132a9dbce73a7e4a21a17d06e1878839ffbf975568bc875c60537824b0c4d" url: "https://pub.dev" source: hosted - version: "0.18.0" + version: "0.18.1" io: dependency: transitive description: @@ -672,18 +672,18 @@ packages: dependency: transitive description: name: matcher - sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb" + sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e" url: "https://pub.dev" source: hosted - version: "0.12.15" + version: "0.12.16" material_color_utilities: dependency: transitive description: name: material_color_utilities - sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724 + sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41" url: "https://pub.dev" source: hosted - version: "0.2.0" + version: "0.5.0" meta: dependency: transitive description: @@ -1059,10 +1059,10 @@ packages: dependency: transitive description: name: source_span - sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250 + sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c" url: "https://pub.dev" source: hosted - version: "1.9.1" + version: "1.10.0" stack_trace: dependency: transitive description: @@ -1115,26 +1115,26 @@ packages: dependency: transitive description: name: test - sha256: "3dac9aecf2c3991d09b9cdde4f98ded7b30804a88a0d7e4e7e1678e78d6b97f4" + sha256: "13b41f318e2a5751c3169137103b60c584297353d4b1761b66029bae6411fe46" url: "https://pub.dev" source: hosted - version: "1.24.1" + version: "1.24.3" test_api: dependency: transitive description: name: test_api - sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb + sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.6.0" test_core: dependency: transitive description: name: test_core - sha256: "5138dbffb77b2289ecb12b81c11ba46036590b72a64a7a90d6ffb880f1a29e93" + sha256: "99806e9e6d95c7b059b7a0fc08f07fc53fabe54a829497f0d9676299f1e8637e" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.5.3" timing: dependency: transitive description: @@ -1299,10 +1299,10 @@ packages: dependency: transitive description: name: vm_service - sha256: f6deed8ed625c52864792459709183da231ebf66ff0cf09e69b573227c377efe + sha256: c620a6f783fa22436da68e42db7ebbf18b8c44b9a46ab911f666ff09ffd9153f url: "https://pub.dev" source: hosted - version: "11.3.0" + version: "11.7.1" watcher: dependency: transitive description: @@ -1311,6 +1311,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.0" + web: + dependency: transitive + description: + name: web + sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 + url: "https://pub.dev" + source: hosted + version: "0.1.4-beta" web_browser_detect: dependency: transitive description: diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java index 02f6f9acd7a6..17aea34045ff 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java @@ -1755,7 +1755,7 @@ void maybeRecordPCollectionWithAutoSharding(PCollection pcol) { options.isEnableStreamingEngine(), "Runner determined sharding not available in Dataflow for GroupIntoBatches for" + " non-Streaming-Engine jobs. In order to use runner determined sharding, please use" - + " --streaming --enable_streaming_engine"); + + " --streaming --experiments=enable_streaming_engine"); pCollectionsPreservedKeys.add(pcol); pcollectionsRequiringAutoSharding.add(pcol); } diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java index af6d890cf51b..a34303d92552 100644 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java +++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java @@ -173,4 +173,10 @@ public ExecutorService create(PipelineOptions options) { new ThreadFactoryBuilder().setNameFormat("Process Element Thread-%d").build()); } } + + @Description("Enable/disable late data dropping in GroupByKey/Combine transforms") + @Default.Boolean(false) + boolean getDropLateData(); + + void setDropLateData(boolean dropLateData); } diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java index 3ecd406da615..1b19275dd967 100644 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java +++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java @@ -180,11 +180,19 @@ public TimerInternals timerInternals() { DoFnSchemaInformation.create(), Collections.emptyMap()); + final DoFnRunner, KV> dropLateDataRunner = + pipelineOptions.getDropLateData() + ? DoFnRunners.lateDataDroppingRunner( + doFnRunner, keyedInternals.timerInternals(), windowingStrategy) + : doFnRunner; + final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext(); - this.fnRunner = + final DoFnRunner, KV> doFnRunnerWithMetrics = DoFnRunnerWithMetrics.wrap( - doFnRunner, executionContext.getMetricsContainer(), transformFullName); + dropLateDataRunner, executionContext.getMetricsContainer(), transformFullName); + + this.fnRunner = new DoFnRunnerWithKeyedInternals<>(doFnRunnerWithMetrics, keyedInternals); } @Override diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java new file mode 100644 index 000000000000..8670d9a46eac --- /dev/null +++ b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.samza.runtime; + +import java.io.Serializable; +import java.util.Arrays; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestStream; +import org.apache.beam.sdk.transforms.Combine; +import org.apache.beam.sdk.transforms.Sum; +import org.apache.beam.sdk.transforms.windowing.FixedWindows; +import org.apache.beam.sdk.transforms.windowing.Window; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.TimestampedValue; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.junit.Rule; +import org.junit.Test; + +/** Tests for GroupByKeyOp. */ +public class GroupByKeyOpTest implements Serializable { + @Rule + public final transient TestPipeline pipeline = + TestPipeline.fromOptions( + PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner").create()); + + @Rule + public final transient TestPipeline dropLateDataPipeline = + TestPipeline.fromOptions( + PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner", "--dropLateData=true") + .create()); + + @Test + public void testDefaultGbk() { + TestStream.Builder testStream = + TestStream.create(VarIntCoder.of()) + .addElements(TimestampedValue.of(1, new Instant(1000))) + .addElements(TimestampedValue.of(2, new Instant(2000))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(10, new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection aggregated = + pipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); + + PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(3, 10)); + + pipeline.run().waitUntilFinish(); + } + + @Test + public void testDropLateDataNonKeyed() { + TestStream.Builder testStream = + TestStream.create(VarIntCoder.of()) + .addElements(TimestampedValue.of(1, new Instant(1000))) + .addElements(TimestampedValue.of(2, new Instant(2000))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(10, new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection aggregated = + dropLateDataPipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); + + PAssert.that(aggregated).containsInAnyOrder(3); + + dropLateDataPipeline.run().waitUntilFinish(); + } + + @Test + public void testDropLateDataKeyed() { + TestStream.Builder> testStream = + TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) + .addElements(TimestampedValue.of(KV.of("a", 1), new Instant(1000))) + .addElements(TimestampedValue.of(KV.of("b", 2), new Instant(2000))) + .addElements(TimestampedValue.of(KV.of("a", 3), new Instant(2500))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(KV.of("a", 10), new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection> aggregated = + dropLateDataPipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.>into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Sum.integersPerKey()); + + PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(KV.of("a", 4), KV.of("b", 2))); + + dropLateDataPipeline.run().waitUntilFinish(); + } +} diff --git a/sdks/go.mod b/sdks/go.mod index 5e91aea021f8..53596c5d207d 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -31,9 +31,9 @@ require ( cloud.google.com/go/spanner v1.49.0 cloud.google.com/go/storage v1.33.0 github.com/aws/aws-sdk-go-v2 v1.21.0 - github.com/aws/aws-sdk-go-v2/config v1.18.39 - github.com/aws/aws-sdk-go-v2/credentials v1.13.37 - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.83 + github.com/aws/aws-sdk-go-v2/config v1.18.40 + github.com/aws/aws-sdk-go-v2/credentials v1.13.38 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84 github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5 github.com/aws/smithy-go v1.14.2 github.com/docker/go-connections v0.4.0 @@ -47,7 +47,7 @@ require ( github.com/linkedin/goavro/v2 v2.12.0 github.com/proullon/ramsql v0.1.2 github.com/spf13/cobra v1.7.0 - github.com/testcontainers/testcontainers-go v0.23.0 + github.com/testcontainers/testcontainers-go v0.24.0 github.com/tetratelabs/wazero v1.5.0 github.com/xitongsys/parquet-go v1.6.2 github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c @@ -57,9 +57,9 @@ require ( golang.org/x/sync v0.3.0 golang.org/x/sys v0.12.0 golang.org/x/text v0.13.0 - google.golang.org/api v0.140.0 + google.golang.org/api v0.142.0 google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93 - google.golang.org/grpc v1.58.0 + google.golang.org/grpc v1.58.1 google.golang.org/protobuf v1.31.0 gopkg.in/retry.v1 v1.0.3 gopkg.in/yaml.v2 v2.4.0 @@ -71,7 +71,18 @@ require ( golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 ) -require dario.cat/mergo v1.0.0 // indirect +require ( + dario.cat/mergo v1.0.0 // indirect + github.com/Microsoft/hcsshim v0.11.0 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect + github.com/shirou/gopsutil/v3 v3.23.7 // indirect + github.com/shoenig/go-m1cpu v0.1.6 // indirect + github.com/tklauser/go-sysconf v0.3.11 // indirect + github.com/tklauser/numcpus v0.6.0 // indirect + github.com/yusufpapurcu/wmi v1.2.3 // indirect +) require ( cloud.google.com/go v0.110.7 // indirect @@ -96,18 +107,18 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.36 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.35 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.15.4 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.13.6 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.6 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.14.0 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.22.0 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 // indirect - github.com/containerd/containerd v1.7.3 // indirect + github.com/containerd/containerd v1.7.6 // indirect github.com/cpuguy83/dockercfg v0.3.1 // indirect github.com/docker/distribution v2.8.2+incompatible // indirect - github.com/docker/docker v24.0.5+incompatible // indirect; but required to resolve issue docker has with go1.20 + github.com/docker/docker v24.0.6+incompatible // but required to resolve issue docker has with go1.20 github.com/docker/go-units v0.5.0 // indirect github.com/envoyproxy/go-control-plane v0.11.1 // indirect github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect @@ -160,5 +171,5 @@ require ( golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230913181813-007df8e322eb // indirect ) diff --git a/sdks/go.sum b/sdks/go.sum index c30891294dbd..502fdf1e8892 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -48,7 +48,7 @@ cloud.google.com/go/storage v1.33.0/go.mod h1:Hhh/dogNRGca7IWv1RC2YqEn0c0G77ctA/ dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/AdaLogics/go-fuzz-headers v0.0.0-20230106234847-43070de90fa1 h1:EKPd1INOIyr5hWOWhvpmQpY6tKjeG0hT1s3AMC/9fic= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k= github.com/Azure/azure-storage-blob-go v0.14.0/go.mod h1:SMqIBi+SuiQH32bvyjngEewEeXoPfKMgWlBDaYf6fck= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= @@ -64,7 +64,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Microsoft/hcsshim v0.10.0-rc.8 h1:YSZVvlIIDD1UxQpJp0h+dnpLUw+TrY0cx8obKsp3bek= +github.com/Microsoft/hcsshim v0.11.0 h1:7EFNIY4igHEXUdj1zXgAyU3fLc7QfOKHbkldRVTBdiM= +github.com/Microsoft/hcsshim v0.11.0/go.mod h1:OEthFdQv/AD2RAdzR6Mm1N1KPCztGKDurW1Z8b8VGMM= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= @@ -85,17 +86,17 @@ github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pf github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.13 h1:OPLEkmhXf6xFPiz0bLeDArZIDx1NNS4oJyG4nv3Gct0= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.13/go.mod h1:gpAbvyDGQFozTEmlTFO8XcQKHzubdq0LzRyJpG6MiXM= github.com/aws/aws-sdk-go-v2/config v1.5.0/go.mod h1:RWlPOAW3E3tbtNAqTwvSW54Of/yP3oiZXMI0xfUdjyA= -github.com/aws/aws-sdk-go-v2/config v1.18.39 h1:oPVyh6fuu/u4OiW4qcuQyEtk7U7uuNBmHmJSLg1AJsQ= -github.com/aws/aws-sdk-go-v2/config v1.18.39/go.mod h1:+NH/ZigdPckFpgB1TRcRuWCB/Kbbvkxc/iNAKTq5RhE= +github.com/aws/aws-sdk-go-v2/config v1.18.40 h1:dbu1llI/nTIL+r6sYHMeVLl99DM8J8/o1I4EPurnhLg= +github.com/aws/aws-sdk-go-v2/config v1.18.40/go.mod h1:JjrCZQwSPGCoZRQzKHyZNNueaKO+kFaEy2sR6mCzd90= github.com/aws/aws-sdk-go-v2/credentials v1.3.1/go.mod h1:r0n73xwsIVagq8RsxmZbGSRQFj9As3je72C2WzUIToc= -github.com/aws/aws-sdk-go-v2/credentials v1.13.37 h1:BvEdm09+ZEh2XtN+PVHPcYwKY3wIeB6pw7vPRM4M9/U= -github.com/aws/aws-sdk-go-v2/credentials v1.13.37/go.mod h1:ACLrdkd4CLZyXOghZ8IYumQbcooAcp2jo/s2xsFH8IM= +github.com/aws/aws-sdk-go-v2/credentials v1.13.38 h1:gDAuCdVlA4lmmgQhvpZlscwicloCqH44vkxLklGkQLA= +github.com/aws/aws-sdk-go-v2/credentials v1.13.38/go.mod h1:sD4G/Ybgp6s89mWIES3Xn97CsRLpxvz9uVSdv0UxY8I= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.3.0/go.mod h1:2LAuqPx1I6jNfaGDucWfA2zqQCYCOMCDHiCOciALyNw= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.11 h1:uDZJF1hu0EVT/4bogChk8DyjSF6fof6uL/0Y26Ma7Fg= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.11/go.mod h1:TEPP4tENqBGO99KwVpV9MlOX4NSrSLP8u3KRy2CDwA8= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.3.2/go.mod h1:qaqQiHSrOUVOfKe6fhgQ6UzhxjwqVW8aHNegd6Ws4w4= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.83 h1:wcluDLIQ0uYaxv0fCWQRimbXkPdTgWHUD21j1CzXEwc= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.83/go.mod h1:nGCBuon134gW67yAtxHKV73x+tAcY/xG4ZPNPDB1h/I= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84 h1:LENrVcqnWTyI8fbIUCvxAMe+fXbREIaXzcR8WPwco1U= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84/go.mod h1:LHxCiYAStsgps4srke7HujyADd504MSkNXjLpOtICTc= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 h1:22dGT7PneFMx4+b3pz7lMTRyN8ZKH7M2cW4GP9yUS2g= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41/go.mod h1:CrObHAuPneJBlfEJ5T3szXOUkLEThaGfvnhTf33buas= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 h1:SijA0mgjV8E+8G45ltVHs0fvKpTj8xmZJ3VwhGKtUSI= @@ -120,13 +121,13 @@ github.com/aws/aws-sdk-go-v2/service/s3 v1.11.1/go.mod h1:XLAGFrEjbvMCLvAtWLLP32 github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5 h1:A42xdtStObqy7NGvzZKpnyNXvoOmm+FENobZ0/ssHWk= github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5/go.mod h1:rDGMZA7f4pbmTtPOk5v5UM2lmX6UAbRnMDJeDvnH7AM= github.com/aws/aws-sdk-go-v2/service/sso v1.3.1/go.mod h1:J3A3RGUvuCZjvSuZEcOpHDnzZP/sKbhDWV2T1EOzFIM= -github.com/aws/aws-sdk-go-v2/service/sso v1.13.6 h1:2PylFCfKCEDv6PeSN09pC/VUiRd10wi1VfHG5FrW0/g= -github.com/aws/aws-sdk-go-v2/service/sso v1.13.6/go.mod h1:fIAwKQKBFu90pBxx07BFOMJLpRUGu8VOzLJakeY+0K4= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.6 h1:pSB560BbVj9ZlJZF4WYj5zsytWHWKxg+NgyGV4B2L58= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.6/go.mod h1:yygr8ACQRY2PrEcy3xsUI357stq2AxnFM6DIsR9lij4= +github.com/aws/aws-sdk-go-v2/service/sso v1.14.0 h1:AR/hlTsCyk1CwlyKnPFvIMvnONydRjDDRT9OGb0i+/g= +github.com/aws/aws-sdk-go-v2/service/sso v1.14.0/go.mod h1:fIAwKQKBFu90pBxx07BFOMJLpRUGu8VOzLJakeY+0K4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0 h1:vbgiXuhtn49+erlPrgIvQ+J32rg1HseaPf8lEpKbkxQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0/go.mod h1:yygr8ACQRY2PrEcy3xsUI357stq2AxnFM6DIsR9lij4= github.com/aws/aws-sdk-go-v2/service/sts v1.6.0/go.mod h1:q7o0j7d7HrJk/vr9uUt3BVRASvcU7gYZB9PUgPiByXg= -github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 h1:CQBFElb0LS8RojMJlxRSo/HXipvTZW2S44Lt9Mk2aYQ= -github.com/aws/aws-sdk-go-v2/service/sts v1.21.5/go.mod h1:VC7JDqsqiwXukYEDjoHh9U0fOJtNWh04FPQz4ct4GGU= +github.com/aws/aws-sdk-go-v2/service/sts v1.22.0 h1:s4bioTgjSFRwOoyEFzAVCmFmoowBgjTR8gkrF/sQ4wk= +github.com/aws/aws-sdk-go-v2/service/sts v1.22.0/go.mod h1:VC7JDqsqiwXukYEDjoHh9U0fOJtNWh04FPQz4ct4GGU= github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ= github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= @@ -151,8 +152,8 @@ github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+g github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= -github.com/containerd/containerd v1.7.3 h1:cKwYKkP1eTj54bP3wCdXXBymmKRQMrWjkLSWZZJDa8o= -github.com/containerd/containerd v1.7.3/go.mod h1:32FOM4/O0RkNg7AjQj3hDzN9cUGtu+HMvaKUNiqCZB8= +github.com/containerd/containerd v1.7.6 h1:oNAVsnhPoy4BTPQivLgTzI9Oleml9l/+eYIDYXRCYo8= +github.com/containerd/containerd v1.7.6/go.mod h1:SY6lrkkuJT40BVNO37tlYTSnKJnP5AXBc0fhx0q+TJ4= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/dockercfg v0.3.1 h1:/FpZ+JaygUR/lZP2NlFI2DVfrOEMAIKP5wWEJdoYe9E= github.com/cpuguy83/dockercfg v0.3.1/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= @@ -160,15 +161,14 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= -github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8= github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/docker v24.0.5+incompatible h1:WmgcE4fxyI6EEXxBRxsHnZXrO1pQ3smi0k/jho4HLeY= -github.com/docker/docker v24.0.5+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v24.0.6+incompatible h1:hceabKCtUgDqPu+qm0NgsaXf28Ljf4/pWFL7xjWWDgE= +github.com/docker/docker v24.0.6+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= @@ -197,6 +197,8 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gorp/gorp v2.2.0+incompatible h1:xAUh4QgEeqPPhK3vxZN+bzrim1z5Av6q837gtjUlshc= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= @@ -332,6 +334,8 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/linkedin/goavro/v2 v2.12.0 h1:rIQQSj8jdAUlKQh6DttK8wCRv4t4QO09g1C4aBWXslg= github.com/linkedin/goavro/v2 v2.12.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mattn/go-ieproxy v0.0.1/go.mod h1:pYabZ6IHcRpFh7vIaLfK7rdcWgFEb3SFJ6/gNWuh88E= @@ -375,6 +379,8 @@ github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/proullon/ramsql v0.1.2 h1:PTtsy2iml/CW3Lsopyr86dlIs7JyYEmfLrfYvQVXD2U= github.com/proullon/ramsql v0.1.2/go.mod h1:CFGqeQHQpdRfWqYmWD3yXqPTEaHkF4zgXy1C6qDWc9E= @@ -390,6 +396,12 @@ github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5P github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 h1:J6qvD6rbmOil46orKqJaRPG+zTpoGlBTUdyv8ki63L0= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63/go.mod h1:n+VKSARF5y/tS9XFSP7vWDfS+GUC5vs/YT7M5XDTUEM= +github.com/shirou/gopsutil/v3 v3.23.7 h1:C+fHO8hfIppoJ1WdsVm1RoI0RwXoNdfTK7yWXV0wVj4= +github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4= +github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= +github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= +github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= +github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= @@ -414,11 +426,16 @@ github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= -github.com/testcontainers/testcontainers-go v0.23.0 h1:ERYTSikX01QczBLPZpqsETTBO7lInqEP349phDOVJVs= -github.com/testcontainers/testcontainers-go v0.23.0/go.mod h1:3gzuZfb7T9qfcH2pHpV4RLlWrPjeWNQah6XlYQ32c4I= +github.com/testcontainers/testcontainers-go v0.24.0 h1:eqkq6nNIPVrqpXNyn/s5jDBqPGuWtND2hOMEBrUULIw= +github.com/testcontainers/testcontainers-go v0.24.0/go.mod h1:MGBiAkCm86yXQoCiipmQCqZLVdk1uFqtMqaU1Or0MRk= github.com/tetratelabs/wazero v1.5.0 h1:Yz3fZHivfDiZFUXnWMPUoiW7s8tC1sjdBtlJn08qYa0= github.com/tetratelabs/wazero v1.5.0/go.mod h1:0U0G41+ochRKoPKCJlh0jMg1CHkyfK8kDqiirMmKY8A= +github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM= +github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= +github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= +github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= @@ -440,6 +457,8 @@ github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7Jul github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= +github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= @@ -551,6 +570,7 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -565,6 +585,7 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -576,7 +597,9 @@ golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -647,8 +670,8 @@ google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.140.0 h1:CaXNdYOH5oQQI7l6iKTHHiMTdxZca4/02hRg2U8c2hM= -google.golang.org/api v0.140.0/go.mod h1:aGbCiFgtwb2P6badchFbSBUurV6oR5d50Af4iNJtDdI= +google.golang.org/api v0.142.0 h1:mf+7EJ94fi5ZcnpPy+m0Yv2dkz8bKm+UL0snTCuwXlY= +google.golang.org/api v0.142.0/go.mod h1:zJAN5o6HRqR7O+9qJUFOWrZkYE66RH+efPBdTLA4xBA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -678,8 +701,8 @@ google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93 h1:zv6ieVm8jNcN33A google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5 h1:nIgk/EEq3/YlnmVVXVnm14rC2oxgs1o0ong4sD/rd44= google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5/go.mod h1:5DZzOUPCLYL3mNkQ0ms0F3EuUNZ7py1Bqeq6sxzI7/Q= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832 h1:o4LtQxebKIJ4vkzyhtD2rfUNZ20Zf0ik5YVP5E7G7VE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230913181813-007df8e322eb h1:Isk1sSH7bovx8Rti2wZK0UZF6oraBDK74uoyLEEVFN0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230913181813-007df8e322eb/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -689,8 +712,8 @@ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.58.0 h1:32JY8YpPMSR45K+c3o6b8VL73V+rR8k+DeMIr4vRH8o= -google.golang.org/grpc v1.58.0/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/grpc v1.58.1 h1:OL+Vz23DTtrrldqHK49FUOPHyY75rvFqJfXC84NYW58= +google.golang.org/grpc v1.58.1/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/sdks/go/README.md b/sdks/go/README.md index a3b03c2e6184..7734d58d9eb9 100644 --- a/sdks/go/README.md +++ b/sdks/go/README.md @@ -131,6 +131,7 @@ Executing all unit tests for the SDK is possible from the `\sdks\go` To test your change as Jenkins would execute it from a PR, from the beam root directory, run: * `./gradlew :sdks:go:goTest` executes the unit tests. + * `./gradlew :sdks:go:test:prismValidatesRunner` validates the SDK against the Go Prism runner as a stand alone binary, with containers. * `./gradlew :sdks:go:test:ulrValidatesRunner` validates the SDK against the Portable Python runner. * `./gradlew :sdks:go:test:flinkValidatesRunner` validates the SDK against the Flink runner. diff --git a/sdks/go/cmd/prism/prism.go b/sdks/go/cmd/prism/prism.go index f00a16c9b2d0..804ae0c2ab2d 100644 --- a/sdks/go/cmd/prism/prism.go +++ b/sdks/go/cmd/prism/prism.go @@ -30,6 +30,8 @@ import ( ) var ( + jobPort = flag.Int("job_port", 8073, "specify the job management service port") + webPort = flag.Int("web_port", 8074, "specify the web ui port") jobManagerEndpoint = flag.String("jm_override", "", "set to only stand up a web ui that refers to a seperate JobManagement endpoint") serveHTTP = flag.Bool("serve_http", true, "enable or disable the web ui") ) @@ -37,12 +39,12 @@ var ( func main() { flag.Parse() ctx := context.Background() - cli, err := makeJobClient(ctx, *jobManagerEndpoint) + cli, err := makeJobClient(ctx, prism.Options{Port: *jobPort}, *jobManagerEndpoint) if err != nil { log.Fatalf("error creating job server: %v", err) } if *serveHTTP { - if err := prism.CreateWebServer(ctx, cli, prism.Options{Port: 8074}); err != nil { + if err := prism.CreateWebServer(ctx, cli, prism.Options{Port: *webPort}); err != nil { log.Fatalf("error creating web server: %v", err) } } else { @@ -51,7 +53,7 @@ func main() { } } -func makeJobClient(ctx context.Context, endpoint string) (jobpb.JobServiceClient, error) { +func makeJobClient(ctx context.Context, opts prism.Options, endpoint string) (jobpb.JobServiceClient, error) { if endpoint != "" { clientConn, err := grpc.DialContext(ctx, endpoint, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { @@ -59,7 +61,7 @@ func makeJobClient(ctx context.Context, endpoint string) (jobpb.JobServiceClient } return jobpb.NewJobServiceClient(clientConn), nil } - cli, err := prism.CreateJobServer(ctx, prism.Options{Port: 8073}) + cli, err := prism.CreateJobServer(ctx, opts) if err != nil { return nil, fmt.Errorf("error creating local job server: %v", err) } diff --git a/sdks/go/pkg/beam/beam.shims.go b/sdks/go/pkg/beam/beam.shims.go index 6653fb0129f7..29ebaf2ca681 100644 --- a/sdks/go/pkg/beam/beam.shims.go +++ b/sdks/go/pkg/beam/beam.shims.go @@ -25,7 +25,6 @@ import ( // Library imports "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" - "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/graphx/schema" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/util/reflectx" @@ -44,13 +43,6 @@ func init() { runtime.RegisterFunction(schemaDec) runtime.RegisterFunction(schemaEnc) runtime.RegisterFunction(swapKVFn) - runtime.RegisterType(reflect.TypeOf((*createFn)(nil)).Elem()) - schema.RegisterType(reflect.TypeOf((*createFn)(nil)).Elem()) - runtime.RegisterType(reflect.TypeOf((*reflect.Type)(nil)).Elem()) - schema.RegisterType(reflect.TypeOf((*reflect.Type)(nil)).Elem()) - runtime.RegisterType(reflect.TypeOf((*reflectx.Func)(nil)).Elem()) - schema.RegisterType(reflect.TypeOf((*reflectx.Func)(nil)).Elem()) - reflectx.RegisterStructWrapper(reflect.TypeOf((*createFn)(nil)).Elem(), wrapMakerCreateFn) reflectx.RegisterFunc(reflect.TypeOf((*func(reflect.Type, []byte) (typex.T, error))(nil)).Elem(), funcMakerReflect۰TypeSliceOfByteГTypex۰TError) reflectx.RegisterFunc(reflect.TypeOf((*func(reflect.Type, typex.T) ([]byte, error))(nil)).Elem(), funcMakerReflect۰TypeTypex۰TГSliceOfByteError) reflectx.RegisterFunc(reflect.TypeOf((*func([]byte, func(typex.T)) error)(nil)).Elem(), funcMakerSliceOfByteEmitTypex۰TГError) @@ -64,13 +56,6 @@ func init() { exec.RegisterEmitter(reflect.TypeOf((*func(typex.T))(nil)).Elem(), emitMakerTypex۰T) } -func wrapMakerCreateFn(fn any) map[string]reflectx.Func { - dfn := fn.(*createFn) - return map[string]reflectx.Func{ - "ProcessElement": reflectx.MakeFunc(func(a0 []byte, a1 func(typex.T)) error { return dfn.ProcessElement(a0, a1) }), - } -} - type callerReflect۰TypeSliceOfByteГTypex۰TError struct { fn func(reflect.Type, []byte) (typex.T, error) } diff --git a/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go b/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go index aaa049510f52..0a9343199a1c 100644 --- a/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go +++ b/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go @@ -251,6 +251,9 @@ func (s *decodeStream) Read() (*FullValue, error) { } err := s.d.DecodeTo(s.r, &s.ret) if err != nil { + if err == io.EOF { + return nil, io.EOF + } return nil, errors.Wrap(err, "decodeStream value decode failed") } s.next++ @@ -342,6 +345,9 @@ func (s *decodeMultiChunkStream) Read() (*FullValue, error) { if s.chunk == 0 && s.next == 0 { chunk, err := coder.DecodeVarInt(s.r.reader) if err != nil { + if err == io.EOF { + return nil, io.EOF + } return nil, errors.Wrap(err, "decodeMultiChunkStream chunk size decoding failed") } s.chunk = chunk diff --git a/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go b/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go index 4a872e291c6a..c71ead208364 100644 --- a/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go +++ b/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go @@ -24,6 +24,7 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "golang.org/x/exp/slog" ) // FromMonitoringInfos extracts metrics from monitored states and @@ -139,7 +140,7 @@ func groupByType(p *pipepb.Pipeline, minfos []*pipepb.MonitoringInfo) ( } } if len(errs) > 0 { - log.Printf("Warning: %v errors during metrics processing: %v\n", len(errs), errs) + slog.Debug("errors during metrics processing", "count", len(errs), "errors", errs) } return counters, distributions, gauges, msecs, pcols } diff --git a/sdks/go/pkg/beam/create.go b/sdks/go/pkg/beam/create.go index 91e9f335ef87..d2bd554963ee 100644 --- a/sdks/go/pkg/beam/create.go +++ b/sdks/go/pkg/beam/create.go @@ -112,11 +112,6 @@ func createList(s Scope, values []any, t reflect.Type) (PCollection, error) { // TODO(herohde) 6/26/2017: make 'create' a SDF once supported. See BEAM-2421. -func init() { - register.DoFn2x1[[]byte, func(T), error]((*createFn)(nil)) - register.Emitter1[T]() -} - type createFn struct { Values [][]byte `json:"values"` Type EncodedType `json:"type"` diff --git a/sdks/go/pkg/beam/io/databaseio/writer.go b/sdks/go/pkg/beam/io/databaseio/writer.go index 4719831581ff..ebd805b080d7 100644 --- a/sdks/go/pkg/beam/io/databaseio/writer.go +++ b/sdks/go/pkg/beam/io/databaseio/writer.go @@ -108,7 +108,7 @@ type valueTemplateGenerator struct { func (v *valueTemplateGenerator) generate(rowCount int, columnColunt int) string { switch v.driver { - case "postgres": + case "postgres", "pgx": // the point is to generate ($1,$2),($3,$4) valueTemplates := make([]string, rowCount) for i := 0; i < rowCount; i++ { diff --git a/sdks/go/pkg/beam/io/databaseio/writer_test.go b/sdks/go/pkg/beam/io/databaseio/writer_test.go index 24f617a9b857..91d7fb9246dd 100644 --- a/sdks/go/pkg/beam/io/databaseio/writer_test.go +++ b/sdks/go/pkg/beam/io/databaseio/writer_test.go @@ -39,6 +39,12 @@ func TestValueTemplateGenerator_generate(t *testing.T) { columnCount: 10, expected: "", }, + { + generator: &valueTemplateGenerator{"pgx"}, + rowCount: 4, + columnCount: 3, + expected: "($1,$2,$3),($4,$5,$6),($7,$8,$9),($10,$11,$12)", + }, { generator: &valueTemplateGenerator{"mysql"}, rowCount: 4, diff --git a/sdks/go/pkg/beam/io/parquetio/parquetio.go b/sdks/go/pkg/beam/io/parquetio/parquetio.go index 9c48d134014b..eb2a611f6836 100644 --- a/sdks/go/pkg/beam/io/parquetio/parquetio.go +++ b/sdks/go/pkg/beam/io/parquetio/parquetio.go @@ -96,7 +96,7 @@ func (a *parquetReadFn) ProcessElement(ctx context.Context, file fileio.Readable } // Write writes a PCollection to .parquet file. -// Write expects a type t of struct with parquet tags +// Write expects elements of a struct type with parquet tags // For example: // // type Student struct { @@ -108,7 +108,8 @@ func (a *parquetReadFn) ProcessElement(ctx context.Context, file fileio.Readable // Day int32 `parquet:"name=day, type=INT32, convertedtype=DATE"` // Ignored int32 //without parquet tag and won't write // } -func Write(s beam.Scope, filename string, t reflect.Type, col beam.PCollection) { +func Write(s beam.Scope, filename string, col beam.PCollection) { + t := col.Type().Type() s = s.Scope("parquetio.Write") filesystem.ValidateScheme(filename) pre := beam.AddFixedKey(s, col) diff --git a/sdks/go/pkg/beam/io/parquetio/parquetio_test.go b/sdks/go/pkg/beam/io/parquetio/parquetio_test.go index 1cceefcef46b..f3c901395609 100644 --- a/sdks/go/pkg/beam/io/parquetio/parquetio_test.go +++ b/sdks/go/pkg/beam/io/parquetio/parquetio_test.go @@ -95,7 +95,7 @@ func TestWrite(t *testing.T) { } p, s, sequence := ptest.CreateList(studentList) parquetFile := "./write_student.parquet" - Write(s, parquetFile, reflect.TypeOf(Student{}), sequence) + Write(s, parquetFile, sequence) t.Cleanup(func() { os.Remove(parquetFile) }) diff --git a/sdks/go/pkg/beam/runners/prism/internal/environments.go b/sdks/go/pkg/beam/runners/prism/internal/environments.go new file mode 100644 index 000000000000..3a429920fb28 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/environments.go @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" + "golang.org/x/exp/slog" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/protobuf/proto" + + dtyp "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" + dcli "github.com/docker/docker/client" + "github.com/docker/docker/pkg/stdcopy" +) + +// TODO move environment handling to the worker package. + +func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *worker.W) error { + logger := slog.With(slog.String("envID", wk.Env)) + // TODO fix broken abstraction. + // We're starting a worker pool here, because that's the loopback environment. + // It's sort of a mess, largely because of loopback, which has + // a different flow from a provisioned docker container. + e := j.Pipeline.GetComponents().GetEnvironments()[env] + switch e.GetUrn() { + case urns.EnvExternal: + ep := &pipepb.ExternalPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), ep); err != nil { + logger.Error("unmarshing external environment payload", "error", err) + } + go func() { + externalEnvironment(ctx, ep, wk) + slog.Debug("environment stopped", slog.String("job", j.String())) + }() + return nil + case urns.EnvDocker: + dp := &pipepb.DockerPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), dp); err != nil { + logger.Error("unmarshing docker environment payload", "error", err) + } + return dockerEnvironment(ctx, logger, dp, wk, j.ArtifactEndpoint()) + default: + return fmt.Errorf("environment %v with urn %v unimplemented", env, e.GetUrn()) + } +} + +func externalEnvironment(ctx context.Context, ep *pipepb.ExternalPayload, wk *worker.W) { + conn, err := grpc.Dial(ep.GetEndpoint().GetUrl(), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + panic(fmt.Sprintf("unable to dial sdk worker %v: %v", ep.GetEndpoint().GetUrl(), err)) + } + defer conn.Close() + pool := fnpb.NewBeamFnExternalWorkerPoolClient(conn) + + endpoint := &pipepb.ApiServiceDescriptor{ + Url: wk.Endpoint(), + } + pool.StartWorker(ctx, &fnpb.StartWorkerRequest{ + WorkerId: wk.ID, + ControlEndpoint: endpoint, + LoggingEndpoint: endpoint, + ArtifactEndpoint: endpoint, + ProvisionEndpoint: endpoint, + Params: ep.GetParams(), + }) + // Job processing happens here, but orchestrated by other goroutines + // This goroutine blocks until the context is cancelled, signalling + // that the pool runner should stop the worker. + <-ctx.Done() + + // Previous context cancelled so we need a new one + // for this request. + pool.StopWorker(context.Background(), &fnpb.StopWorkerRequest{ + WorkerId: wk.ID, + }) + wk.Stop() +} + +func dockerEnvironment(ctx context.Context, logger *slog.Logger, dp *pipepb.DockerPayload, wk *worker.W, artifactEndpoint string) error { + logger = logger.With("worker_id", wk.ID, "image", dp.GetContainerImage()) + + // TODO consider preserving client? + cli, err := dcli.NewClientWithOpts(dcli.FromEnv, dcli.WithAPIVersionNegotiation()) + if err != nil { + return fmt.Errorf("couldn't connect to docker:%w", err) + } + + // TODO abstract mounting cloud specific auths better. + const gcloudCredsEnv = "GOOGLE_APPLICATION_CREDENTIALS" + gcloudCredsFile, ok := os.LookupEnv(gcloudCredsEnv) + var mounts []mount.Mount + var envs []string + if ok { + _, err := os.Stat(gcloudCredsFile) + // File exists + if err == nil { + dockerGcloudCredsFile := "/docker_cred_file.json" + mounts = append(mounts, mount.Mount{ + Type: "bind", + Source: gcloudCredsFile, + Target: dockerGcloudCredsFile, + }) + credEnv := fmt.Sprintf("%v=%v", gcloudCredsEnv, dockerGcloudCredsFile) + envs = append(envs, credEnv) + } + } + if _, _, err := cli.ImageInspectWithRaw(ctx, dp.GetContainerImage()); err != nil { + // We don't have a local image, so we should pull it. + if rc, err := cli.ImagePull(ctx, dp.GetContainerImage(), dtyp.ImagePullOptions{}); err == nil { + // Copy the output, but discard it so we can wait until the image pull is finished. + io.Copy(io.Discard, rc) + rc.Close() + } else { + logger.Warn("unable to pull image and it's not local", "error", err) + } + } + + ccr, err := cli.ContainerCreate(ctx, &container.Config{ + Image: dp.GetContainerImage(), + Cmd: []string{ + fmt.Sprintf("--id=%v-%v", wk.JobKey, wk.Env), + fmt.Sprintf("--control_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--artifact_endpoint=%v", artifactEndpoint), + fmt.Sprintf("--provision_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--logging_endpoint=%v", wk.Endpoint()), + }, + Env: envs, + Tty: false, + }, &container.HostConfig{ + NetworkMode: "host", + Mounts: mounts, + AutoRemove: true, + }, nil, nil, "") + if err != nil { + cli.Close() + return fmt.Errorf("unable to create container image %v with docker for env %v, err: %w", dp.GetContainerImage(), wk.Env, err) + } + containerID := ccr.ID + logger = logger.With("container", containerID) + + if err := cli.ContainerStart(ctx, containerID, dtyp.ContainerStartOptions{}); err != nil { + cli.Close() + return fmt.Errorf("unable to start container image %v with docker for env %v, err: %w", dp.GetContainerImage(), wk.Env, err) + } + + // Start goroutine to wait on container state. + go func() { + defer cli.Close() + defer wk.Stop() + + statusCh, errCh := cli.ContainerWait(ctx, containerID, container.WaitConditionNotRunning) + select { + case <-ctx.Done(): + // Can't use command context, since it's already canceled here. + err := cli.ContainerKill(context.Background(), containerID, "") + if err != nil { + logger.Error("docker container kill error", "error", err) + } + case err := <-errCh: + if err != nil { + logger.Error("docker container wait error", "error", err) + } + case resp := <-statusCh: + logger.Info("docker container has self terminated", "status_code", resp.StatusCode) + + rc, err := cli.ContainerLogs(ctx, containerID, dtyp.ContainerLogsOptions{Details: true, ShowStdout: true, ShowStderr: true}) + if err != nil { + logger.Error("docker container logs error", "error", err) + } + defer rc.Close() + var buf bytes.Buffer + stdcopy.StdCopy(&buf, &buf, rc) + logger.Error("container self terminated", "log", buf.String()) + } + }() + + return nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute.go b/sdks/go/pkg/beam/runners/prism/internal/execute.go index b2f9d866603a..c1ac6ea4488c 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/execute.go +++ b/sdks/go/pkg/beam/runners/prism/internal/execute.go @@ -20,11 +20,11 @@ import ( "fmt" "io" "sort" + "sync/atomic" "time" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" - fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" @@ -32,8 +32,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" "golang.org/x/exp/maps" "golang.org/x/exp/slog" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" "google.golang.org/protobuf/proto" ) @@ -49,98 +47,59 @@ func RunPipeline(j *jobservices.Job) { // here, we only want and need the go one, operating // in loopback mode. envs := j.Pipeline.GetComponents().GetEnvironments() - if len(envs) != 1 { - j.Failed(fmt.Errorf("unable to execute multi-environment pipelines;\npipeline has environments: %+v", envs)) - return - } - env, _ := getOnlyPair(envs) - wk := worker.New(j.String()+"_"+env, env) // Cheating by having the worker id match the environment id. - go wk.Serve() - timeout := time.Minute - time.AfterFunc(timeout, func() { - if wk.Connected() { + wks := map[string]*worker.W{} + for envID := range envs { + wk, err := makeWorker(envID, j) + if err != nil { + j.Failed(err) return } - err := fmt.Errorf("prism %v didn't get control connection after %v", wk, timeout) - j.Failed(err) - j.CancelFn(err) - }) - + wks[envID] = wk + } // When this function exits, we cancel the context to clear // any related job resources. defer func() { j.CancelFn(fmt.Errorf("runPipeline returned, cleaning up")) }() - go runEnvironment(j.RootCtx, j, env, wk) j.SendMsg("running " + j.String()) j.Running() - err := executePipeline(j.RootCtx, wk, j) - if err != nil { + if err := executePipeline(j.RootCtx, wks, j); err != nil { j.Failed(err) return } j.SendMsg("pipeline completed " + j.String()) - // Stop the worker. - wk.Stop() - j.SendMsg("terminating " + j.String()) j.Done() } -// TODO move environment handling to the worker package. - -func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *worker.W) { - // TODO fix broken abstraction. - // We're starting a worker pool here, because that's the loopback environment. - // It's sort of a mess, largely because of loopback, which has - // a different flow from a provisioned docker container. - e := j.Pipeline.GetComponents().GetEnvironments()[env] - switch e.GetUrn() { - case urns.EnvExternal: - ep := &pipepb.ExternalPayload{} - if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), ep); err != nil { - slog.Error("unmarshing environment payload", err, slog.String("envID", wk.Env)) - } - externalEnvironment(ctx, ep, wk) - slog.Debug("environment stopped", slog.String("envID", wk.String()), slog.String("job", j.String())) - default: - panic(fmt.Sprintf("environment %v with urn %v unimplemented", env, e.GetUrn())) - } -} - -func externalEnvironment(ctx context.Context, ep *pipepb.ExternalPayload, wk *worker.W) { - conn, err := grpc.Dial(ep.GetEndpoint().GetUrl(), grpc.WithTransportCredentials(insecure.NewCredentials())) - if err != nil { - panic(fmt.Sprintf("unable to dial sdk worker %v: %v", ep.GetEndpoint().GetUrl(), err)) - } - defer conn.Close() - pool := fnpb.NewBeamFnExternalWorkerPoolClient(conn) +// makeWorker creates a worker for that environment. +func makeWorker(env string, j *jobservices.Job) (*worker.W, error) { + wk := worker.New(j.String()+"_"+env, env) - endpoint := &pipepb.ApiServiceDescriptor{ - Url: wk.Endpoint(), - } - pool.StartWorker(ctx, &fnpb.StartWorkerRequest{ - WorkerId: wk.ID, - ControlEndpoint: endpoint, - LoggingEndpoint: endpoint, - ArtifactEndpoint: endpoint, - ProvisionEndpoint: endpoint, - Params: nil, - }) + wk.EnvPb = j.Pipeline.GetComponents().GetEnvironments()[env] + wk.PipelineOptions = j.PipelineOptions() + wk.JobKey = j.JobKey() + wk.ArtifactEndpoint = j.ArtifactEndpoint() - // Job processing happens here, but orchestrated by other goroutines - // This goroutine blocks until the context is cancelled, signalling - // that the pool runner should stop the worker. - <-ctx.Done() + go wk.Serve() - // Previous context cancelled so we need a new one - // for this request. - pool.StopWorker(context.Background(), &fnpb.StopWorkerRequest{ - WorkerId: wk.ID, + if err := runEnvironment(j.RootCtx, j, env, wk); err != nil { + return nil, fmt.Errorf("failed to start environment %v for job %v: %w", env, j, err) + } + // Check for connection succeeding after we've created the environment successfully. + timeout := 1 * time.Minute + time.AfterFunc(timeout, func() { + if wk.Connected() || wk.Stopped() { + return + } + err := fmt.Errorf("prism %v didn't get control connection to %v after %v", wk, wk.Endpoint(), timeout) + j.Failed(err) + j.CancelFn(err) }) + return wk, nil } type transformExecuter interface { @@ -153,7 +112,7 @@ type processor struct { transformExecuters map[string]transformExecuter } -func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) error { +func executePipeline(ctx context.Context, wks map[string]*worker.W, j *jobservices.Job) error { pipeline := j.Pipeline comps := proto.Clone(pipeline.GetComponents()).(*pipepb.Components) @@ -196,7 +155,12 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro // TODO move this loop and code into the preprocessor instead. stages := map[string]*stage{} var impulses []string - for _, stage := range topo { + + // Inialize the "dataservice cache" to support side inputs. + // TODO(https://github.com/apache/beam/issues/28543), remove this concept. + ds := &worker.DataService{} + + for i, stage := range topo { tid := stage.transforms[0] t := ts[tid] urn := t.GetSpec().GetUrn() @@ -207,11 +171,11 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro if stage.exe != nil { stage.envID = stage.exe.ExecuteWith(t) } - stage.ID = wk.NextStage() + stage.ID = fmt.Sprintf("stage-%03d", i) + wk := wks[stage.envID] switch stage.envID { case "": // Runner Transforms - var onlyOut string for _, out := range t.GetOutputs() { onlyOut = out @@ -270,10 +234,8 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro em.AddStage(stage.ID, inputs, nil, []string{getOnlyValue(t.GetOutputs())}) } stages[stage.ID] = stage - wk.Descriptors[stage.ID] = stage.desc case wk.Env: - // Great! this is for this environment. // Broken abstraction. - if err := buildDescriptor(stage, comps, wk); err != nil { + if err := buildDescriptor(stage, comps, wk, ds); err != nil { return fmt.Errorf("prism error building stage %v: \n%w", stage.ID, err) } stages[stage.ID] = stage @@ -297,7 +259,12 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro maxParallelism := make(chan struct{}, 8) // Execute stages here bundleFailed := make(chan error) - bundles := em.Bundles(ctx, wk.NextInst) + + var instID uint64 + bundles := em.Bundles(ctx, func() string { + return fmt.Sprintf("inst%03d", atomic.AddUint64(&instID, 1)) + }) + for { select { case <-ctx.Done(): @@ -311,7 +278,8 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro go func(rb engine.RunBundle) { defer func() { <-maxParallelism }() s := stages[rb.StageID] - if err := s.Execute(ctx, j, wk, comps, em, rb); err != nil { + wk := wks[s.envID] + if err := s.Execute(ctx, j, wk, ds, comps, em, rb); err != nil { // Ensure we clean up on bundle failure em.FailBundle(rb) bundleFailed <- err diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go index e66def5b0fe8..99b786d45980 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go @@ -16,11 +16,14 @@ package jobservices import ( + "bytes" + "context" "fmt" "io" jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" "golang.org/x/exp/slog" + "google.golang.org/protobuf/encoding/prototext" ) func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingService_ReverseArtifactRetrievalServiceServer) error { @@ -47,7 +50,7 @@ func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingSer }, }, }) - var count int + var buf bytes.Buffer for { in, err := stream.Recv() if err == io.EOF { @@ -56,26 +59,61 @@ func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingSer if err != nil { return err } - if in.IsLast { - slog.Debug("GetArtifact finish", + if in.GetIsLast() { + slog.Debug("GetArtifact finished", slog.Group("dep", slog.String("urn", dep.GetTypeUrn()), slog.String("payload", string(dep.GetTypePayload()))), - slog.Int("bytesReceived", count)) + slog.Int("bytesReceived", buf.Len()), + slog.String("rtype", fmt.Sprintf("%T", in.GetResponse())), + ) break } // Here's where we go through each environment's artifacts. // We do nothing with them. switch req := in.GetResponse().(type) { case *jobpb.ArtifactResponseWrapper_GetArtifactResponse: - count += len(req.GetArtifactResponse.GetData()) + buf.Write(req.GetArtifactResponse.GetData()) + case *jobpb.ArtifactResponseWrapper_ResolveArtifactResponse: err := fmt.Errorf("unexpected ResolveArtifactResponse to GetArtifact: %v", in.GetResponse()) slog.Error("GetArtifact failure", err) return err } } + if len(s.artifacts) == 0 { + s.artifacts = map[string][]byte{} + } + s.artifacts[string(dep.GetTypePayload())] = buf.Bytes() } } return nil } + +func (s *Server) ResolveArtifacts(_ context.Context, req *jobpb.ResolveArtifactsRequest) (*jobpb.ResolveArtifactsResponse, error) { + return &jobpb.ResolveArtifactsResponse{ + Replacements: req.GetArtifacts(), + }, nil +} + +func (s *Server) GetArtifact(req *jobpb.GetArtifactRequest, stream jobpb.ArtifactRetrievalService_GetArtifactServer) error { + info := req.GetArtifact() + buf, ok := s.artifacts[string(info.GetTypePayload())] + if !ok { + pt := prototext.Format(info) + slog.Warn("unable to provide artifact to worker", "artifact_info", pt) + return fmt.Errorf("unable to provide %v to worker", pt) + } + chunk := 128 * 1024 * 1024 // 128 MB + var i int + for i+chunk < len(buf) { + stream.Send(&jobpb.GetArtifactResponse{ + Data: buf[i : i+chunk], + }) + i += chunk + } + stream.Send(&jobpb.GetArtifactResponse{ + Data: buf[i:], + }) + return nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go index 10d36066391f..cd302a70fcc0 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go @@ -68,6 +68,8 @@ type Job struct { key string jobName string + artifactEndpoint string + Pipeline *pipepb.Pipeline options *structpb.Struct @@ -88,6 +90,14 @@ type Job struct { metrics metricsStore } +func (j *Job) ArtifactEndpoint() string { + return j.artifactEndpoint +} + +func (j *Job) PipelineOptions() *structpb.Struct { + return j.options +} + // ContributeTentativeMetrics returns the datachannel read index, and any unknown monitoring short ids. func (j *Job) ContributeTentativeMetrics(payloads *fnpb.ProcessBundleProgressResponse) (int64, []string) { return j.metrics.ContributeTentativeMetrics(payloads) @@ -113,6 +123,10 @@ func (j *Job) LogValue() slog.Value { slog.String("name", j.jobName)) } +func (j *Job) JobKey() string { + return j.key +} + func (j *Job) SendMsg(msg string) { j.streamCond.L.Lock() defer j.streamCond.L.Unlock() diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go index e626a05b51e1..0fd7381e17f4 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go @@ -79,6 +79,8 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo streamCond: sync.NewCond(&sync.Mutex{}), RootCtx: rootCtx, CancelFn: cancelFn, + + artifactEndpoint: s.Endpoint(), } // Queue initial state of the job. @@ -91,14 +93,18 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo return nil, err } var errs []error - check := func(feature string, got, want any) { - if got != want { - err := unimplementedError{ - feature: feature, - value: got, + check := func(feature string, got any, wants ...any) { + for _, want := range wants { + if got == want { + return } - errs = append(errs, err) } + + err := unimplementedError{ + feature: feature, + value: got, + } + errs = append(errs, err) } // Inspect Transforms for unsupported features. @@ -112,6 +118,8 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo urns.TransformGBK, urns.TransformFlatten, urns.TransformCombinePerKey, + urns.TransformCombineGlobally, // Used by Java SDK + urns.TransformCombineGroupedValues, // Used by Java SDK urns.TransformAssignWindows: // Very few expected transforms types for submitted pipelines. // Most URNs are for the runner to communicate back to the SDK for execution. @@ -152,7 +160,7 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo check("WindowingStrategy.MergeStatus", ws.GetMergeStatus(), pipepb.MergeStatus_NON_MERGING) } if !bypassedWindowingStrategies[wsID] { - check("WindowingStrategy.OnTimeBehavior", ws.GetOnTimeBehavior(), pipepb.OnTimeBehavior_FIRE_IF_NONEMPTY) + check("WindowingStrategy.OnTimeBehavior", ws.GetOnTimeBehavior(), pipepb.OnTimeBehavior_FIRE_IF_NONEMPTY, pipepb.OnTimeBehavior_FIRE_ALWAYS) check("WindowingStrategy.OutputTime", ws.GetOutputTime(), pipepb.OutputTime_END_OF_WINDOW) // Non nil triggers should fail. if ws.GetTrigger().GetDefault() == nil { diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go index e3fb7766b519..bf2db814813c 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go @@ -29,6 +29,7 @@ import ( type Server struct { jobpb.UnimplementedJobServiceServer jobpb.UnimplementedArtifactStagingServiceServer + jobpb.UnimplementedArtifactRetrievalServiceServer fnpb.UnimplementedProvisionServiceServer // Server management @@ -42,6 +43,9 @@ type Server struct { // execute defines how a job is executed. execute func(*Job) + + // Artifact hack + artifacts map[string][]byte } // NewServer acquires the indicated port. @@ -60,6 +64,7 @@ func NewServer(port int, execute func(*Job)) *Server { s.server = grpc.NewServer(opts...) jobpb.RegisterJobServiceServer(s.server, s) jobpb.RegisterArtifactStagingServiceServer(s.server, s) + jobpb.RegisterArtifactRetrievalServiceServer(s.server, s) return s } diff --git a/sdks/go/pkg/beam/runners/prism/internal/stage.go b/sdks/go/pkg/beam/runners/prism/internal/stage.go index 4d8d4621168d..4ce3ce7ffeb6 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/stage.go +++ b/sdks/go/pkg/beam/runners/prism/internal/stage.go @@ -75,7 +75,7 @@ type stage struct { OutputsToCoders map[string]engine.PColInfo } -func (s *stage) Execute(ctx context.Context, j *jobservices.Job, wk *worker.W, comps *pipepb.Components, em *engine.ElementManager, rb engine.RunBundle) error { +func (s *stage) Execute(ctx context.Context, j *jobservices.Job, wk *worker.W, ds *worker.DataService, comps *pipepb.Components, em *engine.ElementManager, rb engine.RunBundle) error { slog.Debug("Execute: starting bundle", "bundle", rb) var b *worker.B @@ -204,8 +204,8 @@ progress: md := wk.MonitoringMetadata(ctx, unknownIDs) j.AddMetricShortIDs(md) } - // TODO handle side input data properly. - wk.D.Commit(b.OutputData) + // TODO(https://github.com/apache/beam/issues/28543) handle side input data properly. + ds.Commit(b.OutputData) var residualData [][]byte var minOutputWatermark map[string]mtime.Time for _, rr := range resp.GetResidualRoots() { @@ -270,7 +270,7 @@ func portFor(wInCid string, wk *worker.W) []byte { // It assumes that the side inputs are not sourced from PCollections generated by any transform in this stage. // // Because we need the local ids for routing the sources/sinks information. -func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W) error { +func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W, ds *worker.DataService) error { // Assume stage has an indicated primary input coders := map[string]*pipepb.Coder{} @@ -327,7 +327,7 @@ func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W) error { // Update side inputs to point to new PCollection with any replaced coders. transforms[si.transform].GetInputs()[si.local] = newGlobal } - prepSide, err := handleSideInput(si.transform, si.local, si.global, comps, coders, wk) + prepSide, err := handleSideInput(si.transform, si.local, si.global, comps, coders, ds) if err != nil { slog.Error("buildDescriptor: handleSideInputs", err, slog.String("transformID", si.transform)) return err @@ -392,7 +392,7 @@ func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W) error { } // handleSideInput returns a closure that will look up the data for a side input appropriate for the given watermark. -func handleSideInput(tid, local, global string, comps *pipepb.Components, coders map[string]*pipepb.Coder, wk *worker.W) (func(b *worker.B, watermark mtime.Time), error) { +func handleSideInput(tid, local, global string, comps *pipepb.Components, coders map[string]*pipepb.Coder, ds *worker.DataService) (func(b *worker.B, watermark mtime.Time), error) { t := comps.GetTransforms()[tid] sis, err := getSideInputs(t) if err != nil { @@ -412,7 +412,7 @@ func handleSideInput(tid, local, global string, comps *pipepb.Components, coders global, local := global, local return func(b *worker.B, watermark mtime.Time) { - data := wk.D.GetAllData(global) + data := ds.GetAllData(global) if b.IterableSideInputData == nil { b.IterableSideInputData = map[string]map[string]map[typex.Window][][]byte{} @@ -447,7 +447,7 @@ func handleSideInput(tid, local, global string, comps *pipepb.Components, coders global, local := global, local return func(b *worker.B, watermark mtime.Time) { // May be of zero length, but that's OK. Side inputs can be empty. - data := wk.D.GetAllData(global) + data := ds.GetAllData(global) if b.MultiMapSideInputData == nil { b.MultiMapSideInputData = map[string]map[string]map[typex.Window]map[string][][]byte{} } diff --git a/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go b/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go index 9fc2c1a923c5..bf1e36656661 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go +++ b/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go @@ -57,7 +57,9 @@ var ( // SDK transforms. TransformParDo = ptUrn(pipepb.StandardPTransforms_PAR_DO) TransformCombinePerKey = ctUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY) + TransformCombineGlobally = ctUrn(pipepb.StandardPTransforms_COMBINE_GLOBALLY) TransformReshuffle = ctUrn(pipepb.StandardPTransforms_RESHUFFLE) + TransformCombineGroupedValues = cmbtUrn(pipepb.StandardPTransforms_COMBINE_GROUPED_VALUES) TransformPreCombine = cmbtUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY_PRECOMBINE) TransformMerge = cmbtUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY_MERGE_ACCUMULATORS) TransformExtract = cmbtUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY_EXTRACT_OUTPUTS) diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css b/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css index 74f4a6958d29..d252dc020e63 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css +++ b/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css @@ -101,10 +101,19 @@ footer { color: var(--beam-white); } +#page-container { + position: relative; + min-height: 100vh; +} + +#content-wrap { + padding-bottom: 2.5rem; /* Footer height */ +} + .container { width: 100%; margin: 0 auto; - padding: 80px 20px 40px; + padding: 40px 20px 0px; } .child { @@ -132,6 +141,53 @@ footer { padding: 12px 15px; } +/* Tooltip container */ +.tooltip { + display: inline-block; + border-bottom: 1px dotted var(--beam-black); +} + +/* Tooltip text */ +.tooltip .tooltiptext { + visibility: hidden; + width: max-content; + max-width: 400px; + background-color: var(--dark-grey); + color: var(--beam-white); + text-align: left; + padding: 5px 10px; + border-radius: 6px; + + /* Position the tooltip text */ + position: absolute; + z-index: 1; + bottom: 125%; + left: 50%; + margin-left: -60px; + + /* Fade in tooltip */ + opacity: 0; + transition: opacity 0.3s; +} + +/* Tooltip arrow */ +.tooltip .tooltiptext::after { + content: ""; + position: absolute; + top: 100%; + left: 18%; + margin-left: -5px; + border-width: 5px; + border-style: solid; + border-color: var(--dark-grey) transparent transparent transparent; +} + +/* Show the tooltip text when you mouse over the tooltip container */ +.tooltip:hover .tooltiptext { + visibility: visible; + opacity: 1; +} + @media screen and (max-width: 550px) { header { flex-direction: column; diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go index b34547e92752..015a9103134a 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go +++ b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go @@ -21,6 +21,9 @@ import ( "runtime/metrics" "runtime/pprof" "strings" + "time" + + "github.com/dustin/go-humanize" ) type debugzData struct { @@ -54,16 +57,24 @@ func dumpMetrics() debugzData { name, value := sample.Name, sample.Value m := goRuntimeMetric{ - Name: name, + Name: strings.TrimSpace(name), Description: descs[i].Description, } // Handle each sample. switch value.Kind() { case metrics.KindUint64: - m.Value = fmt.Sprintf("%d", value.Uint64()) + if strings.HasSuffix(name, "bytes") { + m.Value = humanize.Bytes(value.Uint64()) + } else { + m.Value = humanize.FormatInteger("", int(value.Uint64())) + } case metrics.KindFloat64: - m.Value = fmt.Sprintf("%f", value.Float64()) + if strings.HasSuffix(name, "seconds") { + m.Value = time.Duration(float64(time.Second) * value.Float64()).String() + } else { + m.Value = humanize.FormatFloat("", value.Float64()) + } case metrics.KindFloat64Histogram: m.Value = fmt.Sprintf("%f", medianBucket(value.Float64Histogram())) // The histogram may be quite large, so let's just pull out @@ -88,16 +99,16 @@ func dumpMetrics() debugzData { data.Metrics = append(data.Metrics, goRuntimeMetric{ Name: "BUILD INFO", - Value: "n/a", - Description: b.String(), + Value: b.String(), + Description: "result from runtime/debug.ReadBuildInfo()", }) b.Reset() goroutineDump(&b) data.Metrics = append(data.Metrics, goRuntimeMetric{ Name: "GOROUTINES", - Value: "n/a", - Description: b.String(), + Value: b.String(), + Description: "consolidated active goroutines", }) b.Reset() diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html index ebf37f129ae3..175f44da7447 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html +++ b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html @@ -30,14 +30,16 @@ - - + {{ range .Metrics }} - + - {{ else }} diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/index.html b/sdks/go/pkg/beam/runners/prism/internal/web/index.html index fe9bb056e51c..1aa0ed719d87 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/index.html +++ b/sdks/go/pkg/beam/runners/prism/internal/web/index.html @@ -22,31 +22,33 @@ -
-
- -
-
- {{ if .Error}}{{.Error}}{{end}} -
IDNameStateValue
{{ .Name }} +
{{ .Name }} + {{ .Description }} +
+
{{ .Value }}{{ .Description }}
- - - - - - {{ range .Jobs }} - - - - - - {{ else }} - - - - {{ end }} -
IDNameState
{{ .JobId }}{{ .JobName }}{{ .State }}
No jobs have been run.
- +
+
+
+ +
+
+ {{ if .Error}}{{.Error}}{{end}} + + + + + + + {{ range .Jobs }} + + + + + + {{ else }} + + + + {{ end }} +
IDNameState
{{ .JobId }}{{ .JobName }}{{ .State }}
No jobs have been run.
+
+
diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go index 98479e3db071..573bdf4aeb9d 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go @@ -93,7 +93,7 @@ func (b *B) Respond(resp *fnpb.InstructionResponse) { } b.responded = true if resp.GetError() != "" { - b.BundleErr = fmt.Errorf("bundle %v failed:%v", resp.GetInstructionId(), resp.GetError()) + b.BundleErr = fmt.Errorf("bundle %v %v failed:%v", resp.GetInstructionId(), b.PBDID, resp.GetError()) close(b.Resp) return } diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go index 0ad7ccb37032..4968c9eb433e 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go @@ -43,6 +43,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/encoding/prototext" + "google.golang.org/protobuf/types/known/structpb" ) // A W manages worker environments, sending them work @@ -57,12 +58,16 @@ type W struct { ID, Env string + JobKey, ArtifactEndpoint string + EnvPb *pipepb.Environment + PipelineOptions *structpb.Struct + // Server management lis net.Listener server *grpc.Server // These are the ID sources - inst, bund uint64 + inst uint64 connected, stopped atomic.Bool InstReqs chan *fnpb.InstructionRequest @@ -71,8 +76,6 @@ type W struct { mu sync.Mutex activeInstructions map[string]controlResponder // Active instructions keyed by InstructionID Descriptors map[string]*fnpb.ProcessBundleDescriptor // Stages keyed by PBDID - - D *DataService } type controlResponder interface { @@ -99,14 +102,13 @@ func New(id, env string) *W { activeInstructions: make(map[string]controlResponder), Descriptors: make(map[string]*fnpb.ProcessBundleDescriptor), - - D: &DataService{}, } slog.Debug("Serving Worker components", slog.String("endpoint", wk.Endpoint())) fnpb.RegisterBeamFnControlServer(wk.server, wk) fnpb.RegisterBeamFnDataServer(wk.server, wk) fnpb.RegisterBeamFnLoggingServer(wk.server, wk) fnpb.RegisterBeamFnStateServer(wk.server, wk) + fnpb.RegisterProvisionServiceServer(wk.server, wk) return wk } @@ -143,11 +145,7 @@ func (wk *W) Stop() { } func (wk *W) NextInst() string { - return fmt.Sprintf("inst%03d", atomic.AddUint64(&wk.inst, 1)) -} - -func (wk *W) NextStage() string { - return fmt.Sprintf("stage%03d", atomic.AddUint64(&wk.bund, 1)) + return fmt.Sprintf("inst-%v-%03d", wk.Env, atomic.AddUint64(&wk.inst, 1)) } // TODO set logging level. @@ -159,20 +157,24 @@ func (wk *W) GetProvisionInfo(_ context.Context, _ *fnpb.GetProvisionInfoRequest } resp := &fnpb.GetProvisionInfoResponse{ Info: &fnpb.ProvisionInfo{ - // TODO: Add the job's Pipeline options // TODO: Include runner capabilities with the per job configuration. RunnerCapabilities: []string{ urns.CapabilityMonitoringInfoShortIDs, }, - LoggingEndpoint: endpoint, - ControlEndpoint: endpoint, - ArtifactEndpoint: endpoint, - // TODO add this job's RetrievalToken - // TODO add this job's artifact Dependencies + LoggingEndpoint: endpoint, + ControlEndpoint: endpoint, + ArtifactEndpoint: &pipepb.ApiServiceDescriptor{ + Url: wk.ArtifactEndpoint, + }, + + RetrievalToken: wk.JobKey, + Dependencies: wk.EnvPb.GetDependencies(), + PipelineOptions: wk.PipelineOptions, Metadata: map[string]string{ "runner": "prism", "runner_version": core.SdkVersion, + "variant": "test", }, }, } @@ -253,6 +255,11 @@ func (wk *W) Connected() bool { return wk.connected.Load() } +// Stopped indicates that the worker has stopped. +func (wk *W) Stopped() bool { + return wk.stopped.Load() +} + // Control relays instructions to SDKs and back again, coordinated via unique instructionIDs. // // Requests come from the runner, and are sent to the client in the SDK. @@ -302,10 +309,12 @@ func (wk *W) Control(ctrl fnpb.BeamFnControl_ControlServer) error { wk.mu.Lock() // Fail extant instructions slog.Debug("SDK Disconnected", "worker", wk, "ctx_error", ctrl.Context().Err(), "outstanding_instructions", len(wk.activeInstructions)) + + msg := fmt.Sprintf("SDK worker disconnected: %v, %v active instructions", wk.String(), len(wk.activeInstructions)) for instID, b := range wk.activeInstructions { b.Respond(&fnpb.InstructionResponse{ InstructionId: instID, - Error: "SDK Disconnected", + Error: msg, }) } wk.mu.Unlock() @@ -526,7 +535,7 @@ func (wk *W) sendInstruction(ctx context.Context, req *fnpb.InstructionRequest) req.InstructionId = progInst - if wk.stopped.Load() { + if wk.Stopped() { return nil } wk.InstReqs <- req @@ -556,6 +565,7 @@ func (wk *W) MonitoringMetadata(ctx context.Context, unknownIDs []string) *fnpb. // DataService is slated to be deleted in favour of stage based state // management for side inputs. +// TODO(https://github.com/apache/beam/issues/28543), remove this concept. type DataService struct { mu sync.Mutex // TODO actually quick process the data to windows here as well. diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go index ed61f484481c..6a90b463c45d 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go @@ -50,18 +50,6 @@ func TestWorker_NextInst(t *testing.T) { } } -func TestWorker_NextStage(t *testing.T) { - w := New("test", "testEnv") - - stageIDs := map[string]struct{}{} - for i := 0; i < 100; i++ { - stageIDs[w.NextStage()] = struct{}{} - } - if got, want := len(stageIDs), 100; got != want { - t.Errorf("calling w.NextStage() got %v unique ids, want %v", got, want) - } -} - func TestWorker_GetProcessBundleDescriptor(t *testing.T) { w := New("test", "testEnv") @@ -189,7 +177,7 @@ func TestWorker_Data_HappyPath(t *testing.T) { b := &B{ InstID: instID, - PBDID: wk.NextStage(), + PBDID: "teststageID", InputData: [][]byte{ {1, 1, 1, 1, 1, 1}, }, diff --git a/sdks/go/pkg/beam/runners/prism/prism.go b/sdks/go/pkg/beam/runners/prism/prism.go index 0be35ad5cc33..bcb7a3fb689f 100644 --- a/sdks/go/pkg/beam/runners/prism/prism.go +++ b/sdks/go/pkg/beam/runners/prism/prism.go @@ -49,9 +49,9 @@ func Execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) s := jobservices.NewServer(0, internal.RunPipeline) *jobopts.Endpoint = s.Endpoint() go s.Serve() - } - if !jobopts.IsLoopback() { - *jobopts.EnvironmentType = "loopback" + if !jobopts.IsLoopback() { + *jobopts.EnvironmentType = "loopback" + } } return universal.Execute(ctx, p) } diff --git a/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go b/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go index 732f4382ab5d..d5cc6aa7327a 100644 --- a/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go +++ b/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go @@ -44,7 +44,7 @@ func Stage(ctx context.Context, id, endpoint, binary, st string) (retrievalToken defer cc.Close() if err := StageViaPortableAPI(ctx, cc, binary, st); err == nil { - return "", nil + return st, nil } log.Warnf(ctx, "unable to stage with PortableAPI: %v; falling back to legacy", err) diff --git a/sdks/go/pkg/beam/util.go b/sdks/go/pkg/beam/util.go index d591dedd7624..4b24af831134 100644 --- a/sdks/go/pkg/beam/util.go +++ b/sdks/go/pkg/beam/util.go @@ -16,7 +16,7 @@ package beam //go:generate go install github.com/apache/beam/sdks/v2/go/cmd/starcgen -//go:generate starcgen --package=beam --identifiers=addFixedKeyFn,dropKeyFn,dropValueFn,swapKVFn,explodeFn,jsonDec,jsonEnc,protoEnc,protoDec,schemaEnc,schemaDec,makePartitionFn,createFn +//go:generate starcgen --package=beam --identifiers=addFixedKeyFn,dropKeyFn,dropValueFn,swapKVFn,explodeFn,jsonDec,jsonEnc,protoEnc,protoDec,schemaEnc,schemaDec,makePartitionFn //go:generate go fmt // We have some freedom to create various utilities, users can use depending on diff --git a/sdks/go/test/build.gradle b/sdks/go/test/build.gradle index d53491194753..5b39cf81400f 100644 --- a/sdks/go/test/build.gradle +++ b/sdks/go/test/build.gradle @@ -173,6 +173,30 @@ tasks.register("ulrValidatesRunner") { } } +// ValidatesRunner tests for Prism. Runs tests in the integration directory +// with prism in docker mod to validate that the runner behaves as expected. +task prismValidatesRunner { + group = "Verification" + + dependsOn ":sdks:go:test:goBuild" + dependsOn ":sdks:go:container:docker" + dependsOn ":sdks:java:container:java8:docker" + dependsOn ":sdks:java:testing:expansion-service:buildTestExpansionServiceJar" + doLast { + def pipelineOptions = [ // Pipeline options piped directly to Go SDK flags. + "--expansion_jar=test:${project(":sdks:java:testing:expansion-service").buildTestExpansionServiceJar.archivePath}", + ] + def options = [ + "--runner prism", + "--pipeline_opts \"${pipelineOptions.join(' ')}\"", + ] + exec { + executable "sh" + args "-c", "./run_validatesrunner_tests.sh ${options.join(' ')}" + } + } +} + // A method for configuring a cross-language validates runner test task, // intended to be used in calls to createCrossLanguageValidatesRunnerTask. ext.goIoValidatesRunnerTask = { proj, name, scriptOpts, pipelineOpts -> diff --git a/sdks/go/test/integration/integration.go b/sdks/go/test/integration/integration.go index dee161dcb2af..f3cffd176110 100644 --- a/sdks/go/test/integration/integration.go +++ b/sdks/go/test/integration/integration.go @@ -38,11 +38,11 @@ package integration import ( "fmt" "math/rand" + "os" "regexp" "strings" "testing" "time" - "os" // common runner flag. "github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts" @@ -140,6 +140,8 @@ var portableFilters = []string{ } var prismFilters = []string{ + // The prism runner does not yet support Java's CoGBK. + "TestXLang_CoGroupBy", // The prism runner does not support the TestStream primitive "TestTestStream.*", // The trigger and pane tests uses TestStream diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index c25d59bd57b0..60dd0cd97f11 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -257,8 +257,10 @@ print(s.getsockname()[1]) s.close() " +TMPDIR=$(mktemp -d) + # Set up environment based on runner. -if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" ]]; then +if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" || "$RUNNER" == "prism" ]]; then if [[ -z "$ENDPOINT" ]]; then JOB_PORT=$(python3 -c "$SOCKET_SCRIPT") ENDPOINT="localhost:$JOB_PORT" @@ -288,6 +290,14 @@ if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$ python3 \ -m apache_beam.runners.portability.local_job_service_main \ --port $JOB_PORT & + elif [[ "$RUNNER" == "prism" ]]; then + PRISMBIN=$TMPDIR/prismbin + cd sdks + ./go/run_with_go_version.sh build -o $PRISMBIN go/cmd/prism/*.go + $PRISMBIN \ + --serve_http=false \ + --job_port $JOB_PORT & + cd .. else echo "Unknown runner: $RUNNER" exit 1; @@ -340,7 +350,6 @@ if [[ "$RUNNER" == "dataflow" ]]; then gcloud --version # ensure gcloud is version 186 or above - TMPDIR=$(mktemp -d) gcloud_ver=$(gcloud -v | head -1 | awk '{print $4}') if [[ "$gcloud_ver" < "186" ]] then @@ -402,6 +411,7 @@ fi ARGS="$ARGS -p $SIMULTANEOUS" # Assemble test arguments and pipeline options. +ARGS="$ARGS -v" ARGS="$ARGS -timeout $TIMEOUT" ARGS="$ARGS --runner=$RUNNER" ARGS="$ARGS --project=$DATAFLOW_PROJECT" @@ -449,9 +459,9 @@ if [[ "$RUNNER" == "dataflow" ]]; then docker rmi $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to remove container" gcloud --quiet container images delete $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to delete container" fi - - # Clean up tempdir - rm -rf $TMPDIR fi +# Clean up tempdir +rm -rf $TMPDIR + exit $TEST_EXIT_CODE diff --git a/sdks/java/container/boot.go b/sdks/java/container/boot.go index 0e39d907f075..f7fd7437c88a 100644 --- a/sdks/java/container/boot.go +++ b/sdks/java/container/boot.go @@ -159,8 +159,9 @@ func main() { cp = append(cp, filepath.Join(dir, filepath.FromSlash(name))) } + var setRecommendedMaxXmx = strings.Contains(options, "set_recommended_max_xmx") args := []string{ - "-Xmx" + strconv.FormatUint(heapSizeLimit(info), 10), + "-Xmx" + strconv.FormatUint(heapSizeLimit(info, setRecommendedMaxXmx), 10), // ParallelGC the most adequate for high throughput and lower CPU utilization // It is the default GC in Java 8, but not on newer versions "-XX:+UseParallelGC", @@ -266,9 +267,14 @@ func makePipelineOptionsFile(options string) error { // it returns 70% of the physical memory on the machine. If it cannot determine // that value, it returns 1GB. This is an imperfect heuristic. It aims to // ensure there is memory for non-heap use and other overhead, while also not -// underutilizing the machine. -func heapSizeLimit(info *fnpb.ProvisionInfo) uint64 { - if size, err := syscallx.PhysicalMemorySize(); err == nil { +// underutilizing the machine. if set_recommended_max_xmx experiment is enabled, +// sets xmx to 32G. Under 32G JVM enables CompressedOops. CompressedOops +// utilizes memory more efficiently, and has positive impact on GC performance +// and cache hit rate. +func heapSizeLimit(info *fnpb.ProvisionInfo, setRecommendedMaxXmx bool) uint64 { + if setRecommendedMaxXmx { + return 32 << 30 + } else if size, err := syscallx.PhysicalMemorySize(); err == nil { return (size * 70) / 100 } return 1 << 30 diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java index fb6746b9cdd1..c0683ef44616 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java @@ -56,6 +56,7 @@ import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps; @@ -74,7 +75,23 @@ public class SchemaTranslation { private static final Logger LOG = LoggerFactory.getLogger(SchemaTranslation.class); private static final String URN_BEAM_LOGICAL_DECIMAL = FixedPrecisionNumeric.BASE_IDENTIFIER; - private static final String URN_BEAM_LOGICAL_JAVASDK = "beam:logical_type:javasdk:v1"; + + private static String getLogicalTypeUrn(String identifier) { + if (identifier.startsWith("beam:logical_type:")) { + return identifier; + } else { + String filtered = identifier.replaceAll("[^0-9A-Za-z_]", "").toLowerCase(); + if (!Strings.isNullOrEmpty(filtered)) { + // urn for non-standard Java SDK logical types are assigned with javasdk_ + return String.format("beam:logical_type:javasdk_%s:v1", filtered); + } else { + // raw "javasdk" name should only be a last resort. Types defined in Beam should have their + // own URN. + return "beam:logical_type:javasdk:v1"; + } + } + } + private static final String URN_BEAM_LOGICAL_MILLIS_INSTANT = SchemaApi.LogicalTypes.Enum.MILLIS_INSTANT .getValueDescriptor() @@ -84,18 +101,18 @@ public class SchemaTranslation { // TODO(https://github.com/apache/beam/issues/19715): Populate this with a LogicalTypeRegistrar, // which includes a way to construct // the LogicalType given an argument. - private static final ImmutableMap>> - STANDARD_LOGICAL_TYPES = - ImmutableMap.>>builder() - .put(FixedPrecisionNumeric.IDENTIFIER, FixedPrecisionNumeric.class) - .put(MicrosInstant.IDENTIFIER, MicrosInstant.class) - .put(SchemaLogicalType.IDENTIFIER, SchemaLogicalType.class) - .put(PythonCallable.IDENTIFIER, PythonCallable.class) - .put(FixedBytes.IDENTIFIER, FixedBytes.class) - .put(VariableBytes.IDENTIFIER, VariableBytes.class) - .put(FixedString.IDENTIFIER, FixedString.class) - .put(VariableString.IDENTIFIER, VariableString.class) - .build(); + @VisibleForTesting + static final ImmutableMap>> STANDARD_LOGICAL_TYPES = + ImmutableMap.>>builder() + .put(FixedPrecisionNumeric.IDENTIFIER, FixedPrecisionNumeric.class) + .put(MicrosInstant.IDENTIFIER, MicrosInstant.class) + .put(SchemaLogicalType.IDENTIFIER, SchemaLogicalType.class) + .put(PythonCallable.IDENTIFIER, PythonCallable.class) + .put(FixedBytes.IDENTIFIER, FixedBytes.class) + .put(VariableBytes.IDENTIFIER, VariableBytes.class) + .put(FixedString.IDENTIFIER, FixedString.class) + .put(VariableString.IDENTIFIER, VariableString.class) + .build(); public static SchemaApi.Schema schemaToProto(Schema schema, boolean serializeLogicalType) { String uuid = schema.getUUID() != null ? schema.getUUID().toString() : ""; @@ -179,11 +196,7 @@ static SchemaApi.FieldType fieldTypeToProto(FieldType fieldType, boolean seriali fieldValueToProto(logicalType.getArgumentType(), logicalType.getArgument())); } } else { - // TODO(https://github.com/apache/beam/issues/19715): "javasdk" types should only - // be a last resort. Types defined in Beam should have their own URN, and there - // should be a mechanism for users to register their own types by URN. - String urn = - identifier.startsWith("beam:logical_type:") ? identifier : URN_BEAM_LOGICAL_JAVASDK; + String urn = getLogicalTypeUrn(identifier); logicalTypeBuilder = SchemaApi.LogicalType.newBuilder() .setRepresentation( @@ -429,15 +442,22 @@ private static FieldType fieldTypeFromProtoWithoutNullable(SchemaApi.FieldType p } else if (urn.equals(URN_BEAM_LOGICAL_DECIMAL)) { return FieldType.DECIMAL; } else if (urn.startsWith("beam:logical_type:")) { - try { - return FieldType.logicalType( - (LogicalType) - SerializableUtils.deserializeFromByteArray( - logicalType.getPayload().toByteArray(), "logicalType")); - } catch (IllegalArgumentException e) { - LOG.warn( - "Unable to deserialize the logical type {} from proto. Mark as UnknownLogicalType.", - urn); + if (!logicalType.getPayload().isEmpty()) { + // logical type has a payload, try to recover the instance by deserialization + try { + return FieldType.logicalType( + (LogicalType) + SerializableUtils.deserializeFromByteArray( + logicalType.getPayload().toByteArray(), "logicalType")); + } catch (IllegalArgumentException e) { + LOG.warn( + "Unable to deserialize the logical type {} from proto. Mark as UnknownLogicalType.", + urn); + } + } else { + // logical type does not have a payload. This happens when it is passed xlang. + // TODO(yathu) it appears this path is called heavily, consider cache the instance + LOG.debug("Constructing non-standard logical type {} as UnknownLogicalType", urn); } } // assemble an UnknownLogicalType diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java index 3679c3eb10f5..db4f141ee624 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java @@ -17,11 +17,15 @@ */ package org.apache.beam.sdk.transforms; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; + +import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.transforms.windowing.FixedWindows; import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; +import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.Duration; import org.joda.time.Instant; @@ -34,28 +38,58 @@ */ public class PeriodicImpulse extends PTransform> { - Instant startTimestamp = Instant.now(); - Instant stopTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE; - Duration fireInterval = Duration.standardMinutes(1); + Instant startTimestamp; + Instant stopTimestamp; + @Nullable Duration stopDuration; + Duration fireInterval; boolean applyWindowing = false; boolean catchUpToNow = true; - private PeriodicImpulse() {} + private PeriodicImpulse() { + this.startTimestamp = Instant.now(); + this.stopTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE; + this.fireInterval = Duration.standardMinutes(1); + } public static PeriodicImpulse create() { return new PeriodicImpulse(); } + /** + * Assign a timestamp when the pipeliene starts to produce data. + * + *

Cannot be used along with {@link #stopAfter}. + */ public PeriodicImpulse startAt(Instant startTime) { + checkArgument(stopDuration == null, "startAt and stopAfter cannot be set at the same time"); this.startTimestamp = startTime; return this; } + /** + * Assign a timestamp when the pipeliene stops producing data. + * + *

Cannot be used along with {@link #stopAfter}. + */ public PeriodicImpulse stopAt(Instant stopTime) { + checkArgument(stopDuration == null, "stopAt and stopAfter cannot be set at the same time"); this.stopTimestamp = stopTime; return this; } + /** + * For internal use only; no backwards-compatibility guarantees. + * + *

Assign a time interval at which the pipeliene produces data. This is different from setting + * {@link #startAt} and {@link #stopAt}, as the first timestamp is determined at run time + * (pipeline starts processing). + */ + @Internal + public PeriodicImpulse stopAfter(Duration duration) { + this.stopDuration = duration; + return this; + } + public PeriodicImpulse withInterval(Duration interval) { this.fireInterval = interval; return this; @@ -67,10 +101,13 @@ public PeriodicImpulse applyWindowing() { } /** - * The default behavior is that PeriodicImpulse emits all instants until Instant.now(), then + * For internal use only; no backwards-compatibility guarantees. + * + *

The default behavior is that PeriodicImpulse emits all instants until Instant.now(), then * starts firing at the specified interval. If this is set to false, the PeriodicImpulse will * perform the interval wait before firing each instant. */ + @Internal public PeriodicImpulse catchUpToNow(boolean catchUpToNow) { this.catchUpToNow = catchUpToNow; return this; @@ -78,20 +115,51 @@ public PeriodicImpulse catchUpToNow(boolean catchUpToNow) { @Override public PCollection expand(PBegin input) { - PCollection result = - input - .apply( - Create.of( - new PeriodicSequence.SequenceDefinition( - startTimestamp, stopTimestamp, fireInterval, catchUpToNow))) - .apply(PeriodicSequence.create()); + PCollection seqDef; + if (stopDuration != null) { + // nonnull guaranteed + Duration d = stopDuration; + seqDef = + input + .apply(Impulse.create()) + .apply(ParDo.of(new RuntimeSequenceFn(d, fireInterval, catchUpToNow))); + } else { + seqDef = + input.apply( + Create.of( + new PeriodicSequence.SequenceDefinition( + startTimestamp, stopTimestamp, fireInterval, catchUpToNow))); + } + PCollection result = seqDef.apply(PeriodicSequence.create()); if (this.applyWindowing) { result = - result.apply( - Window.into(FixedWindows.of(Duration.millis(fireInterval.getMillis())))); + result.apply(Window.into(FixedWindows.of(Duration.millis(fireInterval.getMillis())))); } - return result; } + + /** + * A DoFn generated a SequenceDefinition at run time. This enables set first element timestamp at + * pipeline start processing data. + */ + private static class RuntimeSequenceFn extends DoFn { + Duration stopDuration; + Duration fireInterval; + boolean catchUpToNow; + + RuntimeSequenceFn(Duration stopDuration, Duration fireInterval, boolean catchUpToNow) { + this.stopDuration = stopDuration; + this.fireInterval = fireInterval; + this.catchUpToNow = catchUpToNow; + } + + @ProcessElement + public void process(ProcessContext c) { + Instant now = Instant.now(); + c.output( + new PeriodicSequence.SequenceDefinition( + now, now.plus(stopDuration), fireInterval, catchUpToNow)); + } + } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java index b3cd2afde697..12cbecd04b02 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java @@ -22,6 +22,7 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; import java.util.Objects; +import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.io.range.OffsetRange; import org.apache.beam.sdk.schemas.JavaFieldSchema; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; @@ -67,6 +68,8 @@ public SequenceDefinition(Instant first, Instant last, Duration duration) { this.catchUpToNow = true; } + /** catchUpToNow is experimental; no backwards-compatibility guarantees. */ + @Internal public SequenceDefinition( Instant first, Instant last, Duration duration, boolean catchUpToNow) { this.first = first; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java index bdce452192a4..3020d7e42d05 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java @@ -17,6 +17,7 @@ */ package org.apache.beam.sdk.schemas; +import static org.apache.beam.sdk.schemas.SchemaTranslation.STANDARD_LOGICAL_TYPES; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -48,6 +49,7 @@ import org.apache.beam.sdk.schemas.logicaltypes.PythonCallable; import org.apache.beam.sdk.schemas.logicaltypes.SchemaLogicalType; import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes; +import org.apache.beam.sdk.schemas.logicaltypes.UnknownLogicalType; import org.apache.beam.sdk.schemas.logicaltypes.VariableBytes; import org.apache.beam.sdk.schemas.logicaltypes.VariableString; import org.apache.beam.sdk.values.Row; @@ -186,7 +188,8 @@ public static Iterable data() { .withOptions(optionsBuilder)) .add( Schema.of( - Field.of("null_argument", FieldType.logicalType(new NullArgumentLogicalType())))) + Field.of( + "null_argument", FieldType.logicalType(new PortableNullArgLogicalType())))) .add(Schema.of(Field.of("logical_argument", FieldType.logicalType(new DateTime())))) .add( Schema.of(Field.of("single_arg_argument", FieldType.logicalType(FixedBytes.of(100))))) @@ -348,14 +351,14 @@ public static Iterable data() { .add(simpleRow(FieldType.row(row.getSchema()), row)) .add(simpleRow(FieldType.DATETIME, new Instant(23L))) .add(simpleRow(FieldType.DECIMAL, BigDecimal.valueOf(100000))) - .add(simpleRow(FieldType.logicalType(new NullArgumentLogicalType()), "str")) + .add(simpleRow(FieldType.logicalType(new PortableNullArgLogicalType()), "str")) .add(simpleRow(FieldType.logicalType(new DateTime()), LocalDateTime.of(2000, 1, 3, 3, 1))) .add(simpleNullRow(FieldType.STRING)) .add(simpleNullRow(FieldType.INT32)) .add(simpleNullRow(FieldType.map(FieldType.STRING, FieldType.INT32))) .add(simpleNullRow(FieldType.array(FieldType.STRING))) .add(simpleNullRow(FieldType.row(row.getSchema()))) - .add(simpleNullRow(FieldType.logicalType(new NullArgumentLogicalType()))) + .add(simpleNullRow(FieldType.logicalType(new PortableNullArgLogicalType()))) .add(simpleNullRow(FieldType.logicalType(new DateTime()))) .add(simpleNullRow(FieldType.DECIMAL)) .add(simpleNullRow(FieldType.DATETIME)) @@ -419,6 +422,8 @@ public static Iterable data() { .add(FieldType.logicalType(FixedString.of(10))) .add(FieldType.logicalType(VariableString.of(10))) .add(FieldType.logicalType(FixedPrecisionNumeric.of(10))) + .add(FieldType.logicalType(new PortableNullArgLogicalType())) + .add(FieldType.logicalType(new NullArgumentLogicalType())) .build(); } @@ -426,7 +431,7 @@ public static Iterable data() { public Schema.FieldType fieldType; @Test - public void testPortableLogicalTypeSerializeDeserilizeCorrectly() { + public void testLogicalTypeSerializeDeserilizeCorrectly() { SchemaApi.FieldType proto = SchemaTranslation.fieldTypeToProto(fieldType, true); Schema.FieldType translated = SchemaTranslation.fieldTypeFromProto(proto); @@ -438,14 +443,64 @@ public void testPortableLogicalTypeSerializeDeserilizeCorrectly() { assertThat( translated.getLogicalType().getArgument(), equalTo(fieldType.getLogicalType().getArgument())); + assertThat( + translated.getLogicalType().getIdentifier(), + equalTo(fieldType.getLogicalType().getIdentifier())); + } + + @Test + public void testLogicalTypeFromToProtoCorrectly() { + SchemaApi.FieldType proto = SchemaTranslation.fieldTypeToProto(fieldType, false); + Schema.FieldType translated = SchemaTranslation.fieldTypeFromProto(proto); + + if (STANDARD_LOGICAL_TYPES.containsKey(translated.getLogicalType().getIdentifier())) { + // standard logical type should be able to fully recover the original type + assertThat( + translated.getLogicalType().getClass(), equalTo(fieldType.getLogicalType().getClass())); + } else { + // non-standard type will get assembled to UnknownLogicalType + assertThat(translated.getLogicalType().getClass(), equalTo(UnknownLogicalType.class)); + } + assertThat( + translated.getLogicalType().getArgumentType(), + equalTo(fieldType.getLogicalType().getArgumentType())); + assertThat( + translated.getLogicalType().getArgument(), + equalTo(fieldType.getLogicalType().getArgument())); + if (fieldType.getLogicalType().getIdentifier().startsWith("beam:logical_type:")) { + // portable logical type should fully recover the urn + assertThat( + translated.getLogicalType().getIdentifier(), + equalTo(fieldType.getLogicalType().getIdentifier())); + } else { + // non-portable logical type would have "javasdk_" urn + assertThat( + translated.getLogicalType().getIdentifier(), + equalTo( + String.format( + "beam:logical_type:javasdk_%s:v1", + fieldType + .getLogicalType() + .getIdentifier() + .toLowerCase() + .replaceAll("[^0-9A-Za-z_]", "")))); + } } } - /** A simple logical type that has no argument. */ - private static class NullArgumentLogicalType implements Schema.LogicalType { + /** A portable logical type that has no argument. */ + private static class PortableNullArgLogicalType extends NullArgumentLogicalType { public static final String IDENTIFIER = "beam:logical_type:null_argument:v1"; - public NullArgumentLogicalType() {} + @Override + public String getIdentifier() { + return IDENTIFIER; + } + } + + /** A non-portable (Java SDK) logical type that has no argument. */ + private static class NullArgumentLogicalType implements Schema.LogicalType { + public static final String IDENTIFIER = "NULL_ARGUMENT"; @Override public String toBaseType(String input) { diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java index 7b924cf6b6da..4dc9bd5777ff 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.List; import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.values.Row; @@ -28,8 +29,12 @@ * FROM FACT_TABLE JOIN LOOKUP_TABLE ON ...}. */ public interface BeamSqlSeekableTable extends Serializable { - /** prepare the instance. */ - default void setUp() {} + /** + * prepare the instance. + * + * @param joinSubsetType joining subset schema + */ + default void setUp(Schema joinSubsetType) {} default void startBundle( DoFn.StartBundleContext context, PipelineOptions pipelineOptions) {} diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java index e4d62c2b5de7..d25f98729bd4 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java @@ -153,7 +153,7 @@ public PCollection expand(PCollection input) { new DoFn() { @Setup public void setup() { - seekableTable.setUp(); + seekableTable.setUp(joinSubsetType); } @StartBundle diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java index 2e2971ebd6e9..b5fd03045cbc 100644 --- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java +++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java @@ -34,6 +34,7 @@ import org.apache.beam.sdk.values.POutput; import org.apache.beam.sdk.values.Row; import org.hamcrest.core.StringContains; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; @@ -47,11 +48,18 @@ public class BeamSideInputLookupJoinRelTest extends BaseRelTest { /** Test table for JOIN-AS-LOOKUP. */ public static class SiteLookupTable extends SchemaBaseBeamTable implements BeamSqlSeekableTable { + private Schema joinSubsetType; public SiteLookupTable(Schema schema) { super(schema); } + @Override + public void setUp(Schema joinSubsetType) { + this.joinSubsetType = joinSubsetType; + Assert.assertNotNull(joinSubsetType); + } + @Override public PCollection.IsBounded isBounded() { return PCollection.IsBounded.BOUNDED; @@ -69,6 +77,7 @@ public POutput buildIOWriter(PCollection input) { @Override public List seekRow(Row lookupSubRow) { + Assert.assertEquals(joinSubsetType, lookupSubRow.getSchema()); if (lookupSubRow.getInt32("site_id") == 2) { return Arrays.asList(Row.withSchema(getSchema()).addValues(2, "SITE1").build()); } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java index 3f018c376f08..e103da4d6007 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java @@ -97,7 +97,6 @@ public class FnHarness { private static final String PIPELINE_OPTIONS_FILE = "PIPELINE_OPTIONS_FILE"; private static final String PIPELINE_OPTIONS = "PIPELINE_OPTIONS"; private static final String RUNNER_CAPABILITIES = "RUNNER_CAPABILITIES"; - private static final String ENABLE_DATA_SAMPLING_EXPERIMENT = "enable_data_sampling"; private static final Logger LOG = LoggerFactory.getLogger(FnHarness.class); private static Endpoints.ApiServiceDescriptor getApiServiceDescriptor(String descriptor) @@ -248,7 +247,8 @@ public static void main( options.as(ExecutorOptions.class).getScheduledExecutorService(); ExecutionStateSampler executionStateSampler = new ExecutionStateSampler(options, System::currentTimeMillis); - final DataSampler dataSampler = new DataSampler(); + + final @Nullable DataSampler dataSampler = DataSampler.create(options); // The logging client variable is not used per se, but during its lifetime (until close()) it // intercepts logging and sends it to the logging service. @@ -276,10 +276,6 @@ public static void main( FinalizeBundleHandler finalizeBundleHandler = new FinalizeBundleHandler(executorService); - // Create the sampler, if the experiment is enabled. - boolean shouldSample = - ExperimentalOptions.hasExperiment(options, ENABLE_DATA_SAMPLING_EXPERIMENT); - // Retrieves the ProcessBundleDescriptor from cache. Requests the PBD from the Runner if it // doesn't exist. Additionally, runs any graph modifications. Function getProcessBundleDescriptor = @@ -314,8 +310,7 @@ private BeamFnApi.ProcessBundleDescriptor loadDescriptor(String id) { metricsShortIds, executionStateSampler, processWideCache, - shouldSample ? dataSampler : null); - logging.setProcessBundleHandler(processBundleHandler); + dataSampler); BeamFnStatusClient beamFnStatusClient = null; if (statusApiServiceDescriptor != null) { @@ -363,7 +358,12 @@ private BeamFnApi.ProcessBundleDescriptor loadDescriptor(String id) { InstructionRequest.RequestCase.HARNESS_MONITORING_INFOS, processWideHandler::harnessMonitoringInfos); handlers.put( - InstructionRequest.RequestCase.SAMPLE_DATA, dataSampler::handleDataSampleRequest); + InstructionRequest.RequestCase.SAMPLE_DATA, + request -> + dataSampler == null + ? BeamFnApi.InstructionResponse.newBuilder() + .setSampleData(BeamFnApi.SampleDataResponse.newBuilder()) + : dataSampler.handleDataSampleRequest(request)); JvmInitializers.runBeforeProcessing(options); diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java index 0509a26c76bb..876a838f1662 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java @@ -118,11 +118,11 @@ public void onNext(BeamFnApi.InstructionRequest request) { sendErrorResponse(e); throw e; } finally { - BeamFnLoggingMDC.setInstructionId(null); + BeamFnLoggingMDC.reset(); } }); } finally { - BeamFnLoggingMDC.setInstructionId(null); + BeamFnLoggingMDC.reset(); } } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java index 313bbf5b4fa2..a82ce9276820 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java @@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicReference; import javax.annotation.concurrent.GuardedBy; import org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor; +import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC; import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo; import org.apache.beam.runners.core.metrics.MetricsContainerStepMap; import org.apache.beam.runners.core.metrics.MonitoringInfoEncodings; @@ -120,6 +121,14 @@ public interface ExecutionState { *

Must only be invoked by the bundle processing thread. */ void deactivate(); + + /** + * Sets the error state to the currently executing state. Returns true if this was the first + * time the error was set. Returns false otherwise. + * + *

This can only be set once. + */ + boolean error(); } /** Stops the execution of the state sampler. */ @@ -250,6 +259,8 @@ public class ExecutionStateTracker implements BundleProgressReporter { private @Nullable ExecutionStateImpl currentState; // Read by multiple threads, written by the bundle processing thread lazily. private final AtomicReference<@Nullable ExecutionStateImpl> currentStateLazy; + // If an exception occurs, this will be to state at the time of exception. + private boolean inErrorState = false; // Read and written by the ExecutionStateSampler thread private long transitionsAtLastSample; @@ -465,6 +476,15 @@ public void deactivate() { numTransitions += 1; numTransitionsLazy.lazySet(numTransitions); } + + @Override + public boolean error() { + if (!inErrorState) { + inErrorState = true; + return true; + } + return false; + } } /** @@ -473,6 +493,7 @@ public void deactivate() { *

Only invoked by the bundle processing thread. */ public void start(String processBundleId) { + BeamFnLoggingMDC.setStateTracker(this); this.processBundleId.lazySet(processBundleId); this.lastTransitionTime.lazySet(clock.getMillis()); this.trackedThread.lazySet(Thread.currentThread()); @@ -514,6 +535,8 @@ public void reset() { this.numTransitionsLazy.lazySet(0); this.lastTransitionTime.lazySet(0); this.metricsContainerRegistry.reset(); + this.inErrorState = false; + BeamFnLoggingMDC.setStateTracker(null); } } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java index a7a8766ffc7b..e27df577779c 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java @@ -53,6 +53,8 @@ import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder; import org.apache.beam.sdk.util.common.ElementByteSizeObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * The {@code PCollectionConsumerRegistry} is used to maintain a collection of consuming @@ -97,6 +99,7 @@ public static ConsumerAndMetadata forConsumer( private final ProcessBundleDescriptor processBundleDescriptor; private final RehydratedComponents rehydratedComponents; private final @Nullable DataSampler dataSampler; + private static final Logger LOG = LoggerFactory.getLogger(PCollectionConsumerRegistry.class); public PCollectionConsumerRegistry( ExecutionStateTracker stateTracker, @@ -242,6 +245,26 @@ public FnDataReceiver> getMultiplexingConsumer(String pCollecti }); } + private static void logAndRethrow( + Exception e, + ExecutionState executionState, + ExecutionStateTracker executionStateTracker, + String ptransformId, + @Nullable OutputSampler outputSampler, + @Nullable ElementSample elementSample) + throws Exception { + ExecutionStateSampler.ExecutionStateTrackerStatus status = executionStateTracker.getStatus(); + String processBundleId = status == null ? null : status.getProcessBundleId(); + if (outputSampler != null) { + outputSampler.exception(elementSample, e, ptransformId, processBundleId); + } + + if (executionState.error()) { + LOG.error("Failed to process element for bundle \"{}\"", processBundleId, e); + } + throw e; + } + /** * A wrapping {@code FnDataReceiver>} which counts the number of elements * consumed by the original {@code FnDataReceiver> consumer} and sets up metrics @@ -324,13 +347,8 @@ public void accept(WindowedValue input) throws Exception { try { this.delegate.accept(input); } catch (Exception e) { - if (outputSampler != null) { - ExecutionStateSampler.ExecutionStateTrackerStatus status = - executionStateTracker.getStatus(); - String processBundleId = status == null ? null : status.getProcessBundleId(); - outputSampler.exception(elementSample, e, ptransformId, processBundleId); - } - throw e; + logAndRethrow( + e, executionState, executionStateTracker, ptransformId, outputSampler, elementSample); } finally { executionState.deactivate(); } @@ -419,14 +437,13 @@ public void accept(WindowedValue input) throws Exception { try { consumerAndMetadata.getConsumer().accept(input); } catch (Exception e) { - if (outputSampler != null) { - ExecutionStateSampler.ExecutionStateTrackerStatus status = - consumerAndMetadata.getExecutionStateTracker().getStatus(); - String processBundleId = status == null ? null : status.getProcessBundleId(); - outputSampler.exception( - elementSample, e, consumerAndMetadata.getPTransformId(), processBundleId); - } - throw e; + logAndRethrow( + e, + state, + consumerAndMetadata.getExecutionStateTracker(), + consumerAndMetadata.getPTransformId(), + outputSampler, + elementSample); } finally { state.deactivate(); } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java index b03c453475bd..29011b82a4dc 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java @@ -23,9 +23,12 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import javax.annotation.Nullable; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnApi.SampleDataResponse.ElementList; import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.options.ExperimentalOptions; +import org.apache.beam.sdk.options.PipelineOptions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,19 +40,66 @@ */ public class DataSampler { private static final Logger LOG = LoggerFactory.getLogger(DataSampler.class); + private static final String ENABLE_DATA_SAMPLING_EXPERIMENT = "enable_data_sampling"; + private static final String ENABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT = + "enable_always_on_exception_sampling"; + private static final String DISABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT = + "disable_always_on_exception_sampling"; + + /** + * Optionally returns a DataSampler if the experiment "enable_data_sampling" is present or + * "enable_always_on_exception_sampling" is present. Returns null is data sampling is not enabled + * or "disable_always_on_exception_sampling" experiment is given. + * + * @param options the pipeline options given to this SDK Harness. + * @return the DataSampler if enabled or null, otherwise. + */ + public static @Nullable DataSampler create(PipelineOptions options) { + boolean disableAlwaysOnExceptionSampling = + ExperimentalOptions.hasExperiment(options, DISABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT); + boolean enableAlwaysOnExceptionSampling = + ExperimentalOptions.hasExperiment(options, ENABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT); + boolean enableDataSampling = + ExperimentalOptions.hasExperiment(options, ENABLE_DATA_SAMPLING_EXPERIMENT); + // Enable exception sampling, unless the user specifies for it to be disabled. + enableAlwaysOnExceptionSampling = + enableAlwaysOnExceptionSampling && !disableAlwaysOnExceptionSampling; + + // If no sampling is enabled, don't create the DataSampler. + if (enableDataSampling || enableAlwaysOnExceptionSampling) { + // For performance reasons, sampling all elements should only be done when the user requests + // it. + // But, exception sampling doesn't need to worry about performance implications, since the SDK + // is already in a bad state. Thus, enable only exception sampling when the user does not + // request for the sampling of all elements. + boolean onlySampleExceptions = enableAlwaysOnExceptionSampling && !enableDataSampling; + return new DataSampler(onlySampleExceptions); + } else { + return null; + } + } /** * Creates a DataSampler to sample every 1000 elements while keeping a maximum of 10 in memory. */ public DataSampler() { - this(10, 1000); + this(10, 1000, false); + } + + /** + * Creates a DataSampler to sample every 1000 elements while keeping a maximum of 10 in memory. + * + * @param onlySampleExceptions If true, only samples elements from exceptions. + */ + public DataSampler(Boolean onlySampleExceptions) { + this(10, 1000, onlySampleExceptions); } /** * @param maxSamples Sets the maximum number of samples held in memory at once. * @param sampleEveryN Sets how often to sample. */ - public DataSampler(int maxSamples, int sampleEveryN) { + public DataSampler(int maxSamples, int sampleEveryN, Boolean onlySampleExceptions) { checkArgument( maxSamples > 0, "Expected positive number of samples, did you mean to disable data sampling?"); @@ -58,6 +108,7 @@ public DataSampler(int maxSamples, int sampleEveryN) { "Expected positive number for sampling period, did you mean to disable data sampling?"); this.maxSamples = maxSamples; this.sampleEveryN = sampleEveryN; + this.onlySampleExceptions = onlySampleExceptions; } // Maximum number of elements in buffer. @@ -66,6 +117,9 @@ public DataSampler(int maxSamples, int sampleEveryN) { // Sampling rate. private final int sampleEveryN; + // If true, only takes samples when exceptions in UDFs occur. + private final Boolean onlySampleExceptions; + // The fully-qualified type is: Map[PCollectionId, OutputSampler]. In order to sample // on a PCollection-basis and not per-bundle, this keeps track of shared samples between states. private final Map> outputSamplers = new ConcurrentHashMap<>(); @@ -86,7 +140,10 @@ public DataSampler(int maxSamples, int sampleEveryN) { public OutputSampler sampleOutput(String pcollectionId, Coder coder) { return (OutputSampler) outputSamplers.computeIfAbsent( - pcollectionId, k -> new OutputSampler<>(coder, this.maxSamples, this.sampleEveryN)); + pcollectionId, + k -> + new OutputSampler<>( + coder, this.maxSamples, this.sampleEveryN, this.onlySampleExceptions)); } /** diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java index f1e710d3ec7e..f7fabae0cc21 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java @@ -60,14 +60,19 @@ public class OutputSampler { // Index into the buffer of where to overwrite samples. private int resampleIndex = 0; + // If true, only takes samples when exceptions in UDFs occur. + private final Boolean onlySampleExceptions; + @Nullable private final Coder valueCoder; @Nullable private final Coder> windowedValueCoder; - public OutputSampler(Coder coder, int maxElements, int sampleEveryN) { + public OutputSampler( + Coder coder, int maxElements, int sampleEveryN, boolean onlySampleExceptions) { this.maxElements = maxElements; this.sampleEveryN = sampleEveryN; this.buffer = new ArrayList<>(this.maxElements); + this.onlySampleExceptions = onlySampleExceptions; // The samples taken and encoded should match exactly to the specification from the // ProcessBundleDescriptor. The coder given can either be a WindowedValueCoder, in which the @@ -103,7 +108,7 @@ public ElementSample sample(WindowedValue element) { ElementSample elementSample = new ElementSample<>(ThreadLocalRandom.current().nextInt(), element); - if (samples > 10 && samples % sampleEveryN != 0) { + if (onlySampleExceptions || (samples > 10 && samples % sampleEveryN != 0)) { return elementSample; } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java index 2d3e168eab50..8fa074b04768 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java @@ -40,8 +40,7 @@ import java.util.logging.LogRecord; import java.util.logging.Logger; import java.util.logging.SimpleFormatter; -import org.apache.beam.fn.harness.control.ProcessBundleHandler; -import org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor; +import org.apache.beam.fn.harness.control.ExecutionStateSampler; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry; import org.apache.beam.model.fnexecution.v1.BeamFnLoggingGrpc; @@ -105,8 +104,6 @@ public class BeamFnLoggingClient implements AutoCloseable { * so if they are garbage collected, our hierarchical configuration will be lost. */ private final Collection configuredLoggers = new ArrayList<>(); - private @Nullable ProcessBundleHandler processBundleHandler; - private final BlockingQueue bufferedLogEntries = new ArrayBlockingQueue<>(MAX_BUFFERED_LOG_ENTRY_COUNT); @@ -347,10 +344,6 @@ public void close() throws Exception { } } - public void setProcessBundleHandler(ProcessBundleHandler processBundleHandler) { - this.processBundleHandler = processBundleHandler; - } - // Reset the logging configuration to what it is at startup. @RequiresNonNull("configuredLoggers") @RequiresNonNull("logRecordHandler") @@ -440,14 +433,12 @@ public void publish(LogRecord record) { if (loggerName != null) { builder.setLogLocation(loggerName); } - if (instructionId != null && processBundleHandler != null) { - BundleProcessor bundleProcessor = - processBundleHandler.getBundleProcessorCache().find(instructionId); - if (bundleProcessor != null) { - String transformId = bundleProcessor.getStateTracker().getCurrentThreadsPTransformId(); - if (transformId != null) { - builder.setTransformId(transformId); - } + + ExecutionStateSampler.ExecutionStateTracker stateTracker = BeamFnLoggingMDC.getStateTracker(); + if (stateTracker != null) { + String transformId = stateTracker.getCurrentThreadsPTransformId(); + if (transformId != null) { + builder.setTransformId(transformId); } } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java index bcfcd4b34ea5..68b03a484904 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java @@ -17,12 +17,16 @@ */ package org.apache.beam.fn.harness.logging; +import org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionStateTracker; import org.checkerframework.checker.nullness.qual.Nullable; /** Mapped diagnostic context to be consumed and set on LogEntry protos in BeamFnLoggingClient. */ public class BeamFnLoggingMDC { private static final ThreadLocal<@Nullable String> instructionId = new ThreadLocal<>(); + private static final ThreadLocal<@Nullable ExecutionStateTracker> stateTracker = + new ThreadLocal<>(); + /** Sets the Instruction ID of the current thread, which will be inherited by child threads. */ public static void setInstructionId(@Nullable String newInstructionId) { instructionId.set(newInstructionId); @@ -32,4 +36,20 @@ public static void setInstructionId(@Nullable String newInstructionId) { public static @Nullable String getInstructionId() { return instructionId.get(); } + + /** Sets the State Tracker of the current thread, which will be inherited by child threads. */ + public static void setStateTracker(@Nullable ExecutionStateTracker newStateTracker) { + stateTracker.set(newStateTracker); + } + + /** Gets the State Tracker of the current thread. */ + public static @Nullable ExecutionStateTracker getStateTracker() { + return stateTracker.get(); + } + + /** Resets to a default state. */ + public static void reset() { + instructionId.set(null); + stateTracker.set(null); + } } diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java index 9d79de0fa153..47866adc892b 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java @@ -22,6 +22,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; @@ -648,4 +649,28 @@ public Long answer(InvocationOnMock invocation) throws Throwable { sampler.stop(); expectedLogs.verifyWarn("Operation ongoing in bundle bundleId for PTransform"); } + + @Test + public void testErrorState() throws Exception { + MillisProvider clock = mock(MillisProvider.class); + ExecutionStateSampler sampler = + new ExecutionStateSampler( + PipelineOptionsFactory.fromArgs("--experiments=state_sampling_period_millis=10") + .create(), + clock); + ExecutionStateTracker tracker = sampler.create(); + ExecutionState state1 = + tracker.create("shortId1", "ptransformId1", "ptransformIdName1", "process"); + ExecutionState state2 = + tracker.create("shortId2", "ptransformId2", "ptransformIdName2", "process"); + + state1.activate(); + state2.activate(); + assertTrue(state2.error()); + assertFalse(state2.error()); + state2.deactivate(); + assertFalse(state2.error()); + tracker.reset(); + assertTrue(state1.error()); + } } diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java index b3e2788e378d..f75b84e76ad5 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java @@ -33,17 +33,26 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import org.apache.beam.fn.harness.HandlesSplits; import org.apache.beam.fn.harness.control.BundleProgressReporter; import org.apache.beam.fn.harness.control.ExecutionStateSampler; import org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionStateTracker; import org.apache.beam.fn.harness.debug.DataSampler; +import org.apache.beam.fn.harness.logging.BeamFnLoggingClient; +import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor; +import org.apache.beam.model.fnexecution.v1.BeamFnLoggingGrpc; +import org.apache.beam.model.pipeline.v1.Endpoints; import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo; import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection; import org.apache.beam.runners.core.construction.SdkComponents; @@ -56,6 +65,7 @@ import org.apache.beam.sdk.coders.IterableCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.fn.data.FnDataReceiver; +import org.apache.beam.sdk.fn.test.TestStreams; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; import org.apache.beam.sdk.metrics.MetricsEnvironment; @@ -65,6 +75,12 @@ import org.apache.beam.sdk.util.common.ElementByteSizeObservableIterable; import org.apache.beam.sdk.util.common.ElementByteSizeObservableIterator; import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.ManagedChannel; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.Server; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.inprocess.InProcessChannelBuilder; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.inprocess.InProcessServerBuilder; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.CallStreamObserver; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.StreamObserver; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; import org.junit.After; import org.junit.Before; @@ -567,6 +583,124 @@ public void dataSampling() throws Exception { assertTrue(elementList.getElementsList().containsAll(expectedSamples)); } + @Test + public void logsExceptionWithTransformId() throws Exception { + final String pTransformId = "pTransformId"; + final String message = "testException"; + final String instructionId = "instruction"; + final Exception thrownException = new Exception(message); + + // The following is a bunch of boiler-plate to set up a local FnApiLoggingService to catch any + // logs for later test + // expectations. + AtomicBoolean clientClosedStream = new AtomicBoolean(); + Collection values = new ConcurrentLinkedQueue<>(); + AtomicReference> outboundServerObserver = + new AtomicReference<>(); + CallStreamObserver inboundServerObserver = + TestStreams.withOnNext( + (BeamFnApi.LogEntry.List logEntries) -> + values.addAll(logEntries.getLogEntriesList())) + .withOnCompleted( + () -> { + // Remember that the client told us that this stream completed + clientClosedStream.set(true); + outboundServerObserver.get().onCompleted(); + }) + .build(); + + Endpoints.ApiServiceDescriptor apiServiceDescriptor = + Endpoints.ApiServiceDescriptor.newBuilder() + .setUrl(this.getClass().getName() + "-" + UUID.randomUUID().toString()) + .build(); + Server server = + InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()) + .addService( + new BeamFnLoggingGrpc.BeamFnLoggingImplBase() { + @Override + public StreamObserver logging( + StreamObserver outboundObserver) { + outboundServerObserver.set(outboundObserver); + return inboundServerObserver; + } + }) + .build(); + server.start(); + ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build(); + // End logging boiler-plate... + + // This section is to set up the StateSampler with the expected metadata. + ExecutionStateSampler sampler = + new ExecutionStateSampler(PipelineOptionsFactory.create(), System::currentTimeMillis); + ExecutionStateSampler.ExecutionStateTracker stateTracker = sampler.create(); + stateTracker.start("process-bundle"); + ExecutionStateSampler.ExecutionState state = + stateTracker.create("shortId", pTransformId, pTransformId, "process"); + state.activate(); + + // Track the instruction and state in the logging system. In a real run, this is set when a + // ProcessBundlehandler + // starts processing. + BeamFnLoggingMDC.setInstructionId(instructionId); + BeamFnLoggingMDC.setStateTracker(stateTracker); + + // Start the test within the logging context. This reroutes logging through to the boiler-plate + // that was set up + // earlier. + try (BeamFnLoggingClient ignored = + BeamFnLoggingClient.createAndStart( + PipelineOptionsFactory.create(), + apiServiceDescriptor, + (Endpoints.ApiServiceDescriptor descriptor) -> channel)) { + + // Set up the component under test, the FnDataReceiver, to emit an exception when it starts. + ShortIdMap shortIds = new ShortIdMap(); + BundleProgressReporter.InMemory reporterAndRegistrar = new BundleProgressReporter.InMemory(); + PCollectionConsumerRegistry consumers = + new PCollectionConsumerRegistry( + stateTracker, shortIds, reporterAndRegistrar, TEST_DESCRIPTOR); + FnDataReceiver> consumer = mock(FnDataReceiver.class); + + consumers.register(P_COLLECTION_A, pTransformId, pTransformId + "Name", consumer); + + FnDataReceiver> wrapperConsumer = + (FnDataReceiver>) + (FnDataReceiver) consumers.getMultiplexingConsumer(P_COLLECTION_A); + + doThrow(thrownException).when(consumer).accept(any()); + expectedException.expectMessage(message); + expectedException.expect(Exception.class); + + // Run the test. + wrapperConsumer.accept(valueInGlobalWindow("elem")); + + } finally { + // The actual log entry has a lot of metadata that can't easily be controlled. So set the + // entries that are needed + // for this test and cull everything else. + final BeamFnApi.LogEntry expectedEntry = + BeamFnApi.LogEntry.newBuilder() + .setInstructionId(instructionId) + .setTransformId(pTransformId) + .setMessage("Failed to process element for bundle \"process-bundle\"") + .build(); + + List entries = new ArrayList<>(values); + assertEquals(1, entries.size()); + BeamFnApi.LogEntry actualEntry = entries.get(0); + BeamFnApi.LogEntry actualEntryCulled = + BeamFnApi.LogEntry.newBuilder() + .setInstructionId(actualEntry.getInstructionId()) + .setTransformId(actualEntry.getTransformId()) + .setMessage(actualEntry.getMessage()) + .build(); + + assertEquals(expectedEntry, actualEntryCulled); + + server.shutdownNow(); + } + } + private static class TestElementByteSizeObservableIterable extends ElementByteSizeObservableIterable> { private List elements; diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java index a05efca120ab..1cad5210380b 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java @@ -21,6 +21,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -32,10 +33,13 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import org.apache.beam.model.fnexecution.v1.BeamFnApi; +import org.apache.beam.model.fnexecution.v1.BeamFnApi.SampledElement; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.options.ExperimentalOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; @@ -117,6 +121,21 @@ void assertHasSamples( assertTrue(elementList.getElementsList().containsAll(expectedSamples)); } + void assertHasSamples( + BeamFnApi.InstructionResponse response, + String pcollection, + List elements) { + Map elementSamplesMap = + response.getSampleData().getElementSamplesMap(); + + assertFalse(elementSamplesMap.isEmpty()); + + BeamFnApi.SampleDataResponse.ElementList elementList = elementSamplesMap.get(pcollection); + assertNotNull(elementList); + + assertTrue(elementList.getElementsList().containsAll(elements)); + } + /** * Smoke test that a samples show in the output map. * @@ -203,7 +222,7 @@ void generateStringSamples(DataSampler sampler) { */ @Test public void testFiltersSinglePCollectionId() throws Exception { - DataSampler sampler = new DataSampler(10, 10); + DataSampler sampler = new DataSampler(10, 10, false); generateStringSamples(sampler); BeamFnApi.InstructionResponse samples = getSamplesForPCollection(sampler, "a"); @@ -219,7 +238,7 @@ public void testFiltersSinglePCollectionId() throws Exception { public void testFiltersMultiplePCollectionIds() throws Exception { List pcollectionIds = ImmutableList.of("a", "c"); - DataSampler sampler = new DataSampler(10, 10); + DataSampler sampler = new DataSampler(10, 10, false); generateStringSamples(sampler); BeamFnApi.InstructionResponse samples = getSamplesForPCollections(sampler, pcollectionIds); @@ -275,4 +294,87 @@ public void testConcurrentNewSampler() throws Exception { sampleThread.join(); } } + + /** + * Tests that including the "enable_always_on_exception_sampling" can sample. + * + * @throws Exception + */ + @Test + public void testEnableAlwaysOnExceptionSampling() throws Exception { + ExperimentalOptions experimentalOptions = PipelineOptionsFactory.as(ExperimentalOptions.class); + experimentalOptions.setExperiments( + Collections.singletonList("enable_always_on_exception_sampling")); + DataSampler sampler = DataSampler.create(experimentalOptions); + assertNotNull(sampler); + + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = sampler.sampleOutput("pcollection-id", coder); + ElementSample elementSample = outputSampler.sample(globalWindowedValue(1)); + outputSampler.exception(elementSample, new RuntimeException(), "", ""); + + outputSampler.sample(globalWindowedValue(2)); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + List expectedSamples = + ImmutableList.of( + SampledElement.newBuilder() + .setElement(ByteString.copyFrom(encodeInt(1))) + .setException( + SampledElement.Exception.newBuilder() + .setError(new RuntimeException().toString())) + .build()); + assertHasSamples(samples, "pcollection-id", expectedSamples); + } + + /** + * Tests that "disable_always_on_exception_sampling" overrides the always on experiment. + * + * @throws Exception + */ + @Test + public void testDisableAlwaysOnExceptionSampling() throws Exception { + ExperimentalOptions experimentalOptions = PipelineOptionsFactory.as(ExperimentalOptions.class); + experimentalOptions.setExperiments( + ImmutableList.of( + "enable_always_on_exception_sampling", "disable_always_on_exception_sampling")); + DataSampler sampler = DataSampler.create(experimentalOptions); + assertNull(sampler); + } + + /** + * Tests that the "enable_data_sampling" experiment overrides + * "disable_always_on_exception_sampling". + * + * @throws Exception + */ + @Test + public void testDisableAlwaysOnExceptionSamplingWithEnableDataSampling() throws Exception { + ExperimentalOptions experimentalOptions = PipelineOptionsFactory.as(ExperimentalOptions.class); + experimentalOptions.setExperiments( + ImmutableList.of( + "enable_data_sampling", + "enable_always_on_exception_sampling", + "disable_always_on_exception_sampling")); + DataSampler sampler = DataSampler.create(experimentalOptions); + assertNotNull(sampler); + + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = sampler.sampleOutput("pcollection-id", coder); + ElementSample elementSample = outputSampler.sample(globalWindowedValue(1)); + outputSampler.exception(elementSample, new RuntimeException(), "", ""); + + outputSampler.sample(globalWindowedValue(2)); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + List expectedSamples = + ImmutableList.of( + SampledElement.newBuilder() + .setElement(ByteString.copyFrom(encodeInt(1))) + .setException( + SampledElement.Exception.newBuilder() + .setError(new RuntimeException().toString())) + .build()); + assertHasSamples(samples, "pcollection-id", expectedSamples); + } } diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java index 5ca562e1c241..26285205bd34 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java @@ -90,7 +90,7 @@ public BeamFnApi.SampledElement encodeException( @Test public void testSamplesFirstN() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10, false); // Purposely go over maxSamples and sampleEveryN. This helps to increase confidence. for (int i = 0; i < 15; ++i) { @@ -112,7 +112,7 @@ public void testWindowedValueSample() throws IOException { WindowedValue.WindowedValueCoder coder = WindowedValue.FullWindowedValueCoder.of(VarIntCoder.of(), GlobalWindow.Coder.INSTANCE); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10, false); outputSampler.sample(WindowedValue.valueInGlobalWindow(0)); // The expected list is only 0..9 inclusive. @@ -125,7 +125,7 @@ public void testWindowedValueSample() throws IOException { public void testNonWindowedValueSample() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10, false); outputSampler.sample(WindowedValue.valueInGlobalWindow(0)); // The expected list is only 0..9 inclusive. @@ -142,7 +142,7 @@ public void testNonWindowedValueSample() throws IOException { @Test public void testActsLikeCircularBuffer() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); for (int i = 0; i < 100; ++i) { outputSampler.sample(WindowedValue.valueInGlobalWindow(i)); @@ -171,7 +171,7 @@ public void testActsLikeCircularBuffer() throws IOException { @Test public void testCanSampleExceptions() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(1); ElementSample elementSample = outputSampler.sample(windowedValue); @@ -197,7 +197,7 @@ public void testCanSampleExceptions() throws IOException { @Test public void testNoDuplicateExceptions() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); ElementSample elementSampleA = outputSampler.sample(WindowedValue.valueInGlobalWindow(1)); @@ -227,7 +227,7 @@ public void testNoDuplicateExceptions() throws IOException { @Test public void testExceptionOnlySampledIfNonNullProcessBundle() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(1); ElementSample elementSample = outputSampler.sample(windowedValue); @@ -244,15 +244,14 @@ public void testExceptionOnlySampledIfNonNullProcessBundle() throws IOException } /** - * Tests that multiple samples don't push out exception samples. TODO: test that the exception - * metadata is set. + * Tests that multiple samples don't push out exception samples. * * @throws IOException when encoding fails (shouldn't happen). */ @Test public void testExceptionSamplesAreNotRemoved() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(0); ElementSample elementSample = outputSampler.sample(windowedValue); @@ -281,6 +280,32 @@ public void testExceptionSamplesAreNotRemoved() throws IOException { assertThat(samples, containsInAnyOrder(expected.toArray())); } + /** + * Test that elements the onlySampleExceptions flag works. + * + * @throws IOException when encoding fails (shouldn't happen). + */ + @Test + public void testOnlySampleExceptions() throws IOException { + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, true); + + WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(1); + outputSampler.sample(WindowedValue.valueInGlobalWindow(2)); + ElementSample elementSample = outputSampler.sample(windowedValue); + + Exception exception = new RuntimeException("Test exception"); + String ptransformId = "ptransform"; + String processBundleId = "processBundle"; + outputSampler.exception(elementSample, exception, ptransformId, processBundleId); + + List expected = new ArrayList<>(); + expected.add(encodeException(1, exception.toString(), ptransformId, processBundleId)); + + List samples = outputSampler.samples(); + assertThat(samples, containsInAnyOrder(expected.toArray())); + } + /** * Test that sampling a PCollection while retrieving samples from multiple threads is ok. * @@ -289,7 +314,7 @@ public void testExceptionSamplesAreNotRemoved() throws IOException { @Test public void testConcurrentSamples() throws IOException, InterruptedException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 2); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 2, false); CountDownLatch startSignal = new CountDownLatch(1); CountDownLatch doneSignal = new CountDownLatch(2); diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java index 47b59bc68b07..8c7a40f8db90 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java @@ -38,6 +38,7 @@ import java.util.logging.LogRecord; import java.util.logging.Logger; import java.util.logging.SimpleFormatter; +import org.apache.beam.fn.harness.control.ExecutionStateSampler; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnLoggingGrpc; import org.apache.beam.model.pipeline.v1.Endpoints; @@ -95,6 +96,7 @@ public class BeamFnLoggingClientTest { .setInstructionId("instruction-1") .setSeverity(BeamFnApi.LogEntry.Severity.Enum.DEBUG) .setMessage("Message") + .setTransformId("ptransformId") .setThread("12345") .setTimestamp(Timestamp.newBuilder().setSeconds(1234567).setNanos(890000000).build()) .setLogLocation("LoggerName") @@ -104,6 +106,7 @@ public class BeamFnLoggingClientTest { .setInstructionId("instruction-1") .setSeverity(BeamFnApi.LogEntry.Severity.Enum.DEBUG) .setMessage("testMdcValue:Message") + .setTransformId("ptransformId") .setCustomData( Struct.newBuilder() .putFields( @@ -117,6 +120,7 @@ public class BeamFnLoggingClientTest { .setInstructionId("instruction-1") .setSeverity(BeamFnApi.LogEntry.Severity.Enum.WARN) .setMessage("MessageWithException") + .setTransformId("errorPtransformId") .setTrace(getStackTraceAsString(TEST_RECORD_WITH_EXCEPTION.getThrown())) .setThread("12345") .setTimestamp(Timestamp.newBuilder().setSeconds(1234567).setNanos(890000000).build()) @@ -126,7 +130,16 @@ public class BeamFnLoggingClientTest { @Test public void testLogging() throws Exception { + ExecutionStateSampler sampler = + new ExecutionStateSampler(PipelineOptionsFactory.create(), null); + ExecutionStateSampler.ExecutionStateTracker stateTracker = sampler.create(); + ExecutionStateSampler.ExecutionState state = + stateTracker.create("shortId", "ptransformId", "ptransformIdName", "process"); + state.activate(); + BeamFnLoggingMDC.setInstructionId("instruction-1"); + BeamFnLoggingMDC.setStateTracker(stateTracker); + AtomicBoolean clientClosedStream = new AtomicBoolean(); Collection values = new ConcurrentLinkedQueue<>(); AtomicReference> outboundServerObserver = @@ -188,7 +201,14 @@ public StreamObserver logging( rootLogger.log(FILTERED_RECORD); // Should not be filtered because the default log level override for ConfiguredLogger is DEBUG configuredLogger.log(TEST_RECORD); + + // Simulate an exception. This sets an internal error state where the PTransform should come + // from. + ExecutionStateSampler.ExecutionState errorState = + stateTracker.create("shortId", "errorPtransformId", "errorPtransformIdName", "process"); + errorState.activate(); configuredLogger.log(TEST_RECORD_WITH_EXCEPTION); + errorState.deactivate(); // Ensure that configuring a custom formatter on the logging handler will be honored. for (Handler handler : rootLogger.getHandlers()) { diff --git a/sdks/java/io/google-ads/build.gradle b/sdks/java/io/google-ads/build.gradle index e874c0b3cb1b..f3dbb19e04f4 100644 --- a/sdks/java/io/google-ads/build.gradle +++ b/sdks/java/io/google-ads/build.gradle @@ -27,7 +27,6 @@ dependencies { implementation project(path: ":sdks:java:extensions:google-cloud-platform-core") implementation library.java.jackson_annotations implementation library.java.gax - implementation library.java.google_ads implementation library.java.google_auth_library_credentials implementation library.java.google_auth_library_oauth2_http implementation library.java.protobuf_java diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java index 018e538f552c..15230c8adef9 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java @@ -25,6 +25,7 @@ import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.extensions.gcp.auth.CredentialFactory; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableClientOverride; import org.apache.beam.sdk.options.ValueProvider; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.display.DisplayData; @@ -49,6 +50,9 @@ public abstract class BigtableConfig implements Serializable { /** Returns the app profile being read from. */ public abstract @Nullable ValueProvider getAppProfileId(); + /** Returns the Bigtable client override. */ + public abstract @Nullable BigtableClientOverride getBigtableClientOverride(); + /** * Returns the Google Cloud Bigtable instance being written to, and other parameters. * @@ -113,6 +117,8 @@ abstract Builder setBigtableOptionsConfigurator( abstract Builder setChannelCount(int count); + abstract Builder setBigtableClientOverride(BigtableClientOverride clientOverride); + abstract BigtableConfig build(); } @@ -156,6 +162,12 @@ public BigtableConfig withEmulator(String emulatorHost) { return toBuilder().setEmulatorHost(emulatorHost).build(); } + @VisibleForTesting + BigtableConfig withBigtableClientOverride(BigtableClientOverride clientOverride) { + checkArgument(clientOverride != null, "clientOverride can not be null"); + return toBuilder().setBigtableClientOverride(clientOverride).build(); + } + void validate() { checkArgument( (getProjectId() != null diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java index a655a29e92b2..9f3c627a89ef 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java @@ -49,6 +49,7 @@ import org.apache.beam.sdk.io.gcp.bigtable.changestreams.UniqueIdGenerator; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.action.ActionFactory; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableChangeStreamAccessor; +import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableClientOverride; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.DaoFactory; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.MetadataTableAdminDao; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dofn.DetectNewPartitionsDoFn; @@ -1988,6 +1989,24 @@ public ReadChangeStream withMetadataTableAppProfileId(String appProfileId) { .build(); } + /** + * Returns a new {@link BigtableIO.ReadChangeStream} that overrides the config of data and/or + * admin client for streaming changes and for managing the metadata. For testing purposes only. + * Not intended for use. + * + *

Does not modify this object. + */ + @VisibleForTesting + ReadChangeStream withBigtableClientOverride(BigtableClientOverride clientOverride) { + BigtableConfig config = getBigtableConfig(); + BigtableConfig metadataTableConfig = getMetadataTableBigtableConfig(); + return toBuilder() + .setBigtableConfig(config.withBigtableClientOverride(clientOverride)) + .setMetadataTableBigtableConfig( + metadataTableConfig.withBigtableClientOverride(clientOverride)) + .build(); + } + /** * Returns a new {@link BigtableIO.ReadChangeStream} that, if set to true, will create or update * metadata table before launching pipeline. Otherwise, it is expected that a metadata table diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java index ecf9a7039598..cb296aef6c28 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java @@ -210,6 +210,13 @@ private static BigtableChangeStreamAccessor createAccessor(@NonNull BigtableConf .setMaxAttempts(10) .build()); + final BigtableClientOverride clientOverride = bigtableConfig.getBigtableClientOverride(); + if (clientOverride != null) { + clientOverride.updateTableAdminClientSettings(tableAdminSettingsBuilder); + clientOverride.updateInstanceAdminClientSettings(instanceAdminSettingsBuilder); + clientOverride.updateDataClientSettings(dataSettingsBuilder); + } + BigtableDataClient dataClient = BigtableDataClient.create(dataSettingsBuilder.build()); BigtableTableAdminClient tableAdminClient = BigtableTableAdminClient.create(tableAdminSettingsBuilder.build()); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java new file mode 100644 index 000000000000..72b3e39871ef --- /dev/null +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao; + +import com.google.cloud.bigtable.admin.v2.BigtableInstanceAdminSettings; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings; +import com.google.cloud.bigtable.data.v2.BigtableDataSettings; +import java.io.IOException; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; + +/** Override the configuration of Cloud Bigtable data and admin client. */ +@VisibleForTesting +public interface BigtableClientOverride { + /** + * Update {@link BigtableInstanceAdminSettings.Builder} with custom configurations. + * + *

For example, to update the admin api endpoint. + * + * @param builder builds the instance admin client + * @throws IOException when dependency initialization fails + */ + void updateInstanceAdminClientSettings(BigtableInstanceAdminSettings.Builder builder) + throws IOException; + + /** + * Update {@link BigtableTableAdminSettings.Builder} with custom configurations. + * + *

For example, to update the admin api endpoint. + * + * @param builder builds the table admin client + * @throws IOException when dependency initialization fails + */ + void updateTableAdminClientSettings(BigtableTableAdminSettings.Builder builder) + throws IOException; + + /** + * Update {@link BigtableDataSettings.Builder} with custom configurations. + * + * @param builder builds the data client + * @throws IOException when dependency initialization fails + */ + void updateDataClientSettings(BigtableDataSettings.Builder builder) throws IOException; +} diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java index a72b55bd7209..39c30b949425 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java @@ -36,60 +36,68 @@ import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.Instant; -/** Defines a client that talks to the Cloud Healthcare API (version v1). */ +/** Defines a client to communicate with the GCP HCLS API (version v1). */ public interface HealthcareApiClient { /** - * Fetches an Hl7v2 message by its name from a Hl7v2 store. + * Gets a Hl7v2 message by its name from a Hl7v2 store. * - * @param msgName the msg name - * @return HL7v2 message - * @throws IOException the io exception - * @throws ParseException the parse exception + * @param msgName The message name to be retrieved. + * @return The HL7v2 message. + * @throws IOException The IO Exception. + * @throws ParseException The Parse Exception. */ Message getHL7v2Message(String msgName) throws IOException, ParseException; /** - * Delete hl 7 v 2 message empty. + * Deletes an HL7v2 message. * - * @param msgName the msg name - * @return the empty - * @throws IOException the io exception + * @param msgName The message name to be deleted. + * @return Empty. + * @throws IOException The IO Exception. */ Empty deleteHL7v2Message(String msgName) throws IOException; /** - * Gets HL7v2 store. + * Gets an HL7v2 store. * - * @param storeName the store name - * @return the HL7v2 store - * @throws IOException the io exception + * @param storeName The store name to be retrieved. + * @return The HL7v2 store. + * @throws IOException The IO Exception. */ Hl7V2Store getHL7v2Store(String storeName) throws IOException; /** - * Gets earliest hl 7 v 2 send time. + * Gets the earliest HL7v2 send time. * - * @param hl7v2Store the hl 7 v 2 store - * @param filter the filter - * @return the earliest hl 7 v 2 send time - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store. + * @param filter the filter to be matched on. + * @return The earliest HL7v2 send time. + * @throws IOException The IO Exception. */ Instant getEarliestHL7v2SendTime(String hl7v2Store, @Nullable String filter) throws IOException; + /** + * Gets the latest HL7v2 send time. + * + * @param hl7v2Store The HL7v2 store. + * @param filter The filter to be matched on. + * @return The latest HL7v2 send time. + * @throws IOException The IO Exception. + */ Instant getLatestHL7v2SendTime(String hl7v2Store, @Nullable String filter) throws IOException; /** - * Make send time bound hl 7 v 2 list request. + * Time Bound HL7v2 list request. * - * @param hl7v2Store the hl 7 v 2 store - * @param start the start - * @param end the end - * @param otherFilter the other filter - * @param orderBy the order by - * @param pageToken the page token - * @return the list messages response - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store. + * @param start Start time. + * @param end End time. + * @param otherFilter The filter outside of the sendTime. + * @param orderBy Order by. + * @param pageToken The page token. + * @return HTTP List response. + * @throws IOException The IO Exception. */ ListMessagesResponse makeSendTimeBoundHL7v2ListRequest( String hl7v2Store, @@ -103,12 +111,12 @@ ListMessagesResponse makeSendTimeBoundHL7v2ListRequest( /** * Make hl 7 v 2 list request list messages response. * - * @param hl7v2Store the hl 7 v 2 store - * @param filter the filter - * @param orderBy the order by - * @param pageToken the page token - * @return the list messages response - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 Store. + * @param filter The Filter. + * @param orderBy Order by. + * @param pageToken The Page Token. + * @return HTTP List response. + * @throws IOException The IO Exception. */ ListMessagesResponse makeHL7v2ListRequest( String hl7v2Store, @@ -118,38 +126,89 @@ ListMessagesResponse makeHL7v2ListRequest( throws IOException; /** - * Ingest hl 7 v 2 message ingest message response. + * Ingest an HL7v2 message. * - * @param hl7v2Store the hl 7 v 2 store - * @param msg the msg - * @return the ingest message response - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store of the message. + * @param msg The message. + * @return Empty. + * @throws IOException The IO Exception. */ IngestMessageResponse ingestHL7v2Message(String hl7v2Store, Message msg) throws IOException; /** - * Create hl 7 v 2 message message. + * Creates an HL7v2 message. * - * @param hl7v2Store the hl 7 v 2 store - * @param msg the msg - * @return the message - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store to create a message in. + * @param msg The message to create. + * @return Empty. + * @throws IOException The IO Exception. */ Message createHL7v2Message(String hl7v2Store, Message msg) throws IOException; + /** + * Deletes an HL7v2 store. + * + * @param store The HL7v2 store to be deleted. + * @return Empty. + * @throws IOException The IO Exception. + */ + Empty deleteHL7v2Store(String store) throws IOException; + + /** + * Importing a FHIR resource from GCS. + * + * @param fhirStore the FhirStore to import into. + * @param gcsSourcePath the GCS Path of resource. + * @param contentStructure The content structure. + * @return Empty. + * @throws IOException the io exception + */ Operation importFhirResource( String fhirStore, String gcsSourcePath, @Nullable String contentStructure) throws IOException; + /** + * Export a FHIR Resource to GCS. + * + * @param fhirStore the FhirStore of the resource. + * @param gcsDestinationPrefix GCS Destination Prefix to export to. + * @return Empty. + * @throws IOException The IO Exception. + */ Operation exportFhirResourceToGcs(String fhirStore, String gcsDestinationPrefix) throws IOException; + /** + * Export a FHIR Resource to BigQuery. + * + * @param fhirStore the FhirStore of the resource. + * @param bigQueryDatasetUri The BQ Dataset URI to export to. + * @return Empty. + * @throws IOException The IO Exception. + */ Operation exportFhirResourceToBigQuery(String fhirStore, String bigQueryDatasetUri) throws IOException; + /** + * Deidentify a GCP FHIR Store and write the result into a new FHIR Store. + * + * @param sourceFhirStore the FhirStore to be deidentified. + * @param destinationFhirStore the FhirStore that the deidentified data will be written to. + * @param deidConfig the deidCongig specifying form of deidentification. + * @return Empty. + * @throws IOException The IO Exception. + */ Operation deidentifyFhirStore( String sourceFhirStore, String destinationFhirStore, DeidentifyConfig deidConfig) throws IOException; + /** + * Poll operation. + * + * @param operation to be polled. + * @param sleepMs length of time to wait between requests. + * @return HTTP Request (that returns status of operation). + * @throws IOException The IO Exception. + */ Operation pollOperation(Operation operation, Long sleepMs) throws InterruptedException, IOException; @@ -159,7 +218,7 @@ Operation pollOperation(Operation operation, Long sleepMs) * @param fhirStore the fhir store * @param bundle the bundle * @return the http body - * @throws IOException the io exception + * @throws IOException The IO Exception. */ HttpBody executeFhirBundle(String fhirStore, String bundle) throws IOException, HealthcareHttpException; @@ -170,7 +229,7 @@ HttpBody executeFhirBundle(String fhirStore, String bundle) * @param resourceName the resource name, in format * projects/{p}/locations/{l}/datasets/{d}/fhirStores/{f}/fhir/{resourceType}/{id} * @return the http body - * @throws IOException the io exception + * @throws IOException The IO Exception. */ HttpBody readFhirResource(String resourceName) throws IOException; @@ -179,9 +238,9 @@ HttpBody executeFhirBundle(String fhirStore, String bundle) * * @param fhirStore the fhir store * @param resourceType the resource type - * @param parameters the parameters + * @param parameters The parameters (in the form of key-value pairs). * @return the http body - * @throws IOException + * @throws IOException The IO Exception. */ HttpBody searchFhirResource( String fhirStore, @@ -195,9 +254,9 @@ HttpBody searchFhirResource( * * @param resourceName the resource name, in format * projects/{p}/locations/{l}/datasets/{d}/fhirStores/{f}/fhir/{resourceType}/{id} - * @param filters optional request filters + * @param filters optional request filters (in key value pairs). * @return the http body - * @throws IOException + * @throws IOException The IO Exception. */ HttpBody getPatientEverything( String resourceName, @Nullable Map filters, String pageToken) @@ -206,46 +265,101 @@ HttpBody getPatientEverything( /** * Create hl 7 v 2 store hl 7 v 2 store. * - * @param dataset the dataset - * @param name the name - * @return the hl 7 v 2 store - * @throws IOException the io exception + * @param dataset The dataset to create the HL7v2 store in. + * @param name The name of the store to be created. + * @return Empty. + * @throws IOException The IO Exception. */ Hl7V2Store createHL7v2Store(String dataset, String name) throws IOException; + /** + * Create FHIR Store with a PubSub topic listener. + * + * @param dataset The name of Dataset for the FHIR store to be created in. + * @param name The name of the FHIR store. + * @param version The version of the FHIR store (DSTU2, STU3, R4). + * @param pubsubTopic The pubsub topic listening to the FHIR store. + * @throws IOException The IO Exception. + */ FhirStore createFhirStore(String dataset, String name, String version, String pubsubTopic) throws IOException; - + /** + * Create FHIR Store. + * + * @param dataset The name of the Dataset for the FHIR store to be created in. + * @param name The name of the FHIR store. + * @param version The version of the FHIR store (DSTU2, STU3, R4). + * @throws IOException The IO Exception. + */ FhirStore createFhirStore(String dataset, String name, String version) throws IOException; /** * List all FHIR stores in a dataset. * - * @param dataset the dataset, in the format: + * @param dataset The dataset, in the format: * projects/project_id/locations/location_id/datasets/dataset_id - * @return a list of FhirStore - * @throws IOException + * @return A list of all FHIR stores in the dataset. + * @throws IOException The IO Exception. */ List listAllFhirStores(String dataset) throws IOException; /** - * Delete hl 7 v 2 store empty. + * Delete Fhir store. * - * @param store the store - * @return the empty - * @throws IOException the io exception + * @param store The FHIR store to be deleted. + * @return Empty. + * @throws IOException The IO Exception. */ - Empty deleteHL7v2Store(String store) throws IOException; - Empty deleteFhirStore(String store) throws IOException; + /** + * Retrieve DicomStudyMetadata. + * + * @param dicomWebPath The Dicom Web Path to retrieve the metadata from. + * @return The study metadata. + * @throws IOException The IO Exception. + */ String retrieveDicomStudyMetadata(String dicomWebPath) throws IOException; + /** + * Create a DicomStore. + * + * @param dataset The dataset that the Dicom Store should be in, in the format: + * projects/project_id/locations/location_id/datasets/dataset_id. + * @param name The name of the Dicom Store to be created. + * @return Empty. + * @throws IOException The IO Exception. + */ DicomStore createDicomStore(String dataset, String name) throws IOException; + /** + * Create a DicomStore with a PubSub listener. + * + * @param dataset The dataset that the Dicom Store should be in, in the format: + * projects/project_id/locations/location_id/datasets/dataset_id + * @param name The name of the Dicom Store to be created. + * @param pubsubTopic Name of PubSub topic connected with the Dicom store. + * @return Empty. + * @throws IOException The IO Exception. + */ DicomStore createDicomStore(String dataset, String name, String pubsubTopic) throws IOException; + /** + * Delete a Dicom Store. + * + * @param name The name of the Dicom Store to be deleted. + * @return Empty. + * @throws IOException The IO Exception. + */ Empty deleteDicomStore(String name) throws IOException; + /** + * Upload to a Dicom Store. + * + * @param webPath String format of webPath to upload into. + * @param filePath filePath of file to upload. + * @return Empty. + * @throws IOException The IO Exception. + */ Empty uploadToDicomStore(String webPath, String filePath) throws IOException, URISyntaxException; } diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java index 81de67f38502..fc3ce0be4b69 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java @@ -33,9 +33,16 @@ import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.PeriodicImpulse; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; import org.joda.time.Duration; +import org.joda.time.Instant; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -81,11 +88,32 @@ public void processElement(ProcessContext c) { } } - private GenerateSequence stream(int rowCount) { - int timestampIntervalInMilliseconds = 10; - return GenerateSequence.from(0) - .to(rowCount) - .withRate(1, Duration.millis(timestampIntervalInMilliseconds)); + static class UnboundedStream extends PTransform> { + + private final int rowCount; + + public UnboundedStream(int rowCount) { + this.rowCount = rowCount; + } + + @Override + public PCollection expand(PBegin input) { + int timestampIntervalInMillis = 10; + PeriodicImpulse impulse = + PeriodicImpulse.create() + .stopAfter(Duration.millis((long) timestampIntervalInMillis * rowCount - 1)) + .withInterval(Duration.millis(timestampIntervalInMillis)); + return input + .apply(impulse) + .apply( + MapElements.via( + new SimpleFunction() { + @Override + public Long apply(Instant input) { + return input.getMillis(); + } + })); + } } private void runBigQueryIOStorageWritePipeline( @@ -102,7 +130,9 @@ private void runBigQueryIOStorageWritePipeline( new TableFieldSchema().setName("str").setType("STRING"))); Pipeline p = Pipeline.create(bqOptions); - p.apply("Input", isStreaming ? stream(rowCount) : GenerateSequence.from(0).to(rowCount)) + p.apply( + "Input", + isStreaming ? new UnboundedStream(rowCount) : GenerateSequence.from(0).to(rowCount)) .apply("GenerateMessage", ParDo.of(new FillRowFn())) .apply( "WriteToBQ", diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java index e49ef05974fe..6e7ad865cc35 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java @@ -786,7 +786,7 @@ public PCollection expand(PBegin input) { // Spotbugs seems to not understand the multi-statement try-with-resources @SuppressFBWarnings("OBL_UNSATISFIED_OBLIGATION") - private static Schema inferBeamSchema(DataSource ds, String query) { + public static Schema inferBeamSchema(DataSource ds, String query) { try (Connection conn = ds.getConnection(); PreparedStatement statement = conn.prepareStatement( diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java index 674b274f907b..6e8e46b7afa2 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java @@ -19,7 +19,6 @@ import java.sql.JDBCType; import java.time.Instant; -import java.util.Objects; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.FieldType; import org.apache.beam.sdk.schemas.logicaltypes.FixedBytes; @@ -30,11 +29,11 @@ import org.apache.beam.sdk.schemas.logicaltypes.VariableBytes; import org.apache.beam.sdk.schemas.logicaltypes.VariableString; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; /** Beam {@link org.apache.beam.sdk.schemas.Schema.LogicalType} implementations of JDBC types. */ class LogicalTypes { + // Logical types of the following static members are not portable and are preserved for + // compatibility reason. Consider using portable logical types when adding new ones. static final Schema.FieldType JDBC_BIT_TYPE = Schema.FieldType.logicalType( new PassThroughLogicalType( @@ -110,69 +109,4 @@ static Schema.LogicalType fixedOrVariableBytes(String name, int return FixedBytes.of(name, length); } } - - /** Base class for JDBC logical types. */ - abstract static class JdbcLogicalType - implements Schema.LogicalType { - protected final String identifier; - protected final Schema.FieldType argumentType; - protected final Schema.FieldType baseType; - protected final Object argument; - - protected JdbcLogicalType( - String identifier, - Schema.FieldType argumentType, - Schema.FieldType baseType, - Object argument) { - this.identifier = identifier; - this.argumentType = argumentType; - this.baseType = baseType; - this.argument = argument; - } - - @Override - public String getIdentifier() { - return identifier; - } - - @Override - public FieldType getArgumentType() { - return argumentType; - } - - @Override - @SuppressWarnings("TypeParameterUnusedInFormals") - public ArgumentT getArgument() { - return (ArgumentT) argument; - } - - @Override - public Schema.FieldType getBaseType() { - return baseType; - } - - @Override - public T toBaseType(T input) { - return input; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (!(o instanceof JdbcLogicalType)) { - return false; - } - JdbcLogicalType that = (JdbcLogicalType) o; - return Objects.equals(identifier, that.identifier) - && Objects.equals(baseType, that.baseType) - && Objects.equals(argument, that.argument); - } - - @Override - public int hashCode() { - return Objects.hash(identifier, baseType, argument); - } - } } diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java index 234b60cd3879..65f21308ea32 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java @@ -354,7 +354,7 @@ private static ResultSetFieldExtractor createObjectExtractor() { * A {@link org.apache.beam.sdk.io.jdbc.JdbcIO.RowMapper} implementation that converts JDBC * results into Beam {@link Row} objects. */ - static final class BeamRowMapper implements JdbcIO.RowMapper { + public static final class BeamRowMapper implements JdbcIO.RowMapper { private final Schema schema; private final List fieldExtractors; diff --git a/sdks/python/apache_beam/dataframe/frame_base.py b/sdks/python/apache_beam/dataframe/frame_base.py index 1da12ececff6..4e89e473b730 100644 --- a/sdks/python/apache_beam/dataframe/frame_base.py +++ b/sdks/python/apache_beam/dataframe/frame_base.py @@ -500,6 +500,8 @@ def wrap(func): removed_arg_names = removed_args if removed_args is not None else [] + # We would need to add position only arguments if they ever become a thing + # in Pandas (as of 2.1 currently they aren't). base_arg_spec = getfullargspec(unwrap(getattr(base_type, func.__name__))) base_arg_names = base_arg_spec.args # Some arguments are keyword only and we still want to check against those. @@ -514,6 +516,9 @@ def wrap(func): @functools.wraps(func) def wrapper(*args, **kwargs): + if len(args) > len(base_arg_names): + raise TypeError(f"{func.__name__} got too many positioned arguments.") + for name, value in zip(base_arg_names, args): if name in kwargs: raise TypeError( @@ -523,7 +528,7 @@ def wrapper(*args, **kwargs): # Still have to populate these for the Beam function signature. if removed_args: for name in removed_args: - if not name in kwargs: + if name not in kwargs: kwargs[name] = None return func(**kwargs) @@ -646,13 +651,18 @@ def wrap(func): return func base_argspec = getfullargspec(unwrap(getattr(base_type, func.__name__))) - if not base_argspec.defaults: + if not base_argspec.defaults and not base_argspec.kwonlydefaults: return func - arg_to_default = dict( - zip( - base_argspec.args[-len(base_argspec.defaults):], - base_argspec.defaults)) + arg_to_default = {} + if base_argspec.defaults: + arg_to_default.update( + zip( + base_argspec.args[-len(base_argspec.defaults):], + base_argspec.defaults)) + + if base_argspec.kwonlydefaults: + arg_to_default.update(base_argspec.kwonlydefaults) unwrapped_func = unwrap(func) # args that do not have defaults in func, but do have defaults in base @@ -664,11 +674,19 @@ def wrap(func): if removed_args: defaults_to_populate -= set(removed_args) + # In pandas 2, many methods rely on the default copy=None + # to mean that copy is the value of copy_on_write. Since + # copy_on_write will always be true for Beam, just fill it + # in here. In pandas 1, the default was True anyway. + if 'copy' in arg_to_default and arg_to_default['copy'] is None: + arg_to_default['copy'] = True + @functools.wraps(func) def wrapper(**kwargs): for name in defaults_to_populate: if name not in kwargs: kwargs[name] = arg_to_default[name] + return func(**kwargs) return wrapper diff --git a/sdks/python/apache_beam/dataframe/frame_base_test.py b/sdks/python/apache_beam/dataframe/frame_base_test.py index 2d16d02ba1ea..0a73905339fd 100644 --- a/sdks/python/apache_beam/dataframe/frame_base_test.py +++ b/sdks/python/apache_beam/dataframe/frame_base_test.py @@ -72,7 +72,7 @@ def add_one(frame): def test_args_to_kwargs(self): class Base(object): - def func(self, a=1, b=2, c=3): + def func(self, a=1, b=2, c=3, *, kw_only=4): pass class Proxy(object): @@ -87,6 +87,9 @@ def func(self, **kwargs): self.assertEqual(proxy.func(2, 4, 6), {'a': 2, 'b': 4, 'c': 6}) self.assertEqual(proxy.func(2, c=6), {'a': 2, 'c': 6}) self.assertEqual(proxy.func(c=6, a=2), {'a': 2, 'c': 6}) + self.assertEqual(proxy.func(2, kw_only=20), {'a': 2, 'kw_only': 20}) + with self.assertRaises(TypeError): # got too many positioned arguments + proxy.func(2, 4, 6, 8) def test_args_to_kwargs_populates_defaults(self): class Base(object): @@ -129,6 +132,63 @@ def func_removed_args(self, a, c, **kwargs): proxy.func_removed_args() self.assertEqual(proxy.func_removed_args(12, d=100), {'a': 12, 'd': 100}) + def test_args_to_kwargs_populates_default_handles_kw_only(self): + class Base(object): + def func(self, a, b=2, c=3, *, kw_only=4): + pass + + class ProxyUsesKwOnly(object): + @frame_base.args_to_kwargs(Base) + @frame_base.populate_defaults(Base) + def func(self, a, kw_only, **kwargs): + return dict(kwargs, a=a, kw_only=kw_only) + + proxy = ProxyUsesKwOnly() + + # pylint: disable=too-many-function-args,no-value-for-parameter + with self.assertRaises(TypeError): # missing 1 required positional argument + proxy.func() + + self.assertEqual(proxy.func(100), {'a': 100, 'kw_only': 4}) + self.assertEqual( + proxy.func(2, 4, 6, kw_only=8), { + 'a': 2, 'b': 4, 'c': 6, 'kw_only': 8 + }) + with self.assertRaises(TypeError): + proxy.func(2, 4, 6, 8) # got too many positioned arguments + + class ProxyDoesntUseKwOnly(object): + @frame_base.args_to_kwargs(Base) + @frame_base.populate_defaults(Base) + def func(self, a, **kwargs): + return dict(kwargs, a=a) + + proxy = ProxyDoesntUseKwOnly() + + # pylint: disable=too-many-function-args,no-value-for-parameter + with self.assertRaises(TypeError): # missing 1 required positional argument + proxy.func() + self.assertEqual(proxy.func(100), {'a': 100}) + self.assertEqual( + proxy.func(2, 4, 6, kw_only=8), { + 'a': 2, 'b': 4, 'c': 6, 'kw_only': 8 + }) + + def test_populate_defaults_overwrites_copy(self): + class Base(object): + def func(self, a=1, b=2, c=3, *, copy=None): + pass + + class Proxy(object): + @frame_base.args_to_kwargs(Base) + @frame_base.populate_defaults(Base) + def func(self, a, copy, **kwargs): + return dict(kwargs, a=a, copy=copy) + + proxy = Proxy() + self.assertEqual(proxy.func(), {'a': 1, 'copy': True}) + self.assertEqual(proxy.func(copy=False), {'a': 1, 'copy': False}) + if __name__ == '__main__': unittest.main() diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py index e2390bda28be..a74ccbba041a 100644 --- a/sdks/python/apache_beam/dataframe/frames.py +++ b/sdks/python/apache_beam/dataframe/frames.py @@ -907,7 +907,7 @@ def sort_index(self, axis, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( 'sort_index', - lambda df: df.sort_index(axis, **kwargs), + lambda df: df.sort_index(axis=axis, **kwargs), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary(), @@ -1471,8 +1471,10 @@ def compute_idx(s): index = pd.Index([], dtype=index_dtype) proxy = self._expr.proxy().copy() proxy.index = index - proxy = proxy.append( - pd.Series([1], index=np.asarray(['0']).astype(proxy.index.dtype))) + proxy = pd.concat([ + proxy, + pd.Series([1], index=np.asarray(['0']).astype(proxy.index.dtype)) + ]) idx_func = expressions.ComputedExpression( 'idx_func', @@ -1899,7 +1901,8 @@ def dropna(self, **kwargs): @frame_base.with_docs_from(pd.Series) @frame_base.args_to_kwargs(pd.Series) - @frame_base.populate_defaults(pd.Series) + @frame_base.populate_defaults( + pd.Series, removed_args=['inplace'] if PD_VERSION >= (2, 0) else None) @frame_base.maybe_inplace def set_axis(self, labels, **kwargs): # TODO: assigning the index is generally order-sensitive, but we could @@ -2345,8 +2348,13 @@ def value_counts( result = column.groupby(column, dropna=dropna).size() - # groupby.size() names the index, which we don't need - result.index.name = None + # Pandas 2 introduces new naming for the results. + if PD_VERSION >= (2, 0): + result.index.name = getattr(self, "name", None) + result.name = "proportion" if normalize else "count" + else: + # groupby.size() names the index, which we don't need + result.index.name = None if normalize: return result / column.length() @@ -2673,7 +2681,9 @@ def set_index(self, keys, **kwargs): @frame_base.with_docs_from(pd.DataFrame) @frame_base.args_to_kwargs(pd.DataFrame) - @frame_base.populate_defaults(pd.DataFrame) + @frame_base.populate_defaults( + pd.DataFrame, + removed_args=['inplace'] if PD_VERSION >= (2, 0) else None) @frame_base.maybe_inplace def set_axis(self, labels, axis, **kwargs): if axis in ('index', 0): @@ -2687,7 +2697,7 @@ def set_axis(self, labels, axis, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( 'set_axis', - lambda df: df.set_axis(labels, axis, **kwargs), + lambda df: df.set_axis(labels, axis=axis, **kwargs), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) @@ -4002,12 +4012,18 @@ def value_counts(self, subset=None, sort=False, normalize=False, columns = subset or list(self.columns) if dropna: - dropped = self.dropna() + # Must include subset here because otherwise we spuriously drop NAs due + # to columns outside our subset. + dropped = self.dropna(subset=subset) else: dropped = self result = dropped.groupby(columns, dropna=dropna).size() + # Pandas 2 introduces new naming for the results. + if PD_VERSION >= (2,0): + result.name = "proportion" if normalize else "count" + if normalize: return result/dropped.length() else: @@ -4915,9 +4931,9 @@ def __setitem__(self, index, value): class _DeferredStringMethods(frame_base.DeferredBase): - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def cat(self, others, join, **kwargs): """If defined, ``others`` must be a :class:`DeferredSeries` or a ``list`` of ``DeferredSeries``.""" @@ -4957,8 +4973,8 @@ def func(*args): requires_partition_by=requires, preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def repeat(self, repeats): """``repeats`` must be an ``int`` or a :class:`DeferredSeries`. Lists are not supported because they make this operation order-sensitive.""" @@ -4995,8 +5011,8 @@ def repeat(self, repeats): raise TypeError("str.repeat(repeats=) value must be an int or a " f"DeferredSeries (encountered {type(repeats)}).") - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def get_dummies(self, **kwargs): """ Series must be categorical dtype. Please cast to ``CategoricalDtype`` @@ -5078,9 +5094,9 @@ def func(s): requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def split(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5089,9 +5105,9 @@ def split(self, **kwargs): """ return self._split_helper(rsplit=False, **kwargs) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def rsplit(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5169,17 +5185,17 @@ def func(df, *args, **kwargs): return func for method in ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, method, frame_base._elementwise_method(make_str_func(method), name=method, - base=pd.core.strings.StringMethods)) + base=pd.Series.str)) for method in NON_ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, @@ -5187,7 +5203,7 @@ def func(df, *args, **kwargs): frame_base._proxy_method( make_str_func(method), name=method, - base=pd.core.strings.StringMethods, + base=pd.Series.str, requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Singleton())) @@ -5361,11 +5377,12 @@ def func(df, *args, **kwargs): 'second', 'time', 'timetz', - 'week', 'weekday', - 'weekofyear', 'year', ] +# Pandas 2 removed these. +if PD_VERSION < (2, 0): + ELEMENTWISE_DATETIME_PROPERTIES += ['week', 'weekofyear'] for method in ELEMENTWISE_DATETIME_PROPERTIES: setattr(_DeferredDatetimeMethods, diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py index 4e59d1da5de4..e3555b50187b 100644 --- a/sdks/python/apache_beam/dataframe/frames_test.py +++ b/sdks/python/apache_beam/dataframe/frames_test.py @@ -17,9 +17,11 @@ import re import unittest import warnings +from typing import Dict import numpy as np import pandas as pd +import pytest from parameterized import parameterized import apache_beam as beam @@ -44,6 +46,10 @@ 'str': [str(i) for i in range(100)], }) +if PD_VERSION < (2, 0): + # All these are things that are fixed in the Pandas 2 transition. + pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning") + def _get_deferred_args(*args): return [ @@ -187,6 +193,9 @@ def _run_test( if expected.index.is_unique: expected = expected.sort_index() actual = actual.sort_index() + elif isinstance(expected, pd.Series): + expected = expected.sort_values() + actual = actual.sort_values() else: expected = expected.sort_values(list(expected.columns)) actual = actual.sort_values(list(actual.columns)) @@ -688,6 +697,8 @@ def test_value_counts_with_nans(self): self._run_test(lambda df: df.value_counts(), df) self._run_test(lambda df: df.value_counts(normalize=True), df) + # Ensure we don't drop rows due to nan values in unused columns. + self._run_test(lambda df: df.value_counts('num_wings'), df) if PD_VERSION >= (1, 3): # dropna=False is new in pandas 1.3 @@ -1634,6 +1645,30 @@ def test_pivot_no_index_provided_on_multiindex(self): # https://github.com/pandas-dev/pandas/issues/40139 ALL_GROUPING_AGGREGATIONS = sorted( set(frames.ALL_AGGREGATIONS) - set(('kurt', 'kurtosis'))) +AGGREGATIONS_WHERE_NUMERIC_ONLY_DEFAULTS_TO_TRUE_IN_PANDAS_1 = set( + frames.ALL_AGGREGATIONS) - set(( + 'nunique', + 'size', + 'count', + 'idxmin', + 'idxmax', + 'mode', + 'rank', + 'all', + 'any', + 'describe')) + + +def numeric_only_kwargs_for_pandas_2(agg_type: str) -> Dict[str, bool]: + """Get proper arguments for numeric_only. + + Behavior for numeric_only in these methods changed in Pandas 2 to default + to False instead of True, so explicitly make it True in Pandas 2.""" + if PD_VERSION >= (2, 0) and ( + agg_type in AGGREGATIONS_WHERE_NUMERIC_ONLY_DEFAULTS_TO_TRUE_IN_PANDAS_1): + return {'numeric_only': True} + else: + return {} class GroupByTest(_AbstractFrameTest): @@ -1650,8 +1685,9 @@ def test_groupby_agg(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: df.groupby('group').agg(agg_type), + lambda df: df.groupby('group').agg(agg_type, **kwargs), GROUPBY_DF, check_proxy=False) @@ -1661,8 +1697,10 @@ def test_groupby_with_filter(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df[df.foo > 30].groupby('group'), agg_type)(), + lambda df: getattr(df[df.foo > 30].groupby('group'), agg_type) + (**kwargs), GROUPBY_DF, check_proxy=False) @@ -1673,8 +1711,9 @@ def test_groupby(self, agg_type): "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df.groupby('group'), agg_type)(), + lambda df: getattr(df.groupby('group'), agg_type)(**kwargs), GROUPBY_DF, check_proxy=False) @@ -1685,8 +1724,10 @@ def test_groupby_series(self, agg_type): "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df[df.foo > 40].groupby(df.group), agg_type)(), + lambda df: getattr(df[df.foo > 40].groupby(df.group), agg_type) + (**kwargs), GROUPBY_DF, check_proxy=False) @@ -1717,12 +1758,15 @@ def test_groupby_project_series(self, agg_type): "https://github.com/apache/beam/issues/20895: " "SeriesGroupBy.{corr, cov} do not raise the expected error.") - self._run_test(lambda df: getattr(df.groupby('group').foo, agg_type)(), df) - self._run_test(lambda df: getattr(df.groupby('group').bar, agg_type)(), df) + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) + self._run_test( + lambda df: getattr(df.groupby('group').foo, agg_type)(**kwargs), df) + self._run_test( + lambda df: getattr(df.groupby('group').bar, agg_type)(**kwargs), df) self._run_test( - lambda df: getattr(df.groupby('group')['foo'], agg_type)(), df) + lambda df: getattr(df.groupby('group')['foo'], agg_type)(**kwargs), df) self._run_test( - lambda df: getattr(df.groupby('group')['bar'], agg_type)(), df) + lambda df: getattr(df.groupby('group')['bar'], agg_type)(**kwargs), df) @parameterized.expand(ALL_GROUPING_AGGREGATIONS) def test_groupby_project_dataframe(self, agg_type): @@ -1730,8 +1774,10 @@ def test_groupby_project_dataframe(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df.groupby('group')[['bar', 'baz']], agg_type)(), + lambda df: getattr(df.groupby('group')[['bar', 'baz']], agg_type) + (**kwargs), GROUPBY_DF, check_proxy=False) @@ -1760,9 +1806,10 @@ def test_groupby_errors_non_existent_label(self): def test_groupby_callable(self): df = GROUPBY_DF - - self._run_test(lambda df: df.groupby(lambda x: x % 2).foo.sum(), df) - self._run_test(lambda df: df.groupby(lambda x: x % 5).median(), df) + kwargs = numeric_only_kwargs_for_pandas_2('sum') + self._run_test(lambda df: df.groupby(lambda x: x % 2).foo.sum(**kwargs), df) + kwargs = numeric_only_kwargs_for_pandas_2('median') + self._run_test(lambda df: df.groupby(lambda x: x % 5).median(**kwargs), df) def test_groupby_apply(self): df = GROUPBY_DF @@ -1790,8 +1837,8 @@ def test_groupby_apply_preserves_column_order(self): df = GROUPBY_DF self._run_test( - lambda df: df[['foo', 'group', 'bar']].groupby('group').apply( - lambda x: x), + lambda df: df[['foo', 'group', 'bar']].groupby( + 'group', group_keys=False).apply(lambda x: x), df) def test_groupby_transform(self): @@ -1817,8 +1864,9 @@ def test_groupby_transform(self): def test_groupby_pipe(self): df = GROUPBY_DF - - self._run_test(lambda df: df.groupby('group').pipe(lambda x: x.sum()), df) + kwargs = numeric_only_kwargs_for_pandas_2('sum') + self._run_test( + lambda df: df.groupby('group').pipe(lambda x: x.sum(**kwargs)), df) self._run_test( lambda df: df.groupby('group')['bool'].pipe(lambda x: x.any()), df) self._run_test( @@ -1900,14 +1948,14 @@ def test_dataframe_groupby_series(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + + def agg(df, group_by): + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) + return df[df.foo > 40].groupby(group_by).agg(agg_type, **kwargs) + + self._run_test(lambda df: agg(df, df.group), GROUPBY_DF, check_proxy=False) self._run_test( - lambda df: df[df.foo > 40].groupby(df.group).agg(agg_type), - GROUPBY_DF, - check_proxy=False) - self._run_test( - lambda df: df[df.foo > 40].groupby(df.foo % 3).agg(agg_type), - GROUPBY_DF, - check_proxy=False) + lambda df: agg(df, df.foo % 3), GROUPBY_DF, check_proxy=False) @parameterized.expand(ALL_GROUPING_AGGREGATIONS) def test_series_groupby_series(self, agg_type): @@ -2941,7 +2989,7 @@ class DocstringTest(unittest.TestCase): (frames.DeferredDataFrame, pd.DataFrame), (frames.DeferredSeries, pd.Series), #(frames._DeferredIndex, pd.Index), - (frames._DeferredStringMethods, pd.core.strings.StringMethods), + (frames._DeferredStringMethods, pd.Series.str), ( frames._DeferredCategoricalMethods, pd.core.arrays.categorical.CategoricalAccessor), diff --git a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py index ed8745ec2ac1..54a473d1b52b 100644 --- a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py @@ -17,6 +17,7 @@ # pytype: skip-file +import datetime import logging import time import typing @@ -60,7 +61,8 @@ "JdbcTestRow", [("f_id", int), ("f_float", float), ("f_char", str), ("f_varchar", str), ("f_bytes", bytes), ("f_varbytes", bytes), ("f_timestamp", Timestamp), - ("f_decimal", Decimal)], + ("f_decimal", Decimal), ("f_date", datetime.date), + ("f_time", datetime.time)], ) coders.registry.register_coder(JdbcTestRow, coders.RowCoder) @@ -132,7 +134,7 @@ def test_xlang_jdbc_write_read(self, database): "f_float DOUBLE PRECISION, " + "f_char CHAR(10), " + "f_varchar VARCHAR(10), " + f"f_bytes {binary_type[0]}, " + f"f_varbytes {binary_type[1]}, " + "f_timestamp TIMESTAMP(3), " + - "f_decimal DECIMAL(10, 2))") + "f_decimal DECIMAL(10, 2), " + "f_date DATE, " + "f_time TIME(3))") inserted_rows = [ JdbcTestRow( i, @@ -144,7 +146,11 @@ def test_xlang_jdbc_write_read(self, database): # In alignment with Java Instant which supports milli precision. Timestamp.of(seconds=round(time.time(), 3)), # Test both positive and negative numbers. - Decimal(f'{i-1}.23')) for i in range(ROW_COUNT) + Decimal(f'{i-1}.23'), + # Test both date before or after EPOCH + datetime.date(1969 + i, i % 12 + 1, i % 31 + 1), + datetime.time(i % 24, i % 60, i % 60, (i * 1000) % 1_000_000)) + for i in range(ROW_COUNT) ] expected_row = [] for row in inserted_rows: @@ -163,7 +169,9 @@ def test_xlang_jdbc_write_read(self, database): f_bytes, row.f_bytes, row.f_timestamp, - row.f_decimal)) + row.f_decimal, + row.f_date, + row.f_time)) with TestPipeline() as p: p.not_use_test_runner_api = True diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py index 3a3033dfcaf4..7e9c1e634748 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py @@ -77,7 +77,6 @@ from apache_beam.testing.util import equal_to from apache_beam.transforms.display import DisplayData from apache_beam.transforms.display_test import DisplayDataItemMatcher -from apache_beam.utils import retry # Protect against environments where bigquery library is not available. # pylint: disable=wrong-import-order, wrong-import-position @@ -931,77 +930,269 @@ def test_copy_load_job_exception(self, exception_type, error_message): 'GCP dependencies are not installed') class BigQueryStreamingInsertsErrorHandling(unittest.TestCase): - # Using https://cloud.google.com/bigquery/docs/error-messages and - # https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + # Running tests with a variety of exceptions from https://googleapis.dev + # /python/google-api-core/latest/_modules/google/api_core/exceptions.html. + # Choosing some exceptions that produce reasons included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not @parameterized.expand([ + # reason not in _NON_TRANSIENT_ERRORS for row 1 on first attempt + # transient error retried and succeeds on second attempt, 0 rows sent to + # failed rows param( - exception_type=exceptions.Forbidden if exceptions else None, - error_reason='rateLimitExceeded'), + insert_response=[ + exceptions.TooManyRequests if exceptions else None, + None], + error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS + failed_rows=[]), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on both attempts, sent to + # failed rows after hitting max_retries + param( + insert_response=[ + exceptions.InternalServerError if exceptions else None, + exceptions.InternalServerError if exceptions else None], + error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS + failed_rows=['value1', 'value3', 'value5']), + # reason in _NON_TRANSIENT_ERRORS for row 1 on both attempts, sent to + # failed_rows after hitting max_retries + param( + insert_response=[ + exceptions.Forbidden if exceptions else None, + exceptions.Forbidden if exceptions else None], + error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS + failed_rows=['value1', 'value3', 'value5']), + ]) + def test_insert_rows_json_exception_retry_always( + self, insert_response, error_reason, failed_rows): + # In this test, a pipeline will always retry all caught exception types + # since RetryStrategy is not set and defaults to RETRY_ALWAYS + with mock.patch('time.sleep'): + call_counter = 0 + mock_response = mock.Mock() + mock_response.reason = error_reason + + def store_callback(table, **kwargs): + nonlocal call_counter + # raise exception if insert_response element is an exception + if insert_response[call_counter]: + exception_type = insert_response[call_counter] + call_counter += 1 + raise exception_type('some exception', response=mock_response) + # return empty list if not insert_response element, indicating + # successful call to insert_rows_json + else: + call_counter += 1 + return [] + + client = mock.Mock() + client.insert_rows_json.side_effect = store_callback + + # Using the bundle based direct runner to avoid pickling problems + # with mocks. + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1', 'columnB': 'value2' + }, { + 'columnA': 'value3', 'columnB': 'value4' + }, { + 'columnA': 'value5', 'columnB': 'value6' + }]) + # Using _StreamToBigQuery in order to be able to pass max_retries + # in order to limit run time of test with RETRY_ALWAYS + | _StreamToBigQuery( + table_reference='project:dataset.table', + table_side_inputs=[], + schema_side_inputs=[], + schema='anyschema', + batch_size=None, + triggering_frequency=None, + create_disposition='CREATE_NEVER', + write_disposition=None, + kms_key=None, + retry_strategy=RetryStrategy.RETRY_ALWAYS, + additional_bq_parameters=[], + ignore_insert_ids=False, + ignore_unknown_columns=False, + with_auto_sharding=False, + test_client=client, + max_retries=len(insert_response) - 1, + num_streaming_keys=500)) + + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(failed_rows)) + + # Running tests with a variety of exceptions from https://googleapis.dev + # /python/google-api-core/latest/_modules/google/api_core/exceptions.html. + # Choosing some exceptions that produce reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ + param( + # not in _NON_TRANSIENT_ERRORS + exception_type=exceptions.BadGateway if exceptions else None, + error_reason='Bad Gateway'), + param( + # in _NON_TRANSIENT_ERRORS + exception_type=exceptions.Unauthorized if exceptions else None, + error_reason='Unauthorized'), + ]) + @mock.patch('time.sleep') + @mock.patch('google.cloud.bigquery.Client.insert_rows_json') + def test_insert_rows_json_exception_retry_never( + self, mock_send, unused_mock_sleep, exception_type, error_reason): + # In this test, a pipeline will never retry caught exception types + # since RetryStrategy is set to RETRY_NEVER + mock_response = mock.Mock() + mock_response.reason = error_reason + mock_send.side_effect = [ + exception_type('some exception', response=mock_response) + ] + + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_NEVER)) + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS_WITH_ERRORS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(['value1', 'value2'])) + + self.assertEqual(1, mock_send.call_count) + + # Running tests with a variety of exceptions from https://googleapis.dev + # /python/google-api-core/latest/_modules/google/api_core/exceptions.html. + # Choosing some exceptions that produce reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ param( exception_type=exceptions.DeadlineExceeded if exceptions else None, - error_reason='somereason'), + error_reason='Deadline Exceeded', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), param( - exception_type=exceptions.ServiceUnavailable if exceptions else None, - error_reason='backendError'), + exception_type=exceptions.Conflict if exceptions else None, + error_reason='Conflict', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), param( - exception_type=exceptions.InternalServerError if exceptions else None, - error_reason='internalError'), + exception_type=exceptions.TooManyRequests if exceptions else None, + error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), param( exception_type=exceptions.InternalServerError if exceptions else None, - error_reason='backendError'), + error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.BadGateway if exceptions else None, + error_reason='Bad Gateway', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.ServiceUnavailable if exceptions else None, + error_reason='Service Unavailable', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.GatewayTimeout if exceptions else None, + error_reason='Gateway Timeout', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.BadRequest if exceptions else None, + error_reason='Bad Request', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.Unauthorized if exceptions else None, + error_reason='Unauthorized', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.Forbidden if exceptions else None, + error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.NotFound if exceptions else None, + error_reason='Not Found', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.MethodNotImplemented + if exceptions else None, + error_reason='Not Implemented', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_retries_if_structured_retriable( + def test_insert_rows_json_exception_retry_on_transient_error( self, mock_send, unused_mock_sleep, - exception_type=None, - error_reason=None): - # In this test, a BATCH pipeline will retry the known RETRIABLE errors. + exception_type, + error_reason, + failed_values, + expected_call_count): + # In this test, a pipeline will only retry caught exception types + # with reasons that are not in _NON_TRANSIENT_ERRORS since RetryStrategy is + # set to RETRY_ON_TRANSIENT_ERROR + mock_response = mock.Mock() + mock_response.reason = error_reason mock_send.side_effect = [ - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), + exception_type('some exception', response=mock_response), + # Return no exception and no errors on 2nd call, if there is a 2nd call + [] ] - with self.assertRaises(Exception) as exc: - with beam.Pipeline() as p: - _ = ( - p - | beam.Create([{ - 'columnA': 'value1' - }]) - | WriteToBigQuery( - table='project:dataset.table', - schema={ - 'fields': [{ - 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' - }] - }, - create_disposition='CREATE_NEVER', - method='STREAMING_INSERTS')) - self.assertEqual(4, mock_send.call_count) - self.assertIn('some retriable exception', exc.exception.args[0]) + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR)) + failed_values_out = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values_out, equal_to(failed_values)) + self.assertEqual(expected_call_count, mock_send.call_count) - # Using https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + # Running tests with persistent exceptions with exception types not + # caught in BigQueryWrapper._insert_all_rows but retriable by + # retry.with_exponential_backoff @parameterized.expand([ param( exception_type=requests.exceptions.ConnectionError, @@ -1009,28 +1200,18 @@ def test_insert_all_retries_if_structured_retriable( param( exception_type=requests.exceptions.Timeout, error_message='some timeout error'), - param( - exception_type=ConnectionError, - error_message='some py connection error'), - param( - exception_type=exceptions.BadGateway if exceptions else None, - error_message='some badgateway error'), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_retries_if_unstructured_retriable( - self, - mock_send, - unused_mock_sleep, - exception_type=None, - error_message=None): - # In this test, a BATCH pipeline will retry the unknown RETRIABLE errors. - mock_send.side_effect = [ - exception_type(error_message), - exception_type(error_message), - exception_type(error_message), - exception_type(error_message), - ] + def test_insert_rows_json_persistent_retriable_exception( + self, mock_send, unused_mock_sleep, exception_type, error_message): + # In this test, each insert_rows_json call will result in an exception + # and be retried with retry.with_exponential_backoff until MAX_RETRIES is + # reached + mock_send.side_effect = exception_type(error_message) + + # Expecting 1 initial call plus maximum number of retries + expected_call_count = 1 + bigquery_tools.MAX_RETRIES with self.assertRaises(Exception) as exc: with beam.Pipeline() as p: @@ -1038,6 +1219,8 @@ def test_insert_all_retries_if_unstructured_retriable( p | beam.Create([{ 'columnA': 'value1' + }, { + 'columnA': 'value2' }]) | WriteToBigQuery( table='project:dataset.table', @@ -1048,138 +1231,390 @@ def test_insert_all_retries_if_unstructured_retriable( }, create_disposition='CREATE_NEVER', method='STREAMING_INSERTS')) - self.assertEqual(4, mock_send.call_count) + + self.assertEqual(expected_call_count, mock_send.call_count) self.assertIn(error_message, exc.exception.args[0]) - # Using https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + # Running tests with intermittent exceptions with exception types not + # caught in BigQueryWrapper._insert_all_rows but retriable by + # retry.with_exponential_backoff @parameterized.expand([ param( - exception_type=retry.PermanentException, - error_args=('nonretriable', )), - param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=( - 'forbidden morbidden', [{ - 'reason': 'nonretriablereason' - }])), - param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=('BAD REQUEST!', [{ - 'reason': 'nonretriablereason' - }])), - param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=( - 'method not allowed!', [{ - 'reason': 'nonretriablereason' - }])), - param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=('method not allowed!', 'args')), - param( - exception_type=exceptions.Unknown if exceptions else None, - error_args=('unknown!', 'args')), + exception_type=requests.exceptions.ConnectionError, + error_message='some connection error'), param( - exception_type=exceptions.Aborted if exceptions else None, - error_args=('abortet!', 'abort')), + exception_type=requests.exceptions.Timeout, + error_message='some timeout error'), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_unretriable_errors( - self, mock_send, unused_mock_sleep, exception_type=None, error_args=None): - # In this test, a BATCH pipeline will retry the unknown RETRIABLE errors. + def test_insert_rows_json_intermittent_retriable_exception( + self, mock_send, unused_mock_sleep, exception_type, error_message): + # In this test, the first 2 insert_rows_json calls will result in an + # exception and be retried with retry.with_exponential_backoff. The last + # call will not raise an exception and will succeed. mock_send.side_effect = [ - exception_type(*error_args), - exception_type(*error_args), - exception_type(*error_args), - exception_type(*error_args), + exception_type(error_message), exception_type(error_message), [] ] - with self.assertRaises(Exception): - with beam.Pipeline() as p: - _ = ( + with beam.Pipeline() as p: + _ = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS')) + + self.assertEqual(3, mock_send.call_count) + + # Running tests with a variety of error reasons from + # https://cloud.google.com/bigquery/docs/error-messages + # This covers the scenario when + # the google.cloud.bigquery.Client.insert_rows_json call returns an error list + # rather than raising an exception. + # Choosing some error reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows + param( + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1']), + # reason in _NON_TRANSIENT_ERRORS for row 1 + # reason not in _NON_TRANSIENT_ERRORS for row 2 on 1st run + # row 1 sent to failed_rows + param( + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, { + 'index': 1, 'errors': [{ + 'reason': 'internalError' + }] + }], + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1']), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on first attempt + # transient error succeeds on second attempt, 0 rows sent to failed rows + param( + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + [], + ], + failed_rows=[]), + ]) + def test_insert_rows_json_errors_retry_always( + self, insert_response, failed_rows, unused_sleep_mock=None): + # In this test, a pipeline will always retry all errors + # since RetryStrategy is not set and defaults to RETRY_ALWAYS + with mock.patch('time.sleep'): + call_counter = 0 + + def store_callback(table, **kwargs): + nonlocal call_counter + response = insert_response[call_counter] + call_counter += 1 + return response + + client = mock.Mock() + client.insert_rows_json = mock.Mock(side_effect=store_callback) + + # Using the bundle based direct runner to avoid pickling problems + # with mocks. + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( p | beam.Create([{ - 'columnA': 'value1' + 'columnA': 'value1', 'columnB': 'value2' + }, { + 'columnA': 'value3', 'columnB': 'value4' + }, { + 'columnA': 'value5', 'columnB': 'value6' }]) - | WriteToBigQuery( - table='project:dataset.table', - schema={ - 'fields': [{ - 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' - }] - }, + # Using _StreamToBigQuery in order to be able to pass max_retries + # in order to limit run time of test with RETRY_ALWAYS + | _StreamToBigQuery( + table_reference='project:dataset.table', + table_side_inputs=[], + schema_side_inputs=[], + schema='anyschema', + batch_size=None, + triggering_frequency=None, create_disposition='CREATE_NEVER', - method='STREAMING_INSERTS')) - self.assertEqual(1, mock_send.call_count) + write_disposition=None, + kms_key=None, + retry_strategy=RetryStrategy.RETRY_ALWAYS, + additional_bq_parameters=[], + ignore_insert_ids=False, + ignore_unknown_columns=False, + with_auto_sharding=False, + test_client=client, + max_retries=len(insert_response) - 1, + num_streaming_keys=500)) - # Using https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(failed_rows)) + + # Running tests with a variety of error reasons from + # https://cloud.google.com/bigquery/docs/error-messages + # This covers the scenario when + # the google.cloud.bigquery.Client.insert_rows_json call returns an error list + # rather than raising an exception. + # Choosing some error reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not @parameterized.expand([ + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=retry.PermanentException, - error_args=('nonretriable', )), - param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=( - 'forbidden morbidden', [{ - 'reason': 'nonretriablereason' - }])), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalidQuery' + }] + }], + ], + streaming=False), + # reason not in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=('BAD REQUEST!', [{ - 'reason': 'nonretriablereason' - }])), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + ], + streaming=False), param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=( - 'method not allowed!', [{ - 'reason': 'nonretriablereason' - }])), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + streaming=True), + # reason not in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=('method not allowed!', 'args')), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + ], + streaming=True), + ]) + @mock.patch('time.sleep') + @mock.patch('google.cloud.bigquery.Client.insert_rows_json') + def test_insert_rows_json_errors_retry_never( + self, mock_send, unused_mock_sleep, insert_response, streaming): + # In this test, a pipeline will never retry errors since RetryStrategy is + # set to RETRY_NEVER + mock_send.side_effect = insert_response + opt = StandardOptions() + opt.streaming = streaming + with beam.Pipeline(runner='BundleBasedDirectRunner', options=opt) as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_NEVER)) + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS_WITH_ERRORS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(['value1'])) + + self.assertEqual(1, mock_send.call_count) + + # Running tests with a variety of error reasons from + # https://cloud.google.com/bigquery/docs/error-messages + # This covers the scenario when + # the google.cloud.bigquery.Client.insert_rows_json call returns an error list + # rather than raising an exception. + # Choosing some error reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=exceptions.Unknown if exceptions else None, - error_args=('unknown!', 'args')), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1'], + streaming=False), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on 1st attempt + # transient error succeeds on 2nd attempt, 0 rows sent to failed rows param( - exception_type=exceptions.Aborted if exceptions else None, - error_args=('abortet!', 'abort')), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + [], + ], + failed_rows=[], + streaming=False), + # reason in _NON_TRANSIENT_ERRORS for row 1 + # reason not in _NON_TRANSIENT_ERRORS for row 2 on 1st and 2nd attempt + # all rows with errors are retried when any row has a retriable error + # row 1 sent to failed_rows after final attempt param( - exception_type=requests.exceptions.ConnectionError, - error_args=('some connection error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, { + 'index': 1, 'errors': [{ + 'reason': 'internalError' + }] + }], + [ + { + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, + ], + ], + failed_rows=['value1'], + streaming=False), + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=requests.exceptions.Timeout, - error_args=('some timeout error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1'], + streaming=True), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on 1st attempt + # transient error succeeds on 2nd attempt, 0 rows sent to failed rows param( - exception_type=ConnectionError, - error_args=('some py connection error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + [], + ], + failed_rows=[], + streaming=True), + # reason in _NON_TRANSIENT_ERRORS for row 1 + # reason not in _NON_TRANSIENT_ERRORS for row 2 on 1st and 2nd attempt + # all rows with errors are retried when any row has a retriable error + # row 1 sent to failed_rows after final attempt param( - exception_type=exceptions.BadGateway if exceptions else None, - error_args=('some badgateway error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, { + 'index': 1, 'errors': [{ + 'reason': 'internalError' + }] + }], + [ + { + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, + ], + ], + failed_rows=['value1'], + streaming=True), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_unretriable_errors_streaming( - self, mock_send, unused_mock_sleep, exception_type=None, error_args=None): - # In this test, a STREAMING pipeline will retry ALL errors, and never throw - # an exception. - mock_send.side_effect = [ - exception_type(*error_args), - exception_type(*error_args), - [] # Errors thrown twice, and then succeeded - ] + def test_insert_rows_json_errors_retry_on_transient_error( + self, + mock_send, + unused_mock_sleep, + insert_response, + failed_rows, + streaming=False): + # In this test, a pipeline will only retry errors with reasons that are not + # in _NON_TRANSIENT_ERRORS since RetryStrategy is set to + # RETRY_ON_TRANSIENT_ERROR + call_counter = 0 + + def store_callback(table, **kwargs): + nonlocal call_counter + response = insert_response[call_counter] + call_counter += 1 + return response + + mock_send.side_effect = store_callback opt = StandardOptions() - opt.streaming = True + opt.streaming = streaming + + # Using the bundle based direct runner to avoid pickling problems + # with mocks. with beam.Pipeline(runner='BundleBasedDirectRunner', options=opt) as p: - _ = ( + bq_write_out = ( p | beam.Create([{ 'columnA': 'value1' + }, { + 'columnA': 'value2' + }, { + 'columnA': 'value3' }]) | WriteToBigQuery( table='project:dataset.table', @@ -1189,8 +1624,14 @@ def test_insert_all_unretriable_errors_streaming( }] }, create_disposition='CREATE_NEVER', - method='STREAMING_INSERTS')) - self.assertEqual(3, mock_send.call_count) + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR)) + + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(failed_rows)) @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed') @@ -1499,76 +1940,6 @@ def store_callback(table, **kwargs): result) self.assertEqual(len(data1['colA_values']), 1) - @parameterized.expand([ - param(retry_strategy=RetryStrategy.RETRY_ALWAYS), - param(retry_strategy=RetryStrategy.RETRY_NEVER), - param(retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR), - ]) - def test_permanent_failure_in_some_rows_does_not_duplicate( - self, unused_sleep_mock=None, retry_strategy=None): - with mock.patch('time.sleep'): - - def store_callback(table, **kwargs): - return [ - { - 'index': 0, - 'errors': [{ - 'reason': 'invalid' - }, { - 'reason': 'its bad' - }] - }, - ] - - client = mock.Mock() - client.insert_rows_json = mock.Mock(side_effect=store_callback) - - # The expected rows to be inserted according to the insert strategy - if retry_strategy == RetryStrategy.RETRY_NEVER: - inserted_rows = ['value3', 'value5'] - else: # RETRY_ALWAYS and RETRY_ON_TRANSIENT_ERRORS should insert all rows - inserted_rows = ['value3', 'value5'] - - # Using the bundle based direct runner to avoid pickling problems - # with mocks. - with beam.Pipeline(runner='BundleBasedDirectRunner') as p: - bq_write_out = ( - p - | beam.Create([{ - 'columnA': 'value1', 'columnB': 'value2' - }, { - 'columnA': 'value3', 'columnB': 'value4' - }, { - 'columnA': 'value5', 'columnB': 'value6' - }]) - | _StreamToBigQuery( - table_reference='project:dataset.table', - table_side_inputs=[], - schema_side_inputs=[], - schema='anyschema', - batch_size=None, - triggering_frequency=None, - create_disposition='CREATE_NEVER', - write_disposition=None, - kms_key=None, - retry_strategy=retry_strategy, - additional_bq_parameters=[], - ignore_insert_ids=False, - ignore_unknown_columns=False, - with_auto_sharding=False, - test_client=client, - max_retries=10, - num_streaming_keys=500)) - - failed_values = ( - bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] - | beam.Map(lambda x: x[1]['columnA'])) - - assert_that( - failed_values, - equal_to( - list({'value1', 'value3', 'value5'}.difference(inserted_rows)))) - @parameterized.expand([ param(with_auto_sharding=False), param(with_auto_sharding=True), diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py index 07d711f8fc92..2f9420795288 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py @@ -732,11 +732,13 @@ def _insert_all_rows( except (ClientError, GoogleAPICallError) as e: # e.code contains the numeric http status code. service_call_metric.call(e.code) - # Re-reise the exception so that we re-try appropriately. - raise + # Package exception with required fields + error = {'message': e.message, 'reason': e.response.reason} + # Add all rows to the errors list along with the error + errors = [{"index": i, "errors": [error]} for i, _ in enumerate(rows)] except HttpError as e: service_call_metric.call(e) - # Re-reise the exception so that we re-try appropriately. + # Re-raise the exception so that we re-try appropriately. raise finally: self._latency_histogram_metric.update( @@ -1491,7 +1493,19 @@ class RetryStrategy(object): RETRY_NEVER = 'RETRY_NEVER' RETRY_ON_TRANSIENT_ERROR = 'RETRY_ON_TRANSIENT_ERROR' - _NON_TRANSIENT_ERRORS = {'invalid', 'invalidQuery', 'notImplemented'} + # Values below may be found in reasons provided either in an + # error returned by a client method or by an http response as + # defined in google.api_core.exceptions + _NON_TRANSIENT_ERRORS = { + 'invalid', + 'invalidQuery', + 'notImplemented', + 'Bad Request', + 'Unauthorized', + 'Forbidden', + 'Not Found', + 'Not Implemented', + } @staticmethod def should_retry(strategy, error_message): diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py index c73d3ff7e53e..4b728fe7ec1f 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py @@ -379,7 +379,7 @@ def test_big_query_write_without_schema(self): def test_big_query_write_insert_errors_reporting(self): """ Test that errors returned by beam.io.WriteToBigQuery - contain both the failed rows amd the reason for it failing. + contain both the failed rows and the reason for it failing. """ table_name = 'python_write_table' table_id = '{}.{}'.format(self.dataset_id, table_name) @@ -454,6 +454,55 @@ def test_big_query_write_insert_errors_reporting(self): | 'ParseErrors' >> beam.Map(lambda err: (err[1], err[2])), equal_to(bq_result_errors)) + @pytest.mark.it_postcommit + def test_big_query_write_insert_non_transient_api_call_error(self): + """ + Test that non-transient GoogleAPICallError errors returned + by beam.io.WriteToBigQuery are not retried and result in + FAILED_ROWS containing both the failed rows and the reason + for failure. + """ + table_name = 'this_table_does_not_exist' + table_id = '{}.{}'.format(self.dataset_id, table_name) + + input_data = [{ + 'number': 1, + 'str': 'some_string', + }] + + table_schema = { + "fields": [{ + "name": "number", "type": "INTEGER", 'mode': 'NULLABLE' + }, { + "name": "str", "type": "STRING", 'mode': 'NULLABLE' + }] + } + + bq_result_errors = [({ + 'number': 1, + 'str': 'some_string', + }, "Not Found")] + + args = self.test_pipeline.get_full_options_as_args() + + with beam.Pipeline(argv=args) as p: + # pylint: disable=expression-not-assigned + errors = ( + p | 'create' >> beam.Create(input_data) + | 'write' >> beam.io.WriteToBigQuery( + table_id, + schema=table_schema, + method='STREAMING_INSERTS', + insert_retry_strategy='RETRY_ON_TRANSIENT_ERROR', + create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, + write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND)) + + assert_that( + errors[BigQueryWriteFn.FAILED_ROWS_WITH_ERRORS] + | + 'ParseErrors' >> beam.Map(lambda err: (err[1], err[2][0]["reason"])), + equal_to(bq_result_errors)) + @pytest.mark.it_postcommit @parameterized.expand([ param(file_format=FileFormat.AVRO), diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index 2fdbce73170a..d75af4fe6ac1 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -825,5 +825,4 @@ def finish(self): # Check for exception since the last put() call. if self._upload_thread.last_error is not None: e = self._upload_thread.last_error - raise type(self._upload_thread.last_error)( - "Error while uploading file %s" % self._path) from e # pylint: disable=raising-bad-type + raise RuntimeError("Error while uploading file %s" % self._path) from e diff --git a/sdks/python/apache_beam/io/jdbc.py b/sdks/python/apache_beam/io/jdbc.py index f8f24ddeb8d2..903b0d1b0fef 100644 --- a/sdks/python/apache_beam/io/jdbc.py +++ b/sdks/python/apache_beam/io/jdbc.py @@ -86,6 +86,7 @@ # pytype: skip-file +import datetime import typing import numpy as np @@ -94,7 +95,10 @@ from apache_beam.transforms.external import BeamJarExpansionService from apache_beam.transforms.external import ExternalTransform from apache_beam.transforms.external import NamedTupleBasedPayloadBuilder +from apache_beam.typehints.schemas import LogicalType +from apache_beam.typehints.schemas import MillisInstant from apache_beam.typehints.schemas import typing_to_runner_api +from apache_beam.utils.timestamp import Timestamp __all__ = [ 'WriteToJdbc', @@ -355,3 +359,99 @@ def __init__( ), expansion_service or default_io_expansion_service(classpath), ) + + +@LogicalType.register_logical_type +class JdbcDateType(LogicalType[datetime.date, MillisInstant, str]): + """ + For internal use only; no backwards-compatibility guarantees. + + Support of Legacy JdbcIO DATE logical type. Deemed to change when Java JDBCIO + has been migrated to Beam portable logical types. + """ + def __init__(self, argument=""): + pass + + @classmethod + def representation_type(cls): + # type: () -> type + return Timestamp + + @classmethod + def urn(cls): + return "beam:logical_type:javasdk_date:v1" + + @classmethod + def language_type(cls): + return datetime.date + + def to_representation_type(self, value): + # type: (datetime.date) -> Timestamp + return Timestamp.from_utc_datetime( + datetime.datetime.combine( + value, datetime.datetime.min.time(), tzinfo=datetime.timezone.utc)) + + def to_language_type(self, value): + # type: (Timestamp) -> datetime.date + + return value.to_utc_datetime().date() + + @classmethod + def argument_type(cls): + return str + + def argument(self): + return "" + + @classmethod + def _from_typing(cls, typ): + return cls() + + +@LogicalType.register_logical_type +class JdbcTimeType(LogicalType[datetime.time, MillisInstant, str]): + """ + For internal use only; no backwards-compatibility guarantees. + + Support of Legacy JdbcIO TIME logical type. . Deemed to change when Java + JDBCIO has been migrated to Beam portable logical types. + """ + def __init__(self, argument=""): + pass + + @classmethod + def representation_type(cls): + # type: () -> type + return Timestamp + + @classmethod + def urn(cls): + return "beam:logical_type:javasdk_time:v1" + + @classmethod + def language_type(cls): + return datetime.time + + def to_representation_type(self, value): + # type: (datetime.date) -> Timestamp + return Timestamp.from_utc_datetime( + datetime.datetime.combine( + datetime.datetime.utcfromtimestamp(0), + value, + tzinfo=datetime.timezone.utc)) + + def to_language_type(self, value): + # type: (Timestamp) -> datetime.date + + return value.to_utc_datetime().time() + + @classmethod + def argument_type(cls): + return str + + def argument(self): + return "" + + @classmethod + def _from_typing(cls, typ): + return cls() diff --git a/sdks/python/apache_beam/ml/inference/huggingface_inference.py b/sdks/python/apache_beam/ml/inference/huggingface_inference.py index aee613363781..3ec063808ae3 100644 --- a/sdks/python/apache_beam/ml/inference/huggingface_inference.py +++ b/sdks/python/apache_beam/ml/inference/huggingface_inference.py @@ -98,6 +98,7 @@ class PipelineTask(str, Enum): TextClassification = 'text-classification' TextGeneration = 'text-generation' Text2TextGeneration = 'text2text-generation' + TextToAudio = 'text-to-audio' TokenClassification = 'token-classification' Translation = 'translation' VideoClassification = 'video-classification' @@ -570,7 +571,7 @@ class HuggingFacePipelineModelHandler(ModelHandler[str, def __init__( self, task: Union[str, PipelineTask] = "", - model=None, + model: str = "", *, inference_fn: PipelineInferenceFn = _default_pipeline_inference_fn, load_pipeline_args: Optional[Dict[str, Any]] = None, @@ -593,9 +594,18 @@ def __init__( Args: task (str or enum.Enum): task supported by HuggingFace Pipelines. Accepts a string task or an enum.Enum from PipelineTask. - model : path to pretrained model on Hugging Face Models Hub to use custom - model for the chosen task. If the model already defines the task then - no need to specify the task parameter. + model (str): path to the pretrained *model-id* on Hugging Face Models Hub + to use custom model for the chosen task. If the `model` already defines + the task then no need to specify the `task` parameter. + Use the *model-id* string instead of an actual model here. + Model-specific kwargs for `from_pretrained(..., **model_kwargs)` can be + specified with `model_kwargs` using `load_pipeline_args`. + + Example Usage:: + model_handler = HuggingFacePipelineModelHandler( + task="text-generation", model="meta-llama/Llama-2-7b-hf", + load_pipeline_args={'model_kwargs':{'quantization_map':config}}) + inference_fn: the inference function to use during RunInference. Default is _default_pipeline_inference_fn. load_pipeline_args (Dict[str, Any]): keyword arguments to provide load diff --git a/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py b/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py index 4ef42fb10a70..8ba315ba00ca 100644 --- a/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py @@ -26,6 +26,7 @@ from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline +# pylint: disable=ungrouped-imports try: from apache_beam.examples.inference import vertex_ai_image_classification from apache_beam.examples.inference import vertex_ai_llm_text_classification diff --git a/sdks/python/apache_beam/py.typed b/sdks/python/apache_beam/py.typed new file mode 100644 index 000000000000..1aea47b9e7dd --- /dev/null +++ b/sdks/python/apache_beam/py.typed @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Marker file for PEP 561. +# The apache-beam package uses inline types. diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py index c3de9c190434..99cd26cc4098 100644 --- a/sdks/python/apache_beam/runners/common.py +++ b/sdks/python/apache_beam/runners/common.py @@ -24,6 +24,7 @@ # pytype: skip-file +import logging import sys import threading import traceback @@ -81,6 +82,8 @@ ENCODED_IMPULSE_VALUE = IMPULSE_VALUE_CODER_IMPL.encode_nested( GlobalWindows.windowed_value(b'')) +_LOGGER = logging.getLogger(__name__) + class NameContext(object): """Holds the name information for a step.""" @@ -1538,6 +1541,7 @@ def _reraise_augmented(self, exn, windowed_value=None): new_exn = new_exn.with_traceback(tb) self._maybe_sample_exception(exc_info, windowed_value) + _LOGGER.exception(new_exn) raise new_exn diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py index 11e2e1218659..3d612bd6ec0f 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py @@ -359,6 +359,8 @@ def run_pipeline(self, pipeline, options, pipeline_proto=None): 'Google Cloud Dataflow runner not available, ' 'please install apache_beam[gcp]') + _check_and_add_missing_options(options) + # Convert all side inputs into a form acceptable to Dataflow. if pipeline: pipeline.visit(self.combinefn_visitor()) diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py index 8bb39940e484..57c0bcdff201 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/names.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -34,6 +34,6 @@ # Unreleased sdks use container image tag specified below. # Update this tag whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. -BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20230912' +BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20230915' DATAFLOW_CONTAINER_IMAGE_REPOSITORY = 'gcr.io/cloud-dataflow/v1beta3' diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py index 13f1dc635965..cc1494fc7ae2 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py @@ -1346,6 +1346,7 @@ def lifted_stages(stage): payload=transform.spec.payload), inputs=transform.inputs, outputs={'out': precombined_pcoll_id}, + annotations=transform.annotations, environment_id=transform.environment_id)) yield make_stage( @@ -1355,6 +1356,7 @@ def lifted_stages(stage): spec=beam_runner_api_pb2.FunctionSpec( urn=common_urns.primitives.GROUP_BY_KEY.urn), inputs={'in': precombined_pcoll_id}, + annotations=transform.annotations, outputs={'out': grouped_pcoll_id})) yield make_stage( @@ -1367,6 +1369,7 @@ def lifted_stages(stage): payload=transform.spec.payload), inputs={'in': grouped_pcoll_id}, outputs={'out': merged_pcoll_id}, + annotations=transform.annotations, environment_id=transform.environment_id)) yield make_stage( @@ -1379,6 +1382,7 @@ def lifted_stages(stage): payload=transform.spec.payload), inputs={'in': merged_pcoll_id}, outputs=transform.outputs, + annotations=transform.annotations, environment_id=transform.environment_id)) def unlifted_stages(stage): diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py index 144f067900f3..3ff2421e6265 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py @@ -384,6 +384,36 @@ def expand(self, pcoll): 'multiple-small-combines/min-4-globally/CombinePerKey', optimized_stage_names) + def test_combineperkey_annotation_propagation(self): + """ + Test that the CPK component transforms inherit annotations from the + source CPK + """ + class MyCombinePerKey(beam.CombinePerKey): + def annotations(self): + return {"my_annotation": b""} + + with TestPipeline() as pipeline: + _ = pipeline | beam.Create([(1, 2)]) | MyCombinePerKey(min) + + # Verify the annotations are propagated to the split up + # CPK transforms + proto = pipeline.to_runner_api( + default_environment=environments.EmbeddedPythonEnvironment( + capabilities=environments.python_sdk_capabilities())) + optimized = translations.optimize_pipeline( + proto, + phases=[translations.lift_combiners], + known_runner_urns=frozenset(), + partial=True) + for transform_id in ['MyCombinePerKey(min)/Precombine', + 'MyCombinePerKey(min)/Group', + 'MyCombinePerKey(min)/Merge', + 'MyCombinePerKey(min)/ExtractOutputs']: + assert ( + "my_annotation" in + optimized.components.transforms[transform_id].annotations) + def test_conditionally_packed_combiners(self): class RecursiveCombine(beam.PTransform): def __init__(self, labels): diff --git a/sdks/python/apache_beam/runners/render.py b/sdks/python/apache_beam/runners/render.py index 306bf8c2090b..da153d25a4bd 100644 --- a/sdks/python/apache_beam/runners/render.py +++ b/sdks/python/apache_beam/runners/render.py @@ -78,6 +78,8 @@ except ImportError: gcsio = None # type: ignore +_LOGGER = logging.getLogger(__name__) + # From the Beam site, circa November 2022. DEFAULT_EDGE_STYLE = 'color="#ff570b"' DEFAULT_TRANSFORM_STYLE = ( @@ -129,12 +131,6 @@ def _add_argparse_args(cls, parser): help='Set to also log input pipeline proto to stdout.') return parser - def __init__(self, *args, render_testing=False, **kwargs): - super().__init__(*args, **kwargs) - if self.render_port < 0 and not self.render_output and not render_testing: - raise ValueError( - 'At least one of --render_port or --render_output must be provided.') - class PipelineRenderer: def __init__(self, pipeline, options): @@ -342,7 +338,7 @@ def page_callback_data(self, layout): } def render_data(self): - logging.info("Re-rendering pipeline...") + _LOGGER.info("Re-rendering pipeline...") layout = self.layout_dot() if self.options.render_output: for path in self.options.render_output: @@ -352,10 +348,10 @@ def render_data(self): input=layout, check=False) if result.returncode: - logging.error( + _LOGGER.error( "Failed render pipeline as %r: exit %s", path, result.returncode) else: - logging.info("Rendered pipeline as %r", path) + _LOGGER.info("Rendered pipeline as %r", path) return self.page_callback_data(layout) def render_json(self): @@ -404,15 +400,19 @@ class RenderRunner(runner.PipelineRunner): # (such as counters, stage completion status, or possibly even PCollection # samples) queryable and/or displayed. This could evolve into a full Beam # UI. - def run_pipeline(self, pipeline_object, options, pipeline_proto=None): - if not pipeline_proto: - pipeline_proto = pipeline_object.to_runner_api() + def run_pipeline(self, pipeline_object, options): + return self.run_portable_pipeline(pipeline_object.to_runner_api(), options) + + def run_portable_pipeline(self, pipeline_proto, options): render_options = options.view_as(RenderOptions) + if render_options.render_port < 0 and not render_options.render_output: + raise ValueError( + 'At least one of --render_port or --render_output must be provided.') if render_options.log_proto: - logging.info(pipeline_proto) + _LOGGER.info(pipeline_proto) renderer = PipelineRenderer(pipeline_proto, render_options) try: - subprocess.run(['dotX', '-V'], capture_output=True, check=True) + subprocess.run(['dot', '-V'], capture_output=True, check=True) except FileNotFoundError as exn: # If dot is not available, we can at least output the raw .dot files. dot_files = [ @@ -422,7 +422,7 @@ def run_pipeline(self, pipeline_object, options, pipeline_proto=None): for output in dot_files: with open(output, 'w') as fout: fout.write(renderer.to_dot()) - logging.info("Wrote pipeline as %s", output) + _LOGGER.info("Wrote pipeline as %s", output) non_dot_files = set(render_options.render_output) - set(dot_files) if non_dot_files: @@ -543,17 +543,16 @@ def render_one(options): pipeline_proto = beam_runner_api_pb2.Pipeline() pipeline_proto.ParseFromString(content) - RenderRunner().run_pipeline( - None, pipeline_options.PipelineOptions(**vars(options)), pipeline_proto) + RenderRunner().run_portable_pipeline( + pipeline_proto, pipeline_options.PipelineOptions(**vars(options))) def run_server(options): class RenderBeamJob(local_job_service.BeamJob): def _invoke_runner(self): - return RenderRunner().run_pipeline( - None, - pipeline_options.PipelineOptions(**vars(options)), - self._pipeline_proto) + return RenderRunner().run_portable_pipeline( + self._pipeline_proto, + pipeline_options.PipelineOptions(**vars(options))) with tempfile.TemporaryDirectory() as staging_dir: job_servicer = local_job_service.LocalJobServicer( diff --git a/sdks/python/apache_beam/runners/render_test.py b/sdks/python/apache_beam/runners/render_test.py index 4dca2b8b5221..67e7afc1c7b9 100644 --- a/sdks/python/apache_beam/runners/render_test.py +++ b/sdks/python/apache_beam/runners/render_test.py @@ -16,10 +16,13 @@ # # pytype: skip-file +import os import argparse import logging import subprocess import unittest +import tempfile +import pytest import apache_beam as beam from apache_beam.runners import render @@ -39,6 +42,16 @@ def test_basic_graph(self): self.assertIn('CustomName', dot) self.assertEqual(dot.count('->'), 2) + def test_render_config_validation(self): + p = beam.Pipeline() + _ = ( + p | beam.Impulse() | beam.Map(lambda _: 2) + | 'CustomName' >> beam.Map(lambda x: x * x)) + pipeline_proto = p.to_runner_api() + with pytest.raises(ValueError): + render.RenderRunner().run_portable_pipeline( + pipeline_proto, render.RenderOptions()) + def test_side_input(self): p = beam.Pipeline() pcoll = p | beam.Impulse() | beam.FlatMap(lambda x: [1, 2, 3]) @@ -65,11 +78,35 @@ def test_composite_collapse(self): renderer.update(toggle=[create_transform_id]) self.assertEqual(renderer.to_dot().count('->'), 1) - def test_dot_well_formed(self): + +class DotRequiringRenderingTest(unittest.TestCase): + @classmethod + def setUpClass(cls): try: subprocess.run(['dot', '-V'], capture_output=True, check=True) except FileNotFoundError: + cls._dot_installed = False + else: + cls._dot_installed = True + + def setUp(self) -> None: + if not self._dot_installed: # type: ignore[attr-defined] self.skipTest('dot executable not installed') + + def test_run_portable_pipeline(self): + p = beam.Pipeline() + _ = ( + p | beam.Impulse() | beam.Map(lambda _: 2) + | 'CustomName' >> beam.Map(lambda x: x * x)) + pipeline_proto = p.to_runner_api() + + with tempfile.TemporaryDirectory() as tmpdir: + svg_path = os.path.join(tmpdir, "my_output.svg") + render.RenderRunner().run_portable_pipeline( + pipeline_proto, render.RenderOptions(render_output=[svg_path])) + assert os.path.exists(svg_path) + + def test_dot_well_formed(self): p = beam.Pipeline() _ = p | beam.Create([1, 2, 3]) | beam.Map(lambda x: x * x) pipeline_proto = p.to_runner_api() @@ -84,16 +121,12 @@ def test_dot_well_formed(self): renderer.render_data() def test_leaf_composite_filter(self): - try: - subprocess.run(['dot', '-V'], capture_output=True, check=True) - except FileNotFoundError: - self.skipTest('dot executable not installed') p = beam.Pipeline() _ = p | beam.Create([1, 2, 3]) | beam.Map(lambda x: x * x) dot = render.PipelineRenderer( p.to_runner_api(), - render.RenderOptions(['--render_leaf_composite_nodes=Create'], - render_testing=True)).to_dot() + render.RenderOptions(['--render_leaf_composite_nodes=Create' + ])).to_dot() self.assertEqual(dot.count('->'), 1) diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py index 4adae90edceb..9eb9299cac39 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler_test.py +++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py @@ -23,17 +23,97 @@ import grpc +import apache_beam as beam +from apache_beam.coders.coders import FastPrimitivesCoder +from apache_beam.portability import common_urns from apache_beam.portability.api import beam_fn_api_pb2 from apache_beam.portability.api import beam_fn_api_pb2_grpc from apache_beam.portability.api import endpoints_pb2 +from apache_beam.runners import common from apache_beam.runners.common import NameContext +from apache_beam.runners.worker import bundle_processor from apache_beam.runners.worker import log_handler +from apache_beam.runners.worker import operations from apache_beam.runners.worker import statesampler +from apache_beam.runners.worker.bundle_processor import BeamTransformFactory +from apache_beam.runners.worker.bundle_processor import BundleProcessor +from apache_beam.transforms.window import GlobalWindow from apache_beam.utils import thread_pool_executor +from apache_beam.utils.windowed_value import WindowedValue _LOGGER = logging.getLogger(__name__) +@BeamTransformFactory.register_urn('beam:internal:testexn:v1', bytes) +def create_exception_dofn( + factory, transform_id, transform_proto, payload, consumers): + """Returns a test DoFn that raises the given exception.""" + class RaiseException(beam.DoFn): + def __init__(self, msg): + self.msg = msg.decode() + + def process(self, _): + raise RuntimeError(self.msg) + + return bundle_processor._create_simple_pardo_operation( + factory, + transform_id, + transform_proto, + consumers, + RaiseException(payload)) + + +class TestOperation(operations.Operation): + """Test operation that forwards its payload to consumers.""" + class Spec: + def __init__(self, transform_proto): + self.output_coders = [ + FastPrimitivesCoder() for _ in transform_proto.outputs + ] + + def __init__( + self, + transform_proto, + name_context, + counter_factory, + state_sampler, + consumers, + payload, + ): + super().__init__( + name_context, + self.Spec(transform_proto), + counter_factory, + state_sampler) + self.payload = payload + + for _, consumer_ops in consumers.items(): + for consumer in consumer_ops: + self.add_receiver(consumer, 0) + + def start(self): + super().start() + + # Not using windowing logic, so just using simple defaults here. + if self.payload: + self.process( + WindowedValue(self.payload, timestamp=0, windows=[GlobalWindow()])) + + def process(self, windowed_value): + self.output(windowed_value) + + +@BeamTransformFactory.register_urn('beam:internal:testop:v1', bytes) +def create_test_op(factory, transform_id, transform_proto, payload, consumers): + return TestOperation( + transform_proto, + common.NameContext(transform_proto.unique_name, transform_id), + factory.counter_factory, + factory.state_sampler, + consumers, + payload) + + class BeamFnLoggingServicer(beam_fn_api_pb2_grpc.BeamFnLoggingServicer): def __init__(self): self.log_records_received = [] @@ -153,6 +233,77 @@ def test_context(self): finally: statesampler.set_current_tracker(None) + def test_extracts_transform_id_during_exceptions(self): + """Tests that transform ids are captured during user code exceptions.""" + descriptor = beam_fn_api_pb2.ProcessBundleDescriptor() + + # Boiler plate for the DoFn. + WINDOWING_ID = 'window' + WINDOW_CODER_ID = 'cw' + window = descriptor.windowing_strategies[WINDOWING_ID] + window.window_fn.urn = common_urns.global_windows.urn + window.window_coder_id = WINDOW_CODER_ID + window.trigger.default.SetInParent() + window_coder = descriptor.coders[WINDOW_CODER_ID] + window_coder.spec.urn = common_urns.StandardCoders.Enum.GLOBAL_WINDOW.urn + + # Input collection to the exception raising DoFn. + INPUT_PCOLLECTION_ID = 'pc-in' + INPUT_CODER_ID = 'c-in' + descriptor.pcollections[ + INPUT_PCOLLECTION_ID].unique_name = INPUT_PCOLLECTION_ID + descriptor.pcollections[INPUT_PCOLLECTION_ID].coder_id = INPUT_CODER_ID + descriptor.pcollections[ + INPUT_PCOLLECTION_ID].windowing_strategy_id = WINDOWING_ID + descriptor.coders[ + INPUT_CODER_ID].spec.urn = common_urns.StandardCoders.Enum.BYTES.urn + + # Output collection to the exception raising DoFn. + OUTPUT_PCOLLECTION_ID = 'pc-out' + OUTPUT_CODER_ID = 'c-out' + descriptor.pcollections[ + OUTPUT_PCOLLECTION_ID].unique_name = OUTPUT_PCOLLECTION_ID + descriptor.pcollections[OUTPUT_PCOLLECTION_ID].coder_id = OUTPUT_CODER_ID + descriptor.pcollections[ + OUTPUT_PCOLLECTION_ID].windowing_strategy_id = WINDOWING_ID + descriptor.coders[ + OUTPUT_CODER_ID].spec.urn = common_urns.StandardCoders.Enum.BYTES.urn + + # Add a simple transform to inject an element into the fake pipeline. + TEST_OP_TRANSFORM_ID = 'test_op' + test_transform = descriptor.transforms[TEST_OP_TRANSFORM_ID] + test_transform.outputs['None'] = INPUT_PCOLLECTION_ID + test_transform.spec.urn = 'beam:internal:testop:v1' + test_transform.spec.payload = b'hello, world!' + + # Add the DoFn to create an exception. + TEST_EXCEPTION_TRANSFORM_ID = 'test_transform' + test_transform = descriptor.transforms[TEST_EXCEPTION_TRANSFORM_ID] + test_transform.inputs['0'] = INPUT_PCOLLECTION_ID + test_transform.outputs['None'] = OUTPUT_PCOLLECTION_ID + test_transform.spec.urn = 'beam:internal:testexn:v1' + test_transform.spec.payload = b'expected exception' + + # Create and process a fake bundle. The instruction id doesn't matter + # here. + processor = BundleProcessor(descriptor, None, None) + + with self.assertRaisesRegex(RuntimeError, 'expected exception'): + processor.process_bundle('instruction_id') + + self.fn_log_handler.close() + logs = [ + log for logs in self.test_logging_service.log_records_received + for log in logs.log_entries + ] + + actual_log = logs[0] + + self.assertEqual( + actual_log.severity, beam_fn_api_pb2.LogEntry.Severity.ERROR) + self.assertTrue('expected exception' in actual_log.message) + self.assertEqual(actual_log.transform_id, 'test_transform') + # Test cases. data = { diff --git a/sdks/python/apache_beam/transforms/external.py b/sdks/python/apache_beam/transforms/external.py index 4b8e708bfc5c..44bf2398a6dd 100644 --- a/sdks/python/apache_beam/transforms/external.py +++ b/sdks/python/apache_beam/transforms/external.py @@ -185,6 +185,14 @@ def __init__(self, identifier, **kwargs): self._identifier = identifier self._kwargs = kwargs + def identifier(self): + """ + The URN referencing this SchemaTransform + + :return: str + """ + return self._identifier + def build(self): schema_proto, payload = self._get_schema_proto_and_payload(**self._kwargs) payload = external_transforms_pb2.SchemaTransformPayload( @@ -194,7 +202,7 @@ def build(self): return payload -class ExplicitSchemaTransformPayloadBuilder(PayloadBuilder): +class ExplicitSchemaTransformPayloadBuilder(SchemaTransformPayloadBuilder): def __init__(self, identifier, schema_proto, **kwargs): self._identifier = identifier self._schema_proto = schema_proto @@ -414,7 +422,7 @@ def __init__( def expand(self, pcolls): # Expand the transform using the expansion service. - return pcolls | ExternalTransform( + return pcolls | self._payload_builder.identifier() >> ExternalTransform( common_urns.schematransform_based_expand.urn, self._payload_builder, self._expansion_service) diff --git a/sdks/python/apache_beam/yaml/standard_io.yaml b/sdks/python/apache_beam/yaml/standard_io.yaml index e60f0026fd25..1738110539ce 100644 --- a/sdks/python/apache_beam/yaml/standard_io.yaml +++ b/sdks/python/apache_beam/yaml/standard_io.yaml @@ -51,6 +51,8 @@ 'ReadFromBigQuery': 'apache_beam.yaml.yaml_io.read_from_bigquery' # Disable until https://github.com/apache/beam/issues/28162 is resolved. # 'WriteToBigQuery': 'apache_beam.yaml.yaml_io.write_to_bigquery' + 'ReadFromText': 'apache_beam.yaml.yaml_io.read_from_text' + 'WriteToText': 'apache_beam.yaml.yaml_io.write_to_text' # Declared as a renaming transform to avoid exposing all # (implementation-specific) pandas arguments and aligning with possible Java diff --git a/sdks/python/apache_beam/yaml/yaml_io.py b/sdks/python/apache_beam/yaml/yaml_io.py index 646d5e1fbff1..297c07e9abb5 100644 --- a/sdks/python/apache_beam/yaml/yaml_io.py +++ b/sdks/python/apache_beam/yaml/yaml_io.py @@ -28,12 +28,38 @@ import yaml import apache_beam as beam +import apache_beam.io as beam_io from apache_beam.io import ReadFromBigQuery from apache_beam.io import WriteToBigQuery from apache_beam.io.gcp.bigquery import BigQueryDisposition +from apache_beam.typehints.schemas import named_fields_from_element_type from apache_beam.yaml import yaml_provider +def read_from_text(path: str): + # TODO(yaml): Consider passing the filename and offset, possibly even + # by default. + return beam_io.ReadFromText(path) | beam.Map(lambda s: beam.Row(line=s)) + + +@beam.ptransform_fn +def write_to_text(pcoll, path: str): + try: + field_names = [ + name for name, _ in named_fields_from_element_type(pcoll.element_type) + ] + except Exception as exn: + raise ValueError( + "WriteToText requires an input schema with exactly one field.") from exn + if len(field_names) != 1: + raise ValueError( + "WriteToText requires an input schema with exactly one field, got %s" % + field_names) + sole_field_name, = field_names + return pcoll | beam.Map( + lambda x: str(getattr(x, sole_field_name))) | beam.io.WriteToText(path) + + def read_from_bigquery( query=None, table=None, row_restriction=None, fields=None): if query is None: @@ -99,18 +125,4 @@ def raise_exception(failed_row_with_error): def io_providers(): with open(os.path.join(os.path.dirname(__file__), 'standard_io.yaml')) as fin: - explicit_ios = yaml_provider.parse_providers( - yaml.load(fin, Loader=yaml.SafeLoader)) - - # TOOD(yaml): We should make all top-level IOs explicit. - # This will be a chance to clean up the APIs and align them with their - # Java implementations. - # PythonTransform can be used to get the "raw" transforms for any others. - implicit_ios = yaml_provider.InlineProvider({ - key: getattr(beam.io, key) - for key in dir(beam.io) - if (key.startswith('ReadFrom') or key.startswith('WriteTo')) and - key not in explicit_ios - }) - - return yaml_provider.merge_providers(explicit_ios, implicit_ios) + return yaml_provider.parse_providers(yaml.load(fin, Loader=yaml.SafeLoader)) diff --git a/sdks/python/apache_beam/yaml/yaml_mapping.py b/sdks/python/apache_beam/yaml/yaml_mapping.py index 38d31d8bfa9f..889f7f1ee309 100644 --- a/sdks/python/apache_beam/yaml/yaml_mapping.py +++ b/sdks/python/apache_beam/yaml/yaml_mapping.py @@ -297,16 +297,20 @@ def normalize_fields(pcoll, fields, drop=(), append=False, language='generic'): if append: for name in fields: if name in input_schema and name not in drop: - raise ValueError(f'Redefinition of field "{name}"') + raise ValueError( + f'Redefinition of field "{name}". ' + 'Cannot append a field that already exists in original input.') if language == 'generic': for expr in fields.values(): if not isinstance(expr, str): - raise ValueError("Missing language specification.") + raise ValueError( + "Missing language specification. " + "Must specify a language when using a map with custom logic.") missing = set(fields.values()) - set(input_schema.keys()) if missing: raise ValueError( - f"Missing language specification or unkown input fields: {missing}") + f"Missing language specification or unknown input fields: {missing}") if append: return input_schema, { @@ -335,8 +339,6 @@ def _PyJsMapToFields(pcoll, language='generic', **mapping_args): class SqlMappingProvider(yaml_provider.Provider): def __init__(self, sql_provider=None): if sql_provider is None: - print('yaml_provider', yaml_provider) - print('yaml_provider.beam_jar', yaml_provider.beam_jar) sql_provider = yaml_provider.beam_jar( urns={'Sql': 'beam:external:java:sql:v1'}, gradle_target='sdks:java:extensions:sql:expansion-service:shadowJar') diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py index 964877ac2a2e..c8398b0fcb01 100644 --- a/sdks/python/apache_beam/yaml/yaml_provider.py +++ b/sdks/python/apache_beam/yaml/yaml_provider.py @@ -66,6 +66,16 @@ def provided_transforms(self) -> Iterable[str]: """Returns a list of transform type names this provider can handle.""" raise NotImplementedError(type(self)) + def requires_inputs(self, typ: str, args: Mapping[str, Any]) -> bool: + """Returns whether this transform requires inputs. + + Specifically, if this returns True and inputs are not provided than an error + will be thrown. + + This is best-effort, primarily for better and earlier error messages. + """ + return not typ.startswith('Read') + def create_transform( self, typ: str, @@ -130,9 +140,7 @@ def __init__(self, urns, service): def provided_transforms(self): return self._urns.keys() - def create_transform(self, type, args, yaml_create_transform): - if callable(self._service): - self._service = self._service() + def schema_transforms(self): if self._schema_transforms is None: try: self._schema_transforms = { @@ -143,8 +151,19 @@ def create_transform(self, type, args, yaml_create_transform): except Exception: # It's possible this service doesn't vend schema transforms. self._schema_transforms = {} + return self._schema_transforms + + def requires_inputs(self, typ, args): + if self._urns[type] in self.schema_transforms(): + return bool(self.schema_transforms()[self._urns[type]].inputs) + else: + return super().requires_inputs(typ, args) + + def create_transform(self, type, args, yaml_create_transform): + if callable(self._service): + self._service = self._service() urn = self._urns[type] - if urn in self._schema_transforms: + if urn in self.schema_transforms(): return external.SchemaAwareExternalTransform( urn, self._service, rearrange_based_on_discovery=True, **args) else: @@ -361,8 +380,9 @@ def fn_takes_side_inputs(fn): class InlineProvider(Provider): - def __init__(self, transform_factories): + def __init__(self, transform_factories, no_input_transforms=()): self._transform_factories = transform_factories + self._no_input_transforms = set(no_input_transforms) def available(self): return True @@ -379,6 +399,14 @@ def create_transform(self, type, args, yaml_create_transform): def to_json(self): return {'type': "InlineProvider"} + def requires_inputs(self, typ, args): + if typ in self._no_input_transforms: + return False + elif hasattr(self._transform_factories[typ], '_yaml_requires_inputs'): + return self._transform_factories[typ]._yaml_requires_inputs + else: + return super().requires_inputs(typ, args) + class MetaInlineProvider(InlineProvider): def create_transform(self, type, args, yaml_create_transform): @@ -514,21 +542,15 @@ def log_and_return(x): logging.info(x) return x - return InlineProvider( - dict({ - 'Create': create, - 'LogForTesting': lambda: beam.Map(log_and_return), - 'PyTransform': fully_qualified_named_transform, - 'PyToRow': lambda fields: beam.Select( - **{ - name: python_callable.PythonCallableWithSource(fn) - for (name, fn) in fields.items() - }), - 'WithSchema': with_schema, - 'Flatten': Flatten, - 'WindowInto': WindowInto, - 'GroupByKey': beam.GroupByKey, - })) + return InlineProvider({ + 'Create': create, + 'LogForTesting': lambda: beam.Map(log_and_return), + 'PyTransform': fully_qualified_named_transform, + 'WithSchemaExperimental': with_schema, + 'Flatten': Flatten, + 'WindowInto': WindowInto, + }, + no_input_transforms=('Create', )) class PypiExpansionService: @@ -636,6 +658,9 @@ def available(self) -> bool: def provided_transforms(self) -> Iterable[str]: return self._transforms.keys() + def requires_inputs(self, typ, args): + return self._underlying_provider.requires_inputs(typ, args) + def create_transform( self, typ: str, diff --git a/sdks/python/apache_beam/yaml/yaml_transform.py b/sdks/python/apache_beam/yaml/yaml_transform.py index f30a2ea26335..0deed8ac8913 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform.py +++ b/sdks/python/apache_beam/yaml/yaml_transform.py @@ -76,6 +76,28 @@ def only_element(xs): return x +# These allow a user to explicitly pass no input to a transform (i.e. use it +# as a root transform) without an error even if the transform is not known to +# handle it. +def explicitly_empty(): + return {'__explicitly_empty__': None} + + +def is_explicitly_empty(io): + return io == explicitly_empty() + + +def is_empty(io): + return not io or is_explicitly_empty(io) + + +def empty_if_explicitly_empty(io): + if is_explicitly_empty(io): + return {} + else: + return io + + class SafeLineLoader(SafeLoader): """A yaml loader that attaches line information to mappings and strings.""" class TaggedString(str): @@ -186,7 +208,7 @@ def followers(self, transform_name): # TODO(yaml): Also trace through outputs and composites. for transform in self._transforms: if transform['type'] != 'composite': - for input in transform.get('input').values(): + for input in empty_if_explicitly_empty(transform['input']).values(): transform_id, _ = self.get_transform_id_and_output_name(input) self._all_followers[transform_id].append(transform['__uuid__']) return self._all_followers[self.get_transform_id(transform_name)] @@ -324,6 +346,12 @@ def create_ptransform(self, spec, input_pcolls): raise ValueError( 'Config for transform at %s must be a mapping.' % identify_object(spec)) + + if (not input_pcolls and not is_explicitly_empty(spec.get('input', {})) and + provider.requires_inputs(spec['type'], config)): + raise ValueError( + f'Missing inputs for transform at {identify_object(spec)}') + try: # pylint: disable=undefined-loop-variable ptransform = provider.create_transform( @@ -402,7 +430,7 @@ def expand_leaf_transform(spec, scope): spec = normalize_inputs_outputs(spec) inputs_dict = { key: scope.get_pcollection(value) - for (key, value) in spec['input'].items() + for (key, value) in empty_if_explicitly_empty(spec['input']).items() } input_type = spec.get('input_type', 'default') if input_type == 'list': @@ -442,10 +470,10 @@ def expand_composite_transform(spec, scope): spec = normalize_inputs_outputs(normalize_source_sink(spec)) inner_scope = Scope( - scope.root, { + scope.root, + { key: scope.get_pcollection(value) - for key, - value in spec['input'].items() + for (key, value) in empty_if_explicitly_empty(spec['input']).items() }, spec['transforms'], yaml_provider.merge_providers( @@ -470,8 +498,7 @@ def expand(inputs): _LOGGER.info("Expanding %s ", identify_object(spec)) return ({ key: scope.get_pcollection(value) - for key, - value in spec['input'].items() + for (key, value) in empty_if_explicitly_empty(spec['input']).items() } or scope.root) | scope.unique_name(spec, None) >> CompositePTransform() @@ -496,12 +523,25 @@ def is_not_output_of_last_transform(new_transforms, value): composite_spec = normalize_inputs_outputs(spec) new_transforms = [] for ix, transform in enumerate(composite_spec['transforms']): - if any(io in transform for io in ('input', 'output', 'input', 'output')): - raise ValueError( - f'Transform {identify_object(transform)} is part of a chain, ' - 'must have implicit inputs and outputs.') + if any(io in transform for io in ('input', 'output')): + if (ix == 0 and 'input' in transform and 'output' not in transform and + is_explicitly_empty(transform['input'])): + # This is OK as source clause sets an explicitly empty input. + pass + else: + raise ValueError( + f'Transform {identify_object(transform)} is part of a chain, ' + 'must have implicit inputs and outputs.') if ix == 0: - transform['input'] = {key: key for key in composite_spec['input'].keys()} + if is_explicitly_empty(transform.get('input', None)): + pass + elif is_explicitly_empty(composite_spec['input']): + transform['input'] = composite_spec['input'] + else: + transform['input'] = { + key: key + for key in composite_spec['input'].keys() + } else: transform['input'] = new_transforms[-1]['__uuid__'] new_transforms.append(transform) @@ -554,6 +594,8 @@ def normalize_source_sink(spec): spec = dict(spec) spec['transforms'] = list(spec.get('transforms', [])) if 'source' in spec: + if 'input' not in spec['source']: + spec['source']['input'] = explicitly_empty() spec['transforms'].insert(0, spec.pop('source')) if 'sink' in spec: spec['transforms'].append(spec.pop('sink')) @@ -567,6 +609,13 @@ def preprocess_source_sink(spec): return spec +def tag_explicit_inputs(spec): + if 'input' in spec and not SafeLineLoader.strip_metadata(spec['input']): + return dict(spec, input=explicitly_empty()) + else: + return spec + + def normalize_inputs_outputs(spec): spec = dict(spec) @@ -611,7 +660,7 @@ def push_windowing_to_roots(spec): scope = LightweightScope(spec['transforms']) consumed_outputs_by_transform = collections.defaultdict(set) for transform in spec['transforms']: - for _, input_ref in transform['input'].items(): + for _, input_ref in empty_if_explicitly_empty(transform['input']).items(): try: transform_id, output = scope.get_transform_id_and_output_name(input_ref) consumed_outputs_by_transform[transform_id].add(output) @@ -620,7 +669,7 @@ def push_windowing_to_roots(spec): pass for transform in spec['transforms']: - if not transform['input'] and 'windowing' not in transform: + if is_empty(transform['input']) and 'windowing' not in transform: transform['windowing'] = spec['windowing'] transform['__consumed_outputs'] = consumed_outputs_by_transform[ transform['__uuid__']] @@ -647,7 +696,7 @@ def preprocess_windowing(spec): spec = push_windowing_to_roots(spec) windowing = spec.pop('windowing') - if spec['input']: + if not is_empty(spec['input']): # Apply the windowing to all inputs by wrapping it in a transform that # first applies windowing and then applies the original transform. original_inputs = spec['input'] @@ -778,7 +827,7 @@ def ensure_errors_consumed(spec): raise ValueError( f'Missing output in error_handling of {identify_object(t)}') to_handle[t['__uuid__'], config['error_handling']['output']] = t - for _, input in t['input'].items(): + for _, input in empty_if_explicitly_empty(t['input']).items(): if input not in spec['input']: consumed.add(scope.get_transform_id_and_output_name(input)) for error_pcoll, t in to_handle.items(): @@ -815,7 +864,7 @@ def preprocess(spec, verbose=False, known_transforms=None): def apply(phase, spec): spec = phase(spec) - if spec['type'] in {'composite', 'chain'}: + if spec['type'] in {'composite', 'chain'} and 'transforms' in spec: spec = dict( spec, transforms=[apply(phase, t) for t in spec['transforms']]) return spec @@ -851,6 +900,7 @@ def preprocess_langauges(spec): ensure_transforms_have_providers, preprocess_source_sink, preprocess_chain, + tag_explicit_inputs, normalize_inputs_outputs, preprocess_flattened_inputs, ensure_errors_consumed, diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py b/sdks/python/apache_beam/yaml/yaml_transform_test.py index 118b399b772f..63f2e0e7facd 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py @@ -31,6 +31,8 @@ class CreateTimestamped(beam.PTransform): + _yaml_requires_inputs = False + def __init__(self, elements): self._elements = elements @@ -117,7 +119,8 @@ def test_chain_with_input(self): - type: PyMap config: fn: "lambda x: x + 41" - ''', providers=TEST_PROVIDERS) + ''', + providers=TEST_PROVIDERS) assert_that(result, equal_to([41, 43, 47, 53, 61, 71, 83, 97, 113, 131])) def test_chain_with_source_sink(self): @@ -138,7 +141,8 @@ def test_chain_with_source_sink(self): type: PyMap config: fn: "lambda x: x + 41" - ''', providers=TEST_PROVIDERS) + ''', + providers=TEST_PROVIDERS) assert_that(result, equal_to([41, 43, 47, 53, 61, 71, 83, 97, 113, 131])) def test_chain_with_root(self): @@ -157,7 +161,8 @@ def test_chain_with_root(self): - type: PyMap config: fn: "lambda x: x + 41" - ''', providers=TEST_PROVIDERS) + ''', + providers=TEST_PROVIDERS) assert_that(result, equal_to([41, 43, 47, 53, 61, 71, 83, 97, 113, 131])) def create_has_schema(self): @@ -185,11 +190,11 @@ def test_implicit_flatten(self): ''' type: composite transforms: - - type: CreateInts + - type: Create name: CreateSmall config: elements: [1, 2, 3] - - type: CreateInts + - type: Create name: CreateBig config: elements: [100, 200] @@ -198,7 +203,8 @@ def test_implicit_flatten(self): config: fn: "lambda x: x * x" output: PyMap - ''', providers=TEST_PROVIDERS) + ''', + providers=TEST_PROVIDERS) assert_that(result, equal_to([1, 4, 9, 10000, 40000])) def test_csv_to_json(self): @@ -260,7 +266,8 @@ def test_name_is_not_ambiguous(self): fn: "lambda elem: elem * elem" input: Create output: PyMap - ''', providers=TEST_PROVIDERS) + ''', + providers=TEST_PROVIDERS) # No exception raised assert_that(result, equal_to([0, 1, 9, 16])) @@ -288,7 +295,53 @@ def test_name_is_ambiguous(self): fn: "lambda elem: elem + 3" input: PyMap output: AnotherMap - ''', providers=TEST_PROVIDERS) + ''', + providers=TEST_PROVIDERS) + + def test_empty_inputs_throws_error(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + with self.assertRaisesRegex(ValueError, + 'Missing inputs for transform at ' + '"EmptyInputOkButYamlDoesntKnow" at line .*'): + _ = p | YamlTransform( + ''' + type: composite + transforms: + - type: PyTransform + name: EmptyInputOkButYamlDoesntKnow + config: + constructor: apache_beam.Impulse + ''') + + def test_empty_inputs_ok_in_source(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + # Does not throw an error like it does above. + _ = p | YamlTransform( + ''' + type: composite + source: + type: PyTransform + name: EmptyInputOkButYamlDoesntKnow + config: + constructor: apache_beam.Impulse + ''') + + def test_empty_inputs_ok_if_explicit(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + # Does not throw an error like it does above. + _ = p | YamlTransform( + ''' + type: composite + transforms: + - type: PyTransform + name: EmptyInputOkButYamlDoesntKnow + input: {} + config: + constructor: apache_beam.Impulse + ''') def test_annotations(self): t = LinearTransform(5, b=100) @@ -365,12 +418,12 @@ def test_mapping_errors(self): ''' type: composite transforms: - - type: CreateInts + - type: Create config: elements: [0, 1, 2, 4] - type: PyMap name: ToRow - input: CreateInts + input: Create config: fn: "lambda x: beam.Row(num=x, str='a' * x or 'bbb')" - type: Filter diff --git a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py index d68c029be559..5d5e5850fd73 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py @@ -343,6 +343,7 @@ def test_normalize_source_sink(self): expected = ''' transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap @@ -372,6 +373,7 @@ def test_normalize_source_sink_only_source(self): expected = ''' transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap @@ -441,6 +443,7 @@ def test_preprocess_source_sink_composite(self): type: composite transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap @@ -468,6 +471,7 @@ def test_preprocess_source_sink_chain(self): type: chain transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 88d0abae83bd..762bed268d63 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -96,7 +96,7 @@ platform_identifiers_map.each { platform, idsuffix -> exec { environment CIBW_BUILD: "cp${pyversion}-${idsuffix}" environment CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - environment CIBW_BEFORE_BUILD: "pip install cython==0.29.36 numpy && pip install --upgrade setuptools" + environment CIBW_BEFORE_BUILD: "pip install cython==0.29.36 numpy --config-settings=setup-args='-Dallow-noblas=true' && pip install --upgrade setuptools" // note: sync cibuildwheel version with GitHub Action // .github/workflow/build_wheel.yml:build_wheels "Install cibuildwheel" step executable 'sh' diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index eee6e9d21887..8b8a56808996 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -418,7 +418,8 @@ task vertexAIInferenceTest { "test_opts": testOpts, "suite": "VertexAITests-df-py${pythonVersionSuffix}", "collect": "uses_vertex_ai and it_postcommit" , - "runner": "TestDataflowRunner" + "runner": "TestDataflowRunner", + "requirements_file": "$requirementsFile" ] def cmdArgs = mapToArgString(argMap) exec { diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 1ce1c23c0600..1caf25caf080 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -31,7 +31,7 @@ select = E3 # https://github.com/apache/beam/issues/25668 pip_pre = True # allow apps that support color to use it. -passenv=TERM +passenv=TERM,CLOUDSDK_CONFIG # Set [] options for pip installation of apache-beam tarball. extras = test,dataframe # Don't warn that these commands aren't installed. diff --git a/sdks/typescript/package-lock.json b/sdks/typescript/package-lock.json index 51a10d2b4a8f..e4556449fde4 100644 --- a/sdks/typescript/package-lock.json +++ b/sdks/typescript/package-lock.json @@ -1,15 +1,15 @@ { "name": "apache-beam", - "version": "2.50.0-SNAPSHOT", + "version": "2.50.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "apache-beam", - "version": "2.50.0-SNAPSHOT", + "version": "2.50.0", "dependencies": { "@google-cloud/pubsub": "^2.19.4", - "@grpc/grpc-js": "^1.8.8", + "@grpc/grpc-js": "^1.4.6", "@protobuf-ts/grpc-transport": "^2.1.0", "@protobuf-ts/plugin": "^2.1.0", "bson": "^4.6.0", @@ -19,7 +19,7 @@ "fast-deep-equal": "^3.1.3", "find-git-root": "^1.0.4", "long": "^4.0.0", - "protobufjs": "^7.2.4", + "protobufjs": "^6.11.3", "queue-typescript": "^1.0.1", "serialize-closures": "^0.2.7", "ts-closure-transform": "^0.1.7", @@ -190,73 +190,17 @@ } }, "node_modules/@grpc/grpc-js": { - "version": "1.8.8", - "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.8.8.tgz", - "integrity": "sha512-4gfDqMLXTrorvYTKA1jL22zLvVwiHJ73t6Re1OHwdCFRjdGTDOVtSJuaWhtHaivyeDGg0LeCkmU77MTKoV3wPA==", + "version": "1.4.6", + "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.4.6.tgz", + "integrity": "sha512-Byau4xiXfIixb1PnW30V/P9mkrZ05lknyNqiK+cVY9J5hj3gecxd/anwaUbAM8j834zg1x78NvAbwGnMfWEu7A==", "dependencies": { - "@grpc/proto-loader": "^0.7.0", + "@grpc/proto-loader": "^0.6.4", "@types/node": ">=12.12.47" }, "engines": { "node": "^8.13.0 || >=10.10.0" } }, - "node_modules/@grpc/grpc-js/node_modules/@grpc/proto-loader": { - "version": "0.7.7", - "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.7.tgz", - "integrity": "sha512-1TIeXOi8TuSCQprPItwoMymZXxWT0CPxUhkrkeCUH+D8U7QDwQ6b7SUz2MaLuWM2llT+J/TVFLmQI5KtML3BhQ==", - "dependencies": { - "@types/long": "^4.0.1", - "lodash.camelcase": "^4.3.0", - "long": "^4.0.0", - "protobufjs": "^7.0.0", - "yargs": "^17.7.2" - }, - "bin": { - "proto-loader-gen-types": "build/bin/proto-loader-gen-types.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/@grpc/grpc-js/node_modules/cliui": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", - "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "dependencies": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.1", - "wrap-ansi": "^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@grpc/grpc-js/node_modules/yargs": { - "version": "17.7.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", - "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "dependencies": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@grpc/grpc-js/node_modules/yargs-parser": { - "version": "21.1.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "engines": { - "node": ">=12" - } - }, "node_modules/@grpc/proto-loader": { "version": "0.6.9", "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.6.9.tgz", @@ -275,31 +219,6 @@ "node": ">=6" } }, - "node_modules/@grpc/proto-loader/node_modules/protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "hasInstallScript": true, - "dependencies": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - }, - "bin": { - "pbjs": "bin/pbjs", - "pbts": "bin/pbts" - } - }, "node_modules/@humanwhocodes/config-array": { "version": "0.9.5", "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.9.5.tgz", @@ -3293,7 +3212,7 @@ "protobufjs": "^6.11.2" } }, - "node_modules/proto3-json-serializer/node_modules/protobufjs": { + "node_modules/protobufjs": { "version": "6.11.3", "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", @@ -3318,34 +3237,6 @@ "pbts": "bin/pbts" } }, - "node_modules/protobufjs": { - "version": "7.2.4", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.4.tgz", - "integrity": "sha512-AT+RJgD2sH8phPmCf7OUZR8xGdcJRga4+1cOaXJ64hvcSkVhNcRHOwIxUatPH15+nj59WAGTDv3LSGZPEQbJaQ==", - "hasInstallScript": true, - "dependencies": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/node": ">=13.7.0", - "long": "^5.0.0" - }, - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/protobufjs/node_modules/long": { - "version": "5.2.3", - "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz", - "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==" - }, "node_modules/punycode": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", @@ -4296,55 +4187,12 @@ } }, "@grpc/grpc-js": { - "version": "1.8.8", - "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.8.8.tgz", - "integrity": "sha512-4gfDqMLXTrorvYTKA1jL22zLvVwiHJ73t6Re1OHwdCFRjdGTDOVtSJuaWhtHaivyeDGg0LeCkmU77MTKoV3wPA==", + "version": "1.4.6", + "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.4.6.tgz", + "integrity": "sha512-Byau4xiXfIixb1PnW30V/P9mkrZ05lknyNqiK+cVY9J5hj3gecxd/anwaUbAM8j834zg1x78NvAbwGnMfWEu7A==", "requires": { - "@grpc/proto-loader": "^0.7.0", + "@grpc/proto-loader": "^0.6.4", "@types/node": ">=12.12.47" - }, - "dependencies": { - "@grpc/proto-loader": { - "version": "0.7.7", - "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.7.tgz", - "integrity": "sha512-1TIeXOi8TuSCQprPItwoMymZXxWT0CPxUhkrkeCUH+D8U7QDwQ6b7SUz2MaLuWM2llT+J/TVFLmQI5KtML3BhQ==", - "requires": { - "@types/long": "^4.0.1", - "lodash.camelcase": "^4.3.0", - "long": "^4.0.0", - "protobufjs": "^7.0.0", - "yargs": "^17.7.2" - } - }, - "cliui": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", - "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "requires": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.1", - "wrap-ansi": "^7.0.0" - } - }, - "yargs": { - "version": "17.7.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", - "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "requires": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - } - }, - "yargs-parser": { - "version": "21.1.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==" - } } }, "@grpc/proto-loader": { @@ -4357,28 +4205,6 @@ "long": "^4.0.0", "protobufjs": "^6.10.0", "yargs": "^16.2.0" - }, - "dependencies": { - "protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "requires": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - } - } } }, "@humanwhocodes/config-array": { @@ -6583,34 +6409,12 @@ "integrity": "sha512-A60IisqvnuI45qNRygJjrnNjX2TMdQGMY+57tR3nul3ZgO2zXkR9OGR8AXxJhkqx84g0FTnrfi3D5fWMSdANdQ==", "requires": { "protobufjs": "^6.11.2" - }, - "dependencies": { - "protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "requires": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - } - } } }, "protobufjs": { - "version": "7.2.4", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.4.tgz", - "integrity": "sha512-AT+RJgD2sH8phPmCf7OUZR8xGdcJRga4+1cOaXJ64hvcSkVhNcRHOwIxUatPH15+nj59WAGTDv3LSGZPEQbJaQ==", + "version": "6.11.3", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", + "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", "requires": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", @@ -6622,15 +6426,9 @@ "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", + "@types/long": "^4.0.1", "@types/node": ">=13.7.0", - "long": "^5.0.0" - }, - "dependencies": { - "long": { - "version": "5.2.3", - "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz", - "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==" - } + "long": "^4.0.0" } }, "punycode": { diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index 041121bd41ea..35a1e8134e29 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -36,7 +36,7 @@ }, "dependencies": { "@google-cloud/pubsub": "^2.19.4", - "@grpc/grpc-js": "^1.8.8", + "@grpc/grpc-js": "~1.4.6", "@protobuf-ts/grpc-transport": "^2.1.0", "@protobuf-ts/plugin": "^2.1.0", "bson": "^4.6.0", @@ -46,7 +46,7 @@ "fast-deep-equal": "^3.1.3", "find-git-root": "^1.0.4", "long": "^4.0.0", - "protobufjs": "^7.2.4", + "protobufjs": "~6.11.3", "queue-typescript": "^1.0.1", "serialize-closures": "^0.2.7", "ts-closure-transform": "^0.1.7", diff --git a/sdks/typescript/src/apache_beam/runners/dataflow.ts b/sdks/typescript/src/apache_beam/runners/dataflow.ts index 950e630d82d9..e7da1f7ada51 100644 --- a/sdks/typescript/src/apache_beam/runners/dataflow.ts +++ b/sdks/typescript/src/apache_beam/runners/dataflow.ts @@ -33,6 +33,8 @@ export function dataflowRunner(runnerOptions: { options: Object = {} ): Promise { var augmentedOptions = { experiments: [] as string[], ...options }; + augmentedOptions.experiments.push("use_runner_v2"); + augmentedOptions.experiments.push("use_portable_job_submission"); augmentedOptions.experiments.push("use_sibling_sdk_workers"); return new PortableRunner( runnerOptions as any, diff --git a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md index 24314dc11800..eae98b84d2c1 100644 --- a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md +++ b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md @@ -788,6 +788,8 @@ BigQuery Storage Write API for Python SDK currently has some limitations on supp {{< paragraph class="language-py" >}} **Note:** If you want to run WriteToBigQuery with Storage Write API from the source code, you need to run `./gradlew :sdks:java:io:google-cloud-platform:expansion-service:build` to build the expansion-service jar. If you are running from a released Beam SDK, the jar will already be included. +**Note:** Auto sharding is not currently supported for Python's Storage Write API. + {{< /paragraph >}} #### Exactly-once semantics @@ -877,6 +879,8 @@ explicitly enable this using [`withAutoSharding`](https://beam.apache.org/releas ***Note:*** `STORAGE_WRITE_API` will default to dynamic sharding when `numStorageWriteApiStreams` is set to 0 or is unspecified. + +***Note:*** Auto sharding with `STORAGE_WRITE_API` is supported on Dataflow's legacy runner, but **not** on Runner V2 {{< /paragraph >}} When using `STORAGE_WRITE_API`, the PCollection returned by diff --git a/website/www/site/content/en/documentation/sdks/python-dependencies.md b/website/www/site/content/en/documentation/sdks/python-dependencies.md index a96d722d05c9..09c56adac430 100644 --- a/website/www/site/content/en/documentation/sdks/python-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-dependencies.md @@ -32,7 +32,7 @@ Dependencies for your Beam SDK version are listed in `setup.py` in the Beam repo https://raw.githubusercontent.com/apache/beam/v/sdks/python/setup.py ``` -

Replace `<VERSION_NUMBER>` with the major.minor.patch version of the SDK. For example, https://raw.githubusercontent.com/apache/beam/v{{< param release_latest >}}/sdks/python/setup.py will provide the dependencies for the {{< param release_latest >}} release.

+

Replace `<VERSION_NUMBER>` with the major.minor.patch version of the SDK. For example, https://raw.githubusercontent.com/apache/beam/v{{< param release_latest >}}/sdks/python/setup.py will provide the dependencies for the {{< param release_latest >}} release.

2. Review the core dependency list under `REQUIRED_PACKAGES`. diff --git a/website/www/site/layouts/partials/header.html b/website/www/site/layouts/partials/header.html index 3fa8bdc9455c..957e3de2b1ed 100644 --- a/website/www/site/layouts/partials/header.html +++ b/website/www/site/layouts/partials/header.html @@ -208,9 +208,9 @@
diff --git a/website/www/site/static/images/college_2023_banner_desktop.png b/website/www/site/static/images/college_2023_banner_desktop.png new file mode 100644 index 000000000000..09e0cdfe24b9 Binary files /dev/null and b/website/www/site/static/images/college_2023_banner_desktop.png differ diff --git a/website/www/site/static/images/college_2023_banner_mobile.png b/website/www/site/static/images/college_2023_banner_mobile.png new file mode 100644 index 000000000000..e6b733a7e2ef Binary files /dev/null and b/website/www/site/static/images/college_2023_banner_mobile.png differ