diff --git a/.test-infra/pipelines/README.md b/.test-infra/pipelines/README.md deleted file mode 100644 index e2512e6fec65..000000000000 --- a/.test-infra/pipelines/README.md +++ /dev/null @@ -1,34 +0,0 @@ - - -# Overview - -This directory holds pipelines that support testing infrastructure and are meant for internal use. - -# Infrastructure - -Some pipelines require provisioning of resources prior to execution. See -[infrastructure](infrastructure) for details. - -# Usage - -Pipelines depend on -[Dataflow Flex Templates](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates) -for execution. See [infrastructure/04.templates](infrastructure/04.templates) -for details. diff --git a/.test-infra/pipelines/build.gradle b/.test-infra/pipelines/build.gradle deleted file mode 100644 index a073b6c087ba..000000000000 --- a/.test-infra/pipelines/build.gradle +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar - -plugins { - id 'org.apache.beam.module' -} - -applyJavaNature( - exportJavadoc: false, - publish: false, - // ShadowJar has a naming dependency with: - // .test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/template.tf - validateShadowJar: false, - shadowClosure: { - // TODO: determine how to create separate shadowJar tasks for each pipeline - manifest { - attributes 'Main-Class': 'org.apache.beam.testinfra.pipelines.ReadDataflowApiWriteBigQuery' - } - mergeServiceFiles() - archiveVersion = 'latest' - } -) - -description = "Apache Beam :: Test Infra :: Pipelines" -ext.summary = "Various Beam pipelines to support testing" - -def googleCloudEventsVersion = "0.8.1" -def grpcDataflowProtoVersion = "0.17.0" -def ioGrpcApiVersion = "1.53.0" -def jupiterVersion = "5.9.3" -def nettyVersion = "1.53.0" - -dependencies { - implementation enforcedPlatform(library.java.google_cloud_platform_libraries_bom) - implementation library.java.google_api_services_bigquery - implementation library.java.jackson_annotations - implementation library.java.jackson_core - implementation library.java.jackson_databind - implementation library.java.vendored_guava_32_1_2_jre - implementation library.java.google_auth_library_credentials - implementation library.java.grpc_auth - implementation library.java.protobuf_java - implementation library.java.protobuf_java_util - implementation library.java.joda_time - implementation library.java.slf4j_api - implementation "com.google.api.grpc:proto-google-cloud-dataflow-v1beta3:${grpcDataflowProtoVersion}" - implementation "io.grpc:grpc-api:${ioGrpcApiVersion}" - implementation "io.grpc:grpc-netty:${nettyVersion}" - implementation "com.google.cloud:google-cloudevent-types:${googleCloudEventsVersion}" - implementation 'com.google.api.grpc:grpc-google-cloud-dataflow-v1beta3' - implementation project(":sdks:java:io:google-cloud-platform") - implementation project(":sdks:java:extensions:google-cloud-platform-core") - implementation project(path: ":sdks:java:core", configuration: "shadow") - runtimeOnly project(":runners:google-cloud-dataflow-java") - runtimeOnly project(path: ":runners:direct-java", configuration: "shadow") - - testImplementation(platform("org.junit:junit-bom:${jupiterVersion}")) - testImplementation('org.junit.jupiter:junit-jupiter') - testImplementation project(":sdks:java:extensions:google-cloud-platform-core") - testImplementation library.java.commons_lang3 -} - -test { - useJUnitPlatform() - testLogging { - events "passed", "skipped", "failed" - } -} diff --git a/.test-infra/pipelines/go.mod b/.test-infra/pipelines/go.mod deleted file mode 100644 index 0204d037a4cc..000000000000 --- a/.test-infra/pipelines/go.mod +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This module contains all Go code for internal use by .test-infra pipelines. -module github.com/apache/beam/test-infra/pipelines - -go 1.20 - -require github.com/google/go-cmp v0.5.9 // indirect diff --git a/.test-infra/pipelines/go.sum b/.test-infra/pipelines/go.sum deleted file mode 100644 index 62841cdb151d..000000000000 --- a/.test-infra/pipelines/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= diff --git a/.test-infra/pipelines/infrastructure/01.setup/.terraform.lock.hcl b/.test-infra/pipelines/infrastructure/01.setup/.terraform.lock.hcl deleted file mode 100644 index 48c94d2fa44b..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/.terraform.lock.hcl +++ /dev/null @@ -1,40 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/hashicorp/google" { - version = "4.66.0" - hashes = [ - "h1:ykmsArGX1/JTEbqMMUXA9s1H+IdtXnKanl5dh4YsaXo=", - "zh:141cddc714dec246957a47cb4103b34302222fc93a87b64de88116b22ebb0ea1", - "zh:276ebd75cb7c265d12b2c611a5f8d38fd6b892ef3edec1b845a934721db794e5", - "zh:574ae7b4808c1560b5a55a75ca2ad5d8ff6b5fb9dad6dffce3fae7ff8ccf78a9", - "zh:65309953f79827c23cc800fc093619a1e0e51a53e2429e9b04e537a11012f989", - "zh:6d67d3edea47767a873c38f1ff519d4450d8e1189a971bda7b0ffde9c9c65a86", - "zh:7fb116be869e30ee155c27f122d415f34d1d5de735d1fa9c4280cac71a42e8f4", - "zh:8a95ed92bb4547f4a40c953a6bd1db659b739f67adcacd798b11fafaec55ee67", - "zh:94f0179e84eb74823d8be4781b0a15f7f34ee39a7b158075504c882459f1ab23", - "zh:a58a7c5ace957cb4395f4b3bb11687e3a5c79362a744107f16623118cffc9370", - "zh:ab38b66f3c5c00df64c86fb4e47caef8cf451d5ed1f76845fd8b2c59628dc18a", - "zh:cc6bb1799e38912affc2a5b6f1c52b08f286d3751206532c04482b5ca0418eb6", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} - -provider "registry.terraform.io/hashicorp/random" { - version = "3.5.1" - hashes = [ - "h1:VSnd9ZIPyfKHOObuQCaKfnjIHRtR7qTw19Rz8tJxm+k=", - "zh:04e3fbd610cb52c1017d282531364b9c53ef72b6bc533acb2a90671957324a64", - "zh:119197103301ebaf7efb91df8f0b6e0dd31e6ff943d231af35ee1831c599188d", - "zh:4d2b219d09abf3b1bb4df93d399ed156cadd61f44ad3baf5cf2954df2fba0831", - "zh:6130bdde527587bbe2dcaa7150363e96dbc5250ea20154176d82bc69df5d4ce3", - "zh:6cc326cd4000f724d3086ee05587e7710f032f94fc9af35e96a386a1c6f2214f", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:b6d88e1d28cf2dfa24e9fdcc3efc77adcdc1c3c3b5c7ce503a423efbdd6de57b", - "zh:ba74c592622ecbcef9dc2a4d81ed321c4e44cddf7da799faa324da9bf52a22b2", - "zh:c7c5cde98fe4ef1143bd1b3ec5dc04baf0d4cc3ca2c5c7d40d17c0e9b2076865", - "zh:dac4bad52c940cd0dfc27893507c1e92393846b024c5a9db159a93c534a3da03", - "zh:de8febe2a2acd9ac454b844a4106ed295ae9520ef54dc8ed2faf29f12716b602", - "zh:eab0d0495e7e711cca367f7d4df6e322e6c562fc52151ec931176115b83ed014", - ] -} diff --git a/.test-infra/pipelines/infrastructure/01.setup/README.md b/.test-infra/pipelines/infrastructure/01.setup/README.md deleted file mode 100644 index 1f11e154cb4a..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/README.md +++ /dev/null @@ -1,83 +0,0 @@ - - -# Overview - -This directory sets up the Google Cloud project environment for Dataflow usage. - -# List of all provision GCP resources - -The following table lists all provisioned resources and their rationale. - -| Resource | Reason | -|---------------------------------|--------------------------------------------| -| API services | Required by GCP to provision resources | -| Dataflow Worker Service Account | Use GCP service account other than default | -| Worker IAM Roles | Follow principle of least privilege | -| Artifact Registry Repository | Required to store template artifacts | -| Google Cloud Storage bucket | Required for various storage needs | - -# Usage - -Follow terraform workflow convention to apply this module. It assumes the -working directory is at -[.test-infra/pipelines](../..). - -## Terraform Init - -This module uses a Google Cloud Storage bucket backend. - -Initialize the terraform workspace for the `apache-beam-testing` project: - -``` -DIR=infrastructure/01.setup -terraform -chdir=$DIR init -backend-config=apache-beam-testing.tfbackend -``` - -or for your own Google Cloud project: - -``` -DIR=infrastructure/01.setup -terraform -chdir=$DIR init -backend-config=path/to/your/backend-config-file.tfbackend -``` - -where your `backend-config-file.tfbackend` contains: - -``` -bucket = -``` - -## Terraform Apply - -Notice the `-var-file` flag referencing [common.tfvars](common.tfvars) that -provides opinionated variable defaults. - -For `apache-beam-testing`: - -``` -DIR=infrastructure/01.setup -terraform -chdir=$DIR apply -var-file=common.tfvars -var-file=apache-beam-testing.tfvars -``` - -or for your own Google Cloud project: - -``` -DIR=infrastructure/01.setup -terraform -chdir=$DIR apply -var-file=common.tfvars -``` diff --git a/.test-infra/pipelines/infrastructure/01.setup/apache-beam-testing.tfbackend b/.test-infra/pipelines/infrastructure/01.setup/apache-beam-testing.tfbackend deleted file mode 100644 index 76be44de6453..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/apache-beam-testing.tfbackend +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -bucket = "b507e468-52e9-4e72-83e5-ecbf563eda12" diff --git a/.test-infra/pipelines/infrastructure/01.setup/apache-beam-testing.tfvars b/.test-infra/pipelines/infrastructure/01.setup/apache-beam-testing.tfvars deleted file mode 100644 index ee56a64480c1..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/apache-beam-testing.tfvars +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -project = "apache-beam-testing" diff --git a/.test-infra/pipelines/infrastructure/01.setup/artifactregistry.tf b/.test-infra/pipelines/infrastructure/01.setup/artifactregistry.tf deleted file mode 100644 index 0fe0644f383d..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/artifactregistry.tf +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -resource "google_artifact_registry_repository" "default" { - description = "Stores artifacts related to github.com/apache/beam/.test-infra/pipelines" - format = "DOCKER" - repository_id = var.artifact_registry_id - location = var.region -} - -resource "google_artifact_registry_repository_iam_member" "dataflow_worker" { - member = "serviceAccount:${google_service_account.dataflow_worker.email}" - repository = google_artifact_registry_repository.default.id - role = "roles/artifactregistry.reader" -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/01.setup/common.tfvars b/.test-infra/pipelines/infrastructure/01.setup/common.tfvars deleted file mode 100644 index 51a45e7a6cb9..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/common.tfvars +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -artifact_registry_id = "infra-pipelines" -region = "us-central1" -dataflow_worker_service_account_id = "infra-pipelines-worker" \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/01.setup/iam.tf b/.test-infra/pipelines/infrastructure/01.setup/iam.tf deleted file mode 100644 index 23d1e90b8cdc..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/iam.tf +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Provision a service account that will be bound to the Dataflow pipeline -resource "google_service_account" "dataflow_worker" { - depends_on = [google_project_service.required_services] - account_id = var.dataflow_worker_service_account_id - display_name = var.dataflow_worker_service_account_id - description = "The service account bound to the compute engine instance provisioned to run Dataflow Jobs" -} - -// Provision IAM roles for the Dataflow runner service account -resource "google_project_iam_member" "dataflow_worker_service_account_roles" { - depends_on = [google_project_service.required_services] - for_each = toset([ - "roles/dataflow.worker", - "roles/dataflow.viewer" - ]) - role = each.key - member = "serviceAccount:${google_service_account.dataflow_worker.email}" - project = var.project -} diff --git a/.test-infra/pipelines/infrastructure/01.setup/provider.tf b/.test-infra/pipelines/infrastructure/01.setup/provider.tf deleted file mode 100644 index eca4e05b72d2..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/provider.tf +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Setup Google Cloud provider -provider "google" { - project = var.project -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/01.setup/services.tf b/.test-infra/pipelines/infrastructure/01.setup/services.tf deleted file mode 100644 index cc303dcac9ec..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/services.tf +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Provision the required Google Cloud services -resource "google_project_service" "required_services" { - for_each = toset([ - "artifactregistry", - "bigquery", - "cloudbuild", - "compute", - "dataflow", - "eventarc", - "iam", - "pubsub", - "workflows", - ]) - - service = "${each.key}.googleapis.com" - disable_on_destroy = false -} diff --git a/.test-infra/pipelines/infrastructure/01.setup/state.tf b/.test-infra/pipelines/infrastructure/01.setup/state.tf deleted file mode 100644 index e09cf6b345c3..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/state.tf +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Partial GCS based terraform backend configuration meant for use with a -// tfbackend file. -terraform { - backend "gcs" { - # bucket configured in *.tfbackend file - prefix = "terraform/state/github.com/apache/beam/.test-infra/pipelines/infrastructure/01.setup" - } -} diff --git a/.test-infra/pipelines/infrastructure/01.setup/storage.tf b/.test-infra/pipelines/infrastructure/01.setup/storage.tf deleted file mode 100644 index 82eb201736af..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/storage.tf +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Generate random string to name Storage bucket -resource "random_string" "default" { - length = 8 - special = false - upper = false - lower = true - numeric = true -} - -// Provision Storage Bucket for use by Dataflow Worker as temporary storage -resource "google_storage_bucket" "default" { - location = var.region - name = "infra-pipelines-${random_string.default.result}" - labels = { - purpose = "infra-pipelines" - } - uniform_bucket_level_access = true -} - -// Enable Dataflow Worker Service Account to manage objects in temporary storage -resource "google_storage_bucket_iam_member" "default" { - bucket = google_storage_bucket.default.id - member = "serviceAccount:${google_service_account.dataflow_worker.email}" - role = "roles/storage.objectAdmin" -} diff --git a/.test-infra/pipelines/infrastructure/01.setup/variables.tf b/.test-infra/pipelines/infrastructure/01.setup/variables.tf deleted file mode 100644 index 05d910159b2f..000000000000 --- a/.test-infra/pipelines/infrastructure/01.setup/variables.tf +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -variable "project" { - type = string - description = "The Google Cloud Platform (GCP) project within which resources are provisioned" -} - -variable "region" { - type = string - description = "The Google Cloud Platform (GCP) region in which to provision resources" -} - -variable "dataflow_worker_service_account_id" { - type = string - description = "The Dataflow Worker Service Account ID" -} - -variable "artifact_registry_id" { - type = string - description = "The ID of the artifact registry repository" -} diff --git a/.test-infra/pipelines/infrastructure/02.network/.terraform.lock.hcl b/.test-infra/pipelines/infrastructure/02.network/.terraform.lock.hcl deleted file mode 100644 index aa3d8d0039ea..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/.terraform.lock.hcl +++ /dev/null @@ -1,22 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/hashicorp/google" { - version = "4.64.0" - hashes = [ - "h1:e9YVOqH5JQTR0LbT+VkOlJb1pDoZEvzXkqaA0Xsn5Mo=", - "h1:oT2shsj9Mb4dGGwzlbWQPMTGSex6yDtJZcF5xQJ7rdE=", - "zh:097fcb0a45fa41c2476deeb7a9adeadf5142e35e4d1a9eeb7b1720900a06807c", - "zh:177e6e34f10efb5cec16b4106af5aef5240f20c33d91d40f3ea73fdc6ce9a24a", - "zh:3331b0f62f900f8f1447e654a7318f3db03723739ac5dcdc446f1a1b1bf5fd0b", - "zh:39e5a19693f8d598d35968660837d1b55ca82d7c314cd433fd957d1c2a5b6616", - "zh:44d09cb871e7ec242610d84f93367755d0c532f744e5871a032cdba430e39ec7", - "zh:77769c0f8ace0be3f85b702b7d4cc0fd43d89bfbea1493166c4f288338222f0a", - "zh:a83ca3e204a85d1d04ee7a6432fdabc7b7e2ef7f46513b6309d8e30ea9e855a3", - "zh:bbf1e983d24877a690886aacd48085b37c8c61dc65e128707f36b7ae6de11abf", - "zh:c359fcf8694af0ec490a1784575eeb355d6e5a922b225f49d5307a06e9715ad0", - "zh:f0df551e19cf8cc9a021a4148518a610b856a50a55938710837fa55b4fbd252f", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - "zh:fb171d37178d46d711f3e09107492343f8356c1237bc6df23114920dc23c4528", - ] -} diff --git a/.test-infra/pipelines/infrastructure/02.network/README.md b/.test-infra/pipelines/infrastructure/02.network/README.md deleted file mode 100644 index 56977d6cb014..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/README.md +++ /dev/null @@ -1,81 +0,0 @@ - - -# Overview - -This directory provisions Google Cloud project networking for Dataflow usage. - -# List of all provision GCP resources - -The following table lists all provisioned resources and their rationale. - -| resource | reason | -|----------------|---------------------------------------------| -| Network | Run workload in its isolated GCP VPC | -| Subnetwork | Worker needs at least one subnetwork | -| Firewall Rules | Limit traffic to Worker service account VMS | - -# Usage - -Follow terraform workflow convention to apply this module. It assumes the -working directory is at -[.test-infra/pipelines](../..) - -## Terraform Init - -This module uses a Google Cloud Storage bucket backend. - -Initialize the terraform workspace for the `apache-beam-testing` project: - -``` -DIR=infrastructure/02.network -terraform -chdir=$DIR init -backend-config=apache-beam-testing.tfbackend -``` - -or for your own Google Cloud project: - -``` -DIR=infrastructure/02.network -terraform init -backend-config=path/to/your/backend-config-file.tfbackend -``` - -where your `backend-config-file.tfbackend` contains: - -``` -bucket = -``` - -## Terraform Apply - -Notice the `-var-file` flag referencing [common.tfvars](common.tfvars) that -provides opinionated variable defaults. - -For `apache-beam-testing`: - -``` -DIR=infrastructure/02.network -terraform -chdir=$DIR apply -var-file=common.tfvars -var-file=apache-beam-testing.tfvars -``` - -or for your own Google Cloud project: - -``` -DIR=infrastructure/02.network -terraform -chdir=$DIR apply -var-file=common.tfvars -``` diff --git a/.test-infra/pipelines/infrastructure/02.network/apache-beam-testing.tfbackend b/.test-infra/pipelines/infrastructure/02.network/apache-beam-testing.tfbackend deleted file mode 100644 index 76be44de6453..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/apache-beam-testing.tfbackend +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -bucket = "b507e468-52e9-4e72-83e5-ecbf563eda12" diff --git a/.test-infra/pipelines/infrastructure/02.network/apache-beam-testing.tfvars b/.test-infra/pipelines/infrastructure/02.network/apache-beam-testing.tfvars deleted file mode 100644 index ee56a64480c1..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/apache-beam-testing.tfvars +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -project = "apache-beam-testing" diff --git a/.test-infra/pipelines/infrastructure/02.network/common.tfvars b/.test-infra/pipelines/infrastructure/02.network/common.tfvars deleted file mode 100644 index 5263bd82abfa..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/common.tfvars +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -region = "us-central1" -dataflow_worker_service_account_id = "infra-pipelines-worker" -network_name_base = "infra-pipelines" -subnetwork_cidr_range = "10.0.0.0/24" diff --git a/.test-infra/pipelines/infrastructure/02.network/data.tf b/.test-infra/pipelines/infrastructure/02.network/data.tf deleted file mode 100644 index 8cb074ab2981..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/data.tf +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Query the Dataflow Worker Service Account -data "google_service_account" "dataflow_worker" { - account_id = var.dataflow_worker_service_account_id -} diff --git a/.test-infra/pipelines/infrastructure/02.network/network.tf b/.test-infra/pipelines/infrastructure/02.network/network.tf deleted file mode 100644 index 8abd62aa2afa..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/network.tf +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Provision virtual custom network -resource "google_compute_network" "default" { - name = var.network_name_base - auto_create_subnetworks = false -} - -// Provision subnetwork of the virtual custom network -resource "google_compute_subnetwork" "default" { - name = var.network_name_base - ip_cidr_range = var.subnetwork_cidr_range - network = google_compute_network.default.name - private_ip_google_access = true - region = var.region -} - -// Provision firewall rule for internal network traffic. -resource "google_compute_firewall" "default" { - name = "allow-data-pipelines-internal" - network = google_compute_network.default.name - - allow { - protocol = "tcp" - } - - source_service_accounts = [ - data.google_service_account.dataflow_worker.email - ] -} diff --git a/.test-infra/pipelines/infrastructure/02.network/provider.tf b/.test-infra/pipelines/infrastructure/02.network/provider.tf deleted file mode 100644 index e070d8408d96..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/provider.tf +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Setup Google Cloud provider -provider "google" { - project = var.project -} diff --git a/.test-infra/pipelines/infrastructure/02.network/state.tf b/.test-infra/pipelines/infrastructure/02.network/state.tf deleted file mode 100644 index cffe530cf903..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/state.tf +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Partial GCS based terraform backend configuration meant for use with a -// tfbackend file. -terraform { - backend "gcs" { - # bucket configured in *.tfbackend file - prefix = "terraform/state/github.com/apache/beam/.test-infra/pipelines/infrastructure/02.network" - } -} diff --git a/.test-infra/pipelines/infrastructure/02.network/variables.tf b/.test-infra/pipelines/infrastructure/02.network/variables.tf deleted file mode 100644 index de53c845dc65..000000000000 --- a/.test-infra/pipelines/infrastructure/02.network/variables.tf +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -variable "project" { - type = string - description = "The Google Cloud Platform (GCP) project within which resources are provisioned" -} - -variable "region" { - type = string - description = "The Google Cloud Platform (GCP) region in which to provision resources" -} - -variable "dataflow_worker_service_account_id" { - type = string - description = "The Dataflow Worker Service Account ID" -} - -variable "network_name_base" { - type = string - description = "The name of the Google Cloud Platform (GCP) name basis from which we name network related resources" -} - -variable "subnetwork_cidr_range" { - type = string - description = "The address range for this subnet, in CIDR notation. Use a standard private VPC network address range: for example, 10.0.0.0/9." -} diff --git a/.test-infra/pipelines/infrastructure/03.io/README.md b/.test-infra/pipelines/infrastructure/03.io/README.md deleted file mode 100644 index f72fe07c1bbf..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/README.md +++ /dev/null @@ -1,24 +0,0 @@ - - -# Overview - -This directory holds different terraform modules to provision resources required -for different pipeline needs. -See subdirectories for more details. diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/README.md b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/README.md deleted file mode 100644 index 9ea3a98ee777..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/README.md +++ /dev/null @@ -1,46 +0,0 @@ - - -# Overview - -This directory provisions a redis cluster in Kubernetes. - -# Usage - -Follow terraform workflow convention to apply this module. It assumes the -working directory is at -[.test-infra/pipelines/infrastructure/03.io/api-overuse-study](..). - -## Terraform Init - -Initialize the terraform workspace. - -``` -DIR=02.redis -terraform -chdir=$DIR init -``` - -## Terraform Apply - -Apply the terraform module. - -``` -DIR=02.redis -terraform -chdir=$DIR apply -var-file=common.tfvars -``` \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/common.tfvars b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/common.tfvars deleted file mode 100644 index f71b496b5a20..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/common.tfvars +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -namespace = "api-overuse-study" \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/data.tf b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/data.tf deleted file mode 100644 index 32cb7434d300..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/data.tf +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Query the Kubernetes namespace to verify existence. -data "kubernetes_namespace" "default" { - metadata { - name = var.namespace - } -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/provider.tf b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/provider.tf deleted file mode 100644 index 20bdbac74ea2..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/provider.tf +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -provider "kubernetes" { - config_path = "~/.kube/config" -} - -provider "helm" { - kubernetes { - config_path = "~/.kube/config" - } -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/redis.tf b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/redis.tf deleted file mode 100644 index 93b74a28782e..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/redis.tf +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Spin up a Redis cluster within the Kubernetes cluster using the bitami -// helm chart. -resource "helm_release" "redis" { - wait = false - repository = "https://charts.bitnami.com/bitnami" - chart = "redis" - name = "redis" - namespace = data.kubernetes_namespace.default.metadata[0].name - set { - name = "auth.enabled" - value = false - } - set { - name = "auth.sentinel" - value = false - } -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/variables.tf b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/variables.tf deleted file mode 100644 index b55ce279654a..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/02.redis/variables.tf +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -variable "namespace" { - type = string - description = "The Kubernetes namespace" -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/README.md b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/README.md deleted file mode 100644 index 19c1a5fe7db6..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/README.md +++ /dev/null @@ -1,46 +0,0 @@ - - -# Overview - -This directory sets up the Kubernetes environment for subsequent modules. - -# Usage - -Follow terraform workflow convention to apply this module. -The following assumes the working directory is at -[.test-infra/pipelines/infrastructure/03.io/api-overuse-study](..). - -## Terraform Init - -Initialize the terraform workspace. - -``` -DIR=01.setup -terraform -chdir=$DIR init -``` - -## Terraform Apply - -Apply the terraform module. - -``` -DIR=01.setup -terraform -chdir=$DIR apply -var-file=common.tfvars -``` \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/common.tfvars b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/common.tfvars deleted file mode 100644 index f71b496b5a20..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/common.tfvars +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -namespace = "api-overuse-study" \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/namespace.tf b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/namespace.tf deleted file mode 100644 index f72e08dc08db..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/namespace.tf +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Provisions the namespace shared by all resources. -resource "kubernetes_namespace" "default" { - metadata { - name = var.namespace - } -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/provider.tf b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/provider.tf deleted file mode 100644 index 1846a8717469..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/provider.tf +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -provider "kubernetes" { - config_path = "~/.kube/config" -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/variables.tf b/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/variables.tf deleted file mode 100644 index 2ae6cc65410d..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/api-overuse-study/variables.tf +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -variable "namespace" { - type = string - description = "The Kubernetes namespace to provision resources" -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/.terraform.lock.hcl b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/.terraform.lock.hcl deleted file mode 100644 index b427fc0d2a5d..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/.terraform.lock.hcl +++ /dev/null @@ -1,21 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/hashicorp/google" { - version = "4.65.2" - hashes = [ - "h1:cnZqZeDJdQzW+JvJ+yCzM5h2dAyZt85Srugts7ymqzc=", - "zh:00152f7b8882ed26502965d60a7c1fc897f59db43ee483b4fbdf1bc3387ea2ea", - "zh:4db0f1ed8df34628808de27ea3f2747323ddac3d1919c6e1a9a54a437a93f809", - "zh:5f34a52fa2708350f4b31a325cf2a231327d0d54002bb62f689d3d095927ae9f", - "zh:804d2c73dd09362f6218075a8729c0bab2250f78b24638d683e8a292c076eccc", - "zh:91a4fa7b890d99e5b5a285028c8707c57d6562c3d14230d3ce426a5087a1099a", - "zh:a3eee907fd007348d518074670a894a656fdb2e7f3c27d7a00d2350eea9cbae6", - "zh:c4c32d40cffc780aa3ff1f996110462eca594186f8acd7ca0238b7990d8e7508", - "zh:ccd072f8f9c74d0e18baf8a09d64660924bb34c5f071e8480d1d91440ce3e434", - "zh:dcadca9ab46f8202d8a76b65ab4f5c8167c43bdeb6f149586a67718c3bef6185", - "zh:e38514a5ba4f5acb0e95b6babd953ca5192616c9f9fbe2da4cdf83c3a07468d3", - "zh:edbe487cfb51a9605c097eab47000f9cc92b9dbdeec55b3fbb048b714cf26031", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/README.md b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/README.md deleted file mode 100644 index 612a164399c6..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/README.md +++ /dev/null @@ -1,86 +0,0 @@ - - -# Overview - -This directory holds terraform code to provision resources that -[org.apache.beam.testinfra.pipelines.ReadDataflowApiWriteBigQuery](../../../src/main/java/org/apache/beam/testinfra/pipelines/ReadDataflowApiWriteBigQuery.java) -reads from and writes to. - -# List of all provision GCP resources - -The following lists all provisioned resources and their rationale -categorized by GCP service. - -| resource | reason | -|-----------------------------|-------------------------------------------------------| -| Eventarc Workflow | Intended for listening to Dataflow Status Changes | -| Pub/Sub topic | Required for Eventarc Workflow | -| Pub/Sub subscription | Intended as a source; subscribes to Eventarc Workflow | -| Google Cloud Storage Bucket | Intended for temporary storage | -| BigQuery Datasets | Intended as sink | - -# Usage - -Follow terraform workflow convention to apply this module. It assumes the -working directory is at -[.test-infra/pipelines](../../..) - -## Terraform Init - -This module uses a Google Cloud Storage bucket backend. - -Initialize the terraform workspace for the `apache-beam-testing` project: - -``` -DIR=infrastructure/03.io/dataflow-to-bigquery -terraform -chdir=$DIR init -backend-config=apache-beam-testing.tfbackend -``` - -or for your own Google Cloud project: - -``` -DIR=infrastructure/03.io/dataflow-to-bigquery -terraform init -backend-config=path/to/your/backend-config-file.tfbackend -``` - -where your `backend-config-file.tfbackend` contains: - -``` -bucket = -``` - -## Terraform Apply - -Notice the `-var-file` flag referencing [common.tfvars](common.tfvars) that -provides opinionated variable defaults. - -For `apache-beam-testing`: - -``` -DIR=infrastructure/03.io/dataflow-to-bigquery -terraform -chdir=$DIR apply -var-file=common.tfvars -var-file=apache-beam-testing.tfvars -``` - -or for your own Google Cloud project: - -``` -DIR=infrastructure/03.io/dataflow-to-bigquery -terraform -chdir=$DIR apply -var-file=common.tfvars -``` diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/apache-beam-testing.tfbackend b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/apache-beam-testing.tfbackend deleted file mode 100644 index 76be44de6453..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/apache-beam-testing.tfbackend +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -bucket = "b507e468-52e9-4e72-83e5-ecbf563eda12" diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/apache-beam-testing.tfvars b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/apache-beam-testing.tfvars deleted file mode 100644 index ee56a64480c1..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/apache-beam-testing.tfvars +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -project = "apache-beam-testing" diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/bigquery.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/bigquery.tf deleted file mode 100644 index 1b11ab799281..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/bigquery.tf +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Provision BigQuery dataset sink to store Dataflow API Job data -resource "google_bigquery_dataset" "sink" { - dataset_id = replace(var.workflow_resource_name_base, "-", "_") - description = "Stores Dataflow API Jobs data" -} - -// Provision IAM roles to write to BigQuery sink -resource "google_bigquery_dataset_iam_member" "dataflow_worker_roles" { - dataset_id = google_bigquery_dataset.sink.dataset_id - member = "serviceAccount:${data.google_service_account.dataflow_worker.email}" - role = "roles/bigquery.dataEditor" -} diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/common.tfvars b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/common.tfvars deleted file mode 100644 index 1f6b731240f1..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/common.tfvars +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -region = "us-central1" -eventarc_workflow_service_account_id = "eventarc-workflow-sa" -dataflow_worker_service_account_id = "infra-pipelines-worker" -workflow_resource_name_base = "dataflow-job-v1beta3-status-changed" diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/data.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/data.tf deleted file mode 100644 index 19b50a00aa49..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/data.tf +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Query the Dataflow Worker service account -data "google_service_account" "dataflow_worker" { - account_id = var.dataflow_worker_service_account_id -} - -// Query the GCP project. Needed to acquire the project number. -data "google_project" "default" { - project_id = var.project -} diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/iam.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/iam.tf deleted file mode 100644 index 199ccde1cdb6..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/iam.tf +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Provision Eventarc and Workflow service account -resource "google_service_account" "default" { - account_id = var.eventarc_workflow_service_account_id - description = "Executes and processes Eventarc and Workflows" -} - -// Add IAM roles to Eventarc and Workflow service account -resource "google_project_iam_member" "default" { - for_each = toset([ - "roles/pubsub.publisher", - "roles/workflows.invoker", - "roles/eventarc.eventReceiver", - ]) - member = "serviceAccount:${google_service_account.default.email}" - role = each.key - project = var.project -} - -resource "google_project_iam_member" "gcp-sa-pubsub" { - member = "serviceAccount:service-${data.google_project.default.number}@gcp-sa-pubsub.iam.gserviceaccount.com" - project = var.project - role = "roles/iam.serviceAccountTokenCreator" -} diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/provider.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/provider.tf deleted file mode 100644 index bbbdd5d5f9e6..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/provider.tf +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -provider "google" { - project = var.project -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/pubsub.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/pubsub.tf deleted file mode 100644 index dad4af934abf..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/pubsub.tf +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Provision a Pub/Sub topic to forward Eventarc Workflow event payloads -resource "google_pubsub_topic" "workflow_topic" { - name = var.workflow_resource_name_base -} - -// Provision a Pub/Sub subscription to the Eventarc Workflow event topic -resource "google_pubsub_subscription" "source" { - name = "${var.workflow_resource_name_base}-sub" - topic = google_pubsub_topic.workflow_topic.name -} - -// Allow Dataflow Worker Service Account to subscribe to Pub/Sub subscription -resource "google_pubsub_subscription_iam_member" "source" { - for_each = toset([ - "roles/pubsub.viewer", - "roles/pubsub.subscriber", - ]) - member = "serviceAccount:${data.google_service_account.dataflow_worker.email}" - role = each.key - subscription = google_pubsub_subscription.source.id -} diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/state.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/state.tf deleted file mode 100644 index ad0b0d1d4bab..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/state.tf +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Partial GCS based terraform backend configuration meant for use with a -// tfbackend file. -terraform { - backend "gcs" { - # bucket configured in *.tfbackend file - prefix = "terraform/state/github.com/apache/beam/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery" - } -} diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/variables.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/variables.tf deleted file mode 100644 index f1937f95ec45..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/variables.tf +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -variable "project" { - type = string - description = "The Google Cloud Platform (GCP) project within which resources are provisioned" -} - -variable "region" { - type = string - description = "The Google Cloud Platform (GCP) region in which to provision resources" -} - -variable "workflow_resource_name_base" { - type = string - description = "The basis to derive of GCP Workflow related resource names such as Pub/Sub and the Workflow itself" -} - -variable "eventarc_workflow_service_account_id" { - type = string - description = "The Eventarc Workflow Service Account ID" -} - -variable "dataflow_worker_service_account_id" { - type = string - description = "The Dataflow Worker Service Account ID" -} diff --git a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/workflow.tf b/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/workflow.tf deleted file mode 100644 index 4b3b214fa38b..000000000000 --- a/.test-infra/pipelines/infrastructure/03.io/dataflow-to-bigquery/workflow.tf +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Creates a GCP Eventarc trigger matching Dataflow Job Status Changed events -// See the following resources for more details: -// https://cloud.google.com/eventarc -// https://cloud.google.com/eventarc/docs/reference/supported-events -resource "google_eventarc_trigger" "default" { - depends_on = [google_project_iam_member.default] - location = var.region - name = var.workflow_resource_name_base - service_account = google_service_account.default.email - - matching_criteria { - attribute = "type" - // matches 'type' property of the following command's output: - // gcloud eventarc providers describe dataflow.googleapis.com --location=us-central1 - value = "google.cloud.dataflow.job.v1beta3.statusChanged" - } - - destination { - workflow = google_workflows_workflow.default.id - } -} - -// Provisions a Workflow to which Dataflow Job Status Eventarc events are sent -// Forwards event payload to PubSub. See the following for the expected schema: -// https://github.com/googleapis/google-cloudevents/tree/main/proto/google/events/cloud/dataflow -resource "google_workflows_workflow" "default" { - depends_on = [google_project_iam_member.default] - name = var.workflow_resource_name_base - description = "Consumes Dataflow Job Status Eventarc events and publishes to Pub/Sub" - region = var.region - service_account = google_service_account.default.email - source_contents = < - -# Overview - -This directory holds terraform code to build the -[org.apache.beam.testinfra.pipelines.ReadDataflowApiWriteBigQuery](../../../src/main/java/org/apache/beam/testinfra/pipelines/ReadDataflowApiWriteBigQuery.java) -pipeline for use as a [Dataflow Flex Template](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates). - -# Why terraform? - -As of this README's writing, there is no resource block for the Google Cloud -terraform provider to provision Dataflow Templates. Therefore, this solution -makes use of the -[null_resource](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) -along with the -[local-exec](https://developer.hashicorp.com/terraform/language/resources/provisioners/local-exec) -provisioner. - -The benefit of using terraform is that it provides clean resource lookups, -within its workflow, known to make submitting the Dataflow job -cumbersome through other means, such as through a gradle command, bash script, -etc. - -# Usage - -Follow terraform workflow convention to apply this module. It assumes the -working directory is at -[.test-infra/pipelines](../../..) - -This module does not use a state backend. - -Notice the `-var-file` flag referencing [common.tfvars](common.tfvars) that -provides opinionated variable defaults. - -For `apache-beam-testing`: - -``` -DIR=infrastructure/04.template/dataflow-to-bigquery -terraform -chdir=$DIR init -terraform -chdir=$DIR apply -var-file=common.tfvars -var-file=apache-beam-testing.tfvars -``` - -or for your own Google Cloud project: - -``` -DIR=infrastructure/04.template/dataflow-to-bigquery -terraform -chdir=$DIR init -terraform -chdir=$DIR apply -var-file=common.tfvars -``` diff --git a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/apache-beam-testing.tfvars b/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/apache-beam-testing.tfvars deleted file mode 100644 index 289150180c01..000000000000 --- a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/apache-beam-testing.tfvars +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -project = "apache-beam-testing" -storage_bucket_name = "infra-pipelines-19brjqq5" diff --git a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/common.tfvars b/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/common.tfvars deleted file mode 100644 index a3ea620c47bc..000000000000 --- a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/common.tfvars +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -artifact_registry_id = "infra-pipelines" -dataflow_worker_service_account_id = "infra-pipelines-worker" -gradle_project = ":beam-test-infra-pipelines" -network_name_base = "infra-pipelines" -region = "us-central1" -template_image_prefix = "dataflow-to-bigquery" diff --git a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/data.tf b/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/data.tf deleted file mode 100644 index 9df9b360e316..000000000000 --- a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/data.tf +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Query the Artifact Registry repository. -data "google_artifact_registry_repository" "default" { - location = var.region - repository_id = var.artifact_registry_id -} - -// Query the Dataflow Worker service account. -data "google_service_account" "dataflow_worker" { - account_id = var.dataflow_worker_service_account_id -} - -// Query the GCP project. -data "google_project" "default" { - project_id = var.project -} - -// Query the GCP Network. -data "google_compute_network" "default" { - name = var.network_name_base -} - -// Query the GCP Subnetwork. -data "google_compute_subnetwork" "default" { - region = var.region - name = var.network_name_base -} - -// Query the Storage bucket. -data "google_storage_bucket" "default" { - name = var.storage_bucket_name -} - -locals { - template_file_gcs_path = "gs://${data.google_storage_bucket.default.name}/templates/dataflow-to-bigquery.json" -} diff --git a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/dataflow-template.json b/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/dataflow-template.json deleted file mode 100644 index 8d9e7ab64e06..000000000000 --- a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/dataflow-template.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "description": "Reads from Dataflow Eventarc Pub/Sub Channel and writes to Big/Query", - "name": "Dataflow Jobs data to Big/Query", - "parameters": [ - { - "name": "subscription", - "helpText": "Format: projects/project/subscriptions/subscription. The source Pub/Sub subscription to receive Eventarc events.", - "isOptional": false, - "label": "Pub/Sub Eventarc Subscription", - "paramType": "TEXT" - }, - { - "name": "dataset", - "helpText": "Format: project.dataset. The BigQuery dataset to which to write Dataflow Jobs data.", - "isOptional": false, - "label": "BigQuery Dataset", - "paramType": "TEXT" - } - ] -} \ No newline at end of file diff --git a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/output.tf b/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/output.tf deleted file mode 100644 index 1ad90b21d31a..000000000000 --- a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/output.tf +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Output the path to the generated Dataflow Template File GCS path. -output "template_file_gcs_path" { - value = local.template_file_gcs_path -} diff --git a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/provider.tf b/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/provider.tf deleted file mode 100644 index 208d543f1ee8..000000000000 --- a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/provider.tf +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -provider "google" { - project = var.project -} diff --git a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/template.tf b/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/template.tf deleted file mode 100644 index 9a1296091c57..000000000000 --- a/.test-infra/pipelines/infrastructure/04.template/dataflow-to-bigquery/template.tf +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -locals { - beam_root = "${path.module}/../../../../.." -} - -// Builds the Shadow jar via the gradle command. -resource "null_resource" "shadowjar" { - triggers = { - id = uuid() - } - provisioner "local-exec" { - working_dir = local.beam_root - command = "./gradlew ${var.gradle_project}:shadowJar" - } -} - -// Builds the Dataflow Flex template by invoking the gcloud command. -resource "null_resource" "build_template" { - triggers = { - id = uuid() - } - depends_on = [null_resource.shadowjar] - provisioner "local-exec" { - command = < - -# Overview - -This directory holds all the terraform modules for setting up the Google Cloud -(GCP) resources necessary to executing [:beam-test-infra-pipelines](../) -pipelines using the Dataflow runner. - -# Code organization - -Folders are named according to recommended order of execution. For example, -[01.setup](01.setup) is intended to be used prior to [02.network](02.network). - -# Common Terraform Modules - -The following terraform modules apply to all executable -[:beam-test-infra-pipelines](../) pipelines. - -| Path | Purpose | -|--------------------------|-------------------| -| [01.setup](01.setup) | Setup GCP project | -| [02.network](02.network) | Provision network | - -# Specific Terraform Modules - -The following modules apply to specific pipelines. When creating a new -executable pipeline and its supported terraform, please consider updating this -documentation. - -## org.apache.beam.testinfra.pipelines.ReadDataflowApiWriteBigQuery - -The following modules provision resources related to -[`org.apache.beam.testinfra.pipelines.ReadDataflowApiWriteBigQuery`](../src/main/java/org/apache/beam/testinfra/pipelines/ReadDataflowApiWriteBigQuery.java). - -| Path | Purpose | Required/Optional | -|----------------------------------------------------------------------|--------------------------------------------------------------------------|-------------------| -| [03.io/dataflow-to-bigquery](03.io/dataflow-to-bigquery) | Provisions resources to read from the Dataflow API and write to BigQuery | required | -| [04.template/dataflow-to-bigquery](04.template/dataflow-to-bigquery) | Builds a Dataflow Flex Template that executes the pipelines | optional | - -Therefore, to run -[`org.apache.beam.testinfra.pipelines.ReadDataflowApiWriteBigQuery`](../src/main/java/org/apache/beam/testinfra/pipelines/ReadDataflowApiWriteBigQuery.java), -apply the following recommended order of terraform modules. See their respective -READMEs for more details. - -1. [01.setup](01.setup) -2. [02.network](02.network) -3. [03.io/dataflow-to-bigquery](03.io/dataflow-to-bigquery) -4. [04.template/dataflow-to-bigquery](04.template/dataflow-to-bigquery) (_if you want to use Dataflow Flex Templates_) diff --git a/.test-infra/pipelines/src/main/go/internal/environment/variable.go b/.test-infra/pipelines/src/main/go/internal/environment/variable.go deleted file mode 100644 index 1e81a2e2ce83..000000000000 --- a/.test-infra/pipelines/src/main/go/internal/environment/variable.go +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package environment provides helpers for interacting with environment variables. -package environment - -import ( - "fmt" - "os" - "strings" -) - -// Variable defines an environment variable via a string type alias. -// Variable's string defaultValue assigns the system environment variable key. -type Variable string - -// Default a defaultValue to the system environment. -func (v Variable) Default(value string) error { - if v.Missing() { - return os.Setenv((string)(v), value) - } - return nil -} - -// Missing reports whether the system environment variable is an empty string. -func (v Variable) Missing() bool { - return v.Value() == "" -} - -// Key returns the system environment variable key. -func (v Variable) Key() string { - return (string)(v) -} - -// Value returns the system environment variable defaultValue. -func (v Variable) Value() string { - return os.Getenv((string)(v)) -} - -// KeyValue returns a concatenated string of the system environment variable's -// =. -func (v Variable) KeyValue() string { - return fmt.Sprintf("%s=%s", (string)(v), v.Value()) -} - -// Missing reports as an error listing all Variable among vars that are -// not assigned in the system environment. -func Missing(vars ...Variable) error { - var missing []string - for _, v := range vars { - if v.Missing() { - missing = append(missing, v.KeyValue()) - } - } - if len(missing) > 0 { - return fmt.Errorf("variables empty but expected from environment: %s", strings.Join(missing, "; ")) - } - return nil -} - -// Map converts a slice of Variable into a map. -// Its usage is for logging purposes. -func Map(vars ...Variable) map[string]interface{} { - result := map[string]interface{}{} - for _, v := range vars { - result[(string)(v)] = v.Value() - } - return result -} diff --git a/.test-infra/pipelines/src/main/go/internal/environment/variable_test.go b/.test-infra/pipelines/src/main/go/internal/environment/variable_test.go deleted file mode 100644 index 6675b39ab26c..000000000000 --- a/.test-infra/pipelines/src/main/go/internal/environment/variable_test.go +++ /dev/null @@ -1,358 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package environment - -import ( - "errors" - "github.com/google/go-cmp/cmp" - "os" - "testing" -) - -func TestMap(t *testing.T) { - type args struct { - vars []Variable - values []string - } - tests := []struct { - name string - args args - want map[string]interface{} - }{ - { - name: "{}", - args: args{}, - want: map[string]interface{}{}, - }, - { - name: "{A=1; B=2; C=3}", - args: args{ - vars: []Variable{ - "A", - "B", - "C", - }, - values: []string{ - "1", - "2", - "3", - }, - }, - want: map[string]interface{}{ - "A": "1", - "B": "2", - "C": "3", - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - clear(tt.args.vars...) - set(t, tt.args.vars, tt.args.values) - got := Map(tt.args.vars...) - if diff := cmp.Diff(got, tt.want); diff != "" { - t.Errorf("Map() = %v, want %v, diff:\n%v", got, tt.want, diff) - } - }) - } -} - -func TestMissing(t *testing.T) { - type args struct { - vars []Variable - values []string - } - tests := []struct { - name string - args args - want error - }{ - { - name: "{}", - args: args{}, - }, - { - name: "{A=}", - args: args{ - vars: []Variable{ - "A", - }, - values: []string{ - "", - }, - }, - want: errors.New("variables empty but expected from environment: A="), - }, - { - name: "{A=1}", - args: args{ - vars: []Variable{ - "A", - }, - values: []string{ - "1", - }, - }, - want: nil, - }, - { - name: "{A=; B=}", - args: args{ - vars: []Variable{ - "A", - "B", - }, - values: []string{ - "", - "", - }, - }, - want: errors.New("variables empty but expected from environment: A=; B="), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - var got, want string - clear(tt.args.vars...) - set(t, tt.args.vars, tt.args.values) - err := Missing(tt.args.vars...) - if err != nil { - got = err.Error() - } - if tt.want != nil { - want = tt.want.Error() - } - if diff := cmp.Diff(got, want); diff != "" { - t.Errorf("Missing() error = %v, want %v, diff:\n%s", err, tt.want, diff) - } - }) - } -} - -func TestVariable_Default(t *testing.T) { - type args struct { - setValue string - defaultValue string - } - tests := []struct { - name string - v Variable - args args - want string - }{ - { - name: "environment variable not set", - v: "A", - args: args{ - defaultValue: "1", - }, - want: "1", - }, - { - name: "environment variable default is overridden by set value", - v: "A", - args: args{ - setValue: "2", - defaultValue: "1", - }, - want: "2", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - clear(tt.v) - if tt.args.setValue != "" { - set(t, []Variable{tt.v}, []string{tt.args.setValue}) - } - if err := tt.v.Default(tt.args.defaultValue); err != nil { - t.Fatalf("could not set default environment variable value during test execution: %v", err) - } - got := os.Getenv(tt.v.Key()) - if diff := cmp.Diff(got, tt.want); diff != "" { - t.Errorf("Default() = %s, want %s, diff:\n%s", got, tt.want, diff) - } - }) - } -} - -func TestVariable_KeyValue(t *testing.T) { - tests := []struct { - name string - v Variable - value string - want string - }{ - { - name: "environment variable not set", - v: "A", - want: "A=", - }, - { - name: "environment variable is set", - v: "A", - value: "1", - want: "A=1", - }, - } - for _, tt := range tests { - clear(tt.v) - t.Run(tt.name, func(t *testing.T) { - set(t, []Variable{tt.v}, []string{tt.value}) - got := tt.v.KeyValue() - if diff := cmp.Diff(got, tt.want); diff != "" { - t.Errorf("KeyValue() = %v, want %v, diff:\n%s", got, tt.want, diff) - } - }) - } -} - -func TestVariable_Missing(t *testing.T) { - type args struct { - setValue string - defaultValue string - } - tests := []struct { - name string - args args - v Variable - want bool - }{ - { - name: "no default and not set", - args: args{}, - v: "A", - want: true, - }, - { - name: "has default but not set", - args: args{ - defaultValue: "1", - }, - v: "A", - want: false, - }, - { - name: "no default but set", - args: args{ - setValue: "1", - }, - v: "A", - want: false, - }, - { - name: "has default and set", - args: args{ - setValue: "2", - defaultValue: "1", - }, - v: "A", - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - clear(tt.v) - if tt.args.defaultValue != "" { - if err := tt.v.Default(tt.args.defaultValue); err != nil { - t.Fatalf("could not set default environment variable value during test execution: %v", err) - } - } - if tt.args.setValue != "" { - set(t, []Variable{tt.v}, []string{tt.args.setValue}) - } - if got := tt.v.Missing(); got != tt.want { - t.Errorf("Missing() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestVariable_Value(t *testing.T) { - type args struct { - setValue string - defaultValue string - } - tests := []struct { - name string - args args - v Variable - want string - }{ - { - name: "no default and not set", - args: args{}, - v: "A", - want: "", - }, - { - name: "has default but not set", - args: args{ - defaultValue: "1", - }, - v: "A", - want: "1", - }, - { - name: "no default but set", - args: args{ - setValue: "1", - }, - v: "A", - want: "1", - }, - { - name: "has default and set", - args: args{ - setValue: "2", - defaultValue: "1", - }, - v: "A", - want: "2", - }, - } - for _, tt := range tests { - clear(tt.v) - if tt.args.defaultValue != "" { - if err := tt.v.Default(tt.args.defaultValue); err != nil { - t.Fatalf("could not set default environment variable value during test execution: %v", err) - } - } - if tt.args.setValue != "" { - set(t, []Variable{tt.v}, []string{tt.args.setValue}) - } - t.Run(tt.name, func(t *testing.T) { - if got := tt.v.Value(); got != tt.want { - t.Errorf("Value() = %v, want %v", got, tt.want) - } - }) - } -} - -func clear(vars ...Variable) { - for _, k := range vars { - _ = os.Setenv(k.Key(), "") - } -} - -func set(t *testing.T, vars []Variable, values []string) { - if len(vars) != len(values) { - t.Fatalf("test cases should be configured with matching args.vars and args.values: len(tt.args.vars): %v != len(tt.args.values): %v", len(vars), len(values)) - } - for i := range vars { - key := vars[i].Key() - value := values[i] - _ = os.Setenv(key, value) - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/ReadDataflowApiWriteBigQuery.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/ReadDataflowApiWriteBigQuery.java deleted file mode 100644 index fc1699cc44f2..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/ReadDataflowApiWriteBigQuery.java +++ /dev/null @@ -1,362 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines; - -import static org.apache.beam.sdk.values.TypeDescriptors.rows; - -import com.google.dataflow.v1beta3.GetJobExecutionDetailsRequest; -import com.google.dataflow.v1beta3.GetJobMetricsRequest; -import com.google.dataflow.v1beta3.GetJobRequest; -import com.google.dataflow.v1beta3.Job; -import com.google.events.cloud.dataflow.v1beta3.JobState; -import com.google.events.cloud.dataflow.v1beta3.JobType; -import com.google.protobuf.GeneratedMessageV3; -import java.util.Arrays; -import java.util.stream.Collectors; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.io.gcp.bigquery.WriteResult; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.WithFailures; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionList; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.testinfra.pipelines.bigquery.BigQueryWriteOptions; -import org.apache.beam.testinfra.pipelines.bigquery.BigQueryWrites; -import org.apache.beam.testinfra.pipelines.conversions.ConversionError; -import org.apache.beam.testinfra.pipelines.conversions.EventarcConversions; -import org.apache.beam.testinfra.pipelines.conversions.JobsToRow; -import org.apache.beam.testinfra.pipelines.conversions.RowConversionResult; -import org.apache.beam.testinfra.pipelines.conversions.WithAppendedDetailsToRow; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowClientFactoryConfiguration; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowFilterEventarcJobs; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowGetJobExecutionDetails; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowGetJobMetrics; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowGetJobs; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowJobsOptions; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowReadResult; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowRequestError; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowRequests; -import org.apache.beam.testinfra.pipelines.dataflow.JobMetricsWithAppendedDetails; -import org.apache.beam.testinfra.pipelines.dataflow.StageSummaryWithAppendedDetails; -import org.apache.beam.testinfra.pipelines.pubsub.PubsubReadOptions; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** - * Constructs and executes a {@link Pipeline} that reads from the Dataflow API and writes to - * BigQuery. For internal use only. - */ -@Internal -public class ReadDataflowApiWriteBigQuery { - - public interface Options extends DataflowJobsOptions, PubsubReadOptions, BigQueryWriteOptions {} - - public static void main(String[] args) { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - Pipeline pipeline = Pipeline.create(options); - DataflowClientFactoryConfiguration configuration = - DataflowClientFactoryConfiguration.builder(options).build(); - - // Retrieve Jobs calling the JobsV1Beta3.GetJob rpc. - PCollection jobs = readEvents(options, pipeline); - - // Retrieve JobMetrics calling the MetricsV1Beta3.GetJobMetrics rpc. - readDetailsFromDataflowJobs( - options, - BigQueryWrites.JOB_METRICS_ERRORS, - GetJobMetricsRequest.class, - JobMetricsWithAppendedDetails.class, - jobs, - DataflowGetJobMetrics.create(configuration), - WithAppendedDetailsToRow.jobMetricsWithAppendedDetailsToRow(), - BigQueryWrites.dataflowJobMetrics(options)); - - // Retrieve StageSummary entries (from JobExecutionDetails) calling the - // MetricsV1Beta3.GetJobExecutionDetails rpc. - readDetailsFromDataflowJobs( - options, - BigQueryWrites.JOB_EXECUTION_DETAILS_ERRORS, - GetJobExecutionDetailsRequest.class, - StageSummaryWithAppendedDetails.class, - jobs, - DataflowGetJobExecutionDetails.create(configuration), - WithAppendedDetailsToRow.stageSummaryWithAppendedDetailsToRow(), - BigQueryWrites.dataflowJobExecutionDetails(options)); - - // Retrieve WorkerDetails (from StageExecutionDetails) calling the - // MetricsV1Beta3.GetStageExecutionDetails rpc. - // Not working at this time and left commented out to revisit in the future. - // jobDetails( - // options, - // BigQueryWrites.STAGE_EXECUTION_DETAILS_ERRORS, - // GetStageExecutionDetailsRequest.class, - // WorkerDetailsWithAppendedDetails.class, - // jobs, - // DataflowGetStageExecutionDetails.create(configuration), - // WithAppendedDetailsToRow.workerDetailsWithAppendedDetailsToRow(), - // BigQueryWrites.dataflowStageExecutionDetails(options)); - - pipeline.run(); - } - - private static void writeErrors( - Options options, - String requestErrorTableIdPrefix, - Class requestTClass, - Class responseTClass, - PCollection requestErrors, - PCollection responseConversionErrors) { - - // Write Dataflow API errors to BigQuery. - PCollection requestErrorRows = - requestErrors - .apply( - tagOf(ConversionError.class, DataflowRequestError.class, requestTClass), - MapElements.into(rows()).via(DataflowRequestError.TO_ROW_FN)) - .setRowSchema(DataflowRequestError.SCHEMA); - - requestErrorRows.apply( - tagOf(BigQueryWrites.class, requestTClass.getSimpleName(), "errors"), - BigQueryWrites.writeDataflowRequestErrors(options, requestErrorTableIdPrefix)); - - // Write conversion errors to BigQuery. - PCollection conversionErrors = - responseConversionErrors - .apply( - tagOf(ConversionError.class, responseTClass), - MapElements.into(rows()).via(ConversionError.TO_ROW_FN)) - .setRowSchema(ConversionError.SCHEMA); - - conversionErrors.apply( - tagOf(BigQueryWrites.class, responseTClass.getSimpleName(), "errors"), - BigQueryWrites.writeConversionErrors(options)); - } - - private static PCollection readEvents(Options options, Pipeline pipeline) { - - // Read from Eventarc published Pub/Sub events. - PCollection json = - pipeline.apply( - tagOf(PubsubIO.Read.class, "Eventarc"), - PubsubIO.readStrings() - .fromSubscription(options.getSubscription().getValue().getPath())); - - json.apply( - "Count Pub/Sub messages", ParDo.of(countFn(PubsubIO.Read.class, "pulled_pubsub_messages"))); - - // Encode Eventarc JSON payloads into Eventarc Dataflow Jobs. - WithFailures.Result< - @NonNull PCollection, ConversionError> - events = - json.apply( - tagOf(EventarcConversions.class, "fromJson"), EventarcConversions.fromJson()); - - events - .output() - .apply( - "Count Encoded Events", - ParDo.of(countFn(EventarcConversions.class, "encode_events_success"))); - events - .failures() - .apply( - "Count Encoded Failures", - ParDo.of(countFn(EventarcConversions.class, "encoded_events_failure"))); - - // Write Eventarc encoding errors to BigQuery. - PCollection eventConversionErrorRows = - events - .failures() - .apply( - "Event Conversion Errors To Row", - MapElements.into(rows()).via(ConversionError.TO_ROW_FN)) - .setRowSchema(ConversionError.SCHEMA); - - eventConversionErrorRows.apply( - tagOf(BigQueryWrites.class, com.google.events.cloud.dataflow.v1beta3.Job.class.getName()), - BigQueryWrites.writeConversionErrors(options)); - - return readDataflowJobsFromEvents(options, events.output()); - } - - private static PCollection readDataflowJobsFromEvents( - Options options, PCollection events) { - DataflowClientFactoryConfiguration configuration = - DataflowClientFactoryConfiguration.builder(options).build(); - - // Filter Done Batch Jobs. - PCollection getBatchJobRequests = - events - .apply( - tagOf(DataflowFilterEventarcJobs.class, "Done Batch Jobs"), - DataflowFilterEventarcJobs.builder() - .setIncludeJobStates(ImmutableList.of(JobState.JOB_STATE_DONE)) - .setIncludeJobType(JobType.JOB_TYPE_BATCH) - .build()) - .apply( - tagOf(DataflowRequests.class, "Batch GetJobRequests"), - DataflowRequests.jobRequestsFromEventsViewAll()); - - getBatchJobRequests.apply( - "Count Done Batch Jobs", ParDo.of(countFn(GetJobRequest.class, "done_batch_jobs"))); - - // Filter Canceled Streaming Jobs. - PCollection getStreamCanceledJobRequests = - events - .apply( - tagOf(DataflowFilterEventarcJobs.class, "Canceled Streaming Jobs"), - DataflowFilterEventarcJobs.builder() - .setIncludeJobStates(ImmutableList.of(JobState.JOB_STATE_CANCELLED)) - .setIncludeJobType(JobType.JOB_TYPE_STREAMING) - .build()) - .apply( - tagOf(DataflowRequests.class, "Canceled Stream GetJobRequests"), - DataflowRequests.jobRequestsFromEventsViewAll()); - - getStreamCanceledJobRequests.apply( - "Count Canceled Streaming Jobs", - ParDo.of(countFn(GetJobRequest.class, "canceled_streaming_jobs"))); - - // Filter Drained Streaming Jobs. - PCollection getStreamDrainedJobRequests = - events - .apply( - tagOf(DataflowFilterEventarcJobs.class, "Drained Streaming Jobs"), - DataflowFilterEventarcJobs.builder() - .setIncludeJobStates(ImmutableList.of(JobState.JOB_STATE_DRAINED)) - .setIncludeJobType(JobType.JOB_TYPE_STREAMING) - .build()) - .apply( - tagOf(DataflowRequests.class, "Drained Stream GetJobRequests"), - DataflowRequests.jobRequestsFromEventsViewAll()); - - getStreamDrainedJobRequests.apply( - "Count Drained Streaming Jobs", - ParDo.of(countFn(GetJobRequest.class, "drained_streaming_jobs"))); - - // Merge Batch and Streaming Jobs. - PCollectionList getJobRequestList = - PCollectionList.of(getBatchJobRequests) - .and(getStreamCanceledJobRequests) - .and(getStreamDrainedJobRequests); - PCollection getJobRequests = - getJobRequestList.apply("Merge Batch and Streaming Jobs", Flatten.pCollections()); - - // Call the Dataflow GetJobs endpoint. - DataflowReadResult getJobsResult = - getJobRequests.apply( - tagOf(DataflowGetJobs.class, "Read"), DataflowGetJobs.create(configuration)); - - // Convert Jobs to Rows. - RowConversionResult jobsToRowResult = - getJobsResult.getSuccess().apply(tagOf(JobsToRow.class, "Job"), JobsToRow.create()); - - jobsToRowResult - .getSuccess() - .apply( - "Count JobsToRow Success", ParDo.of(countFn(JobsToRow.class, "jobs_to_row_success"))); - jobsToRowResult - .getFailure() - .apply( - "Count JobsToRow Failure", ParDo.of(countFn(JobsToRow.class, "jobs_to_row_failure"))); - - // Write Job Rows to BigQuery. - jobsToRowResult - .getSuccess() - .apply(tagOf(BigQueryWrites.class, "Job"), BigQueryWrites.dataflowJobs(options)); - - writeErrors( - options, - BigQueryWrites.JOB_ERRORS, - GetJobRequest.class, - Job.class, - getJobsResult.getFailure(), - jobsToRowResult.getFailure()); - - return getJobsResult.getSuccess(); - } - - private static void readDetailsFromDataflowJobs( - Options options, - String requestErrorTableIdPrefix, - Class requestTClass, - Class responseTClass, - PCollection jobs, - PTransform, DataflowReadResult> - callAPITransform, - PTransform, RowConversionResult> - detailsToRowTransform, - PTransform<@NonNull PCollection, @NonNull WriteResult> bigQueryWriteTransform) { - - // Call the Dataflow API to get more Job details. - DataflowReadResult readResult = - jobs.apply(tagOf(callAPITransform.getClass(), responseTClass), callAPITransform); - - // Convert the Job details result to Beam Rows. - RowConversionResult toRowResult = - readResult - .getSuccess() - .apply(tagOf(detailsToRowTransform.getClass(), responseTClass), detailsToRowTransform); - - // Write result to BigQuery. - toRowResult - .getSuccess() - .apply(tagOf(bigQueryWriteTransform.getClass(), responseTClass), bigQueryWriteTransform); - - // Write errors to BigQuery. - writeErrors( - options, - requestErrorTableIdPrefix, - requestTClass, - responseTClass, - readResult.getFailure(), - toRowResult.getFailure()); - } - - private static String tagOf(Class clazz, String... addl) { - return clazz.getSimpleName() + " " + String.join(" ", addl); - } - - private static String tagOf(Class clazz, Class... addl) { - return String.join( - " ", - ImmutableList.builder() - .add(clazz.getSimpleName()) - .addAll(Arrays.stream(addl).map(Class::getSimpleName).collect(Collectors.toList())) - .build()); - } - - private static DoFn countFn(Class clazz, String name) { - return new DoFn() { - final Counter counter = Metrics.counter(clazz, name); - - @ProcessElement - public void process(@Element T ignored) { - counter.inc(); - } - }; - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/BigQueryWriteOptions.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/BigQueryWriteOptions.java deleted file mode 100644 index e76bceaeba68..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/BigQueryWriteOptions.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.bigquery; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.Validation.Required; - -/** Options for writing to BigQuery. */ -@Internal -public interface BigQueryWriteOptions extends PipelineOptions { - @Description("BigQuery Dataset") - @Required - @JsonIgnore - DatasetReferenceOptionValue getDataset(); - - void setDataset(DatasetReferenceOptionValue value); -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/BigQueryWrites.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/BigQueryWrites.java deleted file mode 100644 index 4f60d03efc32..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/BigQueryWrites.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.bigquery; - -import com.google.api.services.bigquery.model.Clustering; -import com.google.api.services.bigquery.model.DatasetReference; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TimePartitioning; -import com.google.dataflow.v1beta3.Job; -import java.time.Instant; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.gcp.bigquery.WriteResult; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.testinfra.pipelines.conversions.ConversionError; -import org.apache.beam.testinfra.pipelines.dataflow.DataflowRequestError; -import org.apache.beam.testinfra.pipelines.dataflow.JobMetricsWithAppendedDetails; -import org.apache.beam.testinfra.pipelines.dataflow.StageSummaryWithAppendedDetails; -import org.apache.beam.testinfra.pipelines.dataflow.WorkerDetailsWithAppendedDetails; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Convenience methods for {@link BigQueryIO.Write}s. */ -@Internal -public class BigQueryWrites { - - public static final String JOB_EXECUTION_DETAILS_ERRORS = "errors_job_execution_details_requests"; - - public static final String JOB_ERRORS = "errors_jobs_requests"; - - public static final String JOB_METRICS_ERRORS = "errors_job_metrics_requests"; - - private static final TimePartitioning JOB_TIME_PARTITIONING = - new TimePartitioning().setType("HOUR").setField("create_time"); - - private static final TimePartitioning ENRICHED_TIME_PARTITIONING = - new TimePartitioning().setType("HOUR").setField("job_create_time"); - - private static final TimePartitioning OBSERVED_TIME_PARTITIONING = - new TimePartitioning().setType("HOUR").setField("observed_time"); - - private static final Clustering JOB_CLUSTERING = - new Clustering().setFields(ImmutableList.of("type", "location")); - - private static final String CONVERSION_ERRORS_TABLE_ID_PREFIX = "errors_conversions"; - - private static final String JOB_EXECUTION_DETAILS = "job_execution_details"; - - private static final String JOB_METRICS = "job_metrics"; - - private static final String JOBS = "jobs"; - - private static final String STAGE_EXECUTION_DETAILS = "stage_execution_details"; - - /** Write {@link Row}s of {@link ConversionError}s with {@link #OBSERVED_TIME_PARTITIONING}. */ - public static PTransform<@NonNull PCollection, @NonNull WriteResult> writeConversionErrors( - BigQueryWriteOptions options) { - return withPartitioning( - options, tableIdFrom(CONVERSION_ERRORS_TABLE_ID_PREFIX), OBSERVED_TIME_PARTITIONING); - } - - /** - * Write {@link Row}s of {@link StageSummaryWithAppendedDetails} with {@link - * #ENRICHED_TIME_PARTITIONING}. - */ - public static PTransform<@NonNull PCollection, @NonNull WriteResult> - dataflowJobExecutionDetails(BigQueryWriteOptions options) { - return withPartitioning( - options, tableIdFrom(JOB_EXECUTION_DETAILS), ENRICHED_TIME_PARTITIONING); - } - - /** - * Write {@link Row}s of {@link JobMetricsWithAppendedDetails} with {@link - * #ENRICHED_TIME_PARTITIONING}. - */ - public static PTransform<@NonNull PCollection, @NonNull WriteResult> dataflowJobMetrics( - BigQueryWriteOptions options) { - return withPartitioning(options, tableIdFrom(JOB_METRICS), ENRICHED_TIME_PARTITIONING); - } - - /** - * Write {@link Row}s of {@link Job}s with {@link #JOB_TIME_PARTITIONING} clustered by {@link - * #JOB_CLUSTERING}. - */ - public static PTransform<@NonNull PCollection, @NonNull WriteResult> dataflowJobs( - BigQueryWriteOptions options) { - return withPartitioningAndOptionalClustering( - options, tableIdFrom(JOBS), JOB_TIME_PARTITIONING, JOB_CLUSTERING); - } - - /** - * Write {@link Row}s of {@link WorkerDetailsWithAppendedDetails} with {@link - * #ENRICHED_TIME_PARTITIONING}. - */ - public static PTransform<@NonNull PCollection, @NonNull WriteResult> - dataflowStageExecutionDetails(BigQueryWriteOptions options) { - return withPartitioning( - options, tableIdFrom(STAGE_EXECUTION_DETAILS), ENRICHED_TIME_PARTITIONING); - } - - /** - * Write {@link Row}s of {@link DataflowRequestError}s with {@link #OBSERVED_TIME_PARTITIONING}. - */ - public static PTransform<@NonNull PCollection, @NonNull WriteResult> - writeDataflowRequestErrors(BigQueryWriteOptions options, String tableIdPrefix) { - return withPartitioning(options, tableIdFrom(tableIdPrefix), OBSERVED_TIME_PARTITIONING); - } - - private static String tableIdFrom(String prefix) { - return String.format("%s_%s", prefix, Instant.now().getEpochSecond()); - } - - private static PTransform<@NonNull PCollection, @NonNull WriteResult> withPartitioning( - BigQueryWriteOptions options, String tableId, TimePartitioning timePartitioning) { - return withPartitioningAndOptionalClustering(options, tableId, timePartitioning, null); - } - - private static - PTransform<@NonNull PCollection, @NonNull WriteResult> - withPartitioningAndOptionalClustering( - BigQueryWriteOptions options, - String tableId, - TimePartitioning timePartitioning, - @Nullable Clustering clustering) { - - DatasetReference datasetReference = options.getDataset().getValue(); - TableReference tableReference = - new TableReference() - .setProjectId(datasetReference.getProjectId()) - .setDatasetId(datasetReference.getDatasetId()) - .setTableId(tableId); - - BigQueryIO.Write write = - BigQueryIO.write() - .to(tableReference) - .useBeamSchema() - .withTimePartitioning(timePartitioning) - .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) - .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) - .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS); - - if (clustering != null) { - write = write.withClustering(clustering); - } - - return write; - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/DatasetReferenceOptionValue.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/DatasetReferenceOptionValue.java deleted file mode 100644 index 7988a11d3adb..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/DatasetReferenceOptionValue.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.bigquery; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.google.api.services.bigquery.model.DatasetReference; -import java.io.Serializable; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.apache.beam.sdk.annotations.Internal; - -/** Parses Pipeline option value into a {@link DatasetReference}. */ -@Internal -public class DatasetReferenceOptionValue implements Serializable { - - // For parsing the format used to parse a String into a dataset reference. - // "{project_id}:{dataset_id}" or - // "{project_id}.{dataset_id}" - private static final Pattern DATASET_PATTERN = - Pattern.compile("^(?[^\\.:]+)[\\.:](?[^\\.:]+)$"); - - private final String project; - - private final String dataset; - - DatasetReferenceOptionValue(String input) { - Matcher m = DATASET_PATTERN.matcher(input); - checkArgument( - m.matches(), - "input does not match BigQuery dataset pattern, " - + "expected 'project_id.dataset_id' or 'project_id:dataset_id, got: %s", - input); - this.project = checkStateNotNull(m.group("PROJECT"), "PROJECT not found in %s", input); - this.dataset = checkStateNotNull(m.group("DATASET"), "DATASET not found in %s", input); - } - - /** Get the parsed String as a {@link DatasetReference}. */ - public DatasetReference getValue() { - return new DatasetReference().setProjectId(this.project).setDatasetId(this.dataset); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/package-info.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/package-info.java deleted file mode 100644 index 1298cbd4d690..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/bigquery/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Defines how testinfra pipelines write to BigQuery. */ -package org.apache.beam.testinfra.pipelines.bigquery; diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/ConversionError.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/ConversionError.java deleted file mode 100644 index f3f42e33440d..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/ConversionError.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.auto.value.AutoValue; -import java.io.Serializable; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.schemas.AutoValueSchema; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.annotations.DefaultSchema; -import org.apache.beam.sdk.schemas.annotations.SchemaCaseFormat; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CaseFormat; -import org.joda.time.Instant; - -/** Stores errors related to conversions. */ -@Internal -@DefaultSchema(AutoValueSchema.class) -@AutoValue -@SchemaCaseFormat(CaseFormat.LOWER_UNDERSCORE) -public abstract class ConversionError implements Serializable { - - private static final AutoValueSchema SCHEMA_PROVIDER = new AutoValueSchema(); - - private static final TypeDescriptor TYPE = - TypeDescriptor.of(ConversionError.class); - - public static final Schema SCHEMA = checkStateNotNull(SCHEMA_PROVIDER.schemaFor(TYPE)); - - public static final SerializableFunction TO_ROW_FN = - SCHEMA_PROVIDER.toRowFunction(TYPE); - - public static Builder builder() { - return new AutoValue_ConversionError.Builder(); - } - - public abstract Instant getObservedTime(); - - public abstract String getMessage(); - - public abstract String getStackTrace(); - - @AutoValue.Builder - public abstract static class Builder { - - public abstract Builder setObservedTime(Instant value); - - public abstract Builder setMessage(String value); - - public abstract Builder setStackTrace(String value); - - public abstract ConversionError build(); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/EventarcConversions.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/EventarcConversions.java deleted file mode 100644 index 856f64d0da72..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/EventarcConversions.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.events.cloud.dataflow.v1beta3.Job; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.util.JsonFormat; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.joda.time.Instant; - -/** Methods for converting from Eventarc JSON payloads. */ -@Internal -public final class EventarcConversions { - - private static final String DATA_NODE_KEY = "data"; - private static final String TYPE_NODE_KEY = "@type"; - private static final String JOB_EVENT_DATA_TYPE = - "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData"; - private static final String PAYLOAD_NODE_KEY = "payload"; - - /** Parses Eventarc JSON strings to {@link Job}s. */ - public static MapElements.MapWithFailures fromJson() { - return MapElements.into(TypeDescriptor.of(Job.class)) - .via(new JsonToJobFn()) - .exceptionsInto(new TypeDescriptor() {}) - .exceptionsVia( - exceptionElement -> - ConversionError.builder() - .setObservedTime(Instant.now()) - .setMessage( - Optional.ofNullable(exceptionElement.exception().getMessage()).orElse("")) - .setStackTrace(Throwables.getStackTraceAsString(exceptionElement.exception())) - .build()); - } - - /** {@link SerializableFunction} that parses an Eventarc JSON string into a {@link Job}. */ - static class JsonToJobFn implements SerializableFunction { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public Job apply(String json) { - - String safeJson = checkStateNotNull(json, "null json string input for %s", JsonToJobFn.class); - - Job.Builder builder = Job.newBuilder(); - - try { - - JsonNode eventNode = - checkStateNotNull( - OBJECT_MAPPER.readTree(safeJson), - "could not parse json input into %s", - JsonNode.class); - - JsonNode dataNode = - checkStateNotNull( - eventNode.get(DATA_NODE_KEY), "json input missing path: $.%s", DATA_NODE_KEY); - - JsonNode typeNode = - checkStateNotNull( - dataNode.get(TYPE_NODE_KEY), - "json input missing path: $.%s.%s", - DATA_NODE_KEY, - TYPE_NODE_KEY); - - JsonNode payloadNode = - checkStateNotNull( - dataNode.get(PAYLOAD_NODE_KEY), - "json input missing path: $.%s.%s", - DATA_NODE_KEY, - PAYLOAD_NODE_KEY); - - checkState( - typeNode.asText().equals(JOB_EVENT_DATA_TYPE), - "expected %s=%s at json path: $.%s.%s, got: %s", - TYPE_NODE_KEY, - JOB_EVENT_DATA_TYPE, - DATA_NODE_KEY, - TYPE_NODE_KEY, - typeNode.asText()); - - JsonFormat.parser().merge(payloadNode.toString(), builder); - return builder.build(); - - } catch (InvalidProtocolBufferException | JsonProcessingException e) { - throw new IllegalStateException(e); - } - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/JobsToRow.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/JobsToRow.java deleted file mode 100644 index 5bbe9e04ac69..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/JobsToRow.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.values.TypeDescriptors.rows; - -import com.google.dataflow.v1beta3.Job; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.transforms.WithFailures.Result; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.testinfra.pipelines.schemas.DescriptorSchemaRegistry; -import org.apache.beam.testinfra.pipelines.schemas.GeneratedMessageV3RowBuilder; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; - -/** {@link PTransform} that converts {@link Job}s to {@link Row}s. */ -@Internal -public class JobsToRow - extends PTransform< - @NonNull PCollection, @NonNull RowConversionResult> { - - public static JobsToRow create() { - return new JobsToRow(); - } - - private static final DescriptorSchemaRegistry SCHEMA_REGISTRY = DescriptorSchemaRegistry.INSTANCE; - - static { - SCHEMA_REGISTRY.build(Job.getDescriptor()); - } - - @Override - public @NonNull RowConversionResult expand( - @NonNull PCollection input) { - TupleTag success = new TupleTag() {}; - TupleTag failure = new TupleTag() {}; - Schema successSchema = SCHEMA_REGISTRY.getOrBuild(Job.getDescriptor()); - Result<@NonNull PCollection, ConversionError> result = - input.apply( - "Jobs To Row", - MapElements.into(rows()) - .via(jobsToRowFn()) - .exceptionsInto(new TypeDescriptor() {}) - .exceptionsVia( - error -> - ConversionError.builder() - .setObservedTime(Instant.now()) - .setMessage( - Optional.ofNullable(error.exception().getMessage()).orElse("")) - .setStackTrace(Throwables.getStackTraceAsString(error.exception())) - .build())); - - PCollectionTuple pct = - PCollectionTuple.of(success, result.output()).and(failure, result.failures()); - return new RowConversionResult<>(successSchema, success, failure, pct); - } - - private static SerializableFunction jobsToRowFn() { - return job -> { - GeneratedMessageV3RowBuilder builder = GeneratedMessageV3RowBuilder.of(job); - return builder.build(); - }; - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/RowConversionResult.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/RowConversionResult.java deleted file mode 100644 index 0f9bafff4d3e..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/RowConversionResult.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import java.util.Map; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.PInput; -import org.apache.beam.sdk.values.POutput; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; - -/** Convenience class for bundling {@link Row} conversion successes and failures. */ -@Internal -public class RowConversionResult implements POutput { - - private final Pipeline pipeline; - - private final TupleTag successTag; - private final PCollection success; - - private final TupleTag failureTag; - private final PCollection failure; - - RowConversionResult( - Schema successRowSchema, - TupleTag successTag, - TupleTag failureTag, - PCollectionTuple pct) { - this.pipeline = pct.getPipeline(); - this.successTag = successTag; - this.success = pct.get(successTag).setRowSchema(successRowSchema); - this.failureTag = failureTag; - this.failure = pct.get(failureTag); - } - - public PCollection getSuccess() { - return success; - } - - public PCollection getFailure() { - return failure; - } - - @Override - public Pipeline getPipeline() { - return pipeline; - } - - @Override - public Map, PValue> expand() { - return ImmutableMap.of( - successTag, success, - failureTag, failure); - } - - @Override - public void finishSpecifyingOutput( - String transformName, PInput input, PTransform transform) {} -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/WithAppendedDetailsToRow.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/WithAppendedDetailsToRow.java deleted file mode 100644 index c62de6deb80a..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/WithAppendedDetailsToRow.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.dataflow.v1beta3.JobMetrics; -import com.google.dataflow.v1beta3.StageSummary; -import com.google.dataflow.v1beta3.WorkerDetails; -import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.GeneratedMessageV3; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.Schema.Field; -import org.apache.beam.sdk.schemas.Schema.FieldType; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.testinfra.pipelines.dataflow.JobMetricsWithAppendedDetails; -import org.apache.beam.testinfra.pipelines.dataflow.StageSummaryWithAppendedDetails; -import org.apache.beam.testinfra.pipelines.dataflow.WorkerDetailsWithAppendedDetails; -import org.apache.beam.testinfra.pipelines.schemas.DescriptorSchemaRegistry; -import org.apache.beam.testinfra.pipelines.schemas.GeneratedMessageV3RowBuilder; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; - -/** - * Convenience methods for converted enriched types such as {@link JobMetricsWithAppendedDetails} - * and {@link StageSummaryWithAppendedDetails} into {@link Row}s. - */ -@Internal -public class WithAppendedDetailsToRow - extends PTransform< - PCollection, RowConversionResult> { - - public static WithAppendedDetailsToRow - jobMetricsWithAppendedDetailsToRow() { - return new WithAppendedDetailsToRow<>( - JobMetricsWithAppendedDetails.class, - JobMetrics.class, - new TupleTag() {}, - "job_metrics", - clazz -> JobMetrics.getDescriptor(), - element -> - checkStateNotNull( - element, - "%s element null for jobId supplier in %s", - JobMetricsWithAppendedDetails.class, - WithAppendedDetailsToRow.class) - .getJobId(), - element -> - checkStateNotNull( - element, - "%s element null for jobCreateTime supplier in %s", - JobMetricsWithAppendedDetails.class, - WithAppendedDetailsToRow.class) - .getJobCreateTime(), - element -> - checkStateNotNull( - element, - "%s element null for %s supplier in %s", - JobMetricsWithAppendedDetails.class, - JobMetrics.class, - WithAppendedDetailsToRow.class) - .getJobMetrics()); - } - - public static WithAppendedDetailsToRow - stageSummaryWithAppendedDetailsToRow() { - return new WithAppendedDetailsToRow<>( - StageSummaryWithAppendedDetails.class, - StageSummary.class, - new TupleTag() {}, - "stage_summary", - clazz -> StageSummary.getDescriptor(), - element -> - checkStateNotNull( - element, - "%s element null for jobId supplier in %s", - StageSummaryWithAppendedDetails.class, - WithAppendedDetailsToRow.class) - .getJobId(), - element -> - checkStateNotNull( - element, - "%s element null for jobCreateTime supplier in %s", - StageSummaryWithAppendedDetails.class, - WithAppendedDetailsToRow.class) - .getJobCreateTime(), - element -> - checkStateNotNull( - element, - "%s element null for %s supplier in %s", - StageSummaryWithAppendedDetails.class, - StageSummary.class, - WithAppendedDetailsToRow.class) - .getStageSummary()); - } - - public static WithAppendedDetailsToRow - workerDetailsWithAppendedDetailsToRow() { - return new WithAppendedDetailsToRow<>( - WorkerDetailsWithAppendedDetails.class, - WorkerDetails.class, - new TupleTag() {}, - "worker_details", - clazz -> WorkerDetails.getDescriptor(), - element -> - checkStateNotNull( - element, - "%s element null for jobId supplier in %s", - WorkerDetailsWithAppendedDetails.class, - WithAppendedDetailsToRow.class) - .getJobId(), - element -> - checkStateNotNull( - element, - "%s element null for jobCreateTime supplier in %s", - WorkerDetailsWithAppendedDetails.class, - WithAppendedDetailsToRow.class) - .getJobCreateTime(), - element -> - checkStateNotNull( - element, - "%s element null for %s supplier in %s", - WorkerDetailsWithAppendedDetails.class, - WorkerDetails.class, - WithAppendedDetailsToRow.class) - .getWorkerDetails()); - } - - private static final TupleTag SUCCESS = new TupleTag() {}; - - static final Field JOB_ID_FIELD = Field.of("job_id", FieldType.STRING); - - static final Field JOB_CREATE_TIME = Field.of("job_create_time", FieldType.DATETIME); - - private final Class containerClass; - - private final Class embeddedTClass; - - private final TupleTag failureTag; - - private final String embeddedFieldName; - - private final SerializableFunction<@NonNull Class, @NonNull Descriptor> - descriptorSupplier; - - private final SerializableFunction<@NonNull AppendedDetailsT, @NonNull String> jobIdSupplier; - - private final SerializableFunction<@NonNull AppendedDetailsT, @NonNull Instant> - jobCreateTimeSupplier; - - private final SerializableFunction<@NonNull AppendedDetailsT, @NonNull EmbeddedT> - embeddedInstanceSupplier; - - private WithAppendedDetailsToRow( - Class containerClass, - Class embeddedTClass, - TupleTag failureTag, - String embeddedFieldName, - SerializableFunction<@NonNull Class, @NonNull Descriptor> descriptorSupplier, - SerializableFunction<@NonNull AppendedDetailsT, @NonNull String> jobIdSupplier, - SerializableFunction<@NonNull AppendedDetailsT, @NonNull Instant> jobCreateTimeSupplier, - SerializableFunction<@NonNull AppendedDetailsT, @NonNull EmbeddedT> - embeddedInstanceSupplier) { - this.containerClass = containerClass; - this.embeddedTClass = embeddedTClass; - this.failureTag = failureTag; - this.embeddedFieldName = embeddedFieldName; - this.descriptorSupplier = descriptorSupplier; - this.jobIdSupplier = jobIdSupplier; - this.jobCreateTimeSupplier = jobCreateTimeSupplier; - this.embeddedInstanceSupplier = embeddedInstanceSupplier; - } - - @Override - public RowConversionResult expand( - PCollection input) { - Descriptor descriptor = descriptorSupplier.apply(embeddedTClass); - Schema embeddedSchema = - checkStateNotNull( - DescriptorSchemaRegistry.INSTANCE.getOrBuild(descriptor), - "%s null from %s: %s", - Schema.class, - Descriptor.class, - descriptor.getFullName()); - FieldType embeddedType = FieldType.row(embeddedSchema); - Field embeddedField = Field.of(embeddedFieldName, embeddedType); - Schema schema = Schema.of(JOB_ID_FIELD, JOB_CREATE_TIME, embeddedField); - - PCollectionTuple pct = - input.apply( - containerClass.getSimpleName() + " ToRowFn", - ParDo.of(new ToRowFn<>(schema, this)) - .withOutputTags(SUCCESS, TupleTagList.of(failureTag))); - - return new RowConversionResult<>(schema, SUCCESS, failureTag, pct); - } - - private static class ToRowFn - extends DoFn { - private final @NonNull Schema schema; - private final WithAppendedDetailsToRow spec; - - private ToRowFn( - @NonNull Schema schema, WithAppendedDetailsToRow spec) { - this.schema = schema; - this.spec = spec; - } - - @ProcessElement - public void process(@Element @NonNull AppendedDetailsT element, MultiOutputReceiver receiver) { - String id = spec.jobIdSupplier.apply(element); - Instant createTime = spec.jobCreateTimeSupplier.apply(element); - EmbeddedT embeddedInstance = spec.embeddedInstanceSupplier.apply(element); - GeneratedMessageV3RowBuilder builder = - GeneratedMessageV3RowBuilder.of(embeddedInstance); - try { - Row embeddedRow = - checkStateNotNull( - builder.build(), "null Row from build of %s type", embeddedInstance.getClass()); - Row result = - Row.withSchema(schema) - .withFieldValue(JOB_ID_FIELD.getName(), id) - .withFieldValue(JOB_CREATE_TIME.getName(), createTime) - .withFieldValue(spec.embeddedFieldName, embeddedRow) - .build(); - receiver.get(SUCCESS).output(result); - } catch (IllegalStateException e) { - receiver - .get(spec.failureTag) - .output( - ConversionError.builder() - .setObservedTime(Instant.now()) - .setMessage(Optional.ofNullable(e.getMessage()).orElse("")) - .setStackTrace(Throwables.getStackTraceAsString(e)) - .build()); - } - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/package-info.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/package-info.java deleted file mode 100644 index bc8605d322dc..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/conversions/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Defines procedures for converting between various types. */ -package org.apache.beam.testinfra.pipelines.conversions; diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowClientFactory.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowClientFactory.java deleted file mode 100644 index fcda6190cde1..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowClientFactory.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import com.google.dataflow.v1beta3.JobsV1Beta3Grpc; -import com.google.dataflow.v1beta3.MetricsV1Beta3Grpc; -import io.grpc.ManagedChannel; -import io.grpc.auth.MoreCallCredentials; -import io.grpc.netty.NettyChannelBuilder; -import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.annotations.Internal; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Produces and caches blocking stub gRPC clients for the Dataflow API. */ -@SuppressWarnings("ForbidNonVendoredGrpcProtobuf") -@Internal -final class DataflowClientFactory { - - static final DataflowClientFactory INSTANCE = new DataflowClientFactory(); - - private DataflowClientFactory() { - Thread closeChannelsHook = new Thread(this::closeAllChannels); - Runtime.getRuntime().addShutdownHook(closeChannelsHook); - } - - private JobsV1Beta3Grpc.@Nullable JobsV1Beta3BlockingStub cachedJobsClient; - private @Nullable ManagedChannel cachedJobsClientChannel; - private MetricsV1Beta3Grpc.@Nullable MetricsV1Beta3BlockingStub cachedMetricsClient; - private @Nullable ManagedChannel cachedMetricsClientChannel; - - JobsV1Beta3Grpc.JobsV1Beta3BlockingStub getOrCreateJobsClient( - DataflowClientFactoryConfiguration configuration) { - if (cachedJobsClient == null) { - cachedJobsClient = - JobsV1Beta3Grpc.newBlockingStub(getOrCreateJobsClientChannel(configuration)) - .withCallCredentials(MoreCallCredentials.from(configuration.getCredentials())); - } - return cachedJobsClient; - } - - MetricsV1Beta3Grpc.MetricsV1Beta3BlockingStub getOrCreateMetricsClient( - DataflowClientFactoryConfiguration configuration) { - if (cachedMetricsClient == null) { - cachedMetricsClient = - MetricsV1Beta3Grpc.newBlockingStub(getOrCreateMetricsClientChannel(configuration)) - .withCallCredentials(MoreCallCredentials.from(configuration.getCredentials())); - } - return cachedMetricsClient; - } - - private @NonNull ManagedChannel getOrCreateJobsClientChannel( - DataflowClientFactoryConfiguration configuration) { - if (cachedJobsClientChannel == null) { - cachedJobsClientChannel = channel(configuration); - } - return cachedJobsClientChannel; - } - - private @NonNull ManagedChannel getOrCreateMetricsClientChannel( - DataflowClientFactoryConfiguration configuration) { - if (cachedMetricsClientChannel == null) { - cachedMetricsClientChannel = channel(configuration); - } - return cachedMetricsClientChannel; - } - - private static ManagedChannel channel(DataflowClientFactoryConfiguration configuration) { - return NettyChannelBuilder.forTarget(configuration.getDataflowTarget()).build(); - } - - void closeAllChannels() { - close(INSTANCE.cachedJobsClientChannel); - close(INSTANCE.cachedMetricsClientChannel); - } - - private static void close(@Nullable ManagedChannel channel) { - if (channel == null) { - return; - } - channel.shutdown(); - try { - boolean ignored = channel.awaitTermination(1L, TimeUnit.SECONDS); - } catch (InterruptedException ignored) { - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowClientFactoryConfiguration.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowClientFactoryConfiguration.java deleted file mode 100644 index c8aebb16c527..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowClientFactoryConfiguration.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import com.google.auth.Credentials; -import com.google.auto.value.AutoValue; -import java.io.Serializable; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; - -/** Configures the Dataflow API client. */ -@Internal -@AutoValue -public abstract class DataflowClientFactoryConfiguration implements Serializable { - - public static Builder builder(DataflowJobsOptions options) { - Credentials credentials = credentialsFrom(options); - return new AutoValue_DataflowClientFactoryConfiguration.Builder() - .setCredentials(credentials) - .setDataflowTarget(options.getDataflowTarget()); - } - - static Credentials credentialsFrom(DataflowJobsOptions options) { - return options.as(GcpOptions.class).getGcpCredential(); - } - - abstract Credentials getCredentials(); - - abstract String getDataflowTarget(); - - @AutoValue.Builder - public abstract static class Builder { - abstract Builder setCredentials(Credentials newCredentials); - - abstract Builder setDataflowTarget(String newDataflowRootUrl); - - public abstract DataflowClientFactoryConfiguration build(); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowFilterEventarcJobs.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowFilterEventarcJobs.java deleted file mode 100644 index 1fe03ed04d68..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowFilterEventarcJobs.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.auto.value.AutoValue; -import com.google.events.cloud.dataflow.v1beta3.Job; -import com.google.events.cloud.dataflow.v1beta3.JobState; -import com.google.events.cloud.dataflow.v1beta3.JobType; -import java.util.Collections; -import java.util.List; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** Filters Eventarc {@link Job}s. */ -@Internal -@AutoValue -public abstract class DataflowFilterEventarcJobs - extends PTransform<@NonNull PCollection, @NonNull PCollection> { - - public static Builder builder() { - return new AutoValue_DataflowFilterEventarcJobs.Builder(); - } - - public abstract List getIncludeJobStates(); - - public abstract List getExcludeJobStates(); - - public abstract JobType getIncludeJobType(); - - abstract Builder toBuilder(); - - @Override - public @NonNull PCollection expand(PCollection input) { - return input.apply( - Filter.by( - job -> { - Job safeJob = - checkStateNotNull(job, "null Job input in %s", DataflowFilterEventarcJobs.class); - boolean matchesIncludes = - !getIncludeJobStates().isEmpty() - && getIncludeJobStates().contains(safeJob.getCurrentState()); - boolean matchesExcludes = - !getExcludeJobStates().isEmpty() - && getExcludeJobStates().contains(safeJob.getCurrentState()); - boolean matchesJobType = getIncludeJobType().equals(job.getType()); - return matchesIncludes && !matchesExcludes && matchesJobType; - })); - } - - @AutoValue.Builder - public abstract static class Builder { - - public abstract Builder setIncludeJobStates(List newIncludeJobStates); - - abstract Optional> getIncludeJobStates(); - - public abstract Builder setExcludeJobStates(List newExcludeJobStates); - - abstract Optional> getExcludeJobStates(); - - public abstract Builder setIncludeJobType(JobType newIncludeJobType); - - abstract JobType getIncludeJobType(); - - public abstract DataflowFilterEventarcJobs autoBuild(); - - public Builder terminatedOnly() { - return setIncludeJobStates( - ImmutableList.of(JobState.JOB_STATE_DONE, JobState.JOB_STATE_CANCELLED)); - } - - public final DataflowFilterEventarcJobs build() { - if (!getIncludeJobStates().isPresent()) { - setIncludeJobStates(Collections.emptyList()); - } - if (!getExcludeJobStates().isPresent()) { - setExcludeJobStates(Collections.emptyList()); - } - if (getIncludeJobType().equals(JobType.JOB_TYPE_UNKNOWN)) { - throw new IllegalStateException( - String.format( - "illegal %s: %s", JobType.class.getSimpleName(), JobType.JOB_TYPE_UNKNOWN)); - } - - return autoBuild(); - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobExecutionDetails.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobExecutionDetails.java deleted file mode 100644 index fcb5518a1105..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobExecutionDetails.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.dataflow.v1beta3.GetJobExecutionDetailsRequest; -import com.google.dataflow.v1beta3.Job; -import com.google.dataflow.v1beta3.JobExecutionDetails; -import com.google.dataflow.v1beta3.MetricsV1Beta3Grpc; -import com.google.dataflow.v1beta3.StageSummary; -import io.grpc.StatusRuntimeException; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.checkerframework.checker.nullness.qual.MonotonicNonNull; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * {@link PTransform} for executing {@link GetJobExecutionDetailsRequest}s using the {@link - * MetricsV1Beta3Grpc} client. Emits {@link StageSummaryWithAppendedDetails} or {@link - * DataflowRequestError}s. - */ -@Internal -public class DataflowGetJobExecutionDetails - extends PTransform< - @NonNull PCollection, - @NonNull DataflowReadResult> { - - private static final TupleTag SUCCESS = - new TupleTag() {}; - - private static final TupleTag FAILURE = - new TupleTag() {}; - - private final DataflowClientFactoryConfiguration configuration; - - public static DataflowGetJobExecutionDetails create( - DataflowClientFactoryConfiguration configuration) { - return new DataflowGetJobExecutionDetails(configuration); - } - - private DataflowGetJobExecutionDetails(DataflowClientFactoryConfiguration configuration) { - this.configuration = configuration; - } - - @Override - public @NonNull DataflowReadResult expand( - PCollection input) { - - PCollectionTuple pct = - input - .apply( - Throttle.class.getSimpleName() - + " " - + DataflowGetJobExecutionDetails.class.getSimpleName(), - Throttle.of( - DataflowGetJobExecutionDetails.class.getName(), Duration.standardSeconds(1L))) - .apply( - DataflowGetJobExecutionDetails.class.getSimpleName(), - ParDo.of(new GetJobExecutionDetailsFn(this)) - .withOutputTags(SUCCESS, TupleTagList.of(FAILURE))); - - return DataflowReadResult.of(SUCCESS, FAILURE, pct); - } - - private static class GetJobExecutionDetailsFn extends DoFn { - final Counter success = - Metrics.counter(GetJobExecutionDetailsRequest.class, "get_job_execution_details_success"); - final Counter failure = - Metrics.counter(GetJobExecutionDetailsRequest.class, "get_job_execution_details_failure"); - final Counter items = Metrics.counter(StageSummary.class, "job_execution_details_items"); - private final DataflowGetJobExecutionDetails spec; - private transient MetricsV1Beta3Grpc.@MonotonicNonNull MetricsV1Beta3BlockingStub client; - - private GetJobExecutionDetailsFn(DataflowGetJobExecutionDetails spec) { - this.spec = spec; - } - - @Setup - public void setup() { - client = DataflowClientFactory.INSTANCE.getOrCreateMetricsClient(spec.configuration); - } - - @ProcessElement - public void process(@Element Job job, MultiOutputReceiver receiver) { - GetJobExecutionDetailsRequest request = - GetJobExecutionDetailsRequest.newBuilder() - .setJobId(job.getId()) - .setProjectId(job.getProjectId()) - .setLocation(job.getLocation()) - .build(); - try { - JobExecutionDetails response = checkStateNotNull(client).getJobExecutionDetails(request); - success.inc(); - items.inc(response.getStagesCount()); - emitResponse(job, response, receiver.get(SUCCESS)); - while (!Strings.isNullOrEmpty(response.getNextPageToken())) { - GetJobExecutionDetailsRequest requestWithPageToken = - request.toBuilder().setPageToken(response.getNextPageToken()).build(); - response = client.getJobExecutionDetails(requestWithPageToken); - success.inc(); - items.inc(response.getStagesCount()); - emitResponse(job, response, receiver.get(SUCCESS)); - } - } catch (StatusRuntimeException e) { - failure.inc(); - receiver - .get(FAILURE) - .output( - DataflowRequestError.fromRequest(request, GetJobExecutionDetailsRequest.class) - .setObservedTime(Instant.now()) - .setMessage(Optional.ofNullable(e.getMessage()).orElse("")) - .setStackTrace(Throwables.getStackTraceAsString(e)) - .build()); - } - } - } - - private static void emitResponse( - @NonNull Job job, - @NonNull JobExecutionDetails response, - DoFn.OutputReceiver receiver) { - Instant createTime = Instant.ofEpochSecond(job.getCreateTime().getSeconds()); - for (StageSummary summary : response.getStagesList()) { - StageSummaryWithAppendedDetails result = new StageSummaryWithAppendedDetails(); - result.setJobId(job.getId()); - result.setJobCreateTime(createTime); - result.setStageSummary(summary); - receiver.output(result); - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobMetrics.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobMetrics.java deleted file mode 100644 index 3429f88949a1..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobMetrics.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.dataflow.v1beta3.GetJobMetricsRequest; -import com.google.dataflow.v1beta3.Job; -import com.google.dataflow.v1beta3.JobMetrics; -import com.google.dataflow.v1beta3.MetricsV1Beta3Grpc; -import io.grpc.StatusRuntimeException; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.checkerframework.checker.nullness.qual.MonotonicNonNull; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * {@link PTransform} for executing {@link GetJobMetricsRequest}s using the {@link - * MetricsV1Beta3Grpc} client. Emits {@link JobMetricsWithAppendedDetails} or {@link - * DataflowRequestError}s. - */ -@Internal -public class DataflowGetJobMetrics - extends PTransform< - @NonNull PCollection, - @NonNull DataflowReadResult> { - - public static DataflowGetJobMetrics create(DataflowClientFactoryConfiguration configuration) { - return new DataflowGetJobMetrics(configuration); - } - - private static final TupleTag SUCCESS = - new TupleTag() {}; - - private static final TupleTag FAILURE = - new TupleTag() {}; - - private final DataflowClientFactoryConfiguration configuration; - - private DataflowGetJobMetrics(DataflowClientFactoryConfiguration configuration) { - this.configuration = configuration; - } - - @Override - public @NonNull DataflowReadResult expand( - PCollection input) { - - PCollectionTuple pct = - input - .apply( - Throttle.class.getSimpleName() + " " + DataflowGetJobMetrics.class.getSimpleName(), - Throttle.of(DataflowGetJobMetrics.class.getName(), Duration.standardSeconds(1L))) - .apply( - DataflowGetJobMetrics.class.getSimpleName(), - ParDo.of(new GetJobMetricsFn(this)) - .withOutputTags(SUCCESS, TupleTagList.of(FAILURE))); - - return DataflowReadResult.of(SUCCESS, FAILURE, pct); - } - - private static class GetJobMetricsFn extends DoFn { - - final Counter success = Metrics.counter(GetJobMetricsRequest.class, "get_jobs_metrics_success"); - final Counter failure = Metrics.counter(GetJobMetricsRequest.class, "get_jobs_metrics_failure"); - final Counter items = Metrics.counter(JobMetrics.class, "job_metrics_items"); - private final DataflowGetJobMetrics spec; - private transient MetricsV1Beta3Grpc.@MonotonicNonNull MetricsV1Beta3BlockingStub client; - - private GetJobMetricsFn(DataflowGetJobMetrics spec) { - this.spec = spec; - } - - @Setup - public void setup() { - client = DataflowClientFactory.INSTANCE.getOrCreateMetricsClient(spec.configuration); - } - - @ProcessElement - public void process(@Element Job job, MultiOutputReceiver receiver) { - GetJobMetricsRequest request = - GetJobMetricsRequest.newBuilder() - .setJobId(job.getId()) - .setProjectId(job.getProjectId()) - .setLocation(job.getLocation()) - .build(); - try { - - JobMetrics response = checkStateNotNull(client).getJobMetrics(request); - success.inc(); - items.inc(response.getMetricsCount()); - com.google.protobuf.Timestamp timestamp = job.getCreateTime(); - JobMetricsWithAppendedDetails result = new JobMetricsWithAppendedDetails(); - result.setJobId(request.getJobId()); - result.setJobCreateTime(Instant.ofEpochSecond(timestamp.getSeconds())); - result.setJobMetrics(response); - - receiver.get(SUCCESS).output(result); - - } catch (StatusRuntimeException e) { - failure.inc(); - receiver - .get(FAILURE) - .output( - DataflowRequestError.fromRequest(request, GetJobMetricsRequest.class) - .setObservedTime(Instant.now()) - .setMessage(Optional.ofNullable(e.getMessage()).orElse("")) - .setStackTrace(Throwables.getStackTraceAsString(e)) - .build()); - } - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobs.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobs.java deleted file mode 100644 index 728c6bd6d6b9..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetJobs.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.dataflow.v1beta3.GetJobRequest; -import com.google.dataflow.v1beta3.Job; -import com.google.dataflow.v1beta3.JobsV1Beta3Grpc; -import io.grpc.StatusRuntimeException; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.checkerframework.checker.nullness.qual.MonotonicNonNull; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * {@link PTransform} for executing {@link GetJobRequest}s using the {@link JobsV1Beta3Grpc}. Emits - * {@link Job}s or {@link DataflowRequestError}s. - */ -@Internal -public class DataflowGetJobs - extends PTransform< - @NonNull PCollection, - @NonNull DataflowReadResult> { - - private static final TupleTag SUCCESS = new TupleTag() {}; - - private static final TupleTag FAILURE = - new TupleTag() {}; - - public static DataflowGetJobs create(DataflowClientFactoryConfiguration configuration) { - return new DataflowGetJobs(configuration); - } - - private final DataflowClientFactoryConfiguration configuration; - - private DataflowGetJobs(@NonNull DataflowClientFactoryConfiguration configuration) { - this.configuration = configuration; - } - - @Override - public @NonNull DataflowReadResult expand( - PCollection input) { - - PCollectionTuple pct = - input - .apply( - Throttle.class.getSimpleName() + " " + DataflowGetJobs.class.getSimpleName(), - Throttle.of(DataflowGetJobs.class.getName(), Duration.standardSeconds(1L))) - .apply( - "GetJobs", - ParDo.of(new GetJobsFn(this)).withOutputTags(SUCCESS, TupleTagList.of(FAILURE))); - - return DataflowReadResult.of(SUCCESS, FAILURE, pct); - } - - private static class GetJobsFn extends DoFn { - final Counter success = Metrics.counter(GetJobRequest.class, "get_jobs_success"); - final Counter failure = Metrics.counter(GetJobRequest.class, "get_jobs_failure"); - private final DataflowGetJobs spec; - private transient JobsV1Beta3Grpc.@MonotonicNonNull JobsV1Beta3BlockingStub client; - - GetJobsFn(DataflowGetJobs spec) { - this.spec = spec; - } - - @Setup - public void setup() { - client = DataflowClientFactory.INSTANCE.getOrCreateJobsClient(spec.configuration); - } - - @ProcessElement - public void process(@Element GetJobRequest request, MultiOutputReceiver receiver) { - try { - - Job job = checkStateNotNull(client).getJob(request); - success.inc(); - receiver.get(SUCCESS).output(job); - - } catch (StatusRuntimeException e) { - - failure.inc(); - receiver - .get(FAILURE) - .output( - DataflowRequestError.fromRequest(request, GetJobRequest.class) - .setObservedTime(Instant.now()) - .setMessage(Optional.ofNullable(e.getMessage()).orElse("")) - .setStackTrace(Throwables.getStackTraceAsString(e)) - .build()); - } - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetStageExecutionDetails.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetStageExecutionDetails.java deleted file mode 100644 index 650631533a0b..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowGetStageExecutionDetails.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.dataflow.v1beta3.GetStageExecutionDetailsRequest; -import com.google.dataflow.v1beta3.Job; -import com.google.dataflow.v1beta3.MetricsV1Beta3Grpc; -import com.google.dataflow.v1beta3.StageExecutionDetails; -import com.google.dataflow.v1beta3.WorkerDetails; -import io.grpc.StatusRuntimeException; -import java.util.Optional; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.checkerframework.checker.nullness.qual.MonotonicNonNull; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * {@link PTransform} for executing {@link GetStageExecutionDetailsRequest}s using the {@link - * MetricsV1Beta3Grpc} client. Emits {@link WorkerDetailsWithAppendedDetails} or {@link - * DataflowRequestError}s. - */ -@Internal -public class DataflowGetStageExecutionDetails - extends PTransform< - @NonNull PCollection, - @NonNull DataflowReadResult> { - - public static DataflowGetStageExecutionDetails create( - DataflowClientFactoryConfiguration configuration) { - return new DataflowGetStageExecutionDetails(configuration); - } - - private static final TupleTag SUCCESS = - new TupleTag() {}; - - private static final TupleTag FAILURE = - new TupleTag() {}; - - private final DataflowClientFactoryConfiguration configuration; - - private DataflowGetStageExecutionDetails(DataflowClientFactoryConfiguration configuration) { - this.configuration = configuration; - } - - @Override - public @NonNull DataflowReadResult expand( - PCollection input) { - - PCollectionTuple pct = - input - .apply( - Throttle.class.getSimpleName() - + " " - + DataflowGetStageExecutionDetails.class.getSimpleName(), - Throttle.of( - DataflowGetStageExecutionDetails.class.getName(), Duration.standardSeconds(1L))) - .apply( - DataflowGetStageExecutionDetails.class.getSimpleName(), - ParDo.of(new GetStageExecutionDetailsFn(this)) - .withOutputTags(SUCCESS, TupleTagList.of(FAILURE))); - return DataflowReadResult.of(SUCCESS, FAILURE, pct); - } - - private static class GetStageExecutionDetailsFn - extends DoFn { - - final Counter success = - Metrics.counter( - GetStageExecutionDetailsRequest.class, "get_stage_execution_details_success"); - final Counter failure = - Metrics.counter( - GetStageExecutionDetailsRequest.class, "get_stage_execution_details_failure"); - - final Counter items = Metrics.counter(WorkerDetails.class, "stage_execution_details_items"); - private final DataflowGetStageExecutionDetails spec; - private transient MetricsV1Beta3Grpc.@MonotonicNonNull MetricsV1Beta3BlockingStub client; - - private GetStageExecutionDetailsFn(DataflowGetStageExecutionDetails spec) { - this.spec = spec; - } - - @Setup - public void setup() { - client = DataflowClientFactory.INSTANCE.getOrCreateMetricsClient(spec.configuration); - } - - @ProcessElement - public void process(@Element Job job, MultiOutputReceiver receiver) { - GetStageExecutionDetailsRequest request = - GetStageExecutionDetailsRequest.getDefaultInstance() - .toBuilder() - .setJobId(job.getId()) - .setProjectId(job.getProjectId()) - .setLocation(job.getLocation()) - .build(); - try { - StageExecutionDetails response = - checkStateNotNull(client).getStageExecutionDetails(request); - success.inc(); - items.inc(response.getWorkersCount()); - emitResponse(job, response, receiver.get(SUCCESS)); - while (!Strings.isNullOrEmpty(response.getNextPageToken())) { - GetStageExecutionDetailsRequest requestWithPageToken = - request.toBuilder().setPageToken(response.getNextPageToken()).build(); - response = client.getStageExecutionDetails(requestWithPageToken); - success.inc(); - emitResponse(job, response, receiver.get(SUCCESS)); - } - } catch (StatusRuntimeException e) { - failure.inc(); - receiver - .get(FAILURE) - .output( - DataflowRequestError.fromRequest(request, GetStageExecutionDetailsRequest.class) - .setObservedTime(Instant.now()) - .setMessage(Optional.ofNullable(e.getMessage()).orElse("")) - .setStackTrace(Throwables.getStackTraceAsString(e)) - .build()); - } - } - - private static void emitResponse( - @NonNull Job job, - @NonNull StageExecutionDetails response, - @NonNull OutputReceiver receiver) { - - for (WorkerDetails details : response.getWorkersList()) { - Instant createTime = Instant.ofEpochSecond(job.getCreateTime().getSeconds()); - WorkerDetailsWithAppendedDetails result = new WorkerDetailsWithAppendedDetails(); - result.setJobId(job.getId()); - result.setJobCreateTime(createTime); - result.setWorkerDetails(details); - receiver.output(result); - } - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowJobsOptions.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowJobsOptions.java deleted file mode 100644 index 148dde303d66..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowJobsOptions.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.Hidden; -import org.apache.beam.sdk.options.PipelineOptions; - -/** Options required for calling the Dataflow API. */ -@Internal -public interface DataflowJobsOptions extends PipelineOptions { - - @Description("Target for use with the Google Cloud Dataflow API") - @Default.String("dns:///dataflow.googleapis.com") - @Hidden - String getDataflowTarget(); - - void setDataflowTarget(String value); -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowReadResult.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowReadResult.java deleted file mode 100644 index af01eb6abd43..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowReadResult.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import java.util.Map; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.PInput; -import org.apache.beam.sdk.values.POutput; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.checkerframework.checker.initialization.qual.Initialized; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.UnknownKeyFor; - -/** Convenience class for bundling Dataflow API successful request responses and failures. */ -@Internal -public class DataflowReadResult implements POutput { - - public static DataflowReadResult of( - TupleTag successTag, TupleTag failureTag, PCollectionTuple pct) { - return new DataflowReadResult<>(successTag, failureTag, pct); - } - - private final Pipeline pipeline; - - private final TupleTag successTag; - - private final PCollection success; - - private final TupleTag failureTag; - - private final PCollection failure; - - private DataflowReadResult( - TupleTag successTag, TupleTag failureTag, PCollectionTuple pct) { - this.pipeline = pct.getPipeline(); - this.successTag = successTag; - this.success = pct.get(successTag); - this.failureTag = failureTag; - this.failure = pct.get(failureTag); - } - - public PCollection getSuccess() { - return success; - } - - public PCollection getFailure() { - return failure; - } - - @Override - public @UnknownKeyFor @NonNull @Initialized Pipeline getPipeline() { - return pipeline; - } - - @Override - public @NonNull Map, PValue> expand() { - return ImmutableMap.of( - successTag, success, - failureTag, failure); - } - - @Override - public void finishSpecifyingOutput( - @UnknownKeyFor @NonNull @Initialized String transformName, - @UnknownKeyFor @NonNull @Initialized PInput input, - @UnknownKeyFor @NonNull @Initialized - PTransform<@UnknownKeyFor @NonNull @Initialized ?, @UnknownKeyFor @NonNull @Initialized ?> - transform) {} -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowRequestError.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowRequestError.java deleted file mode 100644 index 5d7260c47359..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowRequestError.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.auto.value.AutoValue; -import com.google.protobuf.GeneratedMessageV3; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.util.JsonFormat; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.schemas.AutoValueSchema; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.annotations.DefaultSchema; -import org.apache.beam.sdk.schemas.annotations.SchemaCaseFormat; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CaseFormat; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Stores errors related to Dataflow API request executions. */ -@Internal -@DefaultSchema(AutoValueSchema.class) -@AutoValue -@SchemaCaseFormat(CaseFormat.LOWER_UNDERSCORE) -public abstract class DataflowRequestError { - - private static final Logger LOG = LoggerFactory.getLogger(DataflowRequestError.class); - - private static final AutoValueSchema SCHEMA_PROVIDER = new AutoValueSchema(); - - private static final TypeDescriptor TYPE = - TypeDescriptor.of(DataflowRequestError.class); - - public static final Schema SCHEMA = checkStateNotNull(SCHEMA_PROVIDER.schemaFor(TYPE)); - - public static final SerializableFunction TO_ROW_FN = - SCHEMA_PROVIDER.toRowFunction(TYPE); - - public static Builder builder() { - return new AutoValue_DataflowRequestError.Builder(); - } - - public static Builder fromRequest( - RequestT request, Class clazz) { - Builder builder = builder(); - try { - String json = JsonFormat.printer().omittingInsignificantWhitespace().print(request); - builder = builder.setRequest(json); - } catch (InvalidProtocolBufferException e) { - LOG.warn("error converting {} to json: {}", clazz, e.getMessage()); - } - return builder; - } - - public abstract Instant getObservedTime(); - - public abstract String getRequest(); - - public abstract String getMessage(); - - public abstract String getStackTrace(); - - @AutoValue.Builder - public abstract static class Builder { - - public abstract Builder setObservedTime(Instant value); - - public abstract Builder setRequest(String value); - - public abstract Builder setMessage(String value); - - public abstract Builder setStackTrace(String value); - - public abstract DataflowRequestError build(); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowRequests.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowRequests.java deleted file mode 100644 index 6cc54310df68..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowRequests.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.dataflow.v1beta3.GetJobRequest; -import com.google.dataflow.v1beta3.JobView; -import com.google.events.cloud.dataflow.v1beta3.Job; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.TypeDescriptor; - -/** Various methods to create Dataflow API requests. */ -@Internal -public final class DataflowRequests { - - /** Creates {@link GetJobRequest}s from {@link Job}s with {@link JobView#JOB_VIEW_ALL}. */ - public static MapElements jobRequestsFromEventsViewAll() { - return jobRequests(JobView.JOB_VIEW_ALL); - } - - /** Creates {@link GetJobRequest}s from {@link Job}s with the assigned {@link JobView}. */ - public static MapElements jobRequests(JobView view) { - return MapElements.into(TypeDescriptor.of(GetJobRequest.class)) - .via( - event -> { - Job safeEvent = checkStateNotNull(event); - return GetJobRequest.newBuilder() - .setJobId(safeEvent.getId()) - .setLocation(safeEvent.getLocation()) - .setProjectId(safeEvent.getProjectId()) - .setView(view) - .build(); - }); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/JobMetricsWithAppendedDetails.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/JobMetricsWithAppendedDetails.java deleted file mode 100644 index e2472d2e42f1..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/JobMetricsWithAppendedDetails.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import com.google.dataflow.v1beta3.Job; -import com.google.dataflow.v1beta3.JobMetrics; -import java.io.Serializable; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Objects; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** - * A {@link JobMetrics} enrichment with a {@link Job}'s id and create time. The purpose of this - * enrichment is to join metrics with its Job while partitioning on the Job's create time. - */ -@Internal -public class JobMetricsWithAppendedDetails implements Serializable { - - private String jobId = ""; - - private Instant jobCreateTime = Instant.EPOCH; - - private JobMetrics jobMetrics = JobMetrics.getDefaultInstance(); - - public String getJobId() { - return jobId; - } - - public void setJobId(@NonNull String jobId) { - this.jobId = jobId; - } - - public Instant getJobCreateTime() { - return jobCreateTime; - } - - public void setJobCreateTime(@NonNull Instant jobCreateTime) { - this.jobCreateTime = jobCreateTime; - } - - public JobMetrics getJobMetrics() { - return jobMetrics; - } - - public void setJobMetrics(@NonNull JobMetrics jobMetrics) { - this.jobMetrics = jobMetrics; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - JobMetricsWithAppendedDetails that = (JobMetricsWithAppendedDetails) o; - return Objects.equal(jobId, that.jobId) - && Objects.equal(jobCreateTime, that.jobCreateTime) - && Objects.equal(jobMetrics, that.jobMetrics); - } - - @Override - public int hashCode() { - return Objects.hashCode(jobId, jobCreateTime, jobMetrics); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/StageSummaryWithAppendedDetails.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/StageSummaryWithAppendedDetails.java deleted file mode 100644 index 0cdca9accb9c..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/StageSummaryWithAppendedDetails.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import com.google.dataflow.v1beta3.Job; -import com.google.dataflow.v1beta3.StageSummary; -import java.io.Serializable; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Objects; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** - * A {@link StageSummary} enrichment with a {@link Job}'s id and create time. The purpose of this - * enrichment is to join stage summaries with their Job while partitioning on the Job's create time. - */ -@Internal -public class StageSummaryWithAppendedDetails implements Serializable { - - private String jobId = ""; - - private Instant jobCreateTime = Instant.EPOCH; - - private StageSummary stageSummary = StageSummary.getDefaultInstance(); - - public String getJobId() { - return jobId; - } - - public void setJobId(@NonNull String jobId) { - this.jobId = jobId; - } - - public Instant getJobCreateTime() { - return jobCreateTime; - } - - public void setJobCreateTime(@NonNull Instant jobCreateTime) { - this.jobCreateTime = jobCreateTime; - } - - public StageSummary getStageSummary() { - return stageSummary; - } - - public void setStageSummary(@NonNull StageSummary stageSummary) { - this.stageSummary = stageSummary; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - StageSummaryWithAppendedDetails that = (StageSummaryWithAppendedDetails) o; - return Objects.equal(jobId, that.jobId) - && Objects.equal(jobCreateTime, that.jobCreateTime) - && Objects.equal(stageSummary, that.stageSummary); - } - - @Override - public int hashCode() { - return Objects.hashCode(jobId, jobCreateTime, stageSummary); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/Throttle.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/Throttle.java deleted file mode 100644 index 3a062a9a8345..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/Throttle.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.transforms.GroupIntoBatches; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Values; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.Duration; - -/** Controls the rate of elements. */ -@Internal -class Throttle extends PTransform, PCollection> { - - /** Control the rate of elements, emitting each element per {@link Duration}. */ - static Throttle of(String name, Duration duration) { - return new Throttle<>(name, duration); - } - - private final String tagName; - private final Duration duration; - - public Throttle(String tagName, Duration duration) { - this.tagName = tagName; - this.duration = duration; - } - - @Override - public PCollection expand(PCollection input) { - return input - .apply( - "Throttle/Window " + tagName, - Window.into(FixedWindows.of(Duration.standardSeconds(1L)))) - .apply( - "Throttle/Assign To Bounded Window " + tagName, - ParDo.of(new AssignToBoundedWindowFn<>())) - .apply( - "Throttle/GroupIntoBatches " + tagName, - GroupIntoBatches.ofSize(1L).withMaxBufferingDuration(duration)) - .apply("Throttle/Extract Values " + tagName, Values.create()) - .apply("Throttle/Flatten " + tagName, Flatten.iterables()); - } - - private static class AssignToBoundedWindowFn extends DoFn> { - @ProcessElement - public void process( - @Element T element, BoundedWindow boundedWindow, OutputReceiver> receiver) { - receiver.output(KV.of(boundedWindow.maxTimestamp().getMillis(), element)); - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/WorkerDetailsWithAppendedDetails.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/WorkerDetailsWithAppendedDetails.java deleted file mode 100644 index 4b6d0a32d367..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/WorkerDetailsWithAppendedDetails.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import com.google.dataflow.v1beta3.Job; -import com.google.dataflow.v1beta3.WorkerDetails; -import java.io.Serializable; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Objects; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** - * A {@link WorkerDetails} enrichment with a {@link Job}'s id and create time. The purpose of this - * enrichment is to join worker details with their Job while partitioning on the Job's create time. - */ -@Internal -public class WorkerDetailsWithAppendedDetails implements Serializable { - - private String jobId = ""; - - private Instant jobCreateTime = Instant.EPOCH; - - private WorkerDetails workerDetails = WorkerDetails.getDefaultInstance(); - - public String getJobId() { - return jobId; - } - - public void setJobId(@NonNull String jobId) { - this.jobId = jobId; - } - - public Instant getJobCreateTime() { - return jobCreateTime; - } - - public void setJobCreateTime(@NonNull Instant jobCreateTime) { - this.jobCreateTime = jobCreateTime; - } - - public WorkerDetails getWorkerDetails() { - return workerDetails; - } - - public void setWorkerDetails(@NonNull WorkerDetails workerDetails) { - this.workerDetails = workerDetails; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - WorkerDetailsWithAppendedDetails that = (WorkerDetailsWithAppendedDetails) o; - return Objects.equal(jobId, that.jobId) - && Objects.equal(jobCreateTime, that.jobCreateTime) - && Objects.equal(workerDetails, that.workerDetails); - } - - @Override - public int hashCode() { - return Objects.hashCode(jobId, jobCreateTime, workerDetails); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/package-info.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/package-info.java deleted file mode 100644 index 3d359087172c..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/dataflow/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Defines how to read from the Dataflow API. */ -package org.apache.beam.testinfra.pipelines.dataflow; diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/package-info.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/package-info.java deleted file mode 100644 index f61cdfa26331..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** Pipelines for use with test infrastructure. */ -package org.apache.beam.testinfra.pipelines; diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/PubsubReadOptions.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/PubsubReadOptions.java deleted file mode 100644 index 1be06256efef..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/PubsubReadOptions.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.pubsub; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.Validation.Required; - -/** Options for reading from Pub/Sub. */ -@Internal -public interface PubsubReadOptions extends PipelineOptions { - @Description("Pub/Sub subscription") - @Required - @JsonIgnore - SubscriptionPathOptionValue getSubscription(); - - void setSubscription(SubscriptionPathOptionValue value); -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/SubscriptionPathOptionValue.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/SubscriptionPathOptionValue.java deleted file mode 100644 index c27ec16f57c5..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/SubscriptionPathOptionValue.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.pubsub; - -import java.io.Serializable; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.SubscriptionPath; - -/** Converts a String Pub/Sub subscription path into a {@link SubscriptionPath}. */ -@Internal -public class SubscriptionPathOptionValue implements Serializable { - private final SubscriptionPath subscriptionPath; - - public SubscriptionPathOptionValue(String input) { - SubscriptionPath parsedResult = null; - try { - parsedResult = PubsubClient.subscriptionPathFromPath(input); - } catch (IllegalStateException e) { - throw new IllegalArgumentException( - String.format( - "error parsing '%s' into %s: %s", input, SubscriptionPath.class, e.getMessage())); - } - this.subscriptionPath = parsedResult; - } - - public SubscriptionPath getValue() { - return subscriptionPath; - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/package-info.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/package-info.java deleted file mode 100644 index a9694b1d1b3d..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/pubsub/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Defines how to read from Pub/Sub. */ -package org.apache.beam.testinfra.pipelines.pubsub; diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/DependencyDrivenDescriptorQueue.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/DependencyDrivenDescriptorQueue.java deleted file mode 100644 index 94007e29650b..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/DependencyDrivenDescriptorQueue.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.Descriptors.FieldDescriptor; -import com.google.protobuf.Descriptors.FieldDescriptor.JavaType; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.beam.sdk.annotations.Internal; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** - * Prevents stack overflow errors when parsing a {@link Descriptor}. Orders {@link Descriptor} by - * their interdependencies such that the first has no dependencies i.e. only primitive types and - * subsequent {@link Descriptor}s contain {@link JavaType#MESSAGE} types. - */ -@Internal -class DependencyDrivenDescriptorQueue implements Iterable, Comparator { - private final Map<@NonNull String, @NonNull Descriptor> descriptorMap = new HashMap<>(); - private final Map<@NonNull String, @NonNull Set> dependencyMap = new HashMap<>(); - - private final Map<@NonNull String, @NonNull Integer> messageFieldCounts = new HashMap<>(); - - /** - * Enqueues a {@link Descriptor}. Walks down its dependency tree of any nested {@link - * JavaType#MESSAGE} types, further enqueuing these types' {@link Descriptor}s. - */ - void enqueue(@NonNull Descriptor descriptor) { - List<@NonNull Descriptor> descriptorStack = new ArrayList<>(); - descriptorStack.add(descriptor); - while (!descriptorStack.isEmpty()) { - Descriptor fromStack = descriptorStack.remove(0); - if (descriptorMap.containsKey(fromStack.getFullName())) { - checkState(dependencyMap.containsKey(fromStack.getFullName())); - checkState(messageFieldCounts.containsKey(fromStack.getFullName())); - continue; - } - int messageFieldCounts = 0; - for (FieldDescriptor field : fromStack.getFields()) { - if (!field.getJavaType().equals(JavaType.MESSAGE)) { - continue; - } - messageFieldCounts++; - Descriptor fieldDescriptor = field.getMessageType(); - if (!dependencyMap.containsKey(fieldDescriptor.getFullName())) { - dependencyMap.put(fieldDescriptor.getFullName(), new HashSet<>()); - } - Set dependents = dependencyMap.get(fieldDescriptor.getFullName()); - dependents.add(descriptor.getFullName()); - descriptorStack.add(fieldDescriptor); - } - descriptorMap.put(fromStack.getFullName(), fromStack); - this.messageFieldCounts.put(fromStack.getFullName(), messageFieldCounts); - if (!dependencyMap.containsKey(fromStack.getFullName())) { - dependencyMap.put(fromStack.getFullName(), new HashSet<>()); - } - } - } - - /** - * Returns an iteration of {@link Descriptor}s ordered by increasing dependency such that the - * first has no nested {@link JavaType#MESSAGE} types. - */ - @Override - public Iterator iterator() { - return descriptorMap.values().stream().sorted(this).iterator(); - } - - @Override - public int compare(@NonNull Descriptor a, @NonNull Descriptor b) { - boolean aDependsOnB = - checkStateNotNull(dependencyMap.get(b.getFullName())).contains(a.getFullName()); - boolean bDependsOnA = - checkStateNotNull(dependencyMap.get(a.getFullName())).contains(b.getFullName()); - if (aDependsOnB) { - return 1; - } - if (bDependsOnA) { - return -1; - } - Integer aMessageFieldCount = checkStateNotNull(messageFieldCounts.get(a.getFullName())); - Integer bMessageFieldCount = checkStateNotNull(messageFieldCounts.get(b.getFullName())); - return aMessageFieldCount.compareTo(bMessageFieldCount); - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/DescriptorSchemaRegistry.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/DescriptorSchemaRegistry.java deleted file mode 100644 index a8e60f403058..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/DescriptorSchemaRegistry.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; - -import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.Descriptors.FieldDescriptor; -import com.google.protobuf.Descriptors.FieldDescriptor.JavaType; -import java.util.HashMap; -import java.util.Map; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.Schema.Field; -import org.apache.beam.sdk.schemas.Schema.FieldType; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** Registers and builds {@link Schema}s of {@link Descriptor} based types. */ -@Internal -public class DescriptorSchemaRegistry { - - public static final DescriptorSchemaRegistry INSTANCE = new DescriptorSchemaRegistry(); - - private DescriptorSchemaRegistry() {} - - static final String KEY_FIELD_NAME = "key"; - static final String VALUE_FIELD_NAME = "value"; - private static final Map<@NonNull String, @NonNull Schema> SCHEMA_CACHE = new HashMap<>(); - - boolean hasNoCachedBuild(Descriptor descriptor) { - return !SCHEMA_CACHE.containsKey(descriptor.getFullName()); - } - - public @NonNull Schema getOrBuild(Descriptor descriptor) { - if (hasNoCachedBuild(descriptor)) { - build(descriptor); - } - return checkStateNotNull(SCHEMA_CACHE.get(descriptor.getFullName())); - } - - public void build(Descriptor descriptor) { - DependencyDrivenDescriptorQueue queue = new DependencyDrivenDescriptorQueue(); - queue.enqueue(descriptor); - for (Descriptor queuedDescriptor : queue) { - Builder builder = new Builder(); - Schema schema = builder.build(queuedDescriptor); - SCHEMA_CACHE.put(queuedDescriptor.getFullName(), schema); - } - } - - static class Builder { - private static final Map FIELD_TYPE_MAP = - ImmutableMap.builder() - .put(JavaType.BOOLEAN, FieldType.BOOLEAN) - .put(JavaType.INT, FieldType.INT32) - .put(JavaType.LONG, FieldType.INT64) - .put(JavaType.FLOAT, FieldType.FLOAT) - .put(JavaType.DOUBLE, FieldType.DOUBLE) - .put(JavaType.ENUM, FieldType.STRING) - .put(JavaType.STRING, FieldType.STRING) - .build(); - - private static final Map<@NonNull String, @NonNull FieldType> FULL_NAME_TYPE_MAP = - ImmutableMap.builder() - .put("google.protobuf.Value", FieldType.STRING) - .put("google.protobuf.Timestamp", FieldType.DATETIME) - .put("google.protobuf.Any", FieldType.STRING) - .build(); - - private final Schema.Builder schemaBuilder = Schema.builder(); - - Schema build(Descriptor descriptor) { - parse(descriptor); - Schema schema = schemaBuilder.build(); - SCHEMA_CACHE.put(descriptor.getFullName(), schema); - return schema; - } - - void parse(Descriptor descriptor) { - for (FieldDescriptor fieldDescriptor : descriptor.getFields()) { - if (fieldDescriptor.getJavaType().equals(JavaType.BYTE_STRING)) { - continue; - } - FieldType type = build(fieldDescriptor); - schemaBuilder.addField(Field.of(fieldDescriptor.getName(), type)); - } - } - - FieldType build(FieldDescriptor fieldDescriptor) { - if (fieldDescriptor.isMapField()) { - return buildMapType(fieldDescriptor); - } - if (fieldDescriptor.getJavaType().equals(JavaType.MESSAGE)) { - return buildNestedType(fieldDescriptor); - } - FieldType type = checkStateNotNull(FIELD_TYPE_MAP.get(fieldDescriptor.getJavaType())); - if (fieldDescriptor.isRepeated()) { - type = FieldType.array(type); - } - return type; - } - - FieldType buildMapType(FieldDescriptor fieldDescriptor) { - Descriptor mapDescriptor = fieldDescriptor.getMessageType(); - FieldDescriptor keyField = checkStateNotNull(mapDescriptor.findFieldByName(KEY_FIELD_NAME)); - FieldType keyType = checkStateNotNull(FIELD_TYPE_MAP.get(keyField.getJavaType())); - FieldDescriptor valueField = - checkStateNotNull(mapDescriptor.findFieldByName(VALUE_FIELD_NAME)); - FieldType valueType = build(valueField); - return FieldType.array( - FieldType.row( - Schema.of(Field.of(KEY_FIELD_NAME, keyType), Field.of(VALUE_FIELD_NAME, valueType)))); - } - - FieldType buildNestedType(FieldDescriptor fieldDescriptor) { - Descriptor messageDescriptor = fieldDescriptor.getMessageType(); - if (FULL_NAME_TYPE_MAP.containsKey(messageDescriptor.getFullName())) { - return checkStateNotNull(FULL_NAME_TYPE_MAP.get(messageDescriptor.getFullName())); - } - if (!SCHEMA_CACHE.containsKey(messageDescriptor.getFullName())) { - Builder builder = new Builder(); - Schema schema = builder.build(messageDescriptor); - SCHEMA_CACHE.put(messageDescriptor.getFullName(), schema); - } - Schema schema = - checkStateNotNull( - SCHEMA_CACHE.get(messageDescriptor.getFullName()), - "nested type not cached: %s", - messageDescriptor.getFullName()); - FieldType type = FieldType.row(schema); - if (fieldDescriptor.isRepeated()) { - type = FieldType.array(type); - } - return type; - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/GeneratedMessageV3RowBuilder.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/GeneratedMessageV3RowBuilder.java deleted file mode 100644 index 666c787efb80..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/GeneratedMessageV3RowBuilder.java +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.testinfra.pipelines.schemas.DescriptorSchemaRegistry.KEY_FIELD_NAME; -import static org.apache.beam.testinfra.pipelines.schemas.DescriptorSchemaRegistry.VALUE_FIELD_NAME; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import com.google.protobuf.AbstractMessage; -import com.google.protobuf.Any; -import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.Descriptors.FieldDescriptor; -import com.google.protobuf.Descriptors.FieldDescriptor.JavaType; -import com.google.protobuf.GeneratedMessageV3; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.Message; -import com.google.protobuf.Timestamp; -import com.google.protobuf.Value; -import com.google.protobuf.util.JsonFormat; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; -import java.util.function.Function; -import org.apache.beam.sdk.annotations.Internal; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.Schema.TypeName; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Throwables; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.ListeningExecutorService; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.MoreExecutors; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; - -/** Converts a {@link GeneratedMessageV3} type to a {@link Row}. */ -@Internal -public class GeneratedMessageV3RowBuilder { - public static GeneratedMessageV3RowBuilder of(T source) { - return new GeneratedMessageV3RowBuilder<>(source); - } - - private static final Map DEFAULT_VALUES = - ImmutableMap.builder() - .put(JavaType.BOOLEAN, false) - .put(JavaType.INT, 0) - .put(JavaType.LONG, 0L) - .put(JavaType.FLOAT, 0f) - .put(JavaType.DOUBLE, 0.0) - .put(JavaType.ENUM, "") - .put(JavaType.STRING, "") - .build(); - - private final ExecutorService threadPool = Executors.newSingleThreadExecutor(); - private final ListeningExecutorService service = MoreExecutors.listeningDecorator(threadPool); - - private static final Map<@NonNull String, Function> CONVERTERS_MAP = - ImmutableMap.>builder() - .put("google.protobuf.Value", o -> convert((Value) o)) - .put("google.protobuf.Any", o -> convert((Any) o)) - .put("google.protobuf.Timestamp", o -> convert((Timestamp) o)) - .build(); - private final @NonNull T source; - - private Row.@NonNull FieldValueBuilder builder; - - GeneratedMessageV3RowBuilder(@NonNull T source) { - this.source = source; - Schema schema = - checkStateNotNull( - DescriptorSchemaRegistry.INSTANCE.getOrBuild(source.getDescriptorForType())); - builder = Row.withSchema(schema).withFieldValues(ImmutableMap.of()); - } - - /** - * Builds a {@link Row} from a {@link GeneratedMessageV3} type submitting nested types to an - * {@link ExecutorService} to prevent stack overflow errors. - */ - public Row build() { - for (FieldDescriptor fieldDescriptor : source.getDescriptorForType().getFields()) { - if (shouldSkip(fieldDescriptor)) { - continue; - } - Object value = getValue(fieldDescriptor); - builder.withFieldValue(fieldDescriptor.getName(), value); - } - - Row result = builder.build(); - shutdownAwaitTermination(); - return result; - } - - Object getValue(FieldDescriptor fieldDescriptor) { - return getValue(this.source, fieldDescriptor); - } - - Object getValue( - MessageT message, FieldDescriptor fieldDescriptor) { - if (fieldDescriptor.isMapField()) { - return mapOf(message, fieldDescriptor); - } - if (fieldDescriptor.isRepeated()) { - return listOf(message, fieldDescriptor); - } - Object value = message.getField(fieldDescriptor); - return convert(fieldDescriptor, value); - } - - Object listOf( - MessageT message, FieldDescriptor fieldDescriptor) { - List result = new ArrayList<>(); - int size = message.getRepeatedFieldCount(fieldDescriptor); - for (int i = 0; i < size; i++) { - Object value = getValue(message, fieldDescriptor, i); - result.add(value); - } - return result; - } - - Object getValue( - MessageT message, FieldDescriptor fieldDescriptor, int i) { - Object value = message.getRepeatedField(fieldDescriptor, i); - return convert(fieldDescriptor, value); - } - - Object mapOf( - MessageT message, FieldDescriptor fieldDescriptor) { - List result = new ArrayList<>(); - Descriptor mapDescriptor = fieldDescriptor.getMessageType(); - FieldDescriptor keyType = checkStateNotNull(mapDescriptor.findFieldByName(KEY_FIELD_NAME)); - FieldDescriptor valueType = checkStateNotNull(mapDescriptor.findFieldByName(VALUE_FIELD_NAME)); - int size = message.getRepeatedFieldCount(fieldDescriptor); - Schema messageSchema = - checkStateNotNull( - DescriptorSchemaRegistry.INSTANCE.getOrBuild(message.getDescriptorForType())); - Schema.Field mapField = messageSchema.getField(fieldDescriptor.getName()); - checkState(mapField.getType().getTypeName().equals(TypeName.ARRAY)); - Schema.FieldType mapEntryFieldType = - checkStateNotNull(mapField.getType().getCollectionElementType()); - checkState(mapEntryFieldType.getTypeName().equals(TypeName.ROW)); - Schema entrySchema = checkStateNotNull(mapEntryFieldType.getRowSchema()); - - for (int i = 0; i < size; i++) { - Object entryObj = message.getRepeatedField(fieldDescriptor, i); - checkState( - entryObj instanceof AbstractMessage, - "%s is not an instance of %s, found: %s", - fieldDescriptor.getName(), - AbstractMessage.class, - entryObj.getClass()); - AbstractMessage entry = (AbstractMessage) entryObj; - Object key = getValue(entry, keyType); - Object value = getValue(entry, valueType); - Row entryRow = - Row.withSchema(entrySchema) - .withFieldValue(KEY_FIELD_NAME, key) - .withFieldValue(VALUE_FIELD_NAME, value) - .build(); - result.add(entryRow); - } - return result; - } - - Object convert(FieldDescriptor fieldDescriptor, Object originalValue) { - - if (originalValue == null && DEFAULT_VALUES.containsKey(fieldDescriptor.getJavaType())) { - return checkStateNotNull(DEFAULT_VALUES.get(fieldDescriptor.getJavaType())); - } - - if (fieldDescriptor.getJavaType().equals(JavaType.ENUM)) { - return checkStateNotNull(originalValue).toString(); - } - - if (!fieldDescriptor.getJavaType().equals(JavaType.MESSAGE)) { - return originalValue; - } - - Descriptor descriptor = fieldDescriptor.getMessageType(); - if (CONVERTERS_MAP.containsKey(descriptor.getFullName())) { - Function converter = - checkStateNotNull(CONVERTERS_MAP.get(descriptor.getFullName())); - return converter.apply(originalValue); - } - - checkState( - originalValue instanceof GeneratedMessageV3, - "%s is not instance of %s, found: %s", - fieldDescriptor.getName(), - GeneratedMessageV3.class, - originalValue.getClass()); - - GeneratedMessageV3 message = (GeneratedMessageV3) originalValue; - GeneratedMessageV3RowBuilder rowBuilder = - new GeneratedMessageV3RowBuilder<>(message); - - try { - return service.submit(rowBuilder::build).get(); - } catch (InterruptedException | ExecutionException e) { - throw new IllegalStateException( - String.format( - "error building Row of %s type for field: %s of %s: %s %s", - fieldDescriptor.getMessageType().getFullName(), - fieldDescriptor.getName(), - source.getDescriptorForType().getFullName(), - e.getMessage(), - Throwables.getStackTraceAsString(e))); - } - } - - boolean shouldSkip(FieldDescriptor fieldDescriptor) { - return fieldDescriptor.getJavaType().equals(JavaType.BYTE_STRING); - } - - private static Object convert(Timestamp timestamp) { - return Instant.ofEpochMilli(timestamp.getSeconds() * 1000); - } - - private static Object convert(Message value) { - try { - return JsonFormat.printer().omittingInsignificantWhitespace().print(value); - } catch (InvalidProtocolBufferException e) { - throw new IllegalStateException(e); - } - } - - private void shutdownAwaitTermination() { - if (shutdownConsumeTerminationInterruptIfNeeded( - service, - e -> { - shutdownAndIgnoreInterruptIfNeeded(threadPool); - })) { - shutdownAndIgnoreInterruptIfNeeded(threadPool); - } - } - - private boolean shutdownConsumeTerminationInterruptIfNeeded( - ExecutorService executorService, Consumer handleInterrupt) { - try { - if (executorService.isShutdown()) { - return true; - } - executorService.shutdown(); - return executorService.awaitTermination(1L, TimeUnit.SECONDS); - } catch (InterruptedException e) { - handleInterrupt.accept(e); - return true; - } - } - - private void shutdownAndIgnoreInterruptIfNeeded(ExecutorService executorService) { - try { - if (executorService.isShutdown()) { - return; - } - executorService.shutdown(); - boolean ignored = executorService.awaitTermination(1L, TimeUnit.SECONDS); - } catch (InterruptedException ignored) { - } - } -} diff --git a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/package-info.java b/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/package-info.java deleted file mode 100644 index 3fd4ebda4899..000000000000 --- a/.test-infra/pipelines/src/main/java/org/apache/beam/testinfra/pipelines/schemas/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Defines how to handle Beam Schemas. */ -package org.apache.beam.testinfra.pipelines.schemas; diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/EventarcConversionsTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/EventarcConversionsTest.java deleted file mode 100644 index 8ead238eb109..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/EventarcConversionsTest.java +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.sdk.values.TypeDescriptors.booleans; -import static org.apache.beam.sdk.values.TypeDescriptors.strings; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -import com.google.events.cloud.dataflow.v1beta3.Job; -import com.google.events.cloud.dataflow.v1beta3.JobState; -import com.google.events.cloud.dataflow.v1beta3.JobType; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.WithFailures; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.junit.jupiter.api.Test; - -/** Tests for {@link org.apache.beam.testinfra.pipelines.conversions.EventarcConversions}. */ -class EventarcConversionsTest { - - private static final EventarcConversions.JsonToJobFn FROM_JSON_FN = - new EventarcConversions.JsonToJobFn(); - - @Test - void fromJson_emptyStrings_emitsAsConversionErrors() { - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - pipeline.apply(Create.of("")).apply(EventarcConversions.fromJson()); - PAssert.that(result.output()).empty(); - PAssert.thatSingleton( - result - .failures() - .apply( - "get error message", - MapElements.into(strings()) - .via(error -> checkStateNotNull(error).getMessage()))) - .isEqualTo("json input missing path: $.data"); - - pipeline.run(); - } - - @Test - void fromJson_malformedJson_emitsAsConversionErrors() { - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - pipeline.apply(Create.of("{\"foo\", \"bar\"")).apply(EventarcConversions.fromJson()); - PAssert.that(result.output()).empty(); - PAssert.thatSingleton( - result - .failures() - .apply( - "contains message part", - MapElements.into(booleans()) - .via( - error -> - checkStateNotNull(error) - .getMessage() - .contains("JsonParseException")))) - .isEqualTo(true); - - pipeline.run(); - } - - @Test - void fromJson_missingDataKey_emitsAsConversionErrors() { - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - pipeline.apply(Create.of("{}")).apply(EventarcConversions.fromJson()); - PAssert.that(result.output()).empty(); - PAssert.thatSingleton( - result - .failures() - .apply( - "get error message", - MapElements.into(strings()) - .via(error -> checkStateNotNull(error).getMessage()))) - .isEqualTo("json input missing path: $.data"); - - pipeline.run(); - } - - @Test - void fromJson_missingTypeKey_emitsAsConversionErrors() { - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - pipeline.apply(Create.of("{\"data\":{}}")).apply(EventarcConversions.fromJson()); - PAssert.that(result.output()).empty(); - PAssert.thatSingleton( - result - .failures() - .apply( - "get error message", - MapElements.into(strings()) - .via(error -> checkStateNotNull(error).getMessage()))) - .isEqualTo("json input missing path: $.data.@type"); - - pipeline.run(); - } - - @Test - void fromJson_typeMismatch_emitsAsConversionErrors() { - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - pipeline - .apply(Create.of("{\"data\":{\"payload\": {},\"@type\": \"bad.type\"}}")) - .apply(EventarcConversions.fromJson()); - PAssert.that(result.output()).empty(); - PAssert.thatSingleton( - result - .failures() - .apply( - "get error message", - MapElements.into(strings()) - .via(error -> checkStateNotNull(error).getMessage()))) - .isEqualTo( - "expected @type=type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData at json path: $.data.@type, got: bad.type"); - - pipeline.run(); - } - - @Test - void fromJson_missingPayloadKey_emitsAsConversionErrors() { - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - pipeline - .apply( - Create.of( - "{\"data\":{\"@type\": \"type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData\"}}")) - .apply(EventarcConversions.fromJson()); - PAssert.that(result.output()).empty(); - PAssert.thatSingleton( - result - .failures() - .apply( - "get error message", - MapElements.into(strings()) - .via(error -> checkStateNotNull(error).getMessage()))) - .isEqualTo("json input missing path: $.data.payload"); - - pipeline.run(); - } - - @Test - void fromJson_hasUnexpectedProperty_emitsAsConversionErrors() throws IOException { - String resourceName = "eventarc_data/has_extra_data_payload_foo_property.json"; - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - readJsonThenApplyConversion(resourceName, pipeline); - PAssert.that(result.output()).empty(); - PAssert.thatSingleton( - result - .failures() - .apply( - "get error message", - MapElements.into(strings()) - .via(error -> checkStateNotNull(error).getMessage()))) - .isEqualTo( - "com.google.protobuf.InvalidProtocolBufferException: Cannot find field: foo in message google.events.cloud.dataflow.v1beta3.Job"); - - pipeline.run(); - } - - @Test - void fromJson_JobStateCanceledStreaming_emitsJob() throws IOException { - String resourceName = "eventarc_data/job_state_canceled_streaming.json"; - Pipeline pipeline = Pipeline.create(); - WithFailures.Result<@NonNull PCollection, ConversionError> result = - readJsonThenApplyConversion(resourceName, pipeline); - - PAssert.thatSingleton( - result - .output() - .apply( - "get current state", - MapElements.into(TypeDescriptor.of(JobState.class)) - .via(job -> checkStateNotNull(job).getCurrentState()))) - .isEqualTo(JobState.JOB_STATE_CANCELLED); - - PAssert.thatSingleton( - result - .output() - .apply( - "get job type", - MapElements.into(TypeDescriptor.of(JobType.class)) - .via(job -> checkStateNotNull(job).getType()))) - .isEqualTo(JobType.JOB_TYPE_STREAMING); - - PAssert.thatSingleton( - result - .output() - .apply( - "get job id", - MapElements.into(strings()).via(job -> checkStateNotNull(job).getId()))) - .isEqualTo("2023-05-09_13_23_50-11065941757886660214"); - - PAssert.that(result.failures()).empty(); - - pipeline.run(); - } - - @Test - void jobStateCanceledStreaming_JsonToDataFn() throws IOException { - String payload = loadResource("eventarc_data/job_state_canceled_streaming.json"); - Job actual = FROM_JSON_FN.apply(payload); - assertNotNull(actual); - assertEquals(JobState.JOB_STATE_CANCELLED, actual.getCurrentState()); - assertEquals(JobType.JOB_TYPE_STREAMING, actual.getType()); - assertEquals("2023-05-09_13_23_50-11065941757886660214", actual.getId()); - } - - @Test - void jobStateCancelingStreaming_JsonToDataFn() throws IOException { - String payload = loadResource("eventarc_data/job_state_canceling_streaming.json"); - Job actual = FROM_JSON_FN.apply(payload); - assertNotNull(actual); - assertEquals(JobState.JOB_STATE_CANCELLING, actual.getCurrentState()); - assertEquals(JobType.JOB_TYPE_STREAMING, actual.getType()); - assertEquals("2023-05-09_13_23_50-11065941757886660214", actual.getId()); - } - - @Test - void jobStateDoneBatch_JsonToDataFn() throws IOException { - String payload = loadResource("eventarc_data/job_state_done_batch.json"); - Job actual = FROM_JSON_FN.apply(payload); - assertNotNull(actual); - assertEquals(JobState.JOB_STATE_DONE, actual.getCurrentState()); - assertEquals(JobType.JOB_TYPE_BATCH, actual.getType()); - assertEquals("2023-05-09_13_39_13-18226864771788319755", actual.getId()); - } - - private static WithFailures.Result<@NonNull PCollection, ConversionError> - readJsonThenApplyConversion(String resourcePath, Pipeline pipeline) throws IOException { - - String payload = loadResource(resourcePath); - PCollection json = pipeline.apply(Create.of(payload)); - - return json.apply(EventarcConversions.fromJson()); - } - - private static String loadResource(String resourceName) throws IOException { - Path resourcePath = Paths.get("build", "resources", "test", resourceName); - byte[] bytes = Files.readAllBytes(resourcePath); - return new String(bytes, StandardCharsets.UTF_8); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/JobMetricsWithAppendedDetailsTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/JobMetricsWithAppendedDetailsTest.java deleted file mode 100644 index ec0e8e7d2832..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/JobMetricsWithAppendedDetailsTest.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.sdk.values.TypeDescriptors.rows; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.dataflow.v1beta3.JobMetrics; -import com.google.dataflow.v1beta3.MetricUpdate; -import com.google.protobuf.Descriptors; -import com.google.protobuf.Timestamp; -import com.google.protobuf.Value; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.function.Function; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.testinfra.pipelines.dataflow.JobMetricsWithAppendedDetails; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; -import org.joda.time.ReadableDateTime; -import org.junit.jupiter.api.Test; - -/** Tests for {@link WithAppendedDetailsToRow} of {@link JobMetricsWithAppendedDetails}. */ -class JobMetricsWithAppendedDetailsTest - extends WithAppendedDetailsToRowTest { - - @Override - WithAppendedDetailsToRow transform() { - return WithAppendedDetailsToRow.jobMetricsWithAppendedDetailsToRow(); - } - - @Override - Descriptors.Descriptor embeddedTypeDescriptor() { - return JobMetrics.getDescriptor(); - } - - @Override - String embeddedTypeFieldName() { - return "job_metrics"; - } - - @Override - Function jobIdGetter() { - return JobMetricsWithAppendedDetails::getJobId; - } - - @Override - Function createTimeGetter() { - return JobMetricsWithAppendedDetails::getJobCreateTime; - } - - @Override - @NonNull - List<@NonNull JobMetricsWithAppendedDetails> input() { - JobMetricsWithAppendedDetails details = new JobMetricsWithAppendedDetails(); - details.setJobId("job_id_value"); - details.setJobCreateTime(Instant.ofEpochSecond(1000L)); - details.setJobMetrics( - JobMetrics.getDefaultInstance() - .toBuilder() - .addMetrics( - MetricUpdate.getDefaultInstance() - .toBuilder() - .setUpdateTime(Timestamp.newBuilder().setSeconds(10000L).build()) - .setScalar(Value.newBuilder().setNumberValue(1.23456)) - .build()) - .build()); - return ImmutableList.of(details); - } - - @Test - void jobMetrics() { - - Schema jobMetricsSchema = expectedEmbeddedSchema(); - - Pipeline pipeline = Pipeline.create(); - - PCollection input = pipeline.apply(Create.of(input())); - - RowConversionResult result = - input.apply(transform()); - - PAssert.thatSingleton(result.getFailure().apply("count errors", Count.globally())) - .isEqualTo(0L); - - PCollection jobMetrics = - result - .getSuccess() - .apply( - "job_metrics", - MapElements.into(rows()) - .via(row -> checkStateNotNull(row.getRow(embeddedTypeFieldName())))) - .setRowSchema(jobMetricsSchema); - - PAssert.thatSingleton(jobMetrics.apply("count job_metrics", Count.globally())).isEqualTo(1L); - - jobMetrics.apply( - "metrics", - ParDo.of( - new DoFn() { - @ProcessElement - public void process(@Element Row row) { - Collection metrics = checkStateNotNull(row.getArray("metrics")); - assertEquals(1, metrics.size()); - Row metricsUpdate = checkStateNotNull(new ArrayList<>(metrics).get(0)); - ReadableDateTime timestamp = - checkStateNotNull(metricsUpdate.getDateTime("update_time")); - assertEquals(10000000L, timestamp.getMillis()); - String scaler = checkStateNotNull(metricsUpdate.getString("scalar")); - assertEquals("1.23456", scaler); - } - })); - - pipeline.run(); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/StageSummaryWithAppendedDetailsTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/StageSummaryWithAppendedDetailsTest.java deleted file mode 100644 index 1f40f78ca63d..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/StageSummaryWithAppendedDetailsTest.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.sdk.values.TypeDescriptors.rows; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.dataflow.v1beta3.MetricUpdate; -import com.google.dataflow.v1beta3.StageSummary; -import com.google.protobuf.Descriptors; -import com.google.protobuf.Timestamp; -import com.google.protobuf.Value; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.function.Function; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.testinfra.pipelines.dataflow.StageSummaryWithAppendedDetails; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; -import org.joda.time.ReadableDateTime; -import org.junit.jupiter.api.Test; - -/** Tests for {@link WithAppendedDetailsToRow} of {@link StageSummaryWithAppendedDetails}. */ -class StageSummaryWithAppendedDetailsTest - extends WithAppendedDetailsToRowTest { - @Override - WithAppendedDetailsToRow transform() { - return WithAppendedDetailsToRow.stageSummaryWithAppendedDetailsToRow(); - } - - @Override - Descriptors.Descriptor embeddedTypeDescriptor() { - return StageSummary.getDescriptor(); - } - - @Override - String embeddedTypeFieldName() { - return "stage_summary"; - } - - @Override - Function jobIdGetter() { - return StageSummaryWithAppendedDetails::getJobId; - } - - @Override - Function createTimeGetter() { - return StageSummaryWithAppendedDetails::getJobCreateTime; - } - - @Override - @NonNull - List<@NonNull StageSummaryWithAppendedDetails> input() { - StageSummaryWithAppendedDetails details = new StageSummaryWithAppendedDetails(); - details.setJobId("job_id_value"); - details.setJobCreateTime(Instant.ofEpochSecond(1000L)); - details.setStageSummary( - StageSummary.getDefaultInstance() - .toBuilder() - .setStageId("stage_id_a") - .addMetrics( - MetricUpdate.getDefaultInstance() - .toBuilder() - .setUpdateTime(Timestamp.newBuilder().setSeconds(10000L).build()) - .setScalar(Value.newBuilder().setNumberValue(1.23456)) - .build()) - .build()); - return ImmutableList.of(details); - } - - @Test - void stageSummary() { - - Schema stageSummarySchema = expectedEmbeddedSchema(); - - Pipeline pipeline = Pipeline.create(); - - PCollection input = pipeline.apply(Create.of(input())); - - RowConversionResult result = - input.apply(transform()); - - PAssert.thatSingleton(result.getFailure().apply("count errors", Count.globally())) - .isEqualTo(0L); - - PCollection stageSummary = - result - .getSuccess() - .apply( - "stage_summary", - MapElements.into(rows()) - .via(row -> checkStateNotNull(row.getRow(embeddedTypeFieldName())))) - .setRowSchema(stageSummarySchema); - - PAssert.thatSingleton(stageSummary.apply("count stage_summary", Count.globally())) - .isEqualTo(1L); - - stageSummary.apply( - "iterate stage_summary", - ParDo.of( - new DoFn() { - @ProcessElement - public void process(@Element Row row) { - String stageId = checkStateNotNull(row.getString("stage_id")); - assertEquals("stage_id_a", stageId); - Collection metrics = checkStateNotNull(row.getArray("metrics")); - assertEquals(1, metrics.size()); - Row metricsUpdate = checkStateNotNull(new ArrayList<>(metrics).get(0)); - ReadableDateTime timestamp = - checkStateNotNull(metricsUpdate.getDateTime("update_time")); - assertEquals(10000L, timestamp.getMillis() / 1000); - String scalar = checkStateNotNull(metricsUpdate.getString("scalar")); - assertEquals("1.23456", scalar); - } - })); - - pipeline.run(); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/WithAppendedDetailsToRowTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/WithAppendedDetailsToRowTest.java deleted file mode 100644 index 9898781b15e5..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/WithAppendedDetailsToRowTest.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.sdk.values.TypeDescriptors.longs; -import static org.apache.beam.sdk.values.TypeDescriptors.strings; -import static org.apache.beam.testinfra.pipelines.conversions.WithAppendedDetailsToRow.JOB_CREATE_TIME; -import static org.apache.beam.testinfra.pipelines.conversions.WithAppendedDetailsToRow.JOB_ID_FIELD; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.GeneratedMessageV3; -import java.io.Serializable; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.testinfra.pipelines.schemas.DescriptorSchemaRegistry; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; -import org.junit.jupiter.api.Test; - -/** Base class for testing {@link WithAppendedDetailsToRow} transforms. */ -abstract class WithAppendedDetailsToRowTest - implements Serializable { - - private static final DescriptorSchemaRegistry SCHEMA_REGISTRY = DescriptorSchemaRegistry.INSTANCE; - - abstract WithAppendedDetailsToRow transform(); - - abstract Descriptor embeddedTypeDescriptor(); - - abstract String embeddedTypeFieldName(); - - abstract Function jobIdGetter(); - - abstract Function createTimeGetter(); - - abstract @NonNull List<@NonNull AppendedDetailsT> input(); - - @Test - void emittedRowMatchesExpectedSchema() { - Pipeline pipeline = Pipeline.create(); - - PCollection input = pipeline.apply(Create.of(input())); - - RowConversionResult result = input.apply(transform()); - - PAssert.thatSingleton(result.getFailure().apply(Count.globally())).isEqualTo(0L); - - Schema actualSchema = result.getSuccess().getSchema(); - - assertEquals(expectedSchema(), actualSchema); - - pipeline.run(); - } - - @Test - void emittedRowsMatchesJobIds() { - Pipeline pipeline = Pipeline.create(); - - PCollection input = pipeline.apply(Create.of(input())); - - RowConversionResult result = input.apply(transform()); - - PAssert.thatSingleton(result.getFailure().apply(Count.globally())).isEqualTo(0L); - - List expectedJobIds = input().stream().map(jobIdGetter()).collect(Collectors.toList()); - - PAssert.that(jobIdsFrom(result)).containsInAnyOrder(expectedJobIds); - - pipeline.run(); - } - - @Test - void emittedRowsMatchesCreateTimes() { - Pipeline pipeline = Pipeline.create(); - - PCollection input = pipeline.apply(Create.of(input())); - - RowConversionResult result = input.apply(transform()); - - PAssert.thatSingleton(result.getFailure().apply(Count.globally())).isEqualTo(0L); - - List expectedCreateTimes = - input().stream() - .map(createTimeGetter()) - .map(Instant::getMillis) - .collect(Collectors.toList()); - - PAssert.that(createTimesFrom(result)).containsInAnyOrder(expectedCreateTimes); - - pipeline.run(); - } - - protected @NonNull Schema expectedEmbeddedSchema() { - return SCHEMA_REGISTRY.getOrBuild(embeddedTypeDescriptor()); - } - - private @NonNull Schema expectedSchema() { - Schema embeddedSchema = expectedEmbeddedSchema(); - return Schema.of( - JOB_ID_FIELD, - JOB_CREATE_TIME, - Schema.Field.of(embeddedTypeFieldName(), Schema.FieldType.row(embeddedSchema))); - } - - private PCollection jobIdsFrom( - RowConversionResult result) { - return result - .getSuccess() - .apply( - "get job ids", - MapElements.into(strings()) - .via(row -> checkStateNotNull(row).getString(JOB_ID_FIELD.getName()))); - } - - private PCollection createTimesFrom( - RowConversionResult result) { - return result - .getSuccess() - .apply( - "get create times", - MapElements.into(longs()) - .via( - row -> - checkStateNotNull(row) - .getLogicalTypeValue(JOB_CREATE_TIME.getName(), Instant.class) - .getMillis())); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/WorkerDetailsWithAppendedDetailsTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/WorkerDetailsWithAppendedDetailsTest.java deleted file mode 100644 index b466343837bc..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/conversions/WorkerDetailsWithAppendedDetailsTest.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.conversions; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.apache.beam.sdk.values.TypeDescriptors.rows; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.dataflow.v1beta3.MetricUpdate; -import com.google.dataflow.v1beta3.WorkItemDetails; -import com.google.dataflow.v1beta3.WorkerDetails; -import com.google.protobuf.Descriptors; -import com.google.protobuf.Timestamp; -import com.google.protobuf.Value; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.function.Function; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.testinfra.pipelines.dataflow.WorkerDetailsWithAppendedDetails; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.Instant; -import org.joda.time.ReadableDateTime; -import org.junit.jupiter.api.Test; - -/** Tests for {@link WithAppendedDetailsToRow} of {@link WorkerDetailsWithAppendedDetails}. */ -class WorkerDetailsWithAppendedDetailsTest - extends WithAppendedDetailsToRowTest { - - @Override - WithAppendedDetailsToRow transform() { - return WithAppendedDetailsToRow.workerDetailsWithAppendedDetailsToRow(); - } - - @Override - Descriptors.Descriptor embeddedTypeDescriptor() { - return WorkerDetails.getDescriptor(); - } - - @Override - String embeddedTypeFieldName() { - return "worker_details"; - } - - @Override - Function jobIdGetter() { - return WorkerDetailsWithAppendedDetails::getJobId; - } - - @Override - Function createTimeGetter() { - return WorkerDetailsWithAppendedDetails::getJobCreateTime; - } - - @Override - @NonNull - List<@NonNull WorkerDetailsWithAppendedDetails> input() { - WorkerDetailsWithAppendedDetails details = new WorkerDetailsWithAppendedDetails(); - details.setJobId("job_id_value"); - details.setJobCreateTime(Instant.ofEpochSecond(1000L)); - details.setWorkerDetails( - WorkerDetails.getDefaultInstance() - .toBuilder() - .setWorkerName("worker_name_value") - .addWorkItems( - WorkItemDetails.getDefaultInstance() - .toBuilder() - .addMetrics( - MetricUpdate.getDefaultInstance() - .toBuilder() - .setUpdateTime(Timestamp.newBuilder().setSeconds(10000L).build()) - .setScalar(Value.newBuilder().setNumberValue(1.23456)) - .build()) - .build()) - .build()); - return ImmutableList.of(details); - } - - @Test - void workerDetails() { - - Schema embeddedSchema = expectedEmbeddedSchema(); - - Pipeline pipeline = Pipeline.create(); - - PCollection input = pipeline.apply(Create.of(input())); - - RowConversionResult result = - input.apply(transform()); - - PAssert.thatSingleton(result.getFailure().apply("count errors", Count.globally())) - .isEqualTo(0L); - - PCollection workDetails = - result - .getSuccess() - .apply( - "work_details", - MapElements.into(rows()) - .via(row -> checkStateNotNull(row).getRow(embeddedTypeFieldName()))) - .setRowSchema(embeddedSchema); - - PAssert.thatSingleton(workDetails.apply("count work_details", Count.globally())).isEqualTo(1L); - - workDetails.apply( - "iterate work_details", - ParDo.of( - new DoFn() { - @ProcessElement - public void process(@Element Row row) { - String workerName = checkStateNotNull(row.getString("worker_name")); - assertEquals("worker_name_value", workerName); - Collection workItems = checkStateNotNull(row.getArray("work_items")); - assertEquals(1, workItems.size()); - Row workItem = checkStateNotNull(new ArrayList<>(workItems).get(0)); - Collection metrics = checkStateNotNull(workItem.getArray("metrics")); - Row metricsUpdate = checkStateNotNull(new ArrayList<>(metrics).get(0)); - ReadableDateTime timestamp = - checkStateNotNull(metricsUpdate.getDateTime("update_time")); - assertEquals(10000L, timestamp.getMillis() / 1000); - String scalar = checkStateNotNull(metricsUpdate.getString("scalar")); - assertEquals("1.23456", scalar); - } - })); - - pipeline.run(); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowFilterEventarcJobsTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowFilterEventarcJobsTest.java deleted file mode 100644 index 151e0f9fbd71..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/dataflow/DataflowFilterEventarcJobsTest.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.dataflow; - -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.events.cloud.dataflow.v1beta3.Job; -import com.google.events.cloud.dataflow.v1beta3.JobState; -import com.google.events.cloud.dataflow.v1beta3.JobType; -import java.nio.file.Path; -import java.nio.file.Paths; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.WithFailures; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.testinfra.pipelines.conversions.ConversionError; -import org.apache.beam.testinfra.pipelines.conversions.EventarcConversions; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.junit.jupiter.api.Test; - -/** Tests for {@link DataflowFilterEventarcJobs}. */ -class DataflowFilterEventarcJobsTest { - - private static final String JSON_RESOURCE_PATH = "eventarc_data/job_state*.json"; - - @Test - void filterBatchJobTypeOnly_excludesStreamingJobs() { - Pipeline pipeline = Pipeline.create(); - - PCollection json = readJsonAndCheckNotEmpty(pipeline); - WithFailures.Result<@NonNull PCollection, ConversionError> jobs = - json.apply(EventarcConversions.fromJson()); - PCollection result = - jobs.output() - .apply( - DataflowFilterEventarcJobs.builder() - .setIncludeJobType(JobType.JOB_TYPE_BATCH) - .build()); - PAssert.that( - result.apply( - "get job type", - MapElements.into(TypeDescriptor.of(JobType.class)) - .via(job -> checkStateNotNull(job).getType()))) - .satisfies( - itr -> { - itr.forEach(jobType -> assertEquals(JobType.JOB_TYPE_BATCH, jobType)); - return null; - }); - - pipeline.run(); - } - - @Test - void filterBatchTerminatedOnly_includesDoneJobs() { - Pipeline pipeline = Pipeline.create(); - - PCollection json = readJsonAndCheckNotEmpty(pipeline); - WithFailures.Result<@NonNull PCollection, ConversionError> jobs = - json.apply(EventarcConversions.fromJson()); - PCollection result = - jobs.output() - .apply( - DataflowFilterEventarcJobs.builder() - .setIncludeJobType(JobType.JOB_TYPE_BATCH) - .terminatedOnly() - .build()); - - PAssert.thatSingleton( - result.apply( - "filter batch", - MapElements.into(TypeDescriptor.of(JobType.class)) - .via(job -> checkStateNotNull(job).getType()))) - .isEqualTo(JobType.JOB_TYPE_BATCH); - - PAssert.thatSingleton( - result.apply( - "filter done", - MapElements.into(TypeDescriptor.of(JobState.class)) - .via(job -> checkStateNotNull(job).getCurrentState()))) - .isEqualTo(JobState.JOB_STATE_DONE); - - pipeline.run(); - } - - @Test - void filterStreamTerminatedOnly_includesCanceledJobs() { - Pipeline pipeline = Pipeline.create(); - - PCollection json = readJsonAndCheckNotEmpty(pipeline); - WithFailures.Result<@NonNull PCollection, ConversionError> jobs = - json.apply(EventarcConversions.fromJson()); - PCollection result = - jobs.output() - .apply( - DataflowFilterEventarcJobs.builder() - .setIncludeJobType(JobType.JOB_TYPE_STREAMING) - .terminatedOnly() - .build()); - - PAssert.thatSingleton( - result.apply( - "get type", - MapElements.into(TypeDescriptor.of(JobType.class)) - .via(job -> checkStateNotNull(job).getType()))) - .isEqualTo(JobType.JOB_TYPE_STREAMING); - - PAssert.thatSingleton( - result.apply( - "get current state", - MapElements.into(TypeDescriptor.of(JobState.class)) - .via(job -> checkStateNotNull(job).getCurrentState()))) - .isEqualTo(JobState.JOB_STATE_CANCELLED); - - pipeline.run(); - } - - private static PCollection readJsonAndCheckNotEmpty(Pipeline pipeline) { - Path resourcePath = Paths.get("build", "resources", "test", JSON_RESOURCE_PATH); - PCollection json = - pipeline.apply(TextIO.read().from(resourcePath.toAbsolutePath().toString())); - PAssert.thatSingleton(json.apply(Count.globally())).notEqualTo(0L); - - return json; - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/AbstractGeneratedMessageV3RowBuilderTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/AbstractGeneratedMessageV3RowBuilderTest.java deleted file mode 100644 index 95e041419ddf..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/AbstractGeneratedMessageV3RowBuilderTest.java +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.dataflow.v1beta3.DisplayData; -import com.google.dataflow.v1beta3.ExecutionStageSummary.ComponentSource; -import com.google.dataflow.v1beta3.ExecutionStageSummary.ComponentTransform; -import com.google.dataflow.v1beta3.ExecutionStageSummary.StageSource; -import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.Descriptors.EnumDescriptor; -import com.google.protobuf.Descriptors.FieldDescriptor; -import com.google.protobuf.Descriptors.FieldDescriptor.JavaType; -import com.google.protobuf.Duration; -import com.google.protobuf.GeneratedMessageV3; -import com.google.protobuf.ListValue; -import com.google.protobuf.Message; -import com.google.protobuf.Struct; -import com.google.protobuf.Timestamp; -import com.google.protobuf.Value; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.commons.lang3.tuple.Pair; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.joda.time.ReadableDateTime; -import org.junit.jupiter.api.Test; - -/** Base class for {@link GeneratedMessageV3RowBuilder} based tests of various types. */ -abstract class AbstractGeneratedMessageV3RowBuilderTest { - private static final Map<@NonNull String, GeneratedMessageV3> DEFAULT_INSTANCE_MAP = - ImmutableMap.<@NonNull String, GeneratedMessageV3>builder() - .put(StageSource.getDescriptor().getFullName(), StageSource.getDefaultInstance()) - .put( - ComponentTransform.getDescriptor().getFullName(), - ComponentTransform.getDefaultInstance()) - .put(ComponentSource.getDescriptor().getFullName(), ComponentSource.getDefaultInstance()) - .build(); - - protected abstract @NonNull Descriptor getDescriptorForType(); - - protected abstract @NonNull T getDefaultInstance(); - - protected abstract @NonNull Class getDefaultInstanceClass(); - - protected abstract @NonNull Set<@NonNull String> getStringFields(); - - protected abstract @NonNull Set<@NonNull String> getStringArrayFields(); - - protected abstract @NonNull Set<@NonNull String> getBooleanFields(); - - protected abstract @NonNull Set<@NonNull String> getStructFields(); - - protected abstract @NonNull Set<@NonNull String> getEnumFields(); - - protected abstract @NonNull Set<@NonNull String> getDisplayDataFields(); - - protected abstract @NonNull Set<@NonNull String> getRepeatedMessageFields(); - - @Test - void defaultInstance() { - GeneratedMessageV3RowBuilder builder = builderOf(getDefaultInstance()); - Row row = builder.build(); - assertNotNull(row); - assertEquals( - getDescriptorForType().getFields().stream() - .map(FieldDescriptor::getName) - .collect(Collectors.toSet()), - new HashSet<>(row.getSchema().getFieldNames())); - } - - @Test - void strings() { - Descriptor descriptor = getDescriptorForType(); - for (String fieldName : getStringFields()) { - FieldDescriptor field = descriptor.findFieldByName(fieldName); - assertNotNull(field); - String value = fieldName + "_value"; - Message message = getDefaultInstance().toBuilder().setField(field, value).build(); - T source = cast(message); - Row row = builderOf(source).build(); - assertRowValueEquals(row, fieldName, value); - } - } - - @Test - void booleans() { - Descriptor descriptor = getDescriptorForType(); - for (String fieldName : getBooleanFields()) { - FieldDescriptor field = descriptor.findFieldByName(fieldName); - assertNotNull(field); - Boolean value = true; - Message message = getDefaultInstance().toBuilder().setField(field, value).build(); - T source = cast(message); - Row row = builderOf(source).build(); - assertRowValueEquals(row, fieldName, value); - } - } - - @Test - void stringArrays() { - Descriptor descriptor = getDescriptorForType(); - for (String fieldName : getStringArrayFields()) { - FieldDescriptor field = descriptor.findFieldByName(fieldName); - assertNotNull(field); - - for (int n = 0; n < 3; n++) { - Message.Builder messageBuilder = getDefaultInstance().toBuilder(); - List expected = new ArrayList<>(); - for (int i = 0; i < n; i++) { - String value = String.format("%s_%s_value", fieldName, i); - expected.add(value); - messageBuilder.addRepeatedField(field, value); - } - Message message = messageBuilder.build(); - T source = cast(message); - Row row = builderOf(source).build(); - assertRowRepeatedValueEquals(row, fieldName, expected); - } - } - } - - @Test - void structs() { - Descriptor descriptor = getDescriptorForType(); - for (String fieldName : getStructFields()) { - Row defaultRow = builderOf(getDefaultInstance()).build(); - Row defaultStruct = defaultRow.getRow(fieldName); - assertNotNull(defaultStruct); - Collection defaultFields = defaultStruct.getArray("fields"); - assertNotNull(defaultFields); - assertTrue(defaultFields.isEmpty()); - - FieldDescriptor fieldDescriptor = descriptor.findFieldByName(fieldName); - assertNotNull(fieldDescriptor); - Message message = - getDefaultInstance().toBuilder().setField(fieldDescriptor, structOfAllTypes()).build(); - T source = cast(message); - Row row = builderOf(source).build(); - assertNotNull(row); - Row struct = row.getRow(fieldName); - assertNotNull(struct); - Collection fields = struct.getArray("fields"); - assertNotNull(fields); - Map<@NonNull String, @NonNull Object> expectedFields = - ImmutableMap.of( - "struct", - "{\"bool\":true,\"string\":\"string_value\",\"number\":1.234567,\"list\":[true,\"string_value\",1.234567]}"); - Map<@NonNull String, @NonNull Object> actualFields = new HashMap<>(); - for (Row field : fields) { - String key = field.getString("key"); - assertNotNull(key); - Object value = field.getValue("value"); - assertNotNull(value); - actualFields.put(key, value); - } - assertEquals(expectedFields, actualFields); - } - } - - @Test - void enums() { - Descriptor descriptor = getDescriptorForType(); - for (String fieldName : getEnumFields()) { - Row defaultRow = builderOf(getDefaultInstance()).build(); - assertNotNull(defaultRow); - FieldDescriptor fieldDescriptor = descriptor.findFieldByName(fieldName); - assertNotNull(fieldDescriptor); - assertEquals(JavaType.ENUM, fieldDescriptor.getJavaType()); - EnumDescriptor enumDescriptor = fieldDescriptor.getEnumType(); - String defaultEnum = defaultRow.getString(fieldName); - assertEquals(enumDescriptor.findValueByNumber(0).getName(), defaultEnum); - - Message message = - getDefaultInstance() - .toBuilder() - .setField(fieldDescriptor, enumDescriptor.findValueByNumber(1)) - .build(); - T source = cast(message); - Row row = builderOf(source).build(); - assertNotNull(row); - assertEquals(enumDescriptor.findValueByNumber(1).getName(), row.getString(fieldName)); - } - } - - @Test - void displayData() { - Descriptor descriptor = getDescriptorForType(); - for (String fieldName : getDisplayDataFields()) { - FieldDescriptor fieldDescriptor = descriptor.findFieldByName(fieldName); - assertNotNull(fieldDescriptor); - - T defaultInstance = getDefaultInstance(); - Row defaultRow = builderOf(defaultInstance).build(); - assertNotNull(defaultRow); - Collection defaultCollection = defaultRow.getArray(fieldName); - assertNotNull(defaultCollection); - assertTrue(defaultCollection.isEmpty()); - - Map expected = new HashMap<>(); - for (DisplayData data : displayDataOf()) { - expected.put(data.getKey(), data); - } - - Message message = - getDefaultInstance().toBuilder().setField(fieldDescriptor, displayDataOf()).build(); - - T instance = cast(message); - - Row row = builderOf(instance).build(); - assertNotNull(row); - Collection displayData = row.getArray(fieldName); - assertNotNull(displayData); - assertEquals(expected.size(), displayData.size()); - for (Row actual : displayData) { - String key = actual.getString("key"); - assertNotNull(key); - DisplayData expectedData = expected.get(key); - assertNotNull(expectedData); - assertEquals(expectedData.getNamespace(), actual.getString("namespace")); - assertEquals(expectedData.getShortStrValue(), actual.getString("short_str_value")); - assertEquals(expectedData.getUrl(), actual.getString("url")); - assertEquals(expectedData.getLabel(), actual.getString("label")); - assertEquals(expectedData.getBoolValue(), actual.getBoolean("bool_value")); - assertEquals(expectedData.getInt64Value(), actual.getInt64("int64_value")); - assertEquals(expectedData.getFloatValue(), actual.getFloat("float_value")); - assertEquals(expectedData.getJavaClassValue(), actual.getString("java_class_value")); - if (expectedData.hasTimestampValue()) { - Timestamp expectedTimestamp = expectedData.getTimestampValue(); - ReadableDateTime actualTimestamp = actual.getDateTime("timestamp_value"); - assertNotNull(actualTimestamp); - assertEquals(expectedTimestamp.getSeconds() * 1000, actualTimestamp.getMillis()); - } - } - } - } - - @Test - void repeatedMessages() { - Descriptor descriptor = getDescriptorForType(); - for (String fieldName : getRepeatedMessageFields()) { - FieldDescriptor fieldDescriptor = descriptor.findFieldByName(fieldName); - assertNotNull(fieldDescriptor); - - Row defaultRow = builderOf(getDefaultInstance()).build(); - assertNotNull(defaultRow); - Collection defaultcollection = defaultRow.getArray(fieldName); - assertNotNull(defaultcollection); - - GeneratedMessageV3 member = - DEFAULT_INSTANCE_MAP.get(fieldDescriptor.getMessageType().getFullName()); - assertNotNull(member); - - Message message = - getDefaultInstance().toBuilder().addRepeatedField(fieldDescriptor, member).build(); - - T instance = cast(message); - Row row = builderOf(instance).build(); - assertNotNull(row); - Collection collection = row.getArray(fieldName); - assertNotNull(collection); - assertEquals(1, collection.size()); - Set actualFieldNames = - new HashSet<>(new ArrayList<>(collection).get(0).getSchema().getFieldNames()); - Set expectedFieldNames = - fieldDescriptor.getMessageType().getFields().stream() - .map(FieldDescriptor::getName) - .collect(Collectors.toSet()); - assertEquals(expectedFieldNames, actualFieldNames); - } - } - - private static void assertRowValueEquals(Row source, String fieldName, ValueT expected) { - Object value = source.getValue(fieldName); - assertEquals(value, expected); - } - - private static void assertRowRepeatedValueEquals( - Row source, String fieldName, List expected) { - Collection value = source.getArray(fieldName); - assertEquals(value, expected); - } - - protected GeneratedMessageV3RowBuilder builderOf(@NonNull T instance) { - if (DescriptorSchemaRegistry.INSTANCE.hasNoCachedBuild(getDescriptorForType())) { - DescriptorSchemaRegistry.INSTANCE.build(getDescriptorForType()); - } - return new GeneratedMessageV3RowBuilder<>(instance); - } - - static Struct structOfAllTypes() { - Value boolV = Value.newBuilder().setBoolValue(true).build(); - Value stringV = Value.newBuilder().setStringValue("string_value").build(); - Value numberV = Value.newBuilder().setNumberValue(1.234567).build(); - Value listV = - Value.newBuilder() - .setListValue( - ListValue.getDefaultInstance() - .toBuilder() - .addValues(boolV) - .addValues(stringV) - .addValues(numberV) - .build()) - .build(); - Struct base = - structOf( - Pair.of("bool", boolV), - Pair.of("string", stringV), - Pair.of("number", numberV), - Pair.of("list", listV)); - - Value baseStruct = Value.newBuilder().setStructValue(base).build(); - - return structOf(Pair.of("struct", baseStruct)); - } - - static Struct structOf(Pair... pairs) { - Struct.Builder builder = Struct.newBuilder(); - for (Pair pair : pairs) { - builder.putFields(pair.getKey(), pair.getValue()); - } - return builder.build(); - } - - static List displayDataOf() { - return ImmutableList.of( - withBoolean(true), - withString("display_string"), - withInt64(1000L), - withFloat(2f), - withJavaClass("com.example.JavaBean"), - withTimestamp(Timestamp.getDefaultInstance().toBuilder().setSeconds(1000L).build()), - withDuration(Duration.getDefaultInstance().toBuilder().setSeconds(10L).build())); - } - - static DisplayData emptyDisplayData() { - return DisplayData.getDefaultInstance(); - } - - static DisplayData withoutValue() { - return DisplayData.getDefaultInstance() - .toBuilder() - .setKey("empty") - .setNamespace("namespace_") - .setUrl("https://example.com") - .setLabel("label_") - .setShortStrValue("short_str_value_") - .build(); - } - - static DisplayData withBoolean(boolean value) { - return withoutValue().toBuilder().setKey("boolean").setBoolValue(value).build(); - } - - static DisplayData withString(String value) { - return withoutValue().toBuilder().setKey("string").setStrValue(value).build(); - } - - static DisplayData withInt64(long value) { - return withoutValue().toBuilder().setKey("long").setInt64Value(value).build(); - } - - static DisplayData withFloat(float value) { - return withoutValue().toBuilder().setKey("float").setFloatValue(value).build(); - } - - static DisplayData withJavaClass(String value) { - return withoutValue().toBuilder().setKey("javaClass").setJavaClassValue(value).build(); - } - - static DisplayData withTimestamp(Timestamp value) { - return withoutValue().toBuilder().setKey("timestamp").setTimestampValue(value).build(); - } - - static DisplayData withDuration(Duration value) { - return withoutValue().toBuilder().setKey("duration").setDurationValue(value).build(); - } - - private T cast(Message message) { - assertTrue(getDefaultInstanceClass().isInstance(message)); - return getDefaultInstanceClass().cast(message); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/DependencyDrivenDescriptorQueueTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/DependencyDrivenDescriptorQueueTest.java deleted file mode 100644 index c047a0b7271b..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/DependencyDrivenDescriptorQueueTest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.dataflow.v1beta3.Job; -import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.Descriptors.FieldDescriptor.JavaType; -import java.util.Iterator; -import org.junit.jupiter.api.Test; - -/** Tests for {@link DependencyDrivenDescriptorQueue}. */ -class DependencyDrivenDescriptorQueueTest { - @Test - void iterator_Job_isInDependencyOrder() { - DependencyDrivenDescriptorQueue queue = new DependencyDrivenDescriptorQueue(); - queue.enqueue(Job.getDescriptor()); - Iterator itr = queue.iterator(); - Descriptor previous = itr.next(); - int previousMessageCount = messageCount(previous); - assertTrue(itr.hasNext()); - while (itr.hasNext()) { - Descriptor current = itr.next(); - int currentMessageCount = messageCount(current); - assertTrue(previousMessageCount <= currentMessageCount); - boolean previousDependsOnCurrent = - previous.getFields().stream() - .anyMatch(field -> field.getFullName().equals(current.getFullName())); - assertFalse(previousDependsOnCurrent); - } - } - - private static int messageCount(Descriptor descriptor) { - return (int) - descriptor.getFields().stream() - .filter(field -> field.getJavaType().equals(JavaType.MESSAGE)) - .count(); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/DescriptorSchemaRegistryTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/DescriptorSchemaRegistryTest.java deleted file mode 100644 index afa650a33cf1..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/DescriptorSchemaRegistryTest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.dataflow.v1beta3.Job; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.schemas.Schema.FieldType; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -/** Tests for {@link DescriptorSchemaRegistry}. */ -class DescriptorSchemaRegistryTest { - - private static final DescriptorSchemaRegistry REGISTRY = DescriptorSchemaRegistry.INSTANCE; - - @BeforeAll - public static void setup() { - REGISTRY.build(Job.getDescriptor()); - } - - @Test - void build_Job() { - Schema mapSchema = - Schema.of( - Schema.Field.of("key", FieldType.STRING), Schema.Field.of("value", FieldType.STRING)); - Schema schema = REGISTRY.getOrBuild(Job.getDescriptor()); - assertEquals(Schema.Field.of("id", FieldType.STRING), schema.getField("id")); - assertEquals(Schema.Field.of("type", FieldType.STRING), schema.getField("type")); - assertEquals( - Schema.Field.of("create_time", FieldType.DATETIME), schema.getField("create_time")); - assertEquals( - Schema.Field.of( - "steps", - FieldType.array( - FieldType.row( - Schema.of( - Schema.Field.of("kind", FieldType.STRING), - Schema.Field.of("name", FieldType.STRING), - Schema.Field.of( - "properties", - FieldType.row( - Schema.of( - Schema.Field.of( - "fields", FieldType.array(FieldType.row(mapSchema)))))))))), - schema.getField("steps")); - assertEquals( - Schema.Field.of("transform_name_mapping", FieldType.array(FieldType.row(mapSchema))), - schema.getField("transform_name_mapping")); - assertEquals( - Schema.Field.of( - "stage_states", - FieldType.array( - FieldType.row( - Schema.of( - Schema.Field.of("execution_stage_name", FieldType.STRING), - Schema.Field.of("execution_stage_state", FieldType.STRING), - Schema.Field.of("current_state_time", FieldType.DATETIME))))), - schema.getField("stage_states")); - assertEquals( - Schema.Field.of("satisfies_pzs", FieldType.BOOLEAN), schema.getField("satisfies_pzs")); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/EnvironmentRowBuilderTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/EnvironmentRowBuilderTest.java deleted file mode 100644 index a0be8b29568f..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/EnvironmentRowBuilderTest.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.dataflow.v1beta3.DebugOptions; -import com.google.dataflow.v1beta3.Environment; -import com.google.dataflow.v1beta3.WorkerPool; -import com.google.protobuf.Descriptors.Descriptor; -import java.util.Collection; -import java.util.Collections; -import java.util.Set; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.junit.jupiter.api.Test; - -/** - * Tests for converting an {@link Environment} to a {@link Row} using {@link - * GeneratedMessageV3RowBuilder}. - */ -class EnvironmentRowBuilderTest extends AbstractGeneratedMessageV3RowBuilderTest { - - @Override - protected @NonNull Descriptor getDescriptorForType() { - return Environment.getDescriptor(); - } - - @Override - protected @NonNull Environment getDefaultInstance() { - return Environment.getDefaultInstance(); - } - - @Override - protected @NonNull Class getDefaultInstanceClass() { - return Environment.class; - } - - @Override - protected @NonNull Set<@NonNull String> getStringFields() { - return ImmutableSet.of( - "temp_storage_prefix", - "cluster_manager_api_service", - "service_kms_key_name", - "dataset", - "service_account_email", - "worker_region", - "worker_zone"); - } - - @Override - protected @NonNull Set<@NonNull String> getStringArrayFields() { - return ImmutableSet.of("experiments", "service_options"); - } - - @Override - protected @NonNull Set<@NonNull String> getBooleanFields() { - return Collections.emptySet(); - } - - @Override - protected @NonNull Set<@NonNull String> getStructFields() { - return ImmutableSet.of("user_agent", "version", "sdk_pipeline_options"); - } - - @Override - protected @NonNull Set<@NonNull String> getEnumFields() { - return ImmutableSet.of("flex_resource_scheduling_goal", "shuffle_mode"); - } - - @Override - protected @NonNull Set<@NonNull String> getDisplayDataFields() { - return ImmutableSet.of(); - } - - @Override - protected @NonNull Set<@NonNull String> getRepeatedMessageFields() { - return ImmutableSet.of(); - } - - @Test - void workerPools() { - String fieldName = "worker_pools"; - Environment defaultInstance = getDefaultInstance(); - Row defaultRow = builderOf(defaultInstance).build(); - assertNotNull(defaultRow); - Collection defaultCollection = defaultRow.getArray(fieldName); - assertNotNull(defaultCollection); - assertTrue(defaultCollection.isEmpty()); - - Environment instance = - getDefaultInstance().toBuilder().addWorkerPools(WorkerPool.getDefaultInstance()).build(); - Row row = builderOf(instance).build(); - Collection collection = row.getArray(fieldName); - assertNotNull(collection); - assertEquals(1, collection.size()); - } - - @Test - void debugOptions() { - String fieldName = "debug_options"; - Environment defaultInstance = getDefaultInstance(); - Row defaultRow = builderOf(defaultInstance).build(); - assertNotNull(defaultRow); - Row defaultDebugOptions = defaultRow.getRow(fieldName); - assertNotNull(defaultDebugOptions); - assertTrue(defaultDebugOptions.getSchema().hasField("enable_hot_key_logging")); - assertEquals(false, defaultDebugOptions.getBoolean("enable_hot_key_logging")); - - Environment instance = - getDefaultInstance() - .toBuilder() - .setDebugOptions( - DebugOptions.getDefaultInstance().toBuilder().setEnableHotKeyLogging(true).build()) - .build(); - Row row = builderOf(instance).build(); - assertNotNull(row); - Row debugOptions = row.getRow(fieldName); - assertNotNull(debugOptions); - assertEquals(true, debugOptions.getBoolean("enable_hot_key_logging")); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/ExecutionStageSummaryTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/ExecutionStageSummaryTest.java deleted file mode 100644 index a0b489cf5745..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/ExecutionStageSummaryTest.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import com.google.dataflow.v1beta3.ExecutionStageSummary; -import com.google.protobuf.Descriptors.Descriptor; -import java.util.Set; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** - * Tests for converting an {@link ExecutionStageSummary} to a {@link Row} using {@link - * GeneratedMessageV3RowBuilder}. - */ -class ExecutionStageSummaryTest - extends AbstractGeneratedMessageV3RowBuilderTest { - - @Override - protected @NonNull Descriptor getDescriptorForType() { - return ExecutionStageSummary.getDescriptor(); - } - - @Override - protected @NonNull ExecutionStageSummary getDefaultInstance() { - return ExecutionStageSummary.getDefaultInstance(); - } - - @Override - protected @NonNull Class getDefaultInstanceClass() { - return ExecutionStageSummary.class; - } - - @Override - protected @NonNull Set<@NonNull String> getStringFields() { - return ImmutableSet.of("name", "id"); - } - - @Override - protected @NonNull Set<@NonNull String> getStringArrayFields() { - return ImmutableSet.of("prerequisite_stage"); - } - - @Override - protected @NonNull Set<@NonNull String> getBooleanFields() { - return ImmutableSet.of(); - } - - @Override - protected @NonNull Set<@NonNull String> getStructFields() { - return ImmutableSet.of(); - } - - @Override - protected @NonNull Set<@NonNull String> getEnumFields() { - return ImmutableSet.of("kind"); - } - - @Override - protected @NonNull Set<@NonNull String> getDisplayDataFields() { - return ImmutableSet.of(); - } - - @Override - protected @NonNull Set<@NonNull String> getRepeatedMessageFields() { - return ImmutableSet.of( - "input_source", "output_source", "component_transform", "component_source"); - } -} diff --git a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/TransformSummaryRowBuilderTest.java b/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/TransformSummaryRowBuilderTest.java deleted file mode 100644 index 19afba803336..000000000000 --- a/.test-infra/pipelines/src/test/java/org/apache/beam/testinfra/pipelines/schemas/TransformSummaryRowBuilderTest.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.testinfra.pipelines.schemas; - -import com.google.dataflow.v1beta3.TransformSummary; -import com.google.protobuf.Descriptors.Descriptor; -import java.util.Set; -import org.apache.beam.sdk.values.Row; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** - * Tests for converting a {@link TransformSummary} to a {@link Row} using {@link - * GeneratedMessageV3RowBuilder}. - */ -class TransformSummaryRowBuilderTest - extends AbstractGeneratedMessageV3RowBuilderTest { - - @Override - protected @NonNull Descriptor getDescriptorForType() { - return TransformSummary.getDescriptor(); - } - - @Override - protected @NonNull TransformSummary getDefaultInstance() { - return TransformSummary.getDefaultInstance(); - } - - @Override - protected @NonNull Class getDefaultInstanceClass() { - return TransformSummary.class; - } - - @Override - protected @NonNull Set<@NonNull String> getStringFields() { - return ImmutableSet.of("id", "name"); - } - - @Override - protected @NonNull Set<@NonNull String> getStringArrayFields() { - return ImmutableSet.of("output_collection_name", "input_collection_name"); - } - - @Override - protected @NonNull Set<@NonNull String> getBooleanFields() { - return ImmutableSet.of(); - } - - @Override - protected @NonNull Set<@NonNull String> getStructFields() { - return ImmutableSet.of(); - } - - @Override - protected @NonNull Set<@NonNull String> getEnumFields() { - return ImmutableSet.of("kind"); - } - - @Override - protected @NonNull Set<@NonNull String> getDisplayDataFields() { - return ImmutableSet.of("display_data"); - } - - @Override - protected @NonNull Set<@NonNull String> getRepeatedMessageFields() { - return ImmutableSet.of(); - } -} diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/has_extra_data_payload_foo_property.json b/.test-infra/pipelines/src/test/resources/eventarc_data/has_extra_data_payload_foo_property.json deleted file mode 100644 index 6e41df872ce6..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/has_extra_data_payload_foo_property.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"foo": "bar", "createTime": "2023-05-09T20:23:51.386359Z", "currentState": "JOB_STATE_CANCELLED", "currentStateTime": "2023-05-09T20:37:51.386412Z", "environment": {"userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Oracle Corporation", "java.version": "11.0.9", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "5.10.147+", "version": "2.46.0"}, "version": {"job_type": "STREAMING", "major": "8"}}, "id": "2023-05-09_13_23_50-11065941757886660214", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "cloud_pubsub_to_gcs_text_flex", "goog-dataflow-provided-template-type": "flex", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "pubsub-to-gcs", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:23:51.386359Z", "type": "JOB_TYPE_STREAMING"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "54464e9920f90a134a807b3668768aa", "job": "pubsub-to-gcs", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/pubsub-to-gcs", "time": "2023-05-09T20:37:51.386412Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_canceled_streaming.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_canceled_streaming.json deleted file mode 100644 index 55d8ada3c2a6..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_canceled_streaming.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:23:51.386359Z", "currentState": "JOB_STATE_CANCELLED", "currentStateTime": "2023-05-09T20:37:51.386412Z", "environment": {"userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Oracle Corporation", "java.version": "11.0.9", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "5.10.147+", "version": "2.46.0"}, "version": {"job_type": "STREAMING", "major": "8"}}, "id": "2023-05-09_13_23_50-11065941757886660214", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "cloud_pubsub_to_gcs_text_flex", "goog-dataflow-provided-template-type": "flex", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "pubsub-to-gcs", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:23:51.386359Z", "type": "JOB_TYPE_STREAMING"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "54464e9920f90a134a807b3668768aa", "job": "pubsub-to-gcs", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/pubsub-to-gcs", "time": "2023-05-09T20:37:51.386412Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_canceling_streaming.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_canceling_streaming.json deleted file mode 100644 index 92ef37045ce9..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_canceling_streaming.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:23:51.386359Z", "currentState": "JOB_STATE_CANCELLING", "currentStateTime": "2023-05-09T20:37:02.388860Z", "environment": {"userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Oracle Corporation", "java.version": "11.0.9", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "5.10.147+", "version": "2.46.0"}, "version": {"job_type": "STREAMING", "major": "8"}}, "id": "2023-05-09_13_23_50-11065941757886660214", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "cloud_pubsub_to_gcs_text_flex", "goog-dataflow-provided-template-type": "flex", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "pubsub-to-gcs", "projectId": "temp-aac6645f", "requestedState": "JOB_STATE_CANCELLED", "startTime": "2023-05-09T20:23:51.386359Z", "type": "JOB_TYPE_STREAMING"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "9031810c622ceae915d1f303d0440216", "job": "pubsub-to-gcs", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/pubsub-to-gcs", "time": "2023-05-09T20:37:02.388860Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_done_batch.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_done_batch.json deleted file mode 100644 index 5ec88ac2647b..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_done_batch.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:39:13.690602Z", "currentState": "JOB_STATE_DONE", "currentStateTime": "2023-05-09T20:42:29.054956Z", "environment": {"shuffleMode": "SERVICE_BASED", "userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Google Inc.", "java.version": "11.0.15", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "4.15.0-smp-928.26.0.0", "version": "2.46.0"}, "version": {"job_type": "JAVA_BATCH_AUTOSCALING", "major": "8"}}, "id": "2023-05-09_13_39_13-18226864771788319755", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "word_count", "goog-dataflow-provided-template-type": "legacy", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "wordcount", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:39:13.690602Z", "type": "JOB_TYPE_BATCH"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "1f73d7cdba847a3c35963f3354833fba", "job": "wordcount", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/wordcount", "time": "2023-05-09T20:42:29.054956Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_pending_batch.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_pending_batch.json deleted file mode 100644 index 83e49c6f0a46..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_pending_batch.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:39:13.690602Z", "currentState": "JOB_STATE_PENDING", "currentStateTime": "2023-05-09T20:39:13.690602Z", "environment": {"shuffleMode": "SERVICE_BASED", "userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Google Inc.", "java.version": "11.0.15", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "4.15.0-smp-928.26.0.0", "version": "2.46.0"}, "version": {"job_type": "JAVA_BATCH_AUTOSCALING", "major": "8"}}, "id": "2023-05-09_13_39_13-18226864771788319755", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "word_count", "goog-dataflow-provided-template-type": "legacy", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "wordcount", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:39:13.690602Z", "type": "JOB_TYPE_BATCH"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "e2a0f3fc7b45602bce23b4cc2c9150bf", "job": "wordcount", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/wordcount", "time": "2023-05-09T20:39:13.690602Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_pending_streaming.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_pending_streaming.json deleted file mode 100644 index 5a53cfe466ac..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_pending_streaming.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:23:51.386359Z", "currentState": "JOB_STATE_PENDING", "currentStateTime": "2023-05-09T20:25:38.932271Z", "environment": {"userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Oracle Corporation", "java.version": "11.0.9", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "5.10.147+", "version": "2.46.0"}, "version": {"job_type": "STREAMING", "major": "8"}}, "id": "2023-05-09_13_23_50-11065941757886660214", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "cloud_pubsub_to_gcs_text_flex", "goog-dataflow-provided-template-type": "flex", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "pubsub-to-gcs", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:23:51.386359Z", "type": "JOB_TYPE_STREAMING"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "48ecdd08fed35e33bac95c0aa95f7298", "job": "pubsub-to-gcs", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/pubsub-to-gcs", "time": "2023-05-09T20:25:38.932271Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_queued_streaming.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_queued_streaming.json deleted file mode 100644 index c6a8b72caa54..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_queued_streaming.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:23:51.386359Z", "currentState": "JOB_STATE_QUEUED", "currentStateTime": "2023-05-09T20:23:51.386359Z", "environment": {}, "id": "2023-05-09_13_23_50-11065941757886660214", "location": "us-central1", "name": "pubsub-to-gcs", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:23:51.386359Z"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "d97207126d9202dabe3101eada03b32c", "job": "pubsub-to-gcs", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/pubsub-to-gcs", "time": "2023-05-09T20:23:51.386359Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_running_batch.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_running_batch.json deleted file mode 100644 index 8f2780bed576..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_running_batch.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:39:13.690602Z", "currentState": "JOB_STATE_RUNNING", "currentStateTime": "2023-05-09T20:39:17.928139Z", "environment": {"shuffleMode": "SERVICE_BASED", "userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Google Inc.", "java.version": "11.0.15", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "4.15.0-smp-928.26.0.0", "version": "2.46.0"}, "version": {"job_type": "JAVA_BATCH_AUTOSCALING", "major": "8"}}, "id": "2023-05-09_13_39_13-18226864771788319755", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "word_count", "goog-dataflow-provided-template-type": "legacy", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "wordcount", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:39:13.690602Z", "type": "JOB_TYPE_BATCH"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "ca694e08aabb07465926af232904a941", "job": "wordcount", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/wordcount", "time": "2023-05-09T20:39:17.928139Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_running_streaming.json b/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_running_streaming.json deleted file mode 100644 index 42075dcfbd45..000000000000 --- a/.test-infra/pipelines/src/test/resources/eventarc_data/job_state_running_streaming.json +++ /dev/null @@ -1 +0,0 @@ -{"data": {"@type": "type.googleapis.com/google.events.cloud.dataflow.v1beta3.JobEventData", "payload": {"createTime": "2023-05-09T20:23:51.386359Z", "currentState": "JOB_STATE_RUNNING", "currentStateTime": "2023-05-09T20:26:05.500274Z", "environment": {"userAgent": {"container.base_repository": "gcr.io/cloud-dataflow/v1beta3", "fnapi.container.version": "2.46.0", "fnapi.environment.major.version": "8", "java.vendor": "Oracle Corporation", "java.version": "11.0.9", "legacy.container.version": "2.46.0", "legacy.environment.major.version": "8", "name": "Apache Beam SDK for Java", "os.arch": "amd64", "os.name": "Linux", "os.version": "5.10.147+", "version": "2.46.0"}, "version": {"job_type": "STREAMING", "major": "8"}}, "id": "2023-05-09_13_23_50-11065941757886660214", "jobMetadata": {"sdkVersion": {"sdkSupportStatus": "SUPPORTED", "version": "2.46.0", "versionDisplayName": "Apache Beam SDK for Java"}}, "labels": {"goog-dataflow-provided-template-name": "cloud_pubsub_to_gcs_text_flex", "goog-dataflow-provided-template-type": "flex", "goog-dataflow-provided-template-version": "2023-04-25-00_rc00"}, "location": "us-central1", "name": "pubsub-to-gcs", "projectId": "temp-aac6645f", "startTime": "2023-05-09T20:23:51.386359Z", "type": "JOB_TYPE_STREAMING"}}, "datacontenttype": "application/json; charset=utf-8", "dataschema": "https://googleapis.github.io/google-cloudevents/proto/google/events/cloud/dataflow/v1beta3/data.proto#JobEventData", "id": "e43563146f083fe9cad14f6006c717f5", "job": "pubsub-to-gcs", "location": "us-central1", "project": "641223881136", "source": "//dataflow.googleapis.com/projects/temp-aac6645f/locations/us-central1", "specversion": "1.0", "subject": "jobs/pubsub-to-gcs", "time": "2023-05-09T20:26:05.500274Z", "type": "google.cloud.dataflow.job.v1beta3.statusChanged"} \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts index 65a55885afa7..9701b4dbc06f 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -328,8 +328,6 @@ include("beam-test-infra-metrics") project(":beam-test-infra-metrics").projectDir = file(".test-infra/metrics") include("beam-test-infra-mock-apis") project(":beam-test-infra-mock-apis").projectDir = file(".test-infra/mock-apis") -include("beam-test-infra-pipelines") -project(":beam-test-infra-pipelines").projectDir = file(".test-infra/pipelines") include("beam-test-tools") project(":beam-test-tools").projectDir = file(".test-infra/tools") include("beam-test-jenkins")