apache · ahmedabu98 · Nov 12, 2024 · Jun 3, 2024 · Jun 4, 2024 · Jun 4, 2024
diff --git a/.github/trigger_files/beam_PostCommit_Java_DataflowV2.json b/.github/trigger_files/beam_PostCommit_Java_DataflowV2.json
@@ -1,3 +1,4 @@
 {
-    "comment": "Modify this file in a trivial way to cause this test suite to run"
+    "comment": "Modify this file in a trivial way to cause this test suite to run",
+    "modification": 1
 }
diff --git a/.github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Direct.json b/.github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Direct.json
@@ -1,4 +1,4 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
-  "modification": 2
+  "modification": 1
 }
diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/external_transforms.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/external_transforms.proto
@@ -70,6 +70,12 @@ message ManagedTransforms {
       "beam:schematransform:org.apache.beam:kafka_read:v1"];
     KAFKA_WRITE = 3 [(org.apache.beam.model.pipeline.v1.beam_urn) =
       "beam:schematransform:org.apache.beam:kafka_write:v1"];
+    BIGQUERY_READ = 4 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+      "beam:schematransform:org.apache.beam:bigquery_storage_read:v1"];
+    BIGQUERY_STORAGE_WRITE = 5 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+      "beam:schematransform:org.apache.beam:bigquery_storage_write:v2"];
+    BIGQUERY_FILE_LOADS = 6 [(org.apache.beam.model.pipeline.v1.beam_urn) =
+      "beam:schematransform:org.apache.beam:bigquery_fileloads:v1"];
   }
 }
 

diff --git a/sdks/java/io/google-cloud-platform/build.gradle b/sdks/java/io/google-cloud-platform/build.gradle
@@ -159,6 +159,7 @@ dependencies {
   testImplementation project(path: ":sdks:java:extensions:google-cloud-platform-core", configuration: "testRuntimeMigration")
   testImplementation project(path: ":sdks:java:extensions:protobuf", configuration: "testRuntimeMigration")
   testImplementation project(path: ":runners:direct-java", configuration: "shadow")
+  testImplementation project(":sdks:java:managed")
   testImplementation project(path: ":sdks:java:io:common")
   testImplementation project(path: ":sdks:java:testing:test-utils")
   testImplementation library.java.commons_math3

diff --git a/sdks/java/io/google-cloud-platform/expansion-service/build.gradle b/sdks/java/io/google-cloud-platform/expansion-service/build.gradle
@@ -36,6 +36,9 @@ dependencies {
     permitUnusedDeclared project(":sdks:java:io:google-cloud-platform") // BEAM-11761
     implementation project(":sdks:java:extensions:schemaio-expansion-service")
     permitUnusedDeclared project(":sdks:java:extensions:schemaio-expansion-service") // BEAM-11761
+    implementation project(":sdks:java:managed")
+    permitUnusedDeclared project(":sdks:java:managed") // BEAM-11761
+
     runtimeOnly library.java.slf4j_jdk14
 }
 

diff --git a/...g/apache/beam/sdk/io/gcp/bigquery/BigQueryFileLoadsWriteSchemaTransformConfiguration.java b/...g/apache/beam/sdk/io/gcp/bigquery/BigQueryFileLoadsWriteSchemaTransformConfiguration.java
diff --git a/...va/org/apache/beam/sdk/io/gcp/bigquery/BigQueryFileLoadsWriteSchemaTransformProvider.java b/...va/org/apache/beam/sdk/io/gcp/bigquery/BigQueryFileLoadsWriteSchemaTransformProvider.java
@@ -17,34 +17,28 @@
  */
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
+import static org.apache.beam.sdk.util.construction.BeamUrns.getUrn;
+
 import com.google.auto.service.AutoService;
-import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
+import org.apache.beam.model.pipeline.v1.ExternalTransforms;
 import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.schemas.Schema;
-import org.apache.beam.sdk.schemas.io.InvalidConfigurationException;
+import org.apache.beam.sdk.io.gcp.bigquery.providers.BigQueryWriteConfiguration;
 import org.apache.beam.sdk.schemas.transforms.SchemaTransform;
 import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider;
 import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider;
-import org.apache.beam.sdk.transforms.MapElements;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionRowTuple;
 import org.apache.beam.sdk.values.Row;
-import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings;
 
 /**
  * An implementation of {@link TypedSchemaTransformProvider} for BigQuery write jobs configured
- * using {@link BigQueryFileLoadsWriteSchemaTransformConfiguration}.
+ * using {@link BigQueryWriteConfiguration}.
  *
  * <p><b>Internal only:</b> This class is actively being worked on, and it will likely change. We
  * provide no backwards compatibility guarantees, and it should not be implemented outside the Beam
@@ -56,201 +50,82 @@
 @Internal
 @AutoService(SchemaTransformProvider.class)
 public class BigQueryFileLoadsWriteSchemaTransformProvider
-    extends TypedSchemaTransformProvider<BigQueryFileLoadsWriteSchemaTransformConfiguration> {
-
-  private static final String IDENTIFIER =
-      "beam:schematransform:org.apache.beam:bigquery_fileloads_write:v1";
-  static final String INPUT_TAG = "INPUT";
+    extends TypedSchemaTransformProvider<BigQueryWriteConfiguration> {
 
-  /** Returns the expected class of the configuration. */
-  @Override
-  protected Class<BigQueryFileLoadsWriteSchemaTransformConfiguration> configurationClass() {
-    return BigQueryFileLoadsWriteSchemaTransformConfiguration.class;
-  }
+  static final String INPUT_TAG = "input";
 
-  /** Returns the expected {@link SchemaTransform} of the configuration. */
   @Override
-  protected SchemaTransform from(BigQueryFileLoadsWriteSchemaTransformConfiguration configuration) {
+  protected SchemaTransform from(BigQueryWriteConfiguration configuration) {
     return new BigQueryWriteSchemaTransform(configuration);
   }
 
-  /** Implementation of the {@link TypedSchemaTransformProvider} identifier method. */
   @Override
   public String identifier() {
-    return IDENTIFIER;
+    return getUrn(ExternalTransforms.ManagedTransforms.Urns.BIGQUERY_FILE_LOADS);
   }
 
-  /**
-   * Implementation of the {@link TypedSchemaTransformProvider} inputCollectionNames method. Since a
-   * single is expected, this returns a list with a single name.
-   */
   @Override
   public List<String> inputCollectionNames() {
     return Collections.singletonList(INPUT_TAG);
   }
 
-  /**
-   * Implementation of the {@link TypedSchemaTransformProvider} outputCollectionNames method. Since
-   * no output is expected, this returns an empty list.
-   */
   @Override
   public List<String> outputCollectionNames() {
     return Collections.emptyList();
   }
 
-  /**
-   * A {@link SchemaTransform} that performs {@link BigQueryIO.Write}s based on a {@link
-   * BigQueryFileLoadsWriteSchemaTransformConfiguration}.
-   */
   protected static class BigQueryWriteSchemaTransform extends SchemaTransform {
     /** An instance of {@link BigQueryServices} used for testing. */
     private BigQueryServices testBigQueryServices = null;
 
-    private final BigQueryFileLoadsWriteSchemaTransformConfiguration configuration;
+    private final BigQueryWriteConfiguration configuration;
 
-    BigQueryWriteSchemaTransform(BigQueryFileLoadsWriteSchemaTransformConfiguration configuration) {
+    BigQueryWriteSchemaTransform(BigQueryWriteConfiguration configuration) {
+      configuration.validate();
       this.configuration = configuration;
     }
 
     @Override
-    public void validate(PipelineOptions options) {
-      if (!configuration.getCreateDisposition().equals(CreateDisposition.CREATE_NEVER.name())) {
-        return;
-      }
+    public PCollectionRowTuple expand(PCollectionRowTuple input) {
+      PCollection<Row> rowPCollection = input.getSinglePCollection();
+      BigQueryIO.Write<Row> write = toWrite();
+      rowPCollection.apply(write);
 
-      BigQueryOptions bigQueryOptions = options.as(BigQueryOptions.class);
+      return PCollectionRowTuple.empty(input.getPipeline());
+    }
 
-      BigQueryServices bigQueryServices = new BigQueryServicesImpl();
-      if (testBigQueryServices != null) {
-        bigQueryServices = testBigQueryServices;
+    BigQueryIO.Write<Row> toWrite() {
+      BigQueryIO.Write<Row> write =
+          BigQueryIO.<Row>write()
+              .to(configuration.getTable())
+              .withMethod(BigQueryIO.Write.Method.FILE_LOADS)
+              .withFormatFunction(BigQueryUtils.toTableRow())
+              .useBeamSchema();
+
+      if (!Strings.isNullOrEmpty(configuration.getCreateDisposition())) {
+        CreateDisposition createDisposition =
+            CreateDisposition.valueOf(configuration.getCreateDisposition().toUpperCase());
+        write = write.withCreateDisposition(createDisposition);
       }
-
-      DatasetService datasetService = bigQueryServices.getDatasetService(bigQueryOptions);
-      TableReference tableReference = BigQueryUtils.toTableReference(configuration.getTableSpec());
-
-      try {
-        Table table = datasetService.getTable(tableReference);
-        if (table == null) {
-          throw new NullPointerException();
-        }
-
-        if (table.getSchema() == null) {
-          throw new InvalidConfigurationException(
-              String.format("could not fetch schema for table: %s", configuration.getTableSpec()));
-        }
-
-      } catch (NullPointerException | InterruptedException | IOException ex) {
-        throw new InvalidConfigurationException(
-            String.format(
-                "could not fetch table %s, error: %s",
-                configuration.getTableSpec(), ex.getMessage()));
+      if (!Strings.isNullOrEmpty(configuration.getWriteDisposition())) {
+        WriteDisposition writeDisposition =
+            WriteDisposition.valueOf(configuration.getWriteDisposition().toUpperCase());
+        write = write.withWriteDisposition(writeDisposition);
+      }
+      if (!Strings.isNullOrEmpty(configuration.getKmsKey())) {
+        write = write.withKmsKey(configuration.getKmsKey());
       }
-    }
-
-    @Override
-    public PCollectionRowTuple expand(PCollectionRowTuple input) {
-      validate(input);
-      PCollection<Row> rowPCollection = input.get(INPUT_TAG);
-      Schema schema = rowPCollection.getSchema();
-      BigQueryIO.Write<TableRow> write = toWrite(schema);
       if (testBigQueryServices != null) {
         write = write.withTestServices(testBigQueryServices);
       }
 
-      PCollection<TableRow> tableRowPCollection =
-          rowPCollection.apply(
-              MapElements.into(TypeDescriptor.of(TableRow.class)).via(BigQueryUtils::toTableRow));
-      tableRowPCollection.apply(write);
-      return PCollectionRowTuple.empty(input.getPipeline());
-    }
-
-    /** Instantiates a {@link BigQueryIO.Write<TableRow>} from a {@link Schema}. */
-    BigQueryIO.Write<TableRow> toWrite(Schema schema) {
-      TableSchema tableSchema = BigQueryUtils.toTableSchema(schema);
-      CreateDisposition createDisposition =
-          CreateDisposition.valueOf(configuration.getCreateDisposition());
-      WriteDisposition writeDisposition =
-          WriteDisposition.valueOf(configuration.getWriteDisposition());
-
-      return BigQueryIO.writeTableRows()
-          .to(configuration.getTableSpec())
-          .withCreateDisposition(createDisposition)
-          .withWriteDisposition(writeDisposition)
-          .withSchema(tableSchema);
+      return write;
     }
 
     /** Setter for testing using {@link BigQueryServices}. */
     @VisibleForTesting
     void setTestBigQueryServices(BigQueryServices testBigQueryServices) {
       this.testBigQueryServices = testBigQueryServices;
     }
-
-    /** Validate a {@link PCollectionRowTuple} input. */
-    void validate(PCollectionRowTuple input) {
-      if (!input.has(INPUT_TAG)) {
-        throw new IllegalArgumentException(
-            String.format(
-                "%s %s is missing expected tag: %s",
-                getClass().getSimpleName(), input.getClass().getSimpleName(), INPUT_TAG));
-      }
-
-      PCollection<Row> rowInput = input.get(INPUT_TAG);
-      Schema sourceSchema = rowInput.getSchema();
-
-      if (sourceSchema == null) {
-        throw new IllegalArgumentException(
-            String.format("%s is null for input of tag: %s", Schema.class, INPUT_TAG));
-      }
-
-      if (!configuration.getCreateDisposition().equals(CreateDisposition.CREATE_NEVER.name())) {
-        return;
-      }
-
-      BigQueryOptions bigQueryOptions = input.getPipeline().getOptions().as(BigQueryOptions.class);
-
-      BigQueryServices bigQueryServices = new BigQueryServicesImpl();
-      if (testBigQueryServices != null) {
-        bigQueryServices = testBigQueryServices;
-      }
-
-      DatasetService datasetService = bigQueryServices.getDatasetService(bigQueryOptions);
-      TableReference tableReference = BigQueryUtils.toTableReference(configuration.getTableSpec());
-
-      try {
-        Table table = datasetService.getTable(tableReference);
-        if (table == null) {
-          throw new NullPointerException();
-        }
-
-        TableSchema tableSchema = table.getSchema();
-        if (tableSchema == null) {
-          throw new NullPointerException();
-        }
-
-        Schema destinationSchema = BigQueryUtils.fromTableSchema(tableSchema);
-        if (destinationSchema == null) {
-          throw new NullPointerException();
-        }
-
-        validateMatching(sourceSchema, destinationSchema);
-
-      } catch (NullPointerException | InterruptedException | IOException e) {
-        throw new InvalidConfigurationException(
-            String.format(
-                "could not validate input for create disposition: %s and table: %s, error: %s",
-                configuration.getCreateDisposition(),
-                configuration.getTableSpec(),
-                e.getMessage()));
-      }
-    }
-
-    void validateMatching(Schema sourceSchema, Schema destinationSchema) {
-      if (!sourceSchema.equals(destinationSchema)) {
-        throw new IllegalArgumentException(
-            String.format(
-                "source and destination schema mismatch for table: %s",
-                configuration.getTableSpec()));
-      }
-    }
   }
 }