Fix: Migrate Travel Impact Model and Visual clean-up in pipeline.yaml

GoogleCloudPlatform · Nov 11, 2024 · 5e5f1c6 · 5e5f1c6
1 parent 5d8ed02
commit 5e5f1c6
Show file tree

Hide file tree

Showing 4 changed files with 2 additions and 39 deletions.
diff --git a/datasets/travel_impact_model/pipelines/flights_impact_data/flights_impact_data_dag.py b/datasets/travel_impact_model/pipelines/flights_impact_data/flights_impact_data_dag.py
@@ -1,4 +1,4 @@
-# Copyright 2021 Google LLC
+# Copyright 2022 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/datasets/travel_impact_model/pipelines/flights_impact_data/pipeline.yaml b/datasets/travel_impact_model/pipelines/flights_impact_data/pipeline.yaml
@@ -31,7 +31,6 @@ resources:
 dag:
   # [Required] Specify the Airflow version of the operators used by the DAG.
   airflow_version: 2
-
   initialize:
     dag_id: flights_impact_data
     default_args:
@@ -45,32 +44,15 @@ dag:
 
   tasks:
     - operator: "GoogleCloudStorageToBigQueryOperator"
-      # Initializes GCS to BQ task for the DAG. This operator is used to load a
-      # JSON, CSV, Avro, ORC, or Parquet data from GCS into a BigQuery table.
-
-      # Task description
       description: "Task to load CSV data to a BigQuery table"
-
-      # Arguments supported by this operator:
-      # http://airflow.apache.org/docs/apache-airflow/stable/howto/operator/gcp/gcs.html#googlecloudstoragetobigqueryoperator
       args:
         task_id: "flights_impact_data_gcs_to_bq"
-
-        # The GCS bucket where the CSV file is located in.
         bucket: "{{ var.json.travel_impact_model.source_bucket }}"
-
-        # Use the CSV file containing data from today
         source_objects: ["flights_impact_data.csv"]
         source_format: "CSV"
         destination_project_dataset_table: "travel_impact_model.flights_impact_data"
-
-        # Use this if your CSV file contains a header row
         skip_leading_rows: 1
-
-        # How to write data to the table: overwrite, append, or write if empty
-        # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition
         write_disposition: "WRITE_TRUNCATE"
-
         schema_fields:
           - name: "carrier"
             type: "STRING"

diff --git a/datasets/travel_impact_model/pipelines/metadata/metadata_dag.py b/datasets/travel_impact_model/pipelines/metadata/metadata_dag.py
@@ -1,4 +1,4 @@
-# Copyright 2021 Google LLC
+# Copyright 2022 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/datasets/travel_impact_model/pipelines/metadata/pipeline.yaml b/datasets/travel_impact_model/pipelines/metadata/pipeline.yaml
@@ -29,9 +29,7 @@ resources:
     description: "Metadata about the dataset"
 
 dag:
-  # [Required] Specify the Airflow version of the operators used by the DAG.
   airflow_version: 2
-
   initialize:
     dag_id: metadata
     default_args:
@@ -45,32 +43,15 @@ dag:
 
   tasks:
     - operator: "GoogleCloudStorageToBigQueryOperator"
-      # Initializes GCS to BQ task for the DAG. This operator is used to load a
-      # JSON, CSV, Avro, ORC, or Parquet data from GCS into a BigQuery table.
-
-      # Task description
       description: "Task to load CSV data to a BigQuery table"
-
-      # Arguments supported by this operator:
-      # http://airflow.apache.org/docs/apache-airflow/stable/howto/operator/gcp/gcs.html#googlecloudstoragetobigqueryoperator
       args:
         task_id: "metadata_gcs_to_bq"
-
-        # The GCS bucket where the CSV file is located in.
         bucket: "{{ var.json.travel_impact_model.source_bucket }}"
-
-        # Use the CSV file containing data from today
         source_objects: ["metadata.csv"]
         source_format: "CSV"
         destination_project_dataset_table: "travel_impact_model.metadata"
-
-        # Use this if your CSV file contains a header row
         skip_leading_rows: 1
-
-        # How to write data to the table: overwrite, append, or write if empty
-        # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition
         write_disposition: "WRITE_TRUNCATE"
-
         schema_fields:
           - name: "key"
             type: "STRING"