From 5e5f1c66f598e7b92647edcb9b646275f3bb0134 Mon Sep 17 00:00:00 2001 From: nlarge-google Date: Mon, 11 Nov 2024 19:47:25 +0000 Subject: [PATCH] Fix: Migrate Travel Impact Model and Visual clean-up in pipeline.yaml --- .../flights_impact_data_dag.py | 2 +- .../flights_impact_data/pipeline.yaml | 18 ------------------ .../pipelines/metadata/metadata_dag.py | 2 +- .../pipelines/metadata/pipeline.yaml | 19 ------------------- 4 files changed, 2 insertions(+), 39 deletions(-) diff --git a/datasets/travel_impact_model/pipelines/flights_impact_data/flights_impact_data_dag.py b/datasets/travel_impact_model/pipelines/flights_impact_data/flights_impact_data_dag.py index a4b374423..f4b79d267 100644 --- a/datasets/travel_impact_model/pipelines/flights_impact_data/flights_impact_data_dag.py +++ b/datasets/travel_impact_model/pipelines/flights_impact_data/flights_impact_data_dag.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/datasets/travel_impact_model/pipelines/flights_impact_data/pipeline.yaml b/datasets/travel_impact_model/pipelines/flights_impact_data/pipeline.yaml index 3ca3b9984..f69db4907 100644 --- a/datasets/travel_impact_model/pipelines/flights_impact_data/pipeline.yaml +++ b/datasets/travel_impact_model/pipelines/flights_impact_data/pipeline.yaml @@ -31,7 +31,6 @@ resources: dag: # [Required] Specify the Airflow version of the operators used by the DAG. airflow_version: 2 - initialize: dag_id: flights_impact_data default_args: @@ -45,32 +44,15 @@ dag: tasks: - operator: "GoogleCloudStorageToBigQueryOperator" - # Initializes GCS to BQ task for the DAG. This operator is used to load a - # JSON, CSV, Avro, ORC, or Parquet data from GCS into a BigQuery table. - - # Task description description: "Task to load CSV data to a BigQuery table" - - # Arguments supported by this operator: - # http://airflow.apache.org/docs/apache-airflow/stable/howto/operator/gcp/gcs.html#googlecloudstoragetobigqueryoperator args: task_id: "flights_impact_data_gcs_to_bq" - - # The GCS bucket where the CSV file is located in. bucket: "{{ var.json.travel_impact_model.source_bucket }}" - - # Use the CSV file containing data from today source_objects: ["flights_impact_data.csv"] source_format: "CSV" destination_project_dataset_table: "travel_impact_model.flights_impact_data" - - # Use this if your CSV file contains a header row skip_leading_rows: 1 - - # How to write data to the table: overwrite, append, or write if empty - # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition write_disposition: "WRITE_TRUNCATE" - schema_fields: - name: "carrier" type: "STRING" diff --git a/datasets/travel_impact_model/pipelines/metadata/metadata_dag.py b/datasets/travel_impact_model/pipelines/metadata/metadata_dag.py index 52a357fca..7d78b6554 100644 --- a/datasets/travel_impact_model/pipelines/metadata/metadata_dag.py +++ b/datasets/travel_impact_model/pipelines/metadata/metadata_dag.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/datasets/travel_impact_model/pipelines/metadata/pipeline.yaml b/datasets/travel_impact_model/pipelines/metadata/pipeline.yaml index afb8ca3a1..e97072071 100644 --- a/datasets/travel_impact_model/pipelines/metadata/pipeline.yaml +++ b/datasets/travel_impact_model/pipelines/metadata/pipeline.yaml @@ -29,9 +29,7 @@ resources: description: "Metadata about the dataset" dag: - # [Required] Specify the Airflow version of the operators used by the DAG. airflow_version: 2 - initialize: dag_id: metadata default_args: @@ -45,32 +43,15 @@ dag: tasks: - operator: "GoogleCloudStorageToBigQueryOperator" - # Initializes GCS to BQ task for the DAG. This operator is used to load a - # JSON, CSV, Avro, ORC, or Parquet data from GCS into a BigQuery table. - - # Task description description: "Task to load CSV data to a BigQuery table" - - # Arguments supported by this operator: - # http://airflow.apache.org/docs/apache-airflow/stable/howto/operator/gcp/gcs.html#googlecloudstoragetobigqueryoperator args: task_id: "metadata_gcs_to_bq" - - # The GCS bucket where the CSV file is located in. bucket: "{{ var.json.travel_impact_model.source_bucket }}" - - # Use the CSV file containing data from today source_objects: ["metadata.csv"] source_format: "CSV" destination_project_dataset_table: "travel_impact_model.metadata" - - # Use this if your CSV file contains a header row skip_leading_rows: 1 - - # How to write data to the table: overwrite, append, or write if empty - # See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition write_disposition: "WRITE_TRUNCATE" - schema_fields: - name: "key" type: "STRING"