Skip to content

Commit

Permalink
Fix: Migrate Travel Impact Model and Visual clean-up in pipeline.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
nlarge-google committed Nov 11, 2024
1 parent 5d8ed02 commit 5e5f1c6
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 39 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021 Google LLC
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ resources:
dag:
# [Required] Specify the Airflow version of the operators used by the DAG.
airflow_version: 2

initialize:
dag_id: flights_impact_data
default_args:
Expand All @@ -45,32 +44,15 @@ dag:

tasks:
- operator: "GoogleCloudStorageToBigQueryOperator"
# Initializes GCS to BQ task for the DAG. This operator is used to load a
# JSON, CSV, Avro, ORC, or Parquet data from GCS into a BigQuery table.

# Task description
description: "Task to load CSV data to a BigQuery table"

# Arguments supported by this operator:
# http://airflow.apache.org/docs/apache-airflow/stable/howto/operator/gcp/gcs.html#googlecloudstoragetobigqueryoperator
args:
task_id: "flights_impact_data_gcs_to_bq"

# The GCS bucket where the CSV file is located in.
bucket: "{{ var.json.travel_impact_model.source_bucket }}"

# Use the CSV file containing data from today
source_objects: ["flights_impact_data.csv"]
source_format: "CSV"
destination_project_dataset_table: "travel_impact_model.flights_impact_data"

# Use this if your CSV file contains a header row
skip_leading_rows: 1

# How to write data to the table: overwrite, append, or write if empty
# See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition
write_disposition: "WRITE_TRUNCATE"

schema_fields:
- name: "carrier"
type: "STRING"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021 Google LLC
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
19 changes: 0 additions & 19 deletions datasets/travel_impact_model/pipelines/metadata/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ resources:
description: "Metadata about the dataset"

dag:
# [Required] Specify the Airflow version of the operators used by the DAG.
airflow_version: 2

initialize:
dag_id: metadata
default_args:
Expand All @@ -45,32 +43,15 @@ dag:

tasks:
- operator: "GoogleCloudStorageToBigQueryOperator"
# Initializes GCS to BQ task for the DAG. This operator is used to load a
# JSON, CSV, Avro, ORC, or Parquet data from GCS into a BigQuery table.

# Task description
description: "Task to load CSV data to a BigQuery table"

# Arguments supported by this operator:
# http://airflow.apache.org/docs/apache-airflow/stable/howto/operator/gcp/gcs.html#googlecloudstoragetobigqueryoperator
args:
task_id: "metadata_gcs_to_bq"

# The GCS bucket where the CSV file is located in.
bucket: "{{ var.json.travel_impact_model.source_bucket }}"

# Use the CSV file containing data from today
source_objects: ["metadata.csv"]
source_format: "CSV"
destination_project_dataset_table: "travel_impact_model.metadata"

# Use this if your CSV file contains a header row
skip_leading_rows: 1

# How to write data to the table: overwrite, append, or write if empty
# See https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition
write_disposition: "WRITE_TRUNCATE"

schema_fields:
- name: "key"
type: "STRING"
Expand Down

0 comments on commit 5e5f1c6

Please sign in to comment.