From a8bb22ce5c59c450d59f3a7f568559584563d04d Mon Sep 17 00:00:00 2001 From: nlarge-google Date: Fri, 29 Sep 2023 20:00:26 +0000 Subject: [PATCH] fix: Added field HRK to spend fields in 6 pipelines and updated list of numerical fields in csv_transform. --- .../run_csv_transform_kub/csv_transform.py | 9 ++++- .../process_csvs_and_load_to_bq/pipeline.yaml | 24 +++++++++++ .../process_csvs_and_load_to_bq_dag.py | 40 +++++++++++++++---- 3 files changed, 63 insertions(+), 10 deletions(-) diff --git a/datasets/google_political_ads/pipelines/_images/run_csv_transform_kub/csv_transform.py b/datasets/google_political_ads/pipelines/_images/run_csv_transform_kub/csv_transform.py index e0bb1d926..c6f8b1722 100644 --- a/datasets/google_political_ads/pipelines/_images/run_csv_transform_kub/csv_transform.py +++ b/datasets/google_political_ads/pipelines/_images/run_csv_transform_kub/csv_transform.py @@ -46,16 +46,21 @@ "spend_eur", "spend_inr", "spend_bgn", - "spend_hrk", "spend_czk", "spend_dkk", "spend_huf", "spend_pln", "spend_ron", - "spend_gbp", "spend_sek", + "spend_gbp", "spend_nzd", + "spend_ils", + "spend_aud", + "spend_twd", "spend_brl", + "spend_ars", + "spend_zar", + "spend_clp" ] diff --git a/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/pipeline.yaml b/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/pipeline.yaml index bc756ce36..c63778939 100644 --- a/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/pipeline.yaml +++ b/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/pipeline.yaml @@ -192,6 +192,7 @@ dag: "spend_eur", "spend_inr", "spend_bgn", + "spend_hrk", "spend_czk", "spend_dkk", "spend_huf", @@ -218,6 +219,7 @@ dag: "Spend_EUR": "spend_eur", "Spend_INR": "spend_inr", "Spend_BGN": "spend_bgn", + "Spend_HRK": "spend_hrk", "Spend_CZK": "spend_czk", "Spend_DKK": "spend_dkk", "Spend_HUF": "spend_huf", @@ -282,6 +284,10 @@ dag: type: "integer" description: "Total amount in BGN spent on election ads in this region." mode: "nullable" + - name: "spend_hrk" + type: "integer" + description: "Total amount in HRK spent on election ads in this region." + mode: "nullable" - name: "spend_czk" type: "integer" description: "Total amount in CZK spent on election ads in this region." @@ -375,6 +381,7 @@ dag: "spend_eur", "spend_inr", "spend_bgn", + "spend_hrk", "spend_czk", "spend_dkk", "spend_huf", @@ -403,6 +410,7 @@ dag: "Spend_EUR": "spend_eur", "Spend_INR": "spend_inr", "Spend_BGN": "spend_bgn", + "Spend_HRK": "spend_hrk", "Spend_CZK": "spend_czk", "Spend_DKK": "spend_dkk", "Spend_HUF": "spend_huf", @@ -475,6 +483,10 @@ dag: type: "integer" description: "Total amount in BGN spent on election ads by the advertiser." mode: "nullable" + - name: "spend_hrk" + type: "integer" + description: "Total amount in HRK spent on election ads by the advertiser." + mode: "nullable" - name: "spend_czk" type: "integer" description: "Total amount in CZK spent on election ads by the advertiser." @@ -566,6 +578,7 @@ dag: "spend_eur", "spend_inr", "spend_bgn", + "spend_hrk", "spend_czk", "spend_dkk", "spend_huf", @@ -592,6 +605,7 @@ dag: "Spend_EUR": "spend_eur", "Spend_INR": "spend_inr", "Spend_BGN": "spend_bgn", + "Spend_HRK": "spend_hrk", "Spend_CZK": "spend_czk", "Spend_DKK": "spend_dkk", "Spend_HUF": "spend_huf", @@ -656,6 +670,10 @@ dag: type: "integer" description: "The amount in BGN spent on election ads during the given week by the advertiser." mode: "nullable" + - name: "spend_hrk" + type: "integer" + description: "The amount in HRK spent on election ads during the given week by the advertiser." + mode: "nullable" - name: "spend_czk" type: "integer" description: "The amount in CZK spent on election ads during the given week by the advertiser." @@ -1226,6 +1244,7 @@ dag: "spend_eur", "spend_inr", "spend_bgn", + "spend_hrk", "spend_czk", "spend_dkk", "spend_huf", @@ -1251,6 +1270,7 @@ dag: "Spend_EUR": "spend_eur", "Spend_INR": "spend_inr", "Spend_BGN": "spend_bgn", + "Spend_HRK": "spend_hrk", "Spend_CZK": "spend_czk", "Spend_DKK": "spend_dkk", "Spend_HUF": "spend_huf", @@ -1309,6 +1329,10 @@ dag: type: "integer" description: "Total amount in BGN spent on election ads in this region." mode: "nullable" + - name: "spend_hrk" + type: "integer" + description: "Total amount in HRK spent on election ads in this region." + mode: "nullable" - name: "spend_czk" type: "integer" description: "Total amount in CZK spent on election ads in this region." diff --git a/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/process_csvs_and_load_to_bq_dag.py b/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/process_csvs_and_load_to_bq_dag.py index ae563a9bf..d09b7e74b 100644 --- a/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/process_csvs_and_load_to_bq_dag.py +++ b/datasets/google_political_ads/pipelines/process_csvs_and_load_to_bq/process_csvs_and_load_to_bq_dag.py @@ -143,8 +143,8 @@ "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/google_political_ads/advertiser_geo_spend/data_output.csv", "TABLE_NAME": "advertiser_geo_spend", - "CSV_HEADERS": '[\n "advertiser_id",\n "advertiser_name",\n "country",\n "country_subdivision_primary",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl",\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', - "RENAME_MAPPINGS": '{\n "Advertiser_ID": "advertiser_id",\n "Advertiser_Name": "advertiser_name",\n "Country": "country",\n "Country_Subdivision_Primary": "country_subdivision_primary",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_ILS": "spend_ils",\n "Spend_AUD": "spend_aud",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp"\n}', + "CSV_HEADERS": '[\n "advertiser_id",\n "advertiser_name",\n "country",\n "country_subdivision_primary",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_hrk",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl",\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', + "RENAME_MAPPINGS": '{\n "Advertiser_ID": "advertiser_id",\n "Advertiser_Name": "advertiser_name",\n "Country": "country",\n "Country_Subdivision_Primary": "country_subdivision_primary",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_HRK": "spend_hrk",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_ILS": "spend_ils",\n "Spend_AUD": "spend_aud",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp"\n}', }, resources={"request_memory": "1G", "request_cpu": "200m"}, ) @@ -209,6 +209,12 @@ "description": "Total amount in BGN spent on election ads in this region.", "mode": "nullable", }, + { + "name": "spend_hrk", + "type": "integer", + "description": "Total amount in HRK spent on election ads in this region.", + "mode": "nullable", + }, { "name": "spend_czk", "type": "integer", @@ -320,8 +326,8 @@ "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/google_political_ads/advertiser_stats/data_output.csv", "TABLE_NAME": "advertiser_stats", - "CSV_HEADERS": '[\n "advertiser_id",\n "advertiser_name",\n "public_ids_list",\n "regions",\n "elections",\n "total_creatives",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl"\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', - "RENAME_MAPPINGS": '{\n "Advertiser_ID": "advertiser_id",\n "Advertiser_Name": "advertiser_name",\n "Public_IDs_List": "public_ids_list",\n "Regions": "regions",\n "Elections": "elections",\n "Total_Creatives": "total_creatives",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_ILS": "spend_ils",\n "Spend_AUD": "spend_aud",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp"\n}', + "CSV_HEADERS": '[\n "advertiser_id",\n "advertiser_name",\n "public_ids_list",\n "regions",\n "elections",\n "total_creatives",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_hrk",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl"\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', + "RENAME_MAPPINGS": '{\n "Advertiser_ID": "advertiser_id",\n "Advertiser_Name": "advertiser_name",\n "Public_IDs_List": "public_ids_list",\n "Regions": "regions",\n "Elections": "elections",\n "Total_Creatives": "total_creatives",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_HRK": "spend_hrk",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_ILS": "spend_ils",\n "Spend_AUD": "spend_aud",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp"\n}', }, resources={"request_memory": "1G", "request_cpu": "200m"}, ) @@ -396,6 +402,12 @@ "description": "Total amount in BGN spent on election ads by the advertiser.", "mode": "nullable", }, + { + "name": "spend_hrk", + "type": "integer", + "description": "Total amount in HRK spent on election ads by the advertiser.", + "mode": "nullable", + }, { "name": "spend_czk", "type": "integer", @@ -507,8 +519,8 @@ "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/google_political_ads/advertiser_weekly_spend/data_output.csv", "TABLE_NAME": "advertiser_weekly_spend", - "CSV_HEADERS": '[\n "advertiser_id",\n "advertiser_name",\n "election_cycle",\n "week_start_date",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl",\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', - "RENAME_MAPPINGS": '{\n "Advertiser_ID": "advertiser_id",\n "Advertiser_Name": "advertiser_name",\n "Election_Cycle": "election_cycle",\n "Week_Start_Date": "week_start_date",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_NZD": "spend_ils",\n "Spend_AUD": "spend_aud",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp"\n}', + "CSV_HEADERS": '[\n "advertiser_id",\n "advertiser_name",\n "election_cycle",\n "week_start_date",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_hrk",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl",\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', + "RENAME_MAPPINGS": '{\n "Advertiser_ID": "advertiser_id",\n "Advertiser_Name": "advertiser_name",\n "Election_Cycle": "election_cycle",\n "Week_Start_Date": "week_start_date",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_HRK": "spend_hrk",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_NZD": "spend_ils",\n "Spend_AUD": "spend_aud",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp"\n}', }, resources={"request_memory": "1G", "request_cpu": "200m"}, ) @@ -573,6 +585,12 @@ "description": "The amount in BGN spent on election ads during the given week by the advertiser.", "mode": "nullable", }, + { + "name": "spend_hrk", + "type": "integer", + "description": "The amount in HRK spent on election ads during the given week by the advertiser.", + "mode": "nullable", + }, { "name": "spend_czk", "type": "integer", @@ -1158,8 +1176,8 @@ "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/google_political_ads/geo_spend/data_output.csv", "TABLE_NAME": "geo_spend", - "CSV_HEADERS": '[\n "country",\n "country_subdivision_primary",\n "country_subdivision_secondary",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl",\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', - "RENAME_MAPPINGS": '{\n "Country": "country",\n "Country_Subdivision_Primary": "country_subdivision_primary",\n "Country_Subdivision_Secondary": "country_subdivision_secondary",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_ILS": "spend_ils",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp",\n}', + "CSV_HEADERS": '[\n "country",\n "country_subdivision_primary",\n "country_subdivision_secondary",\n "spend_usd",\n "spend_eur",\n "spend_inr",\n "spend_bgn",\n "spend_hrk",\n "spend_czk",\n "spend_dkk",\n "spend_huf",\n "spend_pln",\n "spend_ron",\n "spend_sek",\n "spend_gbp",\n "spend_nzd",\n "spend_ils",\n "spend_aud",\n "spend_twd",\n "spend_brl",\n "spend_ars",\n "spend_zar",\n "spend_clp"\n]', + "RENAME_MAPPINGS": '{\n "Country": "country",\n "Country_Subdivision_Primary": "country_subdivision_primary",\n "Country_Subdivision_Secondary": "country_subdivision_secondary",\n "Spend_USD": "spend_usd",\n "Spend_EUR": "spend_eur",\n "Spend_INR": "spend_inr",\n "Spend_BGN": "spend_bgn",\n "Spend_HRK": "spend_hrk",\n "Spend_CZK": "spend_czk",\n "Spend_DKK": "spend_dkk",\n "Spend_HUF": "spend_huf",\n "Spend_PLN": "spend_pln",\n "Spend_RON": "spend_ron",\n "Spend_SEK": "spend_sek",\n "Spend_GBP": "spend_gbp",\n "Spend_NZD": "spend_nzd",\n "Spend_ILS": "spend_ils",\n "Spend_TWD": "spend_twd",\n "Spend_BRL": "spend_brl",\n "Spend_ARS": "spend_ars",\n "Spend_ZAR": "spend_zar",\n "Spend_CLP": "spend_clp",\n}', }, resources={"request_memory": "1G", "request_cpu": "200m"}, ) @@ -1216,6 +1234,12 @@ "description": "Total amount in BGN spent on election ads in this region.", "mode": "nullable", }, + { + "name": "spend_hrk", + "type": "integer", + "description": "Total amount in HRK spent on election ads in this region.", + "mode": "nullable", + }, { "name": "spend_czk", "type": "integer",