From 97fede7ce9120afdbbfd006f71e9847c7f417ddc Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 29 Dec 2023 07:47:32 -0500 Subject: [PATCH 1/7] use target.project for destination --- macros/combine_property_data.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/macros/combine_property_data.sql b/macros/combine_property_data.sql index a76870dd..32b9fb0c 100644 --- a/macros/combine_property_data.sql +++ b/macros/combine_property_data.sql @@ -4,7 +4,7 @@ {% macro default__combine_property_data() %} - create schema if not exists `{{var('project')}}.{{var('dataset')}}`; + create schema if not exists `{{target.project}}.{{var('combined_dataset')}}`; {# If incremental, then use static_incremental_days variable to find earliest shard to copy #} {% if not should_full_refresh() %} @@ -22,7 +22,7 @@ {% for relation in relations %} {%- set relation_suffix = relation.identifier|replace('events_intraday_', '') -%} {%- if relation_suffix|int >= earliest_shard_to_retrieve|int -%} - CREATE OR REPLACE TABLE `{{var('project')}}.{{var('dataset')}}.events_intraday_{{relation_suffix}}{{property_id}}` CLONE `{{var('project')}}.analytics_{{property_id}}.events_intraday_{{relation_suffix}}`; + CREATE OR REPLACE TABLE `{{target.project}}.{{var('combined_dataset')}}.events_intraday_{{relation_suffix}}{{property_id}}` CLONE `{{var('project')}}.analytics_{{property_id}}.events_intraday_{{relation_suffix}}`; {%- endif -%} {% endfor %} {# Copy daily tables and drop old intraday table #} @@ -30,8 +30,8 @@ {% for relation in relations %} {%- set relation_suffix = relation.identifier|replace('events_', '') -%} {%- if relation_suffix|int >= earliest_shard_to_retrieve|int -%} - CREATE OR REPLACE TABLE `{{var('project')}}.{{var('dataset')}}.events_{{relation_suffix}}{{property_id}}` CLONE `{{var('project')}}.analytics_{{property_id}}.events_{{relation_suffix}}`; - DROP TABLE IF EXISTS `{{var('project')}}.{{var('dataset')}}.events_intraday_{{relation_suffix}}{{property_id}}`; + CREATE OR REPLACE TABLE `{{target.project}}.{{var('combined_dataset')}}.events_{{relation_suffix}}{{property_id}}` CLONE `{{var('project')}}.analytics_{{property_id}}.events_{{relation_suffix}}`; + DROP TABLE IF EXISTS `{{target.project}}.{{var('combined_dataset')}}.events_intraday_{{relation_suffix}}{{property_id}}`; {%- endif -%} {% endfor %} {% endfor %} From eadd7a6668238aaf0f5526972770ebd025f66d98 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 29 Dec 2023 07:49:56 -0500 Subject: [PATCH 2/7] update project var to source_project --- macros/combine_property_data.sql | 8 ++++---- models/staging/src_ga4.yml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/macros/combine_property_data.sql b/macros/combine_property_data.sql index 32b9fb0c..67ef31cc 100644 --- a/macros/combine_property_data.sql +++ b/macros/combine_property_data.sql @@ -18,19 +18,19 @@ {% for property_id in var('property_ids') %} {%- set schema_name = "analytics_" + property_id|string -%} {# Copy intraday tables #} - {%- set relations = dbt_utils.get_relations_by_pattern(schema_pattern=schema_name, table_pattern='events_intraday_%', database=var('project')) -%} + {%- set relations = dbt_utils.get_relations_by_pattern(schema_pattern=schema_name, table_pattern='events_intraday_%', database=var('source_project')) -%} {% for relation in relations %} {%- set relation_suffix = relation.identifier|replace('events_intraday_', '') -%} {%- if relation_suffix|int >= earliest_shard_to_retrieve|int -%} - CREATE OR REPLACE TABLE `{{target.project}}.{{var('combined_dataset')}}.events_intraday_{{relation_suffix}}{{property_id}}` CLONE `{{var('project')}}.analytics_{{property_id}}.events_intraday_{{relation_suffix}}`; + CREATE OR REPLACE TABLE `{{target.project}}.{{var('combined_dataset')}}.events_intraday_{{relation_suffix}}{{property_id}}` CLONE `{{var('source_project')}}.analytics_{{property_id}}.events_intraday_{{relation_suffix}}`; {%- endif -%} {% endfor %} {# Copy daily tables and drop old intraday table #} - {%- set relations = dbt_utils.get_relations_by_pattern(schema_pattern=schema_name, table_pattern='events_%', exclude='events_intraday_%', database=var('project')) -%} + {%- set relations = dbt_utils.get_relations_by_pattern(schema_pattern=schema_name, table_pattern='events_%', exclude='events_intraday_%', database=var('source_project')) -%} {% for relation in relations %} {%- set relation_suffix = relation.identifier|replace('events_', '') -%} {%- if relation_suffix|int >= earliest_shard_to_retrieve|int -%} - CREATE OR REPLACE TABLE `{{target.project}}.{{var('combined_dataset')}}.events_{{relation_suffix}}{{property_id}}` CLONE `{{var('project')}}.analytics_{{property_id}}.events_{{relation_suffix}}`; + CREATE OR REPLACE TABLE `{{target.project}}.{{var('combined_dataset')}}.events_{{relation_suffix}}{{property_id}}` CLONE `{{var('source_project')}}.analytics_{{property_id}}.events_{{relation_suffix}}`; DROP TABLE IF EXISTS `{{target.project}}.{{var('combined_dataset')}}.events_intraday_{{relation_suffix}}{{property_id}}`; {%- endif -%} {% endfor %} diff --git a/models/staging/src_ga4.yml b/models/staging/src_ga4.yml index 4055323d..67b6e356 100644 --- a/models/staging/src_ga4.yml +++ b/models/staging/src_ga4.yml @@ -2,7 +2,7 @@ version: 2 sources: - name: ga4 - database: "{{var('project')}}" + database: "{{var('source_project')}}" schema: "{{var('dataset')}}" tables: - name: events From 1bfe30c2517256d1706f3f30a97503689852f0e3 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 29 Dec 2023 08:00:00 -0500 Subject: [PATCH 3/7] dynamic source based on combined_dataset var --- README.md | 19 ++++++++++++++++--- models/staging/src_ga4.yml | 5 ++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 495f0efe..4f749205 100644 --- a/README.md +++ b/README.md @@ -77,12 +77,25 @@ This package assumes that you have an existing DBT project with a BigQuery profi ``` vars: ga4: - project: "your_gcp_project" - dataset: "your_ga4_dataset" + source_project: "my_source_gcp_project" # Project that contains raw GA4 data + property_ids: [11111111] # Array of properties to process + start_date: "YYYYMMDD" # Earliest date to load + static_incremental_days: 3 # Number of days to scan and reprocess on each run +``` + +## Required Variables (Multi-Project Instance) + +When processing multiple properties at a time, the required variables change slightly. See [Multi-Property Support](#multi-property-support) section for details on configuring multiple GA4 properties as a source. + +``` +vars: + ga4: + source_project: "my_source_gcp_project" # Project that contains raw GA4 data + combined_dataset: "my_combined_data" # Dataset where multi-property data is cloned + property_ids: [11111111,2222222] # Array of properties to process start_date: "YYYYMMDD" # Earliest date to load static_incremental_days: 3 # Number of days to scan and reprocess on each run ``` -See [Multi-Property Support](#multi-property-support) section for details on configuring multiple GA4 properties as a source. ## Optional Variables diff --git a/models/staging/src_ga4.yml b/models/staging/src_ga4.yml index 67b6e356..6b6b71db 100644 --- a/models/staging/src_ga4.yml +++ b/models/staging/src_ga4.yml @@ -3,7 +3,10 @@ version: 2 sources: - name: ga4 database: "{{var('source_project')}}" - schema: "{{var('dataset')}}" + schema: | # Source from combined property dataset if set, otherwise source from source property + {%- if var('combined_dataset', false) != false -%} {{var('combined_dataset')}} + {%- else -%} "analytics_{{var('property_ids')[0]}}" + {%- endif -%} tables: - name: events identifier: events_* # Scan across all sharded event tables. Use the 'start_date' variable to limit this scan From 38cc55b518243a73ae8de4eec6f706be1eb4f12b Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 29 Dec 2023 08:01:27 -0500 Subject: [PATCH 4/7] doc updates --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4f749205..c8302f85 100644 --- a/README.md +++ b/README.md @@ -308,14 +308,14 @@ Overriding the package's default channel mapping makes use of dbt's dispatch ove # Multi-Property Support -Multiple GA4 properties are supported by listing out the project IDs in the `property_ids` variable. In this scenario, the `static_incremental_days` variable is required and the `dataset` variable will define the target dataset where source data will be copied. +Multiple GA4 properties are supported by listing out the project IDs in the `property_ids` variable. In this scenario, the `static_incremental_days` variable is required and the `combined_dataset` variable will define the dataset (in your profile's target project) where source data will be copied. ``` vars: ga4: property_ids: [11111111, 22222222, 33333333] static_incremental_days: 3 - dataset: "my_combined_dataset" + combined_dataset: "my_combined_dataset" ``` With these variables set, the `combine_property_data` macro will run as a pre-hook to `base_ga4_events` and clone shards to the target dataset. The number of days' worth of data to clone during incremental runs will be based on the `static_incremental_days` variable. From a53174e14b5dd58532b76e67e9f851a70933bb60 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 29 Dec 2023 08:04:12 -0500 Subject: [PATCH 5/7] conditionally apply prehook --- models/staging/base/base_ga4__events.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 95033cf0..2b06e315 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -5,7 +5,7 @@ {{ config( - pre_hook="{{ ga4.combine_property_data() }}" if var('property_ids', false) else "", + pre_hook="{{ ga4.combine_property_data() }}" if var('combined_dataset', false) else "", materialized = 'incremental', incremental_strategy = 'insert_overwrite', partition_by={ From 7737681b9d88de2e17aba054ea481d6a0a0b4751 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 29 Dec 2023 08:04:17 -0500 Subject: [PATCH 6/7] fix quotes --- models/staging/src_ga4.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/src_ga4.yml b/models/staging/src_ga4.yml index 6b6b71db..c0ddc003 100644 --- a/models/staging/src_ga4.yml +++ b/models/staging/src_ga4.yml @@ -5,7 +5,7 @@ sources: database: "{{var('source_project')}}" schema: | # Source from combined property dataset if set, otherwise source from source property {%- if var('combined_dataset', false) != false -%} {{var('combined_dataset')}} - {%- else -%} "analytics_{{var('property_ids')[0]}}" + {%- else -%} analytics_{{var('property_ids')[0]}} {%- endif -%} tables: - name: events From dafeb4415fff42de21711e26b2f1fa277ef66d33 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 29 Dec 2023 08:21:05 -0500 Subject: [PATCH 7/7] dynamically swap source project as well as dataset --- models/staging/src_ga4.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/models/staging/src_ga4.yml b/models/staging/src_ga4.yml index c0ddc003..29104767 100644 --- a/models/staging/src_ga4.yml +++ b/models/staging/src_ga4.yml @@ -2,8 +2,11 @@ version: 2 sources: - name: ga4 - database: "{{var('source_project')}}" - schema: | # Source from combined property dataset if set, otherwise source from source property + database: | # Source from target.project if multi-property, otherwise source from source_project + {%- if var('combined_dataset', false) != false -%} {{target.project}} + {%- else -%} {{var('source_project')}} + {%- endif -%} + schema: | # Source from combined property dataset if set, otherwise source from original GA4 property {%- if var('combined_dataset', false) != false -%} {{var('combined_dataset')}} {%- else -%} analytics_{{var('property_ids')[0]}} {%- endif -%}