From 003f9506bf70b0c2c317cd154b0dea0ced4c1fca Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Thu, 18 Apr 2024 09:29:19 +0200 Subject: [PATCH 01/64] Cluster base table by stream_id to save cost --- models/staging/base/base_ga4__events.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 533dbc0f..5ad001ea 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -13,7 +13,7 @@ "data_type": "date", }, partitions = partitions_to_replace, - cluster_by=['event_name'] + cluster_by=['event_name', 'stream_id'] ) }} From 02cb8d9c662a54593cb8018d0e2ab2f74278c789 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Thu, 18 Apr 2024 14:40:49 +0200 Subject: [PATCH 02/64] Added client_id to staging table --- .../staging/stg_ga4__sessions_traffic_sources_daily.sql | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index 1847d8d8..8f274407 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -19,7 +19,8 @@ with session_events as ( select - client_key + client_id + ,client_key ,session_partition_key ,event_date_dt as session_partition_date ,event_timestamp @@ -47,7 +48,8 @@ set_default_channel_grouping as ( ), first_session_source as ( select - client_key + client_id + ,client_key ,session_partition_key ,session_partition_date ,event_timestamp @@ -69,7 +71,7 @@ find_non_direct_session_partition_key as ( from first_session_source ) -select +select client_id client_key ,session_partition_key ,session_partition_date From fa1b0d0733e422d4af0025578e91a18ca77b8aca Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Thu, 18 Apr 2024 14:57:45 +0200 Subject: [PATCH 03/64] Fixed type-o --- models/staging/stg_ga4__sessions_traffic_sources_daily.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index 8f274407..a19f4a85 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -19,7 +19,7 @@ with session_events as ( select - client_id + stream_id ,client_key ,session_partition_key ,event_date_dt as session_partition_date @@ -71,8 +71,8 @@ find_non_direct_session_partition_key as ( from first_session_source ) -select client_id - client_key +select stream_id + ,client_key ,session_partition_key ,session_partition_date ,session_source From c970c6a3ed43afb7957f1a02c49f554685590a96 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Thu, 18 Apr 2024 15:00:34 +0200 Subject: [PATCH 04/64] Forgot one reference --- models/staging/stg_ga4__sessions_traffic_sources_daily.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index a19f4a85..aa949e93 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -48,7 +48,7 @@ set_default_channel_grouping as ( ), first_session_source as ( select - client_id + stream_id ,client_key ,session_partition_key ,session_partition_date From ca16d32fa2cfbf333164ab176831d53f95b8af6f Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Thu, 18 Apr 2024 15:03:45 +0200 Subject: [PATCH 05/64] Fixed grouping issue: --- models/staging/stg_ga4__sessions_traffic_sources_daily.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index aa949e93..f1357843 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -85,4 +85,4 @@ select stream_id ,non_direct_session_partition_key ,min(event_timestamp) as session_partition_timestamp from find_non_direct_session_partition_key -group by 1,2,3,4,5,6,7,8,9,10,11 \ No newline at end of file +group by all \ No newline at end of file From ac57735366b8ad9ca93926e4caec9f1f0375ec9a Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Fri, 19 Apr 2024 16:29:30 +0200 Subject: [PATCH 06/64] Added Client ID to user_id_mapping --- models/staging/stg_ga4__user_id_mapping.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/models/staging/stg_ga4__user_id_mapping.sql b/models/staging/stg_ga4__user_id_mapping.sql index 75786898..fe27e1f8 100644 --- a/models/staging/stg_ga4__user_id_mapping.sql +++ b/models/staging/stg_ga4__user_id_mapping.sql @@ -1,5 +1,6 @@ with events_with_user_id as ( select + stream_id, user_id, client_key, event_timestamp @@ -9,6 +10,7 @@ with events_with_user_id as ( ), include_last_seen_timestamp as ( select + stream_id, user_id, client_key, max(event_timestamp) as last_seen_user_id_timestamp @@ -17,6 +19,7 @@ include_last_seen_timestamp as ( ), pick_latest_timestamp as ( select + stream_id, user_id as last_seen_user_id, client_key, last_seen_user_id_timestamp From 22901a31a9887aa979bc8efafd8436241da3e0b3 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Fri, 19 Apr 2024 16:39:43 +0200 Subject: [PATCH 07/64] Fixed grouping --- models/staging/stg_ga4__user_id_mapping.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/stg_ga4__user_id_mapping.sql b/models/staging/stg_ga4__user_id_mapping.sql index fe27e1f8..ff5150fc 100644 --- a/models/staging/stg_ga4__user_id_mapping.sql +++ b/models/staging/stg_ga4__user_id_mapping.sql @@ -15,7 +15,7 @@ include_last_seen_timestamp as ( client_key, max(event_timestamp) as last_seen_user_id_timestamp from events_with_user_id - group by 1,2 + group by 1,2,3 ), pick_latest_timestamp as ( select From a0adf6eae81d25cc386c93c56903900417272b53 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Apr 2024 16:49:55 +0200 Subject: [PATCH 08/64] Added Client ID to Derived User Properties --- models/staging/stg_ga4__derived_user_properties.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/staging/stg_ga4__derived_user_properties.sql b/models/staging/stg_ga4__derived_user_properties.sql index ec1fd6b6..1803ea51 100644 --- a/models/staging/stg_ga4__derived_user_properties.sql +++ b/models/staging/stg_ga4__derived_user_properties.sql @@ -11,6 +11,7 @@ with events_from_valid_users as ( unnest_user_properties as ( select + stream_id, client_key, event_timestamp {% for up in var('derived_user_properties', []) %} @@ -20,6 +21,7 @@ unnest_user_properties as ) SELECT DISTINCT + stream_id, client_key {% for up in var('derived_user_properties', []) %} , LAST_VALUE({{ up.event_parameter }} IGNORE NULLS) OVER (user_window) AS {{ up.user_property_name }} From aaf95635ffbd52d336e7b5ad4eec338392326bf2 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 24 Apr 2024 13:00:34 +0200 Subject: [PATCH 09/64] Added stream id --- models/staging/stg_ga4__client_key_first_last_pageviews.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/staging/stg_ga4__client_key_first_last_pageviews.sql b/models/staging/stg_ga4__client_key_first_last_pageviews.sql index d46f1c5a..1a51d550 100644 --- a/models/staging/stg_ga4__client_key_first_last_pageviews.sql +++ b/models/staging/stg_ga4__client_key_first_last_pageviews.sql @@ -4,6 +4,7 @@ with page_views_first_last as ( select + stream_id, client_key, FIRST_VALUE(event_key) OVER (PARTITION BY client_key ORDER BY event_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS first_page_view_event_key, LAST_VALUE(event_key) OVER (PARTITION BY client_key ORDER BY event_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_page_view_event_key @@ -12,6 +13,7 @@ with page_views_first_last as ( ), page_views_by_client_key as ( select distinct + stream_id, client_key, first_page_view_event_key, last_page_view_event_key From 9811f8faa0c27485b429b6ccab8ab004cc583c10 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Fri, 26 Apr 2024 13:32:54 +0200 Subject: [PATCH 10/64] Added stream_id --- models/staging/stg_ga4__sessions_traffic_sources.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources.sql b/models/staging/stg_ga4__sessions_traffic_sources.sql index b0f55c40..afb132cc 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources.sql @@ -1,6 +1,7 @@ with session_events as ( select - session_key + stream_id + ,session_key ,event_timestamp ,events.event_source ,event_medium @@ -22,7 +23,8 @@ set_default_channel_grouping as ( ), session_source as ( select - session_key + stream_id + ,session_key ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN event_source END) IGNORE NULLS) OVER (session_window), '(direct)') AS session_source ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category From bbf50d93b5e1b42b471e6403ec6a14ce21a57883 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 6 May 2024 12:54:36 +0200 Subject: [PATCH 11/64] date range --- README.md | 7 +++++++ macros/schema.yml | 15 +++++++++++++++ macros/select_date_range.sql | 7 +++++++ models/staging/base/base_ga4__events.sql | 3 +++ 4 files changed, 32 insertions(+) create mode 100644 macros/schema.yml create mode 100644 macros/select_date_range.sql diff --git a/README.md b/README.md index 68bcfb66..88215d3c 100644 --- a/README.md +++ b/README.md @@ -260,6 +260,13 @@ vars: ga4: session_attribution_lookback_window_days: 90 ``` +# Select Date Range + +To select a date range in a where statement you can use `select_date_range` macro. You can use it in your where statement like this: + +``` WHERE (or TRUE) and {{ ga4.select_date_range(start_date, end_date, date_column) }}``` + + # Custom Events diff --git a/macros/schema.yml b/macros/schema.yml new file mode 100644 index 00000000..4cb10a38 --- /dev/null +++ b/macros/schema.yml @@ -0,0 +1,15 @@ +version: 2 + +macros: + - name: date + description: A macro to convert cents to dollars + arguments: + - name: date_column + type: string + description: The name of the date column you want to use for filtering + - name: start_date + type: string + description: the start date you want to use to filter the date_column (format 'YYYYMMDD') + - name: end_date + type: string + description: the end date you want to use to filter the date_column (format 'YYYYMMDD') \ No newline at end of file diff --git a/macros/select_date_range.sql b/macros/select_date_range.sql new file mode 100644 index 00000000..56c7acce --- /dev/null +++ b/macros/select_date_range.sql @@ -0,0 +1,7 @@ +{% macro select_date_range(start_date, end_date, date_column) %} + {% if start_date is not none and end_date is not none %} + date_column >= start_date and date_column <= end_date + {% else %} + date_column >= CURRENT_DATE - var("lookback_window") + {% endif %} +{% endmacro %} \ No newline at end of file diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 5ad001ea..1a719f58 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -22,6 +22,9 @@ with source as ( {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} + {% if end_date is not none %} + and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ end_date }} + {% endif %} {% if is_incremental() %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) {% endif %} From 715339c3833d665c91527eb1be2f189bbdd75246 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 6 May 2024 14:13:22 +0200 Subject: [PATCH 12/64] Update base_ga4__events.sql --- models/staging/base/base_ga4__events.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 1a719f58..2f0cf23a 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -23,7 +23,7 @@ with source as ( from {{ source('ga4', 'events') }} where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} {% if end_date is not none %} - and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ end_date }} + and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ var('end_date') }} {% endif %} {% if is_incremental() %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) From 6e5c212b975f50602df8c9c4eca1bf7b2fa77b6e Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 6 May 2024 14:18:58 +0200 Subject: [PATCH 13/64] Update base_ga4__events.sql --- models/staging/base/base_ga4__events.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 2f0cf23a..8db733b1 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -22,7 +22,7 @@ with source as ( {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} - {% if end_date is not none %} + {% if {{ var('end_date') }} is not none %} and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ var('end_date') }} {% endif %} {% if is_incremental() %} From a7f9885ccf6cff66eaf0f0aebb88a0384f54422a Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 6 May 2024 14:27:53 +0200 Subject: [PATCH 14/64] Update base_ga4__events.sql --- models/staging/base/base_ga4__events.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 8db733b1..719f52d1 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -22,8 +22,8 @@ with source as ( {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} - {% if {{ var('end_date') }} is not none %} - and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ var('end_date') }} + {% if var('end_date') is not none %} + and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ var('end_date')}} {% endif %} {% if is_incremental() %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) From 33f1f2c522ec4f54055f118a86ab169d31420f43 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 6 May 2024 14:47:16 +0200 Subject: [PATCH 15/64] Update schema.yml --- macros/schema.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/schema.yml b/macros/schema.yml index 4cb10a38..6557e55f 100644 --- a/macros/schema.yml +++ b/macros/schema.yml @@ -1,7 +1,7 @@ version: 2 macros: - - name: date + - name: select_date_range description: A macro to convert cents to dollars arguments: - name: date_column @@ -12,4 +12,4 @@ macros: description: the start date you want to use to filter the date_column (format 'YYYYMMDD') - name: end_date type: string - description: the end date you want to use to filter the date_column (format 'YYYYMMDD') \ No newline at end of file + description: the end date you want to use to filter the date_column (format 'YYYYMMDD') From 3ba4ed7a1c99c306f211f58a12f81694dcac767f Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 6 May 2024 14:52:49 +0200 Subject: [PATCH 16/64] Added logging --- models/staging/base/base_ga4__events.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 719f52d1..3945839c 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -3,6 +3,11 @@ {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} +{{ log("Running with start_date: " ~ var('start_date'), info=True) }} +{% if var('end_date') is not none %} +{{ log("Running with end_date: " ~ var('end_date'), info=True) }} +{% endif %} + {{ config( pre_hook="{{ ga4.combine_property_data() }}" if var('combined_dataset', false) else "", From 5c7af5dfdff06dc0c057cdc4083d45c95b626521 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 6 May 2024 15:08:05 +0200 Subject: [PATCH 17/64] Update base_ga4__events.sql --- models/staging/base/base_ga4__events.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 3945839c..a8002275 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -30,7 +30,7 @@ with source as ( {% if var('end_date') is not none %} and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ var('end_date')}} {% endif %} - {% if is_incremental() %} + {% if is_incremental() and var('end_date') is none %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) {% endif %} ), From 07ece654397ac4b6edcbe2671d092b9b5e39e916 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 6 May 2024 15:55:59 +0200 Subject: [PATCH 18/64] Update select_date_range.sql --- macros/select_date_range.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/select_date_range.sql b/macros/select_date_range.sql index 56c7acce..8dabf6ad 100644 --- a/macros/select_date_range.sql +++ b/macros/select_date_range.sql @@ -1,7 +1,7 @@ {% macro select_date_range(start_date, end_date, date_column) %} {% if start_date is not none and end_date is not none %} - date_column >= start_date and date_column <= end_date + REPLACE(CAST({{date_column}} AS STRING), "-", "") >= CAST({{ start_date }} AS STRING) and REPLACE(CAST({{date_column}} AS STRING), "-", "") <= CAST({{ end_date }} AS STRING) {% else %} - date_column >= CURRENT_DATE - var("lookback_window") + {{ date_column }} >= CURRENT_DATE - {{ var("lookback_window") }} {% endif %} -{% endmacro %} \ No newline at end of file +{% endmacro %} From 317e12d033d568d00c0c03cc3a33cb99b7a9220c Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Thu, 30 May 2024 18:04:38 +0200 Subject: [PATCH 19/64] Changed Macro --- macros/select_date_range.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/select_date_range.sql b/macros/select_date_range.sql index 8dabf6ad..783f2b4e 100644 --- a/macros/select_date_range.sql +++ b/macros/select_date_range.sql @@ -1,7 +1,7 @@ {% macro select_date_range(start_date, end_date, date_column) %} {% if start_date is not none and end_date is not none %} - REPLACE(CAST({{date_column}} AS STRING), "-", "") >= CAST({{ start_date }} AS STRING) and REPLACE(CAST({{date_column}} AS STRING), "-", "") <= CAST({{ end_date }} AS STRING) + CAST({{ date_column }} AS DATE) BETWEEN DATE '{{ start_date }}' AND DATE '{{ end_date }}' {% else %} - {{ date_column }} >= CURRENT_DATE - {{ var("lookback_window") }} + CAST({{ date_column }} AS DATE) >= CURRENT_DATE() - INTERVAL {{ var("lookback_window") }} DAY {% endif %} -{% endmacro %} +{% endmacro %} \ No newline at end of file From 43ca797faae797bcf73b3c32e59c98767350c4a0 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Thu, 30 May 2024 18:46:38 +0200 Subject: [PATCH 20/64] Moved macro to our own project --- macros/schema.yml | 15 --------------- macros/select_date_range.sql | 7 ------- 2 files changed, 22 deletions(-) delete mode 100644 macros/schema.yml delete mode 100644 macros/select_date_range.sql diff --git a/macros/schema.yml b/macros/schema.yml deleted file mode 100644 index 6557e55f..00000000 --- a/macros/schema.yml +++ /dev/null @@ -1,15 +0,0 @@ -version: 2 - -macros: - - name: select_date_range - description: A macro to convert cents to dollars - arguments: - - name: date_column - type: string - description: The name of the date column you want to use for filtering - - name: start_date - type: string - description: the start date you want to use to filter the date_column (format 'YYYYMMDD') - - name: end_date - type: string - description: the end date you want to use to filter the date_column (format 'YYYYMMDD') diff --git a/macros/select_date_range.sql b/macros/select_date_range.sql deleted file mode 100644 index 783f2b4e..00000000 --- a/macros/select_date_range.sql +++ /dev/null @@ -1,7 +0,0 @@ -{% macro select_date_range(start_date, end_date, date_column) %} - {% if start_date is not none and end_date is not none %} - CAST({{ date_column }} AS DATE) BETWEEN DATE '{{ start_date }}' AND DATE '{{ end_date }}' - {% else %} - CAST({{ date_column }} AS DATE) >= CURRENT_DATE() - INTERVAL {{ var("lookback_window") }} DAY - {% endif %} -{% endmacro %} \ No newline at end of file From 43684c982ec0fd9cfc02c1b3be90947303936bcc Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Fri, 31 May 2024 08:16:22 +0200 Subject: [PATCH 21/64] Generate partitions to replace on start and end date --- models/staging/base/base_ga4__events.sql | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index a8002275..ae30f4aa 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -1,12 +1,15 @@ -{% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} - {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} +{% set date_range = (range( + (end_date|date).toordinal() - (start_date|date).toordinal() + 1 +)) %} + +{% for i in date_range %} + {% set partition_date = (start_date|date + timedelta(days=i)).strftime('%Y-%m-%d') %} + {% set partitions_to_replace = partitions_to_replace.append(partition_date) %} + {{ log("Adding partition: " ~ partition_date, info=True) }} {% endfor %} -{{ log("Running with start_date: " ~ var('start_date'), info=True) }} -{% if var('end_date') is not none %} -{{ log("Running with end_date: " ~ var('end_date'), info=True) }} -{% endif %} +{{ log("Partitions to replace: " ~ partitions_to_replace | join(', '), info=True) }} + {{ config( From ac46229591ddf892be8bb61359e0844b20caff7e Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Fri, 31 May 2024 11:42:55 +0200 Subject: [PATCH 22/64] Generate partitions to replace based on start and end date --- models/staging/base/base_ga4__events.sql | 44 +++++++++++++++++++----- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index ae30f4aa..11f31427 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -1,15 +1,41 @@ -{% set date_range = (range( - (end_date|date).toordinal() - (start_date|date).toordinal() + 1 -)) %} - -{% for i in date_range %} - {% set partition_date = (start_date|date + timedelta(days=i)).strftime('%Y-%m-%d') %} - {% set partitions_to_replace = partitions_to_replace.append(partition_date) %} - {{ log("Adding partition: " ~ partition_date, info=True) }} +{% set partitions_to_replace = ['current_date'] %} +{% for i in range(var('static_incremental_days')) %} + {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} -{{ log("Partitions to replace: " ~ partitions_to_replace | join(', '), info=True) }} +{% set start_date = var('start_date', none) %} +{% set end_date = var('end_date', none) %} + +{{ log("Initial start_date: " ~ start_date, info=True) }} +{{ log("Initial end_date: " ~ end_date, info=True) }} + + +{% if execute %} +{% if start_date and end_date %} + {{ log("Running with start_date: " ~ start_date, info=True) }} + {{ log("Running with end_date: " ~ end_date, info=True) }} + + {% set formatted_start_date = start_date[:4] ~ '-' ~ start_date[4:6] ~ '-' ~ start_date[6:] %} + {% set formatted_end_date = end_date[:4] ~ '-' ~ end_date[4:6] ~ '-' ~ end_date[6:] %} + + {{ log("Formatted start_date: " ~ formatted_start_date, info=True) }} + {{ log("Formatted end_date: " ~ formatted_end_date, info=True) }} + + {% set date_array_query %} + {{ generate_date_array(formatted_start_date, formatted_end_date) }} + {% endset %} + + {% set results = run_query(date_array_query) %} + {% set partitions_to_replace = [] %} + + {% set partitions_to_replace = results[0]['date_array'] %} + +{% endif %} +{% endif %} + + +{{ log("Partitions to replace: " ~ partitions_to_replace, info=True) }} {{ config( From 7aa7d28241df1ea91bc56ab3c1adf8e6767570c0 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Fri, 31 May 2024 11:43:22 +0200 Subject: [PATCH 23/64] Generate partitions to replace based on start and end date --- macros/generate_date_array.sql | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 macros/generate_date_array.sql diff --git a/macros/generate_date_array.sql b/macros/generate_date_array.sql new file mode 100644 index 00000000..f85520e5 --- /dev/null +++ b/macros/generate_date_array.sql @@ -0,0 +1,14 @@ +{% macro generate_date_array(start_date, end_date) %} +{% if execute %} + ( + SELECT + ARRAY_AGG(date) AS date_array + FROM + UNNEST(GENERATE_DATE_ARRAY( + DATE('{{ start_date }}'), + DATE('{{ end_date }}'), + INTERVAL 1 DAY + )) AS date + ) + {% endif %} +{% endmacro %} From b2930f54366bb6a1ab9e6bb4402608740d559bf0 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 3 Jun 2024 10:13:42 +0200 Subject: [PATCH 24/64] Added stream_id --- ...a4__sessions_traffic_sources_last_non_direct_daily.sql | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql index 5c7fc69f..1288b2e5 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql @@ -18,7 +18,8 @@ with last_non_direct_session_partition_key as ( select - client_key + stream_id + , client_key ,session_partition_key ,session_partition_date ,session_source @@ -49,7 +50,8 @@ with last_non_direct_session_partition_key as ( ) ,join_last_non_direct_session_source as ( select - last_non_direct_session_partition_key.client_key + last_non_direct_source.stream_id + , last_non_direct_session_partition_key.client_key ,last_non_direct_session_partition_key.session_partition_key ,last_non_direct_session_partition_key.session_partition_date ,last_non_direct_session_partition_key.session_source @@ -69,7 +71,7 @@ with last_non_direct_session_partition_key as ( ,coalesce(last_non_direct_source.session_default_channel_grouping, 'Direct') as last_non_direct_default_channel_grouping from last_non_direct_session_partition_key left join {{ref('stg_ga4__sessions_traffic_sources_daily')}} last_non_direct_source on - last_non_direct_session_partition_key.session_partition_key_last_non_direct = last_non_direct_source.session_partition_key + last_non_direct_session_partition_key.session_partition_key_last_non_direct = last_non_direct_source.session_partition_key and last_non_direct_session_partition_key.stream_id = last_non_direct_source.stream_id {% if is_incremental() %} -- Only keep the records in the partitions we wish to replace (as opposed to the whole 30 day lookback window) where last_non_direct_session_partition_key.session_partition_date in ({{ partitions_to_replace | join(',') }}) From fda0221ad02fa6bfb509d8da3621785660172297 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 3 Jun 2024 10:14:25 +0200 Subject: [PATCH 25/64] Added clustering --- .../stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql index 1288b2e5..53c34d4b 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql @@ -13,6 +13,7 @@ "granularity": "day" }, partitions = partitions_to_replace + cluster_by=['stream_id'] ) }} From abd6d75fb6b6c5f3b363ec46952a82eecf6fb172 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 3 Jun 2024 10:16:10 +0200 Subject: [PATCH 26/64] Fixed Type-O --- .../stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql index 53c34d4b..6dddc095 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql @@ -12,7 +12,7 @@ "data_type": "date", "granularity": "day" }, - partitions = partitions_to_replace + partitions = partitions_to_replace, cluster_by=['stream_id'] ) }} From 49ea67ad7a2dded0442992a11c25d7da55e9ef6f Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 3 Jun 2024 10:39:07 +0200 Subject: [PATCH 27/64] Added stream ID --- models/staging/stg_ga4__session_conversions_daily.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/models/staging/stg_ga4__session_conversions_daily.sql b/models/staging/stg_ga4__session_conversions_daily.sql index f33ea6d5..308f9337 100644 --- a/models/staging/stg_ga4__session_conversions_daily.sql +++ b/models/staging/stg_ga4__session_conversions_daily.sql @@ -13,7 +13,8 @@ "data_type": "date", "granularity": "day" }, - partitions = partitions_to_replace + partitions = partitions_to_replace, + cluster_by = ['stream_id'] ) }} @@ -21,6 +22,7 @@ with event_counts as ( select + stream_id, session_key, session_partition_key, min(event_date_dt) as session_partition_date -- The date of this session partition @@ -32,7 +34,7 @@ with event_counts as ( {% if is_incremental() %} and event_date_dt in ({{ partitions_to_replace | join(',') }}) {% endif %} - group by 1,2 + group by all ) select * from event_counts From d4916832697923c148494c7df9230e829c33f3a0 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 3 Jun 2024 10:40:48 +0200 Subject: [PATCH 28/64] From 5729b6c6317605d57fae3d734f8d5848081ec19f Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 3 Jun 2024 10:40:59 +0200 Subject: [PATCH 29/64] From a57e8c4e1bbc348f9b2755d08e043b24a03b2c58 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 3 Jun 2024 11:26:35 +0200 Subject: [PATCH 30/64] Updated --- macros/generate_date_array.sql | 14 -------------- models/staging/base/base_ga4__events.sql | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 17 deletions(-) delete mode 100644 macros/generate_date_array.sql diff --git a/macros/generate_date_array.sql b/macros/generate_date_array.sql deleted file mode 100644 index f85520e5..00000000 --- a/macros/generate_date_array.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% macro generate_date_array(start_date, end_date) %} -{% if execute %} - ( - SELECT - ARRAY_AGG(date) AS date_array - FROM - UNNEST(GENERATE_DATE_ARRAY( - DATE('{{ start_date }}'), - DATE('{{ end_date }}'), - INTERVAL 1 DAY - )) AS date - ) - {% endif %} -{% endmacro %} diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 11f31427..8c4e5450 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -11,7 +11,6 @@ {{ log("Initial end_date: " ~ end_date, info=True) }} -{% if execute %} {% if start_date and end_date %} {{ log("Running with start_date: " ~ start_date, info=True) }} {{ log("Running with end_date: " ~ end_date, info=True) }} @@ -23,15 +22,25 @@ {{ log("Formatted end_date: " ~ formatted_end_date, info=True) }} {% set date_array_query %} - {{ generate_date_array(formatted_start_date, formatted_end_date) }} + SELECT + ARRAY_AGG(date) AS date_array + FROM + UNNEST(GENERATE_DATE_ARRAY( + DATE('{{ formatted_start_date }}'), + DATE('{{ formatted_end_date }}'), + INTERVAL 1 DAY + )) AS date {% endset %} + + {% if execute %} {% set results = run_query(date_array_query) %} {% set partitions_to_replace = [] %} {% set partitions_to_replace = results[0]['date_array'] %} + {% endif %} + -{% endif %} {% endif %} From ba6f44dbba6c2b42ce303338e04ff4845f8aa950 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 10 Jun 2024 11:20:18 +0200 Subject: [PATCH 31/64] Fixed partitions to replace --- macros/generate_date_array.sql | 15 ++++++++++++++ models/staging/base/base_ga4__events.sql | 25 +++++++----------------- 2 files changed, 22 insertions(+), 18 deletions(-) create mode 100644 macros/generate_date_array.sql diff --git a/macros/generate_date_array.sql b/macros/generate_date_array.sql new file mode 100644 index 00000000..66833772 --- /dev/null +++ b/macros/generate_date_array.sql @@ -0,0 +1,15 @@ +{% macro generate_date_array(start_date_str, end_date_str) %} + {% set date_format_input = '%Y%m%d' %} + {% set date_format_output = '%Y-%m-%d' %} + {% set dates = [] %} + {% set start_date = modules.datetime.datetime.strptime(start_date_str, date_format_input) %} + {% set end_date = modules.datetime.datetime.strptime(end_date_str, date_format_input) %} + {% set diff_days = (end_date - start_date).days %} + + {% for i in range(diff_days + 1) %} + {% set current_date = start_date + modules.datetime.timedelta(days=i) %} + {% set dates = dates.append(current_date.strftime(date_format_output)) %} + {% endfor %} + + {{ return(dates) }} +{% endmacro %} \ No newline at end of file diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 8c4e5450..319e25ea 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -21,31 +21,20 @@ {{ log("Formatted start_date: " ~ formatted_start_date, info=True) }} {{ log("Formatted end_date: " ~ formatted_end_date, info=True) }} - {% set date_array_query %} - SELECT - ARRAY_AGG(date) AS date_array - FROM - UNNEST(GENERATE_DATE_ARRAY( - DATE('{{ formatted_start_date }}'), - DATE('{{ formatted_end_date }}'), - INTERVAL 1 DAY - )) AS date - {% endset %} + {% set date_array = generate_date_array(start_date, end_date) %} - - {% if execute %} - {% set results = run_query(date_array_query) %} - {% set partitions_to_replace = [] %} - {% set partitions_to_replace = results[0]['date_array'] %} - {% endif %} - + {% set partitions_to_replace = [] %} + {% for date in date_array %} + {% set formatted_date = "date('" ~ date ~ "')" %} + {% do partitions_to_replace.append(formatted_date) %} + {% endfor %} {% endif %} - {{ log("Partitions to replace: " ~ partitions_to_replace, info=True) }} + {{ config( pre_hook="{{ ga4.combine_property_data() }}" if var('combined_dataset', false) else "", From 663613632451c579c3227aaaa1a96edfd434ce95 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 10 Jun 2024 15:39:35 +0200 Subject: [PATCH 32/64] Added test --- tests/10_weeks_dates.sql | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/10_weeks_dates.sql diff --git a/tests/10_weeks_dates.sql b/tests/10_weeks_dates.sql new file mode 100644 index 00000000..976cff0b --- /dev/null +++ b/tests/10_weeks_dates.sql @@ -0,0 +1,39 @@ + +{% test ten_weeks_dates(model, column_name) %} + + +{% set environment = env_var('DBT_ENVIRONMENT', 'elmyra-dev') %} +{% set backfill = env_var('backfill', 'false') %} + +{% if environment == 'elmyra-prod' and backfill != 'true' %} + {{ config(severity = 'error') }} +{% else %} + {{ config(severity = 'warn') }} +{% endif %} + + +{% if execute %} +{{ log("Running 10_weeks_dates test against environment: " ~ environment, info=True) }} +{% endif %} + + + +with validation as ( + select + {{ column_name }} as date_column + from {{ model }} +), + +missing_dates AS ( + SELECT + date AS expected_date + FROM UNNEST(GENERATE_DATE_ARRAY(DATE_SUB(CURRENT_DATE(), INTERVAL 10 WEEK), DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))) AS date + EXCEPT DISTINCT + SELECT date_column FROM validation +) + +select + expected_date +from missing_dates + +{% endtest %} \ No newline at end of file From 548a209287955dac2cd955d391807f4317d92744 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 10 Jun 2024 15:39:55 +0200 Subject: [PATCH 33/64] Added test to model --- models/staging/base/base_ga4__events.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/staging/base/base_ga4__events.yml b/models/staging/base/base_ga4__events.yml index f46a9355..df90efba 100644 --- a/models/staging/base/base_ga4__events.yml +++ b/models/staging/base/base_ga4__events.yml @@ -11,6 +11,8 @@ models: columns: - name: event_date_dt description: Date of the event converted to Date type. Time zone is the time zone configured in the GA4 property. + tests: + - ten_weeks_dates - name: event_timestamp description: > Timestamp (in microseconds) indicating when the event's batch was received (as opposed to when the event actually occurred). From f4584a793c7d59c1c6a9ee890f3ec2ece391b10b Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 10 Jun 2024 15:46:54 +0200 Subject: [PATCH 34/64] Backfill is var --- tests/10_weeks_dates.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/10_weeks_dates.sql b/tests/10_weeks_dates.sql index 976cff0b..9ea9dd63 100644 --- a/tests/10_weeks_dates.sql +++ b/tests/10_weeks_dates.sql @@ -3,7 +3,7 @@ {% set environment = env_var('DBT_ENVIRONMENT', 'elmyra-dev') %} -{% set backfill = env_var('backfill', 'false') %} +{% set backfill = var('backfill', 'false') %} {% if environment == 'elmyra-prod' and backfill != 'true' %} {{ config(severity = 'error') }} From 50e38bf08c7995d978a86341adb4f3160f6910e5 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Mon, 10 Jun 2024 16:03:15 +0200 Subject: [PATCH 35/64] Changed var reference --- tests/10_weeks_dates.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/10_weeks_dates.sql b/tests/10_weeks_dates.sql index 9ea9dd63..f4697990 100644 --- a/tests/10_weeks_dates.sql +++ b/tests/10_weeks_dates.sql @@ -2,10 +2,10 @@ {% test ten_weeks_dates(model, column_name) %} -{% set environment = env_var('DBT_ENVIRONMENT', 'elmyra-dev') %} +{% set environment = env_var('DBT_DEPLOYMENT_ENV', 'dev') %} {% set backfill = var('backfill', 'false') %} -{% if environment == 'elmyra-prod' and backfill != 'true' %} +{% if environment == 'prod' and backfill != 'true' %} {{ config(severity = 'error') }} {% else %} {{ config(severity = 'warn') }} From 73796efbacd605b0d9131ad9a37f2b8a0de10a63 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 19 Jun 2024 13:58:54 +0200 Subject: [PATCH 36/64] Fixed 10 week test so it works better with Airflow --- tests/10_weeks_dates.sql | 42 +++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/tests/10_weeks_dates.sql b/tests/10_weeks_dates.sql index f4697990..3bdf2cba 100644 --- a/tests/10_weeks_dates.sql +++ b/tests/10_weeks_dates.sql @@ -1,20 +1,26 @@ - {% test ten_weeks_dates(model, column_name) %} -{% set environment = env_var('DBT_DEPLOYMENT_ENV', 'dev') %} -{% set backfill = var('backfill', 'false') %} + {% set environment = env_var('DBT_DEPLOYMENT_ENV', 'dev') %} + {% set backfill = var('backfill', 'false') %} + {% set start_date = var('start_date') %} + {% set end_date = var('end_date') %} + + {% if environment == 'prod' and backfill != 'true' %} + {{ config(severity = 'error') }} + {% else %} + {{ config(severity = 'warn') }} + {% endif %} + + + {% if execute %} + {{ log("Running 10_weeks_dates test against environment: " ~ environment, info=True) }} + {{ log("Test start_date: " ~ start_date, info=True) }} + {{ log("Test end_date: " ~ end_date, info=True) }} + {% endif %} -{% if environment == 'prod' and backfill != 'true' %} - {{ config(severity = 'error') }} -{% else %} - {{ config(severity = 'warn') }} -{% endif %} -{% if execute %} -{{ log("Running 10_weeks_dates test against environment: " ~ environment, info=True) }} -{% endif %} @@ -27,7 +33,17 @@ with validation as ( missing_dates AS ( SELECT date AS expected_date - FROM UNNEST(GENERATE_DATE_ARRAY(DATE_SUB(CURRENT_DATE(), INTERVAL 10 WEEK), DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))) AS date + FROM UNNEST( + GENERATE_DATE_ARRAY( + {%- if start_date is not none and end_date is not none -%} + DATE_SUB(PARSE_DATE("%Y%m%d", cast({{ start_date }} as string)), INTERVAL 10 WEEK) + , DATE_SUB(PARSE_DATE("%Y%m%d", cast({{ start_date }} as string)), INTERVAL 1 DAY) + {%- else -%} + DATE_SUB(CURRENT_DATE(), INTERVAL 10 WEEK) + , DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY) + {%- endif -%} + ) + ) AS date EXCEPT DISTINCT SELECT date_column FROM validation ) @@ -36,4 +52,4 @@ select expected_date from missing_dates -{% endtest %} \ No newline at end of file +{% endtest %} From 82f8e20dc7f443b28134a96a784bc70fa13090ca Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 26 Jun 2024 08:45:46 +0200 Subject: [PATCH 37/64] Made stg_ga4__sessions_traffic_sources_daily compatible with start and end date --- ...tg_ga4__sessions_traffic_sources_daily.sql | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index f1357843..387801a4 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -2,6 +2,36 @@ {% for i in range(var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} + + +{% set start_date = var('start_date', none) %} +{% set end_date = var('end_date', none) %} + +{{ log("Initial start_date: " ~ start_date, info=True) }} +{{ log("Initial end_date: " ~ end_date, info=True) }} + + +{% if start_date and end_date %} + {{ log("Running with start_date: " ~ start_date, info=True) }} + {{ log("Running with end_date: " ~ end_date, info=True) }} + + {% set formatted_start_date = start_date[:4] ~ '-' ~ start_date[4:6] ~ '-' ~ start_date[6:] %} + {% set formatted_end_date = end_date[:4] ~ '-' ~ end_date[4:6] ~ '-' ~ end_date[6:] %} + + {{ log("Formatted start_date: " ~ formatted_start_date, info=True) }} + {{ log("Formatted end_date: " ~ formatted_end_date, info=True) }} + + {% set date_array = generate_date_array(start_date, end_date) %} + + + {% set partitions_to_replace = [] %} + {% for date in date_array %} + {% set formatted_date = "date('" ~ date ~ "')" %} + {% do partitions_to_replace.append(formatted_date) %} + {% endfor %} + +{% endif %} + {{ config( materialized = 'incremental', From de1cdb3583994c7eec4727f76bdcf73f47433c38 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 26 Jun 2024 09:00:13 +0200 Subject: [PATCH 38/64] Added backfill compatibility start and end date --- .../stg_ga4__session_conversions_daily.sql | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/models/staging/stg_ga4__session_conversions_daily.sql b/models/staging/stg_ga4__session_conversions_daily.sql index 7247a0f9..c5449d9b 100644 --- a/models/staging/stg_ga4__session_conversions_daily.sql +++ b/models/staging/stg_ga4__session_conversions_daily.sql @@ -2,6 +2,36 @@ {% for i in range(var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} + + +{% set start_date = var('start_date', none) %} +{% set end_date = var('end_date', none) %} + +{{ log("Initial start_date: " ~ start_date, info=True) }} +{{ log("Initial end_date: " ~ end_date, info=True) }} + + +{% if start_date and end_date %} + {{ log("Running with start_date: " ~ start_date, info=True) }} + {{ log("Running with end_date: " ~ end_date, info=True) }} + + {% set formatted_start_date = start_date[:4] ~ '-' ~ start_date[4:6] ~ '-' ~ start_date[6:] %} + {% set formatted_end_date = end_date[:4] ~ '-' ~ end_date[4:6] ~ '-' ~ end_date[6:] %} + + {{ log("Formatted start_date: " ~ formatted_start_date, info=True) }} + {{ log("Formatted end_date: " ~ formatted_end_date, info=True) }} + + {% set date_array = generate_date_array(start_date, end_date) %} + + + {% set partitions_to_replace = [] %} + {% for date in date_array %} + {% set formatted_date = "date('" ~ date ~ "')" %} + {% do partitions_to_replace.append(formatted_date) %} + {% endfor %} + +{% endif %} + {{ config( enabled= var('conversion_events', false) != false, From a731e1e06d02b80ff95ad97e26fd67177ce723ca Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 11:47:19 +0200 Subject: [PATCH 39/64] Remove 10 weeks dates --- tests/10_weeks_dates.sql | 55 ---------------------------------------- 1 file changed, 55 deletions(-) delete mode 100644 tests/10_weeks_dates.sql diff --git a/tests/10_weeks_dates.sql b/tests/10_weeks_dates.sql deleted file mode 100644 index 3bdf2cba..00000000 --- a/tests/10_weeks_dates.sql +++ /dev/null @@ -1,55 +0,0 @@ -{% test ten_weeks_dates(model, column_name) %} - - - {% set environment = env_var('DBT_DEPLOYMENT_ENV', 'dev') %} - {% set backfill = var('backfill', 'false') %} - {% set start_date = var('start_date') %} - {% set end_date = var('end_date') %} - - {% if environment == 'prod' and backfill != 'true' %} - {{ config(severity = 'error') }} - {% else %} - {{ config(severity = 'warn') }} - {% endif %} - - - {% if execute %} - {{ log("Running 10_weeks_dates test against environment: " ~ environment, info=True) }} - {{ log("Test start_date: " ~ start_date, info=True) }} - {{ log("Test end_date: " ~ end_date, info=True) }} - {% endif %} - - - - - - -with validation as ( - select - {{ column_name }} as date_column - from {{ model }} -), - -missing_dates AS ( - SELECT - date AS expected_date - FROM UNNEST( - GENERATE_DATE_ARRAY( - {%- if start_date is not none and end_date is not none -%} - DATE_SUB(PARSE_DATE("%Y%m%d", cast({{ start_date }} as string)), INTERVAL 10 WEEK) - , DATE_SUB(PARSE_DATE("%Y%m%d", cast({{ start_date }} as string)), INTERVAL 1 DAY) - {%- else -%} - DATE_SUB(CURRENT_DATE(), INTERVAL 10 WEEK) - , DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY) - {%- endif -%} - ) - ) AS date - EXCEPT DISTINCT - SELECT date_column FROM validation -) - -select - expected_date -from missing_dates - -{% endtest %} From e00ca7a8d93bb1ee5893cb4628a01fcae5fff8c2 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 17:17:10 +0200 Subject: [PATCH 40/64] Added custom test to save cost --- macros/select_date_range.sql | 13 +++++++++++++ models/staging/stg_ga4__events.yml | 2 +- ..._unique_stg_ga4__events_event_key_custom.sql | 17 +++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 macros/select_date_range.sql create mode 100644 tests/test_unique_stg_ga4__events_event_key_custom.sql diff --git a/macros/select_date_range.sql b/macros/select_date_range.sql new file mode 100644 index 00000000..93015af0 --- /dev/null +++ b/macros/select_date_range.sql @@ -0,0 +1,13 @@ +{% macro select_date_range(start_date, end_date, date_column, parse=true) %} + {% if start_date is not none and end_date is not none %} + {% if parse %} + {{ date_column }} between + PARSE_DATE('%Y%m%d', '{{ start_date }}') and + PARSE_DATE('%Y%m%d', '{{ end_date }}') + {% else %} + {{ date_column }} between '{{ start_date }}' and '{{ end_date }}' + {% endif %} + {% else %} + {{ date_column }} >= DATE_SUB(CURRENT_DATE(), INTERVAL {{ var("lookback_window") }} DAY ) + {% endif %} +{% endmacro %} diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index 4d4a8b4d..50e6a7c9 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -8,7 +8,7 @@ models: description: Surrogate key created from stream_id and user_pseudo_id. Provides a way to uniquely identify a user's device within a stream. Important when using the package to combine data across properties and streams. - name: event_key tests: - - unique + - test_unique_stg_ga4__events_event_key_custom - name: page_path description: This field contains the page_location with the query string portion removed. Uses macro remove_query_string - name: page_engagement_key diff --git a/tests/test_unique_stg_ga4__events_event_key_custom.sql b/tests/test_unique_stg_ga4__events_event_key_custom.sql new file mode 100644 index 00000000..63cd06db --- /dev/null +++ b/tests/test_unique_stg_ga4__events_event_key_custom.sql @@ -0,0 +1,17 @@ +{% test ten_weeks_dates(model, column_name) %} + +select * +from ( + + select + {{ column_name }} + + from {{ model }} + where {{ column_name }} is not null + and {{ select_date_range(var("start_date"), var("end_date"), "event_date_dt") }} + group by {{ column_name }} + having count(*) > 1 + +) validation_errors + +{% endtest %} \ No newline at end of file From 231003f79143e9e7b0a4d5743e42675c0b231200 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 17:20:59 +0200 Subject: [PATCH 41/64] Custom name --- ..._date_range.sql => select_date_range_ga4_package_custom.sql} | 2 +- tests/test_unique_stg_ga4__events_event_key_custom.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename macros/{select_date_range.sql => select_date_range_ga4_package_custom.sql} (83%) diff --git a/macros/select_date_range.sql b/macros/select_date_range_ga4_package_custom.sql similarity index 83% rename from macros/select_date_range.sql rename to macros/select_date_range_ga4_package_custom.sql index 93015af0..bf9d05ee 100644 --- a/macros/select_date_range.sql +++ b/macros/select_date_range_ga4_package_custom.sql @@ -1,4 +1,4 @@ -{% macro select_date_range(start_date, end_date, date_column, parse=true) %} +{% macro select_date_range_ga4_package_custom(start_date, end_date, date_column, parse=true) %} {% if start_date is not none and end_date is not none %} {% if parse %} {{ date_column }} between diff --git a/tests/test_unique_stg_ga4__events_event_key_custom.sql b/tests/test_unique_stg_ga4__events_event_key_custom.sql index 63cd06db..5b796723 100644 --- a/tests/test_unique_stg_ga4__events_event_key_custom.sql +++ b/tests/test_unique_stg_ga4__events_event_key_custom.sql @@ -8,7 +8,7 @@ from ( from {{ model }} where {{ column_name }} is not null - and {{ select_date_range(var("start_date"), var("end_date"), "event_date_dt") }} + and {{ select_date_range_ga4_package_custom(var("start_date"), var("end_date"), "event_date_dt") }} group by {{ column_name }} having count(*) > 1 From a9ecd8f65a32f86a2fdc50ea35b4cf39a9bc95ab Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 17:25:02 +0200 Subject: [PATCH 42/64] Macro causes issues, skip for now --- macros/select_date_range_ga4_package_custom.sql | 13 ------------- ...test_unique_stg_ga4__events_event_key_custom.sql | 9 ++++++++- 2 files changed, 8 insertions(+), 14 deletions(-) delete mode 100644 macros/select_date_range_ga4_package_custom.sql diff --git a/macros/select_date_range_ga4_package_custom.sql b/macros/select_date_range_ga4_package_custom.sql deleted file mode 100644 index bf9d05ee..00000000 --- a/macros/select_date_range_ga4_package_custom.sql +++ /dev/null @@ -1,13 +0,0 @@ -{% macro select_date_range_ga4_package_custom(start_date, end_date, date_column, parse=true) %} - {% if start_date is not none and end_date is not none %} - {% if parse %} - {{ date_column }} between - PARSE_DATE('%Y%m%d', '{{ start_date }}') and - PARSE_DATE('%Y%m%d', '{{ end_date }}') - {% else %} - {{ date_column }} between '{{ start_date }}' and '{{ end_date }}' - {% endif %} - {% else %} - {{ date_column }} >= DATE_SUB(CURRENT_DATE(), INTERVAL {{ var("lookback_window") }} DAY ) - {% endif %} -{% endmacro %} diff --git a/tests/test_unique_stg_ga4__events_event_key_custom.sql b/tests/test_unique_stg_ga4__events_event_key_custom.sql index 5b796723..be318e7b 100644 --- a/tests/test_unique_stg_ga4__events_event_key_custom.sql +++ b/tests/test_unique_stg_ga4__events_event_key_custom.sql @@ -1,4 +1,6 @@ {% test ten_weeks_dates(model, column_name) %} +{% set start_date = var('start_date') %} +{% set end_date = var('end_date') %} select * from ( @@ -8,7 +10,12 @@ from ( from {{ model }} where {{ column_name }} is not null - and {{ select_date_range_ga4_package_custom(var("start_date"), var("end_date"), "event_date_dt") }} + {% if start_date is not none and start_date != '' and end_date is not none and end_date != '' %} + and event_date_dt between '{{ start_date }}' and '{{ end_date }}' + {% elif start_date is not none and start_date != '' %} + and event_date_dt >= '{{ start_date }}' + {% endif %} + group by {{ column_name }} having count(*) > 1 From 41258db427e3271dbffcd36cc1800f02d9ff4f17 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 17:31:30 +0200 Subject: [PATCH 43/64] Actually give the good name --- tests/test_unique_stg_ga4__events_event_key_custom.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_unique_stg_ga4__events_event_key_custom.sql b/tests/test_unique_stg_ga4__events_event_key_custom.sql index be318e7b..7a5d61a3 100644 --- a/tests/test_unique_stg_ga4__events_event_key_custom.sql +++ b/tests/test_unique_stg_ga4__events_event_key_custom.sql @@ -1,4 +1,4 @@ -{% test ten_weeks_dates(model, column_name) %} +{% test test_test_unique_stg_ga4__events_event_key_custom(model, column_name) %} {% set start_date = var('start_date') %} {% set end_date = var('end_date') %} From 7e4106dd1923b04580fd1cf443dfd5e93a9d4935 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 17:31:49 +0200 Subject: [PATCH 44/64] Working too fast --- tests/test_unique_stg_ga4__events_event_key_custom.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_unique_stg_ga4__events_event_key_custom.sql b/tests/test_unique_stg_ga4__events_event_key_custom.sql index 7a5d61a3..5cdb73d4 100644 --- a/tests/test_unique_stg_ga4__events_event_key_custom.sql +++ b/tests/test_unique_stg_ga4__events_event_key_custom.sql @@ -1,4 +1,4 @@ -{% test test_test_unique_stg_ga4__events_event_key_custom(model, column_name) %} +{% test test_unique_stg_ga4__events_event_key_custom(model, column_name) %} {% set start_date = var('start_date') %} {% set end_date = var('end_date') %} From 41d370288fde47161b6ed0732500c1b095519065 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 17:37:45 +0200 Subject: [PATCH 45/64] Proper naming --- models/staging/stg_ga4__events.yml | 2 +- ..._ga4__events_event_key_custom.sql => test_unique_custom.sql} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename tests/{test_unique_stg_ga4__events_event_key_custom.sql => test_unique_custom.sql} (88%) diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index 50e6a7c9..d23dd5d3 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -8,7 +8,7 @@ models: description: Surrogate key created from stream_id and user_pseudo_id. Provides a way to uniquely identify a user's device within a stream. Important when using the package to combine data across properties and streams. - name: event_key tests: - - test_unique_stg_ga4__events_event_key_custom + - test_unique_custom - name: page_path description: This field contains the page_location with the query string portion removed. Uses macro remove_query_string - name: page_engagement_key diff --git a/tests/test_unique_stg_ga4__events_event_key_custom.sql b/tests/test_unique_custom.sql similarity index 88% rename from tests/test_unique_stg_ga4__events_event_key_custom.sql rename to tests/test_unique_custom.sql index 5cdb73d4..ab821e2c 100644 --- a/tests/test_unique_stg_ga4__events_event_key_custom.sql +++ b/tests/test_unique_custom.sql @@ -1,4 +1,4 @@ -{% test test_unique_stg_ga4__events_event_key_custom(model, column_name) %} +{% test test_unique_custom(model, column_name) %} {% set start_date = var('start_date') %} {% set end_date = var('end_date') %} From 4a64671a15a94475f32c93d5c3222b6a21ba4452 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 17:41:57 +0200 Subject: [PATCH 46/64] Renamed --- models/staging/stg_ga4__events.yml | 2 +- tests/{test_unique_custom.sql => unique_custom.sql} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename tests/{test_unique_custom.sql => unique_custom.sql} (92%) diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index d23dd5d3..ebd2f9e0 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -8,7 +8,7 @@ models: description: Surrogate key created from stream_id and user_pseudo_id. Provides a way to uniquely identify a user's device within a stream. Important when using the package to combine data across properties and streams. - name: event_key tests: - - test_unique_custom + - unique_custom - name: page_path description: This field contains the page_location with the query string portion removed. Uses macro remove_query_string - name: page_engagement_key diff --git a/tests/test_unique_custom.sql b/tests/unique_custom.sql similarity index 92% rename from tests/test_unique_custom.sql rename to tests/unique_custom.sql index ab821e2c..ef0e1cb0 100644 --- a/tests/test_unique_custom.sql +++ b/tests/unique_custom.sql @@ -1,4 +1,4 @@ -{% test test_unique_custom(model, column_name) %} +{% test unique_custom(model, column_name) %} {% set start_date = var('start_date') %} {% set end_date = var('end_date') %} From 550e6560ca526fb537cf59da9bd94ae2260f679a Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Tue, 23 Jul 2024 19:34:04 +0200 Subject: [PATCH 47/64] Removed tests --- models/staging/stg_ga4__events.yml | 2 -- tests/unique_custom.sql | 24 ------------------------ 2 files changed, 26 deletions(-) delete mode 100644 tests/unique_custom.sql diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index ebd2f9e0..e2e4b60b 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -7,8 +7,6 @@ models: - name: client_key description: Surrogate key created from stream_id and user_pseudo_id. Provides a way to uniquely identify a user's device within a stream. Important when using the package to combine data across properties and streams. - name: event_key - tests: - - unique_custom - name: page_path description: This field contains the page_location with the query string portion removed. Uses macro remove_query_string - name: page_engagement_key diff --git a/tests/unique_custom.sql b/tests/unique_custom.sql deleted file mode 100644 index ef0e1cb0..00000000 --- a/tests/unique_custom.sql +++ /dev/null @@ -1,24 +0,0 @@ -{% test unique_custom(model, column_name) %} -{% set start_date = var('start_date') %} -{% set end_date = var('end_date') %} - -select * -from ( - - select - {{ column_name }} - - from {{ model }} - where {{ column_name }} is not null - {% if start_date is not none and start_date != '' and end_date is not none and end_date != '' %} - and event_date_dt between '{{ start_date }}' and '{{ end_date }}' - {% elif start_date is not none and start_date != '' %} - and event_date_dt >= '{{ start_date }}' - {% endif %} - - group by {{ column_name }} - having count(*) > 1 - -) validation_errors - -{% endtest %} \ No newline at end of file From bf2af62872e228d67b00fdb6c77606ead32efc09 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 24 Jul 2024 09:56:13 +0200 Subject: [PATCH 48/64] no test --- models/staging/events/stg_ga4__event_page_view.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/models/staging/events/stg_ga4__event_page_view.yml b/models/staging/events/stg_ga4__event_page_view.yml index 8f567fc6..a74d3065 100644 --- a/models/staging/events/stg_ga4__event_page_view.yml +++ b/models/staging/events/stg_ga4__event_page_view.yml @@ -2,9 +2,4 @@ version: 2 models: - name: stg_ga4__event_page_view - description: GA4 events filtered to only show 'page_view' events. Pivots common event parameters into separate columns. Identifies the first and last pageview in the 'is_entrance' and 'is_exit' columns. - columns: - - name: page_location - tests: - - not_null: - severity: warn \ No newline at end of file + description: GA4 events filtered to only show 'page_view' events. Pivots common event parameters into separate columns. Identifies the first and last pageview in the 'is_entrance' and 'is_exit' columns. \ No newline at end of file From 5f9cc1f81432f28ab080fcade1dd6213e86dccce Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 24 Jul 2024 17:55:16 +0200 Subject: [PATCH 49/64] Removed test --- models/staging/stg_ga4__sessions_traffic_sources.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources.yml b/models/staging/stg_ga4__sessions_traffic_sources.yml index fa5a54eb..259bdea0 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources.yml @@ -8,9 +8,6 @@ models: The session_start and first_visit events are ignored for this purpose as they contain no acquisition data. Aggregated by session_key. columns: - - name: session_key - tests: - - unique - name: session_source description: First non-null source value of the session tests: From 031e67ebc0205640ea91c6b99d762a6ec1150fe9 Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 24 Jul 2024 17:56:06 +0200 Subject: [PATCH 50/64] Removed test, we do this in our own project now --- models/staging/stg_ga4__sessions_traffic_sources.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources.yml b/models/staging/stg_ga4__sessions_traffic_sources.yml index 259bdea0..ec3c246f 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources.yml @@ -9,6 +9,4 @@ models: Aggregated by session_key. columns: - name: session_source - description: First non-null source value of the session - tests: - - not_null \ No newline at end of file + description: First non-null source value of the session \ No newline at end of file From 429e7cb847633464f40f5ee84ebe6681ec0315bf Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 24 Jul 2024 18:02:53 +0200 Subject: [PATCH 51/64] Disabled test --- tests/page_location_with_gclid_is_cpc.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/page_location_with_gclid_is_cpc.sql b/tests/page_location_with_gclid_is_cpc.sql index 8f38b7fb..3d5d0480 100644 --- a/tests/page_location_with_gclid_is_cpc.sql +++ b/tests/page_location_with_gclid_is_cpc.sql @@ -1,6 +1,6 @@ -- Google has changed the combination of parameters that are used to identify a CPC source in the past. -- In order to detect new changes, this test checks that a page_location with a gclid is classified as cpc. - +{# {{config( severity = 'warn' )}} @@ -12,4 +12,4 @@ where original_page_location like '%gclid%' and event_source != 'google' and event_medium != 'cpc' having sources > 0 - or mediums > 0 \ No newline at end of file + or mediums > 0 #} \ No newline at end of file From f47820647cd40d9708c01eb905f9878951b237cf Mon Sep 17 00:00:00 2001 From: JumboDVDH0 Date: Wed, 24 Jul 2024 18:06:27 +0200 Subject: [PATCH 52/64] Dummy test --- tests/page_location_with_gclid_is_cpc.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/page_location_with_gclid_is_cpc.sql b/tests/page_location_with_gclid_is_cpc.sql index 3d5d0480..40867adf 100644 --- a/tests/page_location_with_gclid_is_cpc.sql +++ b/tests/page_location_with_gclid_is_cpc.sql @@ -12,4 +12,6 @@ where original_page_location like '%gclid%' and event_source != 'google' and event_medium != 'cpc' having sources > 0 - or mediums > 0 #} \ No newline at end of file + or mediums > 0 #} + +select null limit 0 \ No newline at end of file From 7057b3aebf8f9849f2793203070791c2b49229fd Mon Sep 17 00:00:00 2001 From: Davy van der Horst Date: Thu, 12 Dec 2024 16:31:58 +0100 Subject: [PATCH 53/64] Property ID filter --- models/staging/base/base_ga4__events.sql | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 319e25ea..6ca2bc1c 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -60,6 +60,14 @@ with source as ( {% if is_incremental() and var('end_date') is none %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) {% endif %} + -- Add property ID filter + {% set property_ids = var('brand_properties')[var('brands')[0]] %} + and ( + {% for property_id in property_ids %} + _table_suffix like '%{{ property_id }}' + {%- if not loop.last %} or {% endif -%} + {% endfor %} + ) ), renamed as ( select From 55fcd3870f8e69aa106ba6bf6bfddd8c5fc2d79a Mon Sep 17 00:00:00 2001 From: Davy van der Horst Date: Thu, 12 Dec 2024 16:57:17 +0100 Subject: [PATCH 54/64] Select all selected brands --- models/staging/base/base_ga4__events.sql | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 6ca2bc1c..aa463bdf 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -60,10 +60,18 @@ with source as ( {% if is_incremental() and var('end_date') is none %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) {% endif %} - -- Add property ID filter - {% set property_ids = var('brand_properties')[var('brands')[0]] %} + -- Add property ID filter for multiple brands + {% set selected_properties = [] %} + {% if 'all' in var('brands') %} + {% do selected_properties.extend(var('brand_properties')['all']) %} + {% else %} + {% for brand in var('brands') %} + {% do selected_properties.extend(var('brand_properties')[brand]) %} + {% endfor %} + {% endif %} + {{ log("Processing properties: " ~ selected_properties, info=True) }} and ( - {% for property_id in property_ids %} + {% for property_id in selected_properties %} _table_suffix like '%{{ property_id }}' {%- if not loop.last %} or {% endif -%} {% endfor %} From bb108ebfec841f4a2ea028d86878982caeadcb28 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 12:50:23 +0100 Subject: [PATCH 55/64] Update base_select.sql --- macros/base_select.sql | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 8919388f..27859cc6 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -140,7 +140,17 @@ )) from unnest(items) as unnested_items ) items , property_id - , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} + , COALESCE( + -- Use int_value if available + {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, + -- Extract the second section (numeric part) from the string_value format + CAST( + REGEXP_EXTRACT( + {{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, + r'^GS\d\.\d\.(\d+)' + ) AS INT64 + ) +) AS session_id , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( @@ -163,4 +173,4 @@ WHEN event_name = 'purchase' THEN 1 ELSE 0 END AS is_purchase -{% endmacro %} \ No newline at end of file +{% endmacro %} From e514ebcc0b8772149ded0d4d000274ea7ba586de Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 13:08:55 +0100 Subject: [PATCH 56/64] Update base_select.sql --- macros/base_select.sql | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 27859cc6..907185f4 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -140,13 +140,19 @@ )) from unnest(items) as unnested_items ) items , property_id - , COALESCE( - -- Use int_value if available - {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, - -- Extract the second section (numeric part) from the string_value format + , COALESCE( + -- Use int_value if available and not None + NULLIF( + {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, + 'None' + ), + -- Extract the second section (numeric part) from the string_value format, ensure it's not None CAST( REGEXP_EXTRACT( - {{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, + COALESCE( + {{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, + '' + ), r'^GS\d\.\d\.(\d+)' ) AS INT64 ) From decf8b0febcd892fb9c0273bd3efc44ad1677cfd Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 13:19:38 +0100 Subject: [PATCH 57/64] Update base_select.sql --- macros/base_select.sql | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 907185f4..81d49fd7 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -140,23 +140,7 @@ )) from unnest(items) as unnested_items ) items , property_id - , COALESCE( - -- Use int_value if available and not None - NULLIF( - {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, - 'None' - ), - -- Extract the second section (numeric part) from the string_value format, ensure it's not None - CAST( - REGEXP_EXTRACT( - COALESCE( - {{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, - '' - ), - r'^GS\d\.\d\.(\d+)' - ) AS INT64 - ) -) AS session_id + , COALESCE({{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, COALESCE(CAST(REGEXP_EXTRACT({{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, r'^GS\d\.\d\.(\d+)') AS INT64), NULL)) as session_id , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( From e55eea35d9248f7e1daee93a52027a185d13789f Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 13:24:48 +0100 Subject: [PATCH 58/64] Update base_select.sql --- macros/base_select.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 81d49fd7..8320b328 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -140,7 +140,8 @@ )) from unnest(items) as unnested_items ) items , property_id - , COALESCE({{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, COALESCE(CAST(REGEXP_EXTRACT({{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, r'^GS\d\.\d\.(\d+)') AS INT64), NULL)) as session_id + , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} + , CAST(REGEXP_EXTRACT({{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, r'^GS\d\.\d\.(\d+)') AS INT64) as fallback_session_id , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( From 36a4fd87683b441d2422291a1fd538e7c278123f Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 13:29:36 +0100 Subject: [PATCH 59/64] Update base_select.sql --- macros/base_select.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 8320b328..ecd4597d 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -141,7 +141,7 @@ ) items , property_id , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} - , CAST(REGEXP_EXTRACT({{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, r'^GS\d\.\d\.(\d+)') AS INT64) as fallback_session_id + , CAST(REGEXP_EXTRACT(COALESCE({{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, ""), r'^GS\d\.\d\.(\d+)') AS INT64) as fallback_session_id , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( From 99c3f975dd65a791d6fe286b0050fb87f3a98954 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 13:35:48 +0100 Subject: [PATCH 60/64] Update base_select.sql --- macros/base_select.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index ecd4597d..45c2a2f6 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -141,7 +141,7 @@ ) items , property_id , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} - , CAST(REGEXP_EXTRACT(COALESCE({{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, ""), r'^GS\d\.\d\.(\d+)') AS INT64) as fallback_session_id + , {{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }} , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( From 1dfb82e1d6b1139fbe86b228f6bf01e9955028a8 Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 13:37:57 +0100 Subject: [PATCH 61/64] Update base_select.sql --- macros/base_select.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 45c2a2f6..6f7075db 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -141,7 +141,6 @@ ) items , property_id , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} - , {{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }} , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( From 739b47bf76e7a8902c785853529e54e58241b9ec Mon Sep 17 00:00:00 2001 From: tessa-beijloos Date: Mon, 16 Dec 2024 13:52:54 +0100 Subject: [PATCH 62/64] Update base_select.sql --- macros/base_select.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 6f7075db..bf76b3dc 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -140,7 +140,8 @@ )) from unnest(items) as unnested_items ) items , property_id - , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} + , COALESCE({{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, + CAST(REGEXP_EXTRACT(COALESCE((select value.string_value from unnest(event_params) where key = 'ga_session_id') , ""), r'^GS\d\.\d\.(\d+)') AS INT64)) as session_id , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( From 1442e394e25f2a5a02b04244fac22d274b67e85d Mon Sep 17 00:00:00 2001 From: tessa-beijloos-jumbo Date: Mon, 16 Dec 2024 14:12:11 +0100 Subject: [PATCH 63/64] base select --- macros/base_select.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/macros/base_select.sql b/macros/base_select.sql index 8919388f..4b34b98b 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -141,6 +141,7 @@ ) items , property_id , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} + , (select value.string_value from unnest(event_params) where key = 'ga_session_id') as session_id_string , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( From 4d49df00e6a93cd664563b446c6b8380cc9fbaea Mon Sep 17 00:00:00 2001 From: tessa-beijloos-jumbo Date: Mon, 16 Dec 2024 14:26:30 +0100 Subject: [PATCH 64/64] new sessionid --- macros/base_select.sql | 2 +- models/staging/base/base_ga4__events.sql | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/macros/base_select.sql b/macros/base_select.sql index 4b34b98b..300fde62 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -141,7 +141,7 @@ ) items , property_id , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} - , (select value.string_value from unnest(event_params) where key = 'ga_session_id') as session_id_string + , CAST(REGEXP_EXTRACT(COALESCE((select value.string_value from unnest(event_params) where key = 'ga_session_id'), ""), r'^GS\d\.\d\.(\d+)') AS INT64) as mp_session_id , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 319e25ea..0eb27cda 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -63,7 +63,8 @@ with source as ( ), renamed as ( select - {{ ga4.base_select_renamed() }} + {{ ga4.base_select_renamed() }}, + COALESCE(session_id, mp_session_id) as session_id -- if session_id is empty fill in the measurement protocol session_id from source )