From e21ceec20e0a5f427b65b3ad4419c093c1cd5b97 Mon Sep 17 00:00:00 2001 From: Damon Gudaitis Date: Mon, 22 Jan 2024 15:22:56 -0800 Subject: [PATCH] fix fct_ga4__pages model and add tests to catch similar issues (#295) * fix fct_ga4__pages model and add tests to catch issue * remove page_path and page_title from fct_ga4__pages --- models/marts/core/core.yml | 7 ------- models/marts/core/fct_ga4__pages.sql | 22 +++++++++++++--------- models/marts/core/fct_ga4__pages.yml | 13 +++++++++++++ 3 files changed, 26 insertions(+), 16 deletions(-) create mode 100644 models/marts/core/fct_ga4__pages.yml diff --git a/models/marts/core/core.yml b/models/marts/core/core.yml index 3cf254b8..32095107 100644 --- a/models/marts/core/core.yml +++ b/models/marts/core/core.yml @@ -14,13 +14,6 @@ models: description: Hashed combination of user_pseudo_id and stream_id tests: - unique - - name: fct_ga4__pages - description: Incremental model with page metrics such as visits, users, new_users, entrances and exits as well as configurable conversion counts. - columns: - - name: total_engagement_time_msec - description: The total engagement time for that page_location. - - name: avg_engagement_time_denominator - description: Use avg_engagement_time_denominator to calculate the average engagement time, which is derived by dividing the sum of total engagement time by the product of the sum of the denominator and 1000 to get the average engagement time in seconds (average_engagement_time = sum(total_engagement_time_msec)/(sum(avg_engagement_time_denominator) *1000 )). The denominator excludes page_view events where no engagement time is recorded for the page_location within a session. However, it includes subsequent page_view events to a page_location that has previously recorded a page_view event in the same session, even if the subsequent event has no recorded engagement time. - name: fct_ga4__client_keys description: Fact table with aggregate metrics at the level of the user's device (as indicated by the client_key). Metrics are aggregated from fct_ga4__sessions. columns: diff --git a/models/marts/core/fct_ga4__pages.sql b/models/marts/core/fct_ga4__pages.sql index 08df23a7..ca37ac0b 100644 --- a/models/marts/core/fct_ga4__pages.sql +++ b/models/marts/core/fct_ga4__pages.sql @@ -22,8 +22,6 @@ with page_view as ( stream_id, page_location, -- includes query string parameters not listed in query_parameter_exclusions variable page_key, - page_path, - page_title, -- would like to move this to dim_ga4__pages but need to think how to handle page_title changing over time page_engagement_key, count(event_name) as page_views, count(distinct client_key ) as distinct_client_keys, @@ -33,26 +31,32 @@ from {{ref('stg_ga4__event_page_view')}} {% if is_incremental() %} where event_date_dt in ({{ partitions_to_replace | join(',') }}) {% endif %} - group by 1,2,3,4,5,6,7 + group by 1,2,3,4,5 ), page_engagement as ( select - page_view.* except(page_engagement_key), + page_view.event_date_dt, + page_view.stream_id, + page_view.page_location, + page_view.page_key, + sum(page_view.page_views) as page_views, -- page_engagement_key references the page_referrer; need to re-aggregate metrics + sum(page_view.distinct_client_keys) as distinct_client_keys, + sum(page_view.new_client_keys) as new_client_keys, + sum(page_view.entrances) as entrances, sum(page_engagement_time_msec) as total_engagement_time_msec, sum( page_engagement_denominator) as avg_engagement_time_denominator from {{ ref('stg_ga4__page_engaged_time') }} right join page_view using (page_engagement_key) - group by 1,2,3,4,5,6,7,8,9,10 + group by 1,2,3,4 ), scroll as ( select event_date_dt, page_location, - page_title, count(event_name) as scroll_events from {{ref('stg_ga4__event_scroll')}} {% if is_incremental() %} where event_date_dt in ({{ partitions_to_replace | join(',') }}) {% endif %} - group by 1,2,3 + group by 1,2 ) {% if var('conversion_events',false) %} , @@ -66,11 +70,11 @@ select join_conversions.* except (page_key), ifnull(scroll.scroll_events, 0) as scroll_events from join_conversions -left join scroll using (event_date_dt, page_location, page_title) +left join scroll using (event_date_dt, page_location) {% else %} select page_engagement.* except (page_key), ifnull(scroll.scroll_events, 0) as scroll_events from page_engagement -left join scroll using (event_date_dt, page_location, page_title) +left join scroll using (event_date_dt, page_location) {% endif %} \ No newline at end of file diff --git a/models/marts/core/fct_ga4__pages.yml b/models/marts/core/fct_ga4__pages.yml new file mode 100644 index 00000000..3c604173 --- /dev/null +++ b/models/marts/core/fct_ga4__pages.yml @@ -0,0 +1,13 @@ +version: 2 + +models: + - name: fct_ga4__pages + description: Incremental model with page metrics such as visits, users, new_users, entrances and exits as well as configurable conversion counts grouped by page_location. + tests: + - unique: + column_name: "(page_location || event_date_dt)" + columns: + - name: total_engagement_time_msec + description: The total engagement time for that page_location. + - name: avg_engagement_time_denominator + description: Use avg_engagement_time_denominator to calculate the average engagement time, which is derived by dividing the sum of total engagement time by the product of the sum of the denominator and 1000 to get the average engagement time in seconds (average_engagement_time = sum(total_engagement_time_msec)/(sum(avg_engagement_time_denominator) *1000 )). The denominator excludes page_view events where no engagement time is recorded for the page_location within a session. However, it includes subsequent page_view events to a page_location that has previously recorded a page_view event in the same session, even if the subsequent event has no recorded engagement time.