Skip to content

Commit

Permalink
fix fct_ga4__pages model and add tests to catch similar issues (#295)
Browse files Browse the repository at this point in the history
* fix fct_ga4__pages model and add tests to catch issue

* remove page_path and page_title from fct_ga4__pages
  • Loading branch information
dgitis authored Jan 22, 2024
1 parent 6c6d756 commit e21ceec
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 16 deletions.
7 changes: 0 additions & 7 deletions models/marts/core/core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,6 @@ models:
description: Hashed combination of user_pseudo_id and stream_id
tests:
- unique
- name: fct_ga4__pages
description: Incremental model with page metrics such as visits, users, new_users, entrances and exits as well as configurable conversion counts.
columns:
- name: total_engagement_time_msec
description: The total engagement time for that page_location.
- name: avg_engagement_time_denominator
description: Use avg_engagement_time_denominator to calculate the average engagement time, which is derived by dividing the sum of total engagement time by the product of the sum of the denominator and 1000 to get the average engagement time in seconds (average_engagement_time = sum(total_engagement_time_msec)/(sum(avg_engagement_time_denominator) *1000 )). The denominator excludes page_view events where no engagement time is recorded for the page_location within a session. However, it includes subsequent page_view events to a page_location that has previously recorded a page_view event in the same session, even if the subsequent event has no recorded engagement time.
- name: fct_ga4__client_keys
description: Fact table with aggregate metrics at the level of the user's device (as indicated by the client_key). Metrics are aggregated from fct_ga4__sessions.
columns:
Expand Down
22 changes: 13 additions & 9 deletions models/marts/core/fct_ga4__pages.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ with page_view as (
stream_id,
page_location, -- includes query string parameters not listed in query_parameter_exclusions variable
page_key,
page_path,
page_title, -- would like to move this to dim_ga4__pages but need to think how to handle page_title changing over time
page_engagement_key,
count(event_name) as page_views,
count(distinct client_key ) as distinct_client_keys,
Expand All @@ -33,26 +31,32 @@ from {{ref('stg_ga4__event_page_view')}}
{% if is_incremental() %}
where event_date_dt in ({{ partitions_to_replace | join(',') }})
{% endif %}
group by 1,2,3,4,5,6,7
group by 1,2,3,4,5
), page_engagement as (
select
page_view.* except(page_engagement_key),
page_view.event_date_dt,
page_view.stream_id,
page_view.page_location,
page_view.page_key,
sum(page_view.page_views) as page_views, -- page_engagement_key references the page_referrer; need to re-aggregate metrics
sum(page_view.distinct_client_keys) as distinct_client_keys,
sum(page_view.new_client_keys) as new_client_keys,
sum(page_view.entrances) as entrances,
sum(page_engagement_time_msec) as total_engagement_time_msec,
sum( page_engagement_denominator) as avg_engagement_time_denominator
from {{ ref('stg_ga4__page_engaged_time') }}
right join page_view using (page_engagement_key)
group by 1,2,3,4,5,6,7,8,9,10
group by 1,2,3,4
), scroll as (
select
event_date_dt,
page_location,
page_title,
count(event_name) as scroll_events
from {{ref('stg_ga4__event_scroll')}}
{% if is_incremental() %}
where event_date_dt in ({{ partitions_to_replace | join(',') }})
{% endif %}
group by 1,2,3
group by 1,2
)
{% if var('conversion_events',false) %}
,
Expand All @@ -66,11 +70,11 @@ select
join_conversions.* except (page_key),
ifnull(scroll.scroll_events, 0) as scroll_events
from join_conversions
left join scroll using (event_date_dt, page_location, page_title)
left join scroll using (event_date_dt, page_location)
{% else %}
select
page_engagement.* except (page_key),
ifnull(scroll.scroll_events, 0) as scroll_events
from page_engagement
left join scroll using (event_date_dt, page_location, page_title)
left join scroll using (event_date_dt, page_location)
{% endif %}
13 changes: 13 additions & 0 deletions models/marts/core/fct_ga4__pages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: 2

models:
- name: fct_ga4__pages
description: Incremental model with page metrics such as visits, users, new_users, entrances and exits as well as configurable conversion counts grouped by page_location.
tests:
- unique:
column_name: "(page_location || event_date_dt)"
columns:
- name: total_engagement_time_msec
description: The total engagement time for that page_location.
- name: avg_engagement_time_denominator
description: Use avg_engagement_time_denominator to calculate the average engagement time, which is derived by dividing the sum of total engagement time by the product of the sum of the denominator and 1000 to get the average engagement time in seconds (average_engagement_time = sum(total_engagement_time_msec)/(sum(avg_engagement_time_denominator) *1000 )). The denominator excludes page_view events where no engagement time is recorded for the page_location within a session. However, it includes subsequent page_view events to a page_location that has previously recorded a page_view event in the same session, even if the subsequent event has no recorded engagement time.

0 comments on commit e21ceec

Please sign in to comment.