Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change session #350

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions macros/base_select.sql
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,17 @@
, unnested_items.item_params
)) from unnest(items) as unnested_items
) items
, property_id
, {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}
, COALESCE(
-- Use int_value if available
{{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }},
-- Extract the second section (numeric part) from the string_value format
CAST(
REGEXP_EXTRACT(
{{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }},
r'^GS\d\.\d\.(\d+)'
) AS INT64
)
) AS session_id
, {{ ga4.unnest_key('event_params', 'page_location') }}
, {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }}
, COALESCE(
Expand Down
7 changes: 7 additions & 0 deletions macros/select_date_range.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{% macro select_date_range(start_date, end_date, date_column) %}
{% if start_date is not none and end_date is not none %}
date_column >= start_date and date_column <= end_date
{% else %}
date_column >= CURRENT_DATE - var("lookback_window")
{% endif %}
{% endmacro %}
5 changes: 4 additions & 1 deletion models/staging/base/base_ga4__events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"data_type": "date",
},
partitions = partitions_to_replace,
cluster_by=['event_name']
cluster_by=['event_name', 'stream_id']
)
}}

Expand All @@ -22,6 +22,9 @@ with source as (
{{ ga4.base_select_source() }}
from {{ source('ga4', 'events') }}
where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}}
{% if end_date is not none %}
and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ end_date }}
{% endif %}
{% if is_incremental() %}
and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }})
{% endif %}
Expand Down
2 changes: 2 additions & 0 deletions models/staging/stg_ga4__client_key_first_last_pageviews.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

with page_views_first_last as (
select
stream_id,
client_key,
FIRST_VALUE(event_key) OVER (PARTITION BY client_key ORDER BY event_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS first_page_view_event_key,
LAST_VALUE(event_key) OVER (PARTITION BY client_key ORDER BY event_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_page_view_event_key
Expand All @@ -12,6 +13,7 @@ with page_views_first_last as (
),
page_views_by_client_key as (
select distinct
stream_id,
client_key,
first_page_view_event_key,
last_page_view_event_key
Expand Down
2 changes: 2 additions & 0 deletions models/staging/stg_ga4__derived_user_properties.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ with events_from_valid_users as (
unnest_user_properties as
(
select
stream_id,
client_key,
event_timestamp
{% for up in var('derived_user_properties', []) %}
Expand All @@ -20,6 +21,7 @@ unnest_user_properties as
)

SELECT DISTINCT
stream_id,
client_key
{% for up in var('derived_user_properties', []) %}
, LAST_VALUE({{ up.event_parameter }} IGNORE NULLS) OVER (user_window) AS {{ up.user_property_name }}
Expand Down
6 changes: 4 additions & 2 deletions models/staging/stg_ga4__sessions_traffic_sources.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
with session_events as (
select
session_key
stream_id
,session_key
,event_timestamp
,events.event_source
,event_medium
Expand All @@ -22,7 +23,8 @@ set_default_channel_grouping as (
),
session_source as (
select
session_key
stream_id
,session_key
,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN event_source END) IGNORE NULLS) OVER (session_window), '(direct)') AS session_source
,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium
,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category
Expand Down
12 changes: 7 additions & 5 deletions models/staging/stg_ga4__sessions_traffic_sources_daily.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

with session_events as (
select
client_key
stream_id
,client_key
,session_partition_key
,event_date_dt as session_partition_date
,event_timestamp
Expand Down Expand Up @@ -47,7 +48,8 @@ set_default_channel_grouping as (
),
first_session_source as (
select
client_key
stream_id
,client_key
,session_partition_key
,session_partition_date
,event_timestamp
Expand All @@ -69,8 +71,8 @@ find_non_direct_session_partition_key as (
from first_session_source
)

select
client_key
select stream_id
,client_key
,session_partition_key
,session_partition_date
,session_source
Expand All @@ -83,4 +85,4 @@ select
,non_direct_session_partition_key
,min(event_timestamp) as session_partition_timestamp
from find_non_direct_session_partition_key
group by 1,2,3,4,5,6,7,8,9,10,11
group by all
5 changes: 4 additions & 1 deletion models/staging/stg_ga4__user_id_mapping.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
with events_with_user_id as (
select
stream_id,
user_id,
client_key,
event_timestamp
Expand All @@ -9,14 +10,16 @@ with events_with_user_id as (
),
include_last_seen_timestamp as (
select
stream_id,
user_id,
client_key,
max(event_timestamp) as last_seen_user_id_timestamp
from events_with_user_id
group by 1,2
group by 1,2,3
),
pick_latest_timestamp as (
select
stream_id,
user_id as last_seen_user_id,
client_key,
last_seen_user_id_timestamp
Expand Down
Loading