diff --git a/macros/base_select.sql b/macros/base_select.sql index d4e7356e..96c28485 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -162,8 +162,17 @@ , unnested_items.item_params )) from unnest(items) as unnested_items ) items - , property_id - , {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }} + , COALESCE( + -- Use int_value if available + {{ ga4.unnest_key('event_params', 'ga_session_id', 'int_value', 'session_id') }}, + -- Extract the second section (numeric part) from the string_value format + CAST( + REGEXP_EXTRACT( + {{ ga4.unnest_key('event_params', 'ga_session_id', 'string_value', 'session_id_string') }}, + r'^GS\d\.\d\.(\d+)' + ) AS INT64 + ) +) AS session_id , {{ ga4.unnest_key('event_params', 'page_location') }} , {{ ga4.unnest_key('event_params', 'ga_session_number', 'int_value', 'session_number') }} , COALESCE( diff --git a/macros/select_date_range.sql b/macros/select_date_range.sql new file mode 100644 index 00000000..56c7acce --- /dev/null +++ b/macros/select_date_range.sql @@ -0,0 +1,7 @@ +{% macro select_date_range(start_date, end_date, date_column) %} + {% if start_date is not none and end_date is not none %} + date_column >= start_date and date_column <= end_date + {% else %} + date_column >= CURRENT_DATE - var("lookback_window") + {% endif %} +{% endmacro %} \ No newline at end of file diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 533dbc0f..1a719f58 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -13,7 +13,7 @@ "data_type": "date", }, partitions = partitions_to_replace, - cluster_by=['event_name'] + cluster_by=['event_name', 'stream_id'] ) }} @@ -22,6 +22,9 @@ with source as ( {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} + {% if end_date is not none %} + and cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) <= {{ end_date }} + {% endif %} {% if is_incremental() %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) {% endif %} diff --git a/models/staging/stg_ga4__client_key_first_last_pageviews.sql b/models/staging/stg_ga4__client_key_first_last_pageviews.sql index d46f1c5a..1a51d550 100644 --- a/models/staging/stg_ga4__client_key_first_last_pageviews.sql +++ b/models/staging/stg_ga4__client_key_first_last_pageviews.sql @@ -4,6 +4,7 @@ with page_views_first_last as ( select + stream_id, client_key, FIRST_VALUE(event_key) OVER (PARTITION BY client_key ORDER BY event_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS first_page_view_event_key, LAST_VALUE(event_key) OVER (PARTITION BY client_key ORDER BY event_timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_page_view_event_key @@ -12,6 +13,7 @@ with page_views_first_last as ( ), page_views_by_client_key as ( select distinct + stream_id, client_key, first_page_view_event_key, last_page_view_event_key diff --git a/models/staging/stg_ga4__derived_user_properties.sql b/models/staging/stg_ga4__derived_user_properties.sql index ec1fd6b6..1803ea51 100644 --- a/models/staging/stg_ga4__derived_user_properties.sql +++ b/models/staging/stg_ga4__derived_user_properties.sql @@ -11,6 +11,7 @@ with events_from_valid_users as ( unnest_user_properties as ( select + stream_id, client_key, event_timestamp {% for up in var('derived_user_properties', []) %} @@ -20,6 +21,7 @@ unnest_user_properties as ) SELECT DISTINCT + stream_id, client_key {% for up in var('derived_user_properties', []) %} , LAST_VALUE({{ up.event_parameter }} IGNORE NULLS) OVER (user_window) AS {{ up.user_property_name }} diff --git a/models/staging/stg_ga4__sessions_traffic_sources.sql b/models/staging/stg_ga4__sessions_traffic_sources.sql index b0f55c40..afb132cc 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources.sql @@ -1,6 +1,7 @@ with session_events as ( select - session_key + stream_id + ,session_key ,event_timestamp ,events.event_source ,event_medium @@ -22,7 +23,8 @@ set_default_channel_grouping as ( ), session_source as ( select - session_key + stream_id + ,session_key ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN event_source END) IGNORE NULLS) OVER (session_window), '(direct)') AS session_source ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index 1847d8d8..f1357843 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -19,7 +19,8 @@ with session_events as ( select - client_key + stream_id + ,client_key ,session_partition_key ,event_date_dt as session_partition_date ,event_timestamp @@ -47,7 +48,8 @@ set_default_channel_grouping as ( ), first_session_source as ( select - client_key + stream_id + ,client_key ,session_partition_key ,session_partition_date ,event_timestamp @@ -69,8 +71,8 @@ find_non_direct_session_partition_key as ( from first_session_source ) -select - client_key +select stream_id + ,client_key ,session_partition_key ,session_partition_date ,session_source @@ -83,4 +85,4 @@ select ,non_direct_session_partition_key ,min(event_timestamp) as session_partition_timestamp from find_non_direct_session_partition_key -group by 1,2,3,4,5,6,7,8,9,10,11 \ No newline at end of file +group by all \ No newline at end of file diff --git a/models/staging/stg_ga4__user_id_mapping.sql b/models/staging/stg_ga4__user_id_mapping.sql index 75786898..ff5150fc 100644 --- a/models/staging/stg_ga4__user_id_mapping.sql +++ b/models/staging/stg_ga4__user_id_mapping.sql @@ -1,5 +1,6 @@ with events_with_user_id as ( select + stream_id, user_id, client_key, event_timestamp @@ -9,14 +10,16 @@ with events_with_user_id as ( ), include_last_seen_timestamp as ( select + stream_id, user_id, client_key, max(event_timestamp) as last_seen_user_id_timestamp from events_with_user_id - group by 1,2 + group by 1,2,3 ), pick_latest_timestamp as ( select + stream_id, user_id as last_seen_user_id, client_key, last_seen_user_id_timestamp