From 9d8f0c8ce82ae9013998b54b1e1b236ba92ff91e Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 7 Jun 2024 07:18:26 -0400 Subject: [PATCH 01/47] Replaced python unit test with dbt 1.8 unit test For stg_ga4__page_conversions --- models/staging/stg_ga4__page_conversions.yml | 42 ++++++++++ unit_tests/test_stg_ga4__page_conversions.py | 85 -------------------- 2 files changed, 42 insertions(+), 85 deletions(-) create mode 100644 models/staging/stg_ga4__page_conversions.yml delete mode 100644 unit_tests/test_stg_ga4__page_conversions.py diff --git a/models/staging/stg_ga4__page_conversions.yml b/models/staging/stg_ga4__page_conversions.yml new file mode 100644 index 00000000..4793b4ca --- /dev/null +++ b/models/staging/stg_ga4__page_conversions.yml @@ -0,0 +1,42 @@ +version: 2 + +models: + - name: stg_ga4__page_conversions + description: Model that calculates the number of conversions per page. Conversions are defined as variables in the project configurations. +unit_tests: +# dbt test --select test_page_conversion_count --vars "{conversion_events: ['page_view']}" + - name: test_page_conversion_count + description: Test whether the page-level count of conversions is correct + model: stg_ga4__page_conversions + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_name,page_key + page_view,A + page_view,A + page_view,B + expect: + format: csv + rows: | + page_key,page_view_count + A,2 + B,1 +# dbt test --select test_page_conversion_count_non_standard_column --vars "{conversion_events: ['page-view']}" + - name: test_page_conversion_count_non_standard_column + description: Test whether the page-level count of conversions is correct + model: stg_ga4__page_conversions + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_name,page_key + page-view,A + page-view,A + page-view,B + expect: + format: csv + rows: | + page_key,page_view_count + A,2 + B,1 \ No newline at end of file diff --git a/unit_tests/test_stg_ga4__page_conversions.py b/unit_tests/test_stg_ga4__page_conversions.py deleted file mode 100644 index 6d3cd7da..00000000 --- a/unit_tests/test_stg_ga4__page_conversions.py +++ /dev/null @@ -1,85 +0,0 @@ -import pytest -from dbt.tests.util import check_relations_equal, read_file, run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """event_name,page_key -page_view,A -page_view,A -page_view,B -""".lstrip() - -mock_stg_ga4__nonstandard_events_csv = """event_name,page_key -page-view,A -page-view,A -page-view,B -""".lstrip() - -expected_csv = """page_key,page_view_count -A,2 -B,1 -""".lstrip() - -actual = read_file("../models/staging/stg_ga4__page_conversions.sql") - - -class TestPageConversions: - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return {"name": "ga4"} - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['page_view']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) - - -class TestPageConversionsNonStandardEventName: - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__nonstandard_events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['page-view']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) From 298f3738cebbf6fe0c804b74ad517de8dc623872 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 7 Jun 2024 07:39:53 -0400 Subject: [PATCH 02/47] refactored unit tests for stg_ga4__session_conversions_daily --- .../stg_ga4__session_conversions_daily.sql | 4 + .../stg_ga4__session_conversions_daily.yml | 55 ++++++++++- ...test_stg_ga4__session_conversions_daily.py | 93 ------------------- 3 files changed, 58 insertions(+), 94 deletions(-) delete mode 100644 unit_tests/test_stg_ga4__session_conversions_daily.py diff --git a/models/staging/stg_ga4__session_conversions_daily.sql b/models/staging/stg_ga4__session_conversions_daily.sql index 49b0ed85..983657c5 100644 --- a/models/staging/stg_ga4__session_conversions_daily.sql +++ b/models/staging/stg_ga4__session_conversions_daily.sql @@ -1,7 +1,11 @@ {% set partitions_to_replace = ['current_date'] %} + +{% if is_incremental() %} {% for i in range(var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} +{% endif %} + {{ config( enabled= var('conversion_events', false) != false, diff --git a/models/staging/stg_ga4__session_conversions_daily.yml b/models/staging/stg_ga4__session_conversions_daily.yml index 2f26a7c6..93b7aa18 100644 --- a/models/staging/stg_ga4__session_conversions_daily.yml +++ b/models/staging/stg_ga4__session_conversions_daily.yml @@ -9,4 +9,57 @@ models: columns: - name: session_partition_key tests: - - unique \ No newline at end of file + - unique +unit_tests: +# dbt test --select test_session_conversion_count --vars "{conversion_events: ['my_conversion']}" + - name: test_session_conversion_count + description: Test whether the session-level count of conversions is correct + model: stg_ga4__session_conversions_daily + overrides: + macros: + is_incremental: false + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,session_partition_key,event_name,event_date_dt + A,A2022-01-01,page_view,2022-01-01 + A,A2022-01-01,my_conversion,2022-01-01 + A,A2022-01-01,my_conversion,2022-01-01 + B,B2022-01-01,my_conversion,2022-01-01 + C,C2022-01-01,some_other_event,2022-01-01 + A,A2022-01-02,my_conversion,2022-01-02 + expect: + format: csv + rows: | + session_key,session_partition_key,session_partition_date,my_conversion_count + A,A2022-01-01,2022-01-01,2 + B,B2022-01-01,2022-01-01,1 + C,C2022-01-01,2022-01-01,0 + A,A2022-01-02,2022-01-02,1 +# dbt test --select test_session_conversion_count_non_standard_event_name --vars "{conversion_events: ['my-conversion']}" + - name: test_session_conversion_count_non_standard_event_name + description: Test whether the session-level count of conversions is correct + model: stg_ga4__session_conversions_daily + overrides: + macros: + is_incremental: false + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,session_partition_key,event_name,event_date_dt + A,A2022-01-01,page_view,2022-01-01 + A,A2022-01-01,my-conversion,2022-01-01 + A,A2022-01-01,my-conversion,2022-01-01 + B,B2022-01-01,my-conversion,2022-01-01 + C,C2022-01-01,some_other_event,2022-01-01 + A,A2022-01-02,my-conversion,2022-01-02 + expect: + format: csv + rows: | + session_key,session_partition_key,session_partition_date,my_conversion_count + A,A2022-01-01,2022-01-01,2 + B,B2022-01-01,2022-01-01,1 + C,C2022-01-01,2022-01-01,0 + A,A2022-01-02,2022-01-02,1 \ No newline at end of file diff --git a/unit_tests/test_stg_ga4__session_conversions_daily.py b/unit_tests/test_stg_ga4__session_conversions_daily.py deleted file mode 100644 index 8ad1e7ae..00000000 --- a/unit_tests/test_stg_ga4__session_conversions_daily.py +++ /dev/null @@ -1,93 +0,0 @@ -import pytest -from dbt.tests.util import check_relations_equal, read_file, run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """session_key,session_partition_key,event_name,event_date_dt -A,A2022-01-01,page_view,2022-01-01 -A,A2022-01-01,my_conversion,2022-01-01 -A,A2022-01-01,my_conversion,2022-01-01 -B,B2022-01-01,my_conversion,2022-01-01 -C,C2022-01-01,some_other_event,2022-01-01 -A,A2022-01-02,my_conversion,2022-01-02 -""".lstrip() - -mock_stg_ga4__nonstandard_events_csv = """session_key,session_partition_key,event_name,event_date_dt -A,A2022-01-01,page_view,2022-01-01 -A,A2022-01-01,my-conversion,2022-01-01 -A,A2022-01-01,my-conversion,2022-01-01 -B,B2022-01-01,my-conversion,2022-01-01 -C,C2022-01-01,some_other_event,2022-01-01 -A,A2022-01-02,my-conversion,2022-01-02 -""".lstrip() - -expected_csv = """session_key,session_partition_key,session_partition_date,my_conversion_count -A,A2022-01-01,2022-01-01,2 -B,B2022-01-01,2022-01-01,1 -C,C2022-01-01,2022-01-01,0 -A,A2022-01-02,2022-01-02,1 -""".lstrip() - -actual = read_file("../models/staging/stg_ga4__session_conversions_daily.sql") - - -class TestUsersFirstLastEvents: - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return {"name": "ga4", "vars": {"static_incremental_days": 3}} - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['my_conversion']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) - - -class TestUsersNonStandardEventName: - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__nonstandard_events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['my-conversion']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) From 906bec2ae6f6da76f5fce1826083e171463363a0 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Fri, 7 Jun 2024 07:40:11 -0400 Subject: [PATCH 03/47] update test name --- models/staging/stg_ga4__page_conversions.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/staging/stg_ga4__page_conversions.yml b/models/staging/stg_ga4__page_conversions.yml index 4793b4ca..1d2f5e04 100644 --- a/models/staging/stg_ga4__page_conversions.yml +++ b/models/staging/stg_ga4__page_conversions.yml @@ -22,8 +22,8 @@ unit_tests: page_key,page_view_count A,2 B,1 -# dbt test --select test_page_conversion_count_non_standard_column --vars "{conversion_events: ['page-view']}" - - name: test_page_conversion_count_non_standard_column +# dbt test --select test_page_conversion_count_non_event_name --vars "{conversion_events: ['page-view']}" + - name: test_page_conversion_count_non_event_name description: Test whether the page-level count of conversions is correct model: stg_ga4__page_conversions given: From e994408bd14b6fee8edae585e0f8d543748334b5 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 07:11:54 -0400 Subject: [PATCH 04/47] Replaced Python unit test with dbt unit test --- .../stg_ga4__derived_session_properties.yml | 37 +++++++++- ...est_stg_ga4__derived_session_properties.py | 74 ------------------- 2 files changed, 36 insertions(+), 75 deletions(-) delete mode 100644 unit_tests/test_stg_ga4__derived_session_properties.py diff --git a/models/staging/stg_ga4__derived_session_properties.yml b/models/staging/stg_ga4__derived_session_properties.yml index 0ecffcf9..ea2f8c0d 100644 --- a/models/staging/stg_ga4__derived_session_properties.yml +++ b/models/staging/stg_ga4__derived_session_properties.yml @@ -8,4 +8,39 @@ models: columns: - name: session_key tests: - - unique \ No newline at end of file + - unique +unit_tests: +# dbt test --select test_derived_session_properties --vars "derived_session_properties: [{'event_parameter':'my_param','session_property_name':'my_derived_property','value_type':'int_value'},{'user_property':'my_property','session_property_name':'my_derived_property2','value_type':'string_value'}]" +# TODO variable override in the unit test is not wokring as expected + - name: test_derived_session_properties + description: Test whether a derived property is successfully retrieved from multiple event payloads + model: stg_ga4__derived_session_properties + given: + - input: ref('stg_ga4__events') + format: sql + rows: | + select + 'AAA' as session_key + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + , ARRAY[STRUCT('my_property' as key, STRUCT('value1' as string_value) as value)] as user_properties + union all + select + 'AAA' as session_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params + , ARRAY[] as user_properties + union all + select + 'BBB' as session_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + , ARRAY[STRUCT('my_property' as key, STRUCT('value2' as string_value) as value)] as user_properties + expect: + format: dict + rows: + - {session_key: AAA, my_derived_property: 2, my_derived_property2: value1} + - {session_key: BBB, my_derived_property: 1, my_derived_property2: value2} diff --git a/unit_tests/test_stg_ga4__derived_session_properties.py b/unit_tests/test_stg_ga4__derived_session_properties.py deleted file mode 100644 index 16c960eb..00000000 --- a/unit_tests/test_stg_ga4__derived_session_properties.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -mock_stg_ga4__events_json = """ -{ "session_key": "AAA", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}], "user_properties": [{ "key": "my_property", "value": { "string_value": "value1", "int_value": null, "float_value": null, "double_value": null }}]} -{ "session_key": "AAA", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 2, "float_value": null, "double_value": null }}]} -{ "session_key": "BBB", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}], "user_properties": [{ "key": "my_property", "value": { "string_value": "value2", "int_value": null, "float_value": null, "double_value": null }}]} -""".lstrip() - -expected_csv = """session_key,my_derived_property,my_derived_property2 -AAA,2,value1 -BBB,1,value2 -""".lstrip() - -models__config_yml = """ -version: 2 -sources: - - name: fixture - schema: "{{ target.schema }}" - tables: - - name: mock_stg_ga4__events_json -""" - -class TestDerivedSessionProperties(): - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return { - "name": "ga4" - } - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "config.yml": models__config_yml, - "stg_ga4__events.sql": "select * from {{source('fixture','mock_stg_ga4__events_json')}}", - "actual.sql": read_file('../models/staging/stg_ga4__derived_session_properties.sql') - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "unnest_key.sql": read_file('../macros/unnest_key.sql'), - } - - def upload_json_fixture(self, project, file_name, json, table_name): - local_file_path = file_name - with open(local_file_path, "w") as outfile: - outfile.write(json) - project.adapter.upload_file( - local_file_path = local_file_path, - database = project.database, - table_schema = project.test_schema, - table_name = table_name, - kwargs = { - "source_format": "NEWLINE_DELIMITED_JSON", - "autodetect":"true" - } - ) - - def test_mock_run_and_check(self, project): - self.upload_json_fixture(project, "source.json", mock_stg_ga4__events_json, "mock_stg_ga4__events_json" ) - run_dbt(["build", "--vars", "derived_session_properties: [{'event_parameter':'my_param','session_property_name':'my_derived_property','value_type':'int_value'},{'user_property':'my_property','session_property_name':'my_derived_property2','value_type':'string_value'}]"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) From 34bbab9bba97566f04b229fb2df6dd8b0416dd5d Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 10:58:46 -0400 Subject: [PATCH 05/47] variable override working properly --- models/staging/stg_ga4__derived_session_properties.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/models/staging/stg_ga4__derived_session_properties.yml b/models/staging/stg_ga4__derived_session_properties.yml index ea2f8c0d..5d4c0200 100644 --- a/models/staging/stg_ga4__derived_session_properties.yml +++ b/models/staging/stg_ga4__derived_session_properties.yml @@ -10,8 +10,7 @@ models: tests: - unique unit_tests: -# dbt test --select test_derived_session_properties --vars "derived_session_properties: [{'event_parameter':'my_param','session_property_name':'my_derived_property','value_type':'int_value'},{'user_property':'my_property','session_property_name':'my_derived_property2','value_type':'string_value'}]" -# TODO variable override in the unit test is not wokring as expected +# dbt test --select test_derived_session_properties - name: test_derived_session_properties description: Test whether a derived property is successfully retrieved from multiple event payloads model: stg_ga4__derived_session_properties @@ -44,3 +43,7 @@ unit_tests: rows: - {session_key: AAA, my_derived_property: 2, my_derived_property2: value1} - {session_key: BBB, my_derived_property: 1, my_derived_property2: value2} + overrides: + vars: {derived_session_properties: [{event_parameter: 'my_param',session_property_name: 'my_derived_property',value_type: 'int_value'},{user_property: 'my_property',session_property_name: 'my_derived_property2',value_type: 'string_value'}]} + + \ No newline at end of file From 4b66d1f8d164ad17e70535657fe66e5e0d7c5842 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 11:02:19 -0400 Subject: [PATCH 06/47] using overrides properly --- models/staging/stg_ga4__page_conversions.yml | 8 +++++--- .../stg_ga4__session_conversions_daily.yml | 18 +++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/models/staging/stg_ga4__page_conversions.yml b/models/staging/stg_ga4__page_conversions.yml index 1d2f5e04..b285bc2d 100644 --- a/models/staging/stg_ga4__page_conversions.yml +++ b/models/staging/stg_ga4__page_conversions.yml @@ -4,7 +4,6 @@ models: - name: stg_ga4__page_conversions description: Model that calculates the number of conversions per page. Conversions are defined as variables in the project configurations. unit_tests: -# dbt test --select test_page_conversion_count --vars "{conversion_events: ['page_view']}" - name: test_page_conversion_count description: Test whether the page-level count of conversions is correct model: stg_ga4__page_conversions @@ -22,7 +21,8 @@ unit_tests: page_key,page_view_count A,2 B,1 -# dbt test --select test_page_conversion_count_non_event_name --vars "{conversion_events: ['page-view']}" + overrides: + vars: {conversion_events: ['page_view']} - name: test_page_conversion_count_non_event_name description: Test whether the page-level count of conversions is correct model: stg_ga4__page_conversions @@ -39,4 +39,6 @@ unit_tests: rows: | page_key,page_view_count A,2 - B,1 \ No newline at end of file + B,1 + overrides: + vars: {conversion_events: ['page-view']} \ No newline at end of file diff --git a/models/staging/stg_ga4__session_conversions_daily.yml b/models/staging/stg_ga4__session_conversions_daily.yml index 93b7aa18..c233f227 100644 --- a/models/staging/stg_ga4__session_conversions_daily.yml +++ b/models/staging/stg_ga4__session_conversions_daily.yml @@ -11,13 +11,9 @@ models: tests: - unique unit_tests: -# dbt test --select test_session_conversion_count --vars "{conversion_events: ['my_conversion']}" - name: test_session_conversion_count description: Test whether the session-level count of conversions is correct model: stg_ga4__session_conversions_daily - overrides: - macros: - is_incremental: false given: - input: ref('stg_ga4__events') format: csv @@ -37,13 +33,13 @@ unit_tests: B,B2022-01-01,2022-01-01,1 C,C2022-01-01,2022-01-01,0 A,A2022-01-02,2022-01-02,1 -# dbt test --select test_session_conversion_count_non_standard_event_name --vars "{conversion_events: ['my-conversion']}" - - name: test_session_conversion_count_non_standard_event_name - description: Test whether the session-level count of conversions is correct - model: stg_ga4__session_conversions_daily overrides: macros: is_incremental: false + vars: {conversion_events: ['my_conversion']} + - name: test_session_conversion_count_non_standard_event_name + description: Test whether the session-level count of conversions is correct + model: stg_ga4__session_conversions_daily given: - input: ref('stg_ga4__events') format: csv @@ -62,4 +58,8 @@ unit_tests: A,A2022-01-01,2022-01-01,2 B,B2022-01-01,2022-01-01,1 C,C2022-01-01,2022-01-01,0 - A,A2022-01-02,2022-01-02,1 \ No newline at end of file + A,A2022-01-02,2022-01-02,1 + overrides: + macros: + is_incremental: false + vars: {conversion_events: ['my-conversion']} \ No newline at end of file From 79f7e27c70674974b46a1e86889dc577b40edc71 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 11:06:30 -0400 Subject: [PATCH 07/47] replaced another unit test --- .../stg_ga4__derived_session_properties.yml | 1 - .../stg_ga4__derived_user_properties.yml | 34 ++++++++- .../test_stg_ga4__derived_user_properties.py | 74 ------------------- 3 files changed, 33 insertions(+), 76 deletions(-) delete mode 100644 unit_tests/test_stg_ga4__derived_user_properties.py diff --git a/models/staging/stg_ga4__derived_session_properties.yml b/models/staging/stg_ga4__derived_session_properties.yml index 5d4c0200..68d3d295 100644 --- a/models/staging/stg_ga4__derived_session_properties.yml +++ b/models/staging/stg_ga4__derived_session_properties.yml @@ -10,7 +10,6 @@ models: tests: - unique unit_tests: -# dbt test --select test_derived_session_properties - name: test_derived_session_properties description: Test whether a derived property is successfully retrieved from multiple event payloads model: stg_ga4__derived_session_properties diff --git a/models/staging/stg_ga4__derived_user_properties.yml b/models/staging/stg_ga4__derived_user_properties.yml index 3aadf7f4..47d4ef0b 100644 --- a/models/staging/stg_ga4__derived_user_properties.yml +++ b/models/staging/stg_ga4__derived_user_properties.yml @@ -7,4 +7,36 @@ models: - name: client_key description: Hashed combination of user_pseudo_id and stream_id tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_derived_user_properties + description: Test whether a derived user property is successfully retrieved from multiple event payloads + model: stg_ga4__derived_user_properties + given: + - input: ref('stg_ga4__events') + format: sql + rows: | + select + 'AAA' as client_key + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + union all + select + 'AAA' as client_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params + union all + select + 'BBB' as client_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + expect: + format: dict + rows: + - {client_key: AAA, my_derived_property: 2} + - {client_key: BBB, my_derived_property: 1} + overrides: + vars: {derived_user_properties: [{event_parameter: 'my_param',user_property_name: 'my_derived_property',value_type: 'int_value'}]} \ No newline at end of file diff --git a/unit_tests/test_stg_ga4__derived_user_properties.py b/unit_tests/test_stg_ga4__derived_user_properties.py deleted file mode 100644 index 2c04c34b..00000000 --- a/unit_tests/test_stg_ga4__derived_user_properties.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -mock_stg_ga4__events_json = """ -{ "client_key": "AAA", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}]} -{ "client_key": "AAA", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 2, "float_value": null, "double_value": null }}]} -{ "client_key": "BBB", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}]} -""".lstrip() - -expected_csv = """client_key,my_derived_property -AAA,2 -BBB,1 -""".lstrip() - -models__config_yml = """ -version: 2 -sources: - - name: fixture - schema: "{{ target.schema }}" - tables: - - name: mock_stg_ga4__events_json -""" - -class TestDerivedUserProperties(): - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return { - "name": "ga4" - } - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "config.yml": models__config_yml, - "stg_ga4__events.sql": "select * from {{source('fixture','mock_stg_ga4__events_json')}}", - "actual.sql": read_file('../models/staging/stg_ga4__derived_user_properties.sql') - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "unnest_key.sql": read_file('../macros/unnest_key.sql'), - } - - def upload_json_fixture(self, project, file_name, json, table_name): - local_file_path = file_name - with open(local_file_path, "w") as outfile: - outfile.write(json) - project.adapter.upload_file( - local_file_path = local_file_path, - database = project.database, - table_schema = project.test_schema, - table_name = table_name, - kwargs = { - "source_format": "NEWLINE_DELIMITED_JSON", - "autodetect":"true" - } - ) - - def test_mock_run_and_check(self, project): - self.upload_json_fixture(project, "source.json", mock_stg_ga4__events_json, "mock_stg_ga4__events_json" ) - run_dbt(["build", "--vars", "derived_user_properties: [{'event_parameter':'my_param','user_property_name':'my_derived_property','value_type':'int_value'}]"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) From cecf337f3c70f016f3c81ac0f63871c53fd53072 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 12:39:04 -0400 Subject: [PATCH 08/47] replaced python unit test --- .../stg_ga4__client_key_first_last_events.yml | 18 +++++++++- .../test_stg_ga4__users_first_last_events.py | 35 ------------------- 2 files changed, 17 insertions(+), 36 deletions(-) delete mode 100644 unit_tests/test_stg_ga4__users_first_last_events.py diff --git a/models/staging/stg_ga4__client_key_first_last_events.yml b/models/staging/stg_ga4__client_key_first_last_events.yml index 4e9cc7f4..87876103 100644 --- a/models/staging/stg_ga4__client_key_first_last_events.yml +++ b/models/staging/stg_ga4__client_key_first_last_events.yml @@ -7,4 +7,20 @@ models: - name: client_key description: Hashed combination of user_pseudo_id and stream_id tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_stg_ga4__client_key_first_last_events + description: Test pulling the first and last event per client key + model: stg_ga4__client_key_first_last_events + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + stream_id,client_key,event_key,event_timestamp + 1,IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,1661339279816517 + 1,IX+OyYJBgjwqML19GB/XIQ==,gt1SoAtrxDv33uDGwVeMVA==,1661339279816518 + expect: + format: csv + rows: | + client_key,first_event,last_event + IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,gt1SoAtrxDv33uDGwVeMVA== diff --git a/unit_tests/test_stg_ga4__users_first_last_events.py b/unit_tests/test_stg_ga4__users_first_last_events.py deleted file mode 100644 index 7880aaf3..00000000 --- a/unit_tests/test_stg_ga4__users_first_last_events.py +++ /dev/null @@ -1,35 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """stream_id,client_key,event_key,event_timestamp,geo_continent,geo_country,geo_region,geo_city,geo_sub_continent,geo_metro,device_category,device_mobile_brand_name,device_mobile_model_name,device_mobile_marketing_name,device_mobile_os_hardware_model,device_operating_system,device_operating_system_version,device_vendor_id,device_advertising_id,device_language,device_is_limited_ad_tracking,device_time_zone_offset_seconds,device_browser,device_browser_version,device_web_info_browser,device_web_info_browser_version,device_web_info_hostname,user_campaign,user_medium,user_source -1,IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,1661339279816517,Asia,India,Maharashtra,Mumbai,Southern Asia,(not set),desktop,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,, -1,IX+OyYJBgjwqML19GB/XIQ==,gt1SoAtrxDv33uDGwVeMVA==,1661339279816518,USA,Massachusetts,Maharashtra,Mumbai,Southern Asia,(not set),mobile,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,, -""".lstrip() - -expected_csv = """client_key,first_event,last_event,stream_id,first_geo_continent,first_geo_country,first_geo_region,first_geo_city,first_geo_sub_continent,first_geo_metro,first_device_category,first_device_mobile_brand_name,first_device_mobile_model_name,first_device_mobile_marketing_name,first_device_mobile_os_hardware_model,first_device_operating_system,first_device_operating_system_version,first_device_vendor_id,first_device_advertising_id,first_device_language,first_device_is_limited_ad_tracking,first_device_time_zone_offset_seconds,first_device_browser,first_device_browser_version,first_device_web_info_browser,first_device_web_info_browser_version,first_device_web_info_hostname,first_user_campaign,first_user_medium,first_user_source,last_geo_continent,last_geo_country,last_geo_region,last_geo_city,last_geo_sub_continent,last_geo_metro,last_device_category,last_device_mobile_brand_name,last_device_mobile_model_name,last_device_mobile_marketing_name,last_device_mobile_os_hardware_model,last_device_operating_system,last_device_operating_system_version,last_device_vendor_id,last_device_advertising_id,last_device_language,last_device_is_limited_ad_tracking,last_device_time_zone_offset_seconds,last_device_browser,last_device_browser_version,last_device_web_info_browser,last_device_web_info_browser_version,last_device_web_info_hostname,last_user_campaign,last_user_medium,last_user_source -IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,gt1SoAtrxDv33uDGwVeMVA==,1,Asia,India,Maharashtra,Mumbai,Southern Asia,(not set),desktop,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,,,USA,Massachusetts,Maharashtra,Mumbai,Southern Asia,(not set),mobile,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,, -""".lstrip() - -actual = read_file('../models/staging/stg_ga4__client_key_first_last_events.sql') - -class TestUsersFirstLastEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) From 63a7d8681e85a27d51676c94562fe2b06d3db1fa Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 12:42:29 -0400 Subject: [PATCH 09/47] add unit test for stg_ga4__client_key_first_last_pageviews --- ...tg_ga4__client_key_first_last_pageviews.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/models/staging/stg_ga4__client_key_first_last_pageviews.yml b/models/staging/stg_ga4__client_key_first_last_pageviews.yml index 9623fd66..b93e6850 100644 --- a/models/staging/stg_ga4__client_key_first_last_pageviews.yml +++ b/models/staging/stg_ga4__client_key_first_last_pageviews.yml @@ -7,4 +7,20 @@ models: - name: client_key description: Hashed combination of user_pseudo_id and stream_id tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_stg_ga4__client_key_first_last_pageviews + description: Test pulling the first and last page view per client key + model: stg_ga4__client_key_first_last_pageviews + given: + - input: ref('stg_ga4__event_page_view') + format: csv + rows: | + stream_id,client_key,event_key,event_timestamp,page_location + 1,IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,1661339279816517,A + 1,IX+OyYJBgjwqML19GB/XIQ==,gt1SoAtrxDv33uDGwVeMVA==,1661339279816518,B + expect: + format: csv + rows: | + client_key,first_page_view_event_key,last_page_view_event_key,first_page_location,last_page_location + IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,gt1SoAtrxDv33uDGwVeMVA==,A,B From 6e709db81bb4d7fca130eb5957419a8debc40f8a Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 12:47:03 -0400 Subject: [PATCH 10/47] replace unit test --- .../stg_ga4__event_to_query_string_params.yml | 20 ++++++++- ...t_stg_ga4__event_to_query_string_params.py | 45 ------------------- 2 files changed, 19 insertions(+), 46 deletions(-) delete mode 100644 unit_tests/test_stg_ga4__event_to_query_string_params.py diff --git a/models/staging/stg_ga4__event_to_query_string_params.yml b/models/staging/stg_ga4__event_to_query_string_params.yml index 4b4310f4..bf6ede10 100644 --- a/models/staging/stg_ga4__event_to_query_string_params.yml +++ b/models/staging/stg_ga4__event_to_query_string_params.yml @@ -3,4 +3,22 @@ version: 2 models: - name: stg_ga4__event_to_query_string_params description: This model pivots the query string parameters contained within the event's page_location field to become rows. Each row is a single parameter/value combination contained in a single event's query string. - \ No newline at end of file +unit_tests: + - name: test_stg_ga4__event_to_query_string_params + model: stg_ga4__event_to_query_string_params + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_key,page_query_string + aaa,param1=value1¶m2=value2 + bbb,param1 + ccc,param1= + expect: + format: csv + rows: | + event_key,param,value + aaa,param1,value1 + aaa,param2,value2 + bbb,param1, + ccc,param1, \ No newline at end of file diff --git a/unit_tests/test_stg_ga4__event_to_query_string_params.py b/unit_tests/test_stg_ga4__event_to_query_string_params.py deleted file mode 100644 index ab10eeeb..00000000 --- a/unit_tests/test_stg_ga4__event_to_query_string_params.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - - -PARAMS_CSV = """event_key,page_query_string -aaa,param1=value1¶m2=value2 -bbb,param1 -ccc,param1= -""".lstrip() - -EXPECTED_CSV = """event_key,param,value -aaa,param1,value1 -aaa,param2,value2 -bbb,param1, -ccc,param1, -""".lstrip() - -actual = read_file('../models/staging/stg_ga4__event_to_query_string_params.sql').replace( - "ref('stg_ga4__events')", - "ref('params')" -) - - - -class TestEventToQueryStringParams(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "params.csv": PARAMS_CSV, - "expected.csv": EXPECTED_CSV, - - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual - } - - def test_mock_run_and_check(self, project): - #self.upload_json_fixture(project, "source.json", SOURCE_JSON, "SOURCE_JSON" ) - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) From 9d53c9a953210cd4f67a54929161af1c15eae013 Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Sat, 22 Jun 2024 13:17:56 -0400 Subject: [PATCH 11/47] unit test for stg_ga4__sessions_traffic_sources_last_non_direct_daily. Not working yet --- .../stg_ga4__session_conversions_daily.yml | 2 +- ..._traffic_sources_last_non_direct_daily.sql | 10 ++++--- ..._traffic_sources_last_non_direct_daily.yml | 26 ++++++++++++++++++- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/models/staging/stg_ga4__session_conversions_daily.yml b/models/staging/stg_ga4__session_conversions_daily.yml index c233f227..464b3249 100644 --- a/models/staging/stg_ga4__session_conversions_daily.yml +++ b/models/staging/stg_ga4__session_conversions_daily.yml @@ -37,7 +37,7 @@ unit_tests: macros: is_incremental: false vars: {conversion_events: ['my_conversion']} - - name: test_session_conversion_count_non_standard_event_name + - name: test_stg_ga4__session_conversions_daily_non_standard_event_name description: Test whether the session-level count of conversions is correct model: stg_ga4__session_conversions_daily given: diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql index 5c7fc69f..188ad069 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql @@ -1,7 +1,11 @@ {% set partitions_to_replace = ['current_date'] %} + +{% if is_incremental() %} {% for i in range(var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} +{% endif %} + {{ config( materialized = 'incremental', @@ -35,7 +39,7 @@ with last_non_direct_session_partition_key as ( last_value(non_direct_session_partition_key ignore nulls) over( partition by client_key order by - session_partition_timestamp range between {{var('session_attribution_lookback_window_days', 30 ) * 24 * 60 * 60 * 1000000 }} preceding + session_partition_timestamp range between {{ var('session_attribution_lookback_window_days', 30 ) * 24 * 60 * 60 * 1000000 }} preceding and current row -- lookback window ) ELSE non_direct_session_partition_key @@ -44,7 +48,7 @@ with last_non_direct_session_partition_key as ( {{ref('stg_ga4__sessions_traffic_sources_daily')}} {% if is_incremental() %} -- Add 30 to static_incremental_days to include the session attribution lookback window - where session_partition_date >= date_sub(current_date, interval ({{var('static_incremental_days',3) + var('session_attribution_lookback_window_days', 30 )}} ) day) + where session_partition_date >= date_sub(current_date, interval ({{ var('static_incremental_days',3) + var('session_attribution_lookback_window_days', 30 ) }} ) day) {% endif %} ) ,join_last_non_direct_session_source as ( @@ -68,7 +72,7 @@ with last_non_direct_session_partition_key as ( ,coalesce(last_non_direct_source.session_term, '(none)') as last_non_direct_term ,coalesce(last_non_direct_source.session_default_channel_grouping, 'Direct') as last_non_direct_default_channel_grouping from last_non_direct_session_partition_key - left join {{ref('stg_ga4__sessions_traffic_sources_daily')}} last_non_direct_source on + left join {{ ref('stg_ga4__sessions_traffic_sources_daily') }} last_non_direct_source on last_non_direct_session_partition_key.session_partition_key_last_non_direct = last_non_direct_source.session_partition_key {% if is_incremental() %} -- Only keep the records in the partitions we wish to replace (as opposed to the whole 30 day lookback window) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml index 0b34832b..ff63d64e 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml @@ -21,4 +21,28 @@ models: - name: last_non_direct_default_channel_grouping description: The the most recent non-direct channel grouping within a 30-day lookback window. tests: - - not_null \ No newline at end of file + - not_null +unit_tests: + - name: test_stg_ga4__sessions_traffic_sources_last_non_direct_daily + model: stg_ga4__sessions_traffic_sources_last_non_direct_daily + given: + - input : ref('stg_ga4__sessions_traffic_sources_daily') + format: csv + rows: | + client_key,session_partition_key,session_partition_date,session_partition_timestamp,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,non_direct_session_partition_key + A,A,20230505,1683321359,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A + A,B,20230506,1683407759,(direct),,,,,,, + A,C,20230507,1683494159,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C + A,D,20230508,1683580559,(direct),,,,,,, + expect: + format: csv + rows: + client_key,session_partition_key,session_partition_date,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,session_partition_key_last_non_direct,last_non_direct_source,last_non_direct_medium,last_non_direct_source_category,last_non_direct_campaign,last_non_direct_content,last_non_direct_term,last_non_direct_default_channel_grouping + A,A,20230505,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a + A,B,20230506,(direct),,,,,,,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a + A,C,20230507,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a + A,D,20230508,(direct),,,,,,,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a + overrides: + macros: + is_incremental: false + vars: {session_attribution_lookback_window_days: 30} From 3425fdf769110f5d22dbf5052fe4f3cfceb9fe46 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 22 Oct 2024 16:17:40 -0500 Subject: [PATCH 12/47] Add package-lock.yml to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 84063871..3bf3158b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ target/ dbt_packages/ logs/ +package-lock.yml google-cloud-sdk/ unit_tests/.env From c3ba7f7aeb7498de64c7da628b98b1bfc0fe3aed Mon Sep 17 00:00:00 2001 From: David Booke Date: Wed, 23 Oct 2024 08:23:15 -0500 Subject: [PATCH 13/47] Add vars to dbt_project.yml for testing --- dbt_project.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dbt_project.yml b/dbt_project.yml index b758f5e5..5dac60bb 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -8,6 +8,27 @@ seed-paths: ["seeds"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] +profile: 'bq_ga4' + +vars: + start_date: "20230306" # Defines the earliest GA4 _TABLE_SUFFIX to load into base events model. + source_project: "analytics" + property_ids: [id] + frequency: "daily" + conversion_events: ['add_to_cart', 'large_button_clicked'] + static_incremental_days: 3 + derived_session_properties: + - event_parameter: "ga_session_id" + session_property_name: "ga_session_id" + value_type: "int_value" + - event_parameter: "page_title" + session_property_name: "page_title" + value_type: "string_value" + derived_user_properties: + - event_parameter: "page_title" + user_property_name: "page_title" + value_type: "string_value" + target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" From a1f10df3bbc74d730dfa67a479ccf18961134852 Mon Sep 17 00:00:00 2001 From: David Booke Date: Wed, 23 Oct 2024 08:29:40 -0500 Subject: [PATCH 14/47] Add unit tests to stg_ga4__events.yml for the url_parsing macros --- models/staging/stg_ga4__events.yml | 75 ++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index 984f606d..82a0bbc0 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -58,3 +58,78 @@ unit_tests: rows: - {page_location: https://asite.com/page, page_referrer: https://asite.com/previous_page} - {page_location: https://asite.com/anotherpage, page_referrer: https://asite.com/previous_page} + + - name: query_parameter_extraction + description: "Check that query parameters get extracted from the model and added as new fields." + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://asite.com/page?param1=value1} + - {page_location: https://asite.com/anotherpage?param1=value%20with%20encoded%20spaces} + - {page_location: https://sitetwo.com/page?param1=value1¶m2=value2} + - {page_location: https://anothersite.com/page?not_included_param=value¶m1=val¶m=also_not_included} + - {page_location: https://threeparams.com/page¶m1=another_val¶m2=second_val¶m3=value3} + - {page_location: https://noparams.com/} + overrides: + vars: + query_parameter_extraction: ["param1", "param2", "param3"] + expect: + rows: + - {query_param_param1: value1, query_param_param2: null, query_param_param3: null} + - {query_param_param1: value%20with%20encoded%20spaces, query_param_param2: null, query_param_param3: null} + - {query_param_param1: value1, query_param_param2: value2, query_param_param3: null} + - {query_param_param1: val, query_param_param2: null, query_param_param3: null} + - {query_param_param1: another_val, query_param_param2: second_val, query_param_param3: value3} + - {query_param_param1: null, query_param_param2: null, query_param_param3: null} + + - name: hostname_extraction_from_url + description: "Check that the hostname is extracted from the URL" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://fakesite.com/} + - {page_location: https://www.mock.sitehub.io/} + - {page_location: https://cool-site.com/with/this-path} + - {page_location: https://example.site.app/?parameter=this} + - {page_location: https://madeup.org/page?param=true&other_param=sure} + expect: + rows: + - {page_hostname: fakesite.com} + - {page_hostname: mock.sitehub.io} + - {page_hostname: cool-site.com} + - {page_hostname: example.site.app} + - {page_hostname: madeup.org} + + - name: query_string_extraction_from_url + description: "Check that the query string is extracted from the URL" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://fakesite.com/?query_string=something} + - {page_location: https://www.no.query.string/but-has-this-path} + - {page_location: https://cool-site.com/even-cooler-path?utm_term=test-term&utm_source=test-source} + expect: + rows: + - {page_query_string: query_string=something} + - {page_query_string: null} + - {page_query_string: utm_term=test-term&utm_source=test-source} + + - name: page_path_extraction + description: "Check that the page path is extracted from the URL" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://fakesite.com/} + - {page_location: https://cool-site.com/with/this-path} + - {page_location: https://example.site.app/?parameter=no-path} + - {page_location: https://madeup.org/page?param=true&other_param=sure} + expect: + rows: + - {page_path: /} + - {page_path: /with/this-path} + - {page_path: /} + - {page_path: /page} From 59727888cce77058fb78fe2d17866e5e3a43ed79 Mon Sep 17 00:00:00 2001 From: David Booke Date: Wed, 23 Oct 2024 14:55:18 -0500 Subject: [PATCH 15/47] Add conditions for cases when event_source is null for session parameters --- models/staging/stg_ga4__sessions_traffic_sources.sql | 12 ++++++------ .../stg_ga4__sessions_traffic_sources_daily.sql | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources.sql b/models/staging/stg_ga4__sessions_traffic_sources.sql index b0f55c40..ffb02f1a 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources.sql @@ -24,12 +24,12 @@ session_source as ( select session_key ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN event_source END) IGNORE NULLS) OVER (session_window), '(direct)') AS session_source - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping from set_default_channel_grouping WINDOW session_window AS (PARTITION BY session_key ORDER BY event_timestamp ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index 1847d8d8..97c9763f 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -52,12 +52,12 @@ first_session_source as ( ,session_partition_date ,event_timestamp ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN event_source END) IGNORE NULLS) OVER (session_window), '(direct)') AS session_source - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping from set_default_channel_grouping WINDOW session_window AS (PARTITION BY session_partition_key ORDER BY event_timestamp ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ), From 20598fb908ed066993f9a62cd7859b93a6cb02ac Mon Sep 17 00:00:00 2001 From: David Booke Date: Wed, 23 Oct 2024 14:56:24 -0500 Subject: [PATCH 16/47] Add unit test to stg_ga4__sessions_traffic_sources_daily for testing the default channel grouping macro --- .../stg_ga4__sessions_traffic_sources.yml | 78 +- tests/fixtures/ga4_source_categories.csv | 820 ++++++++++++++++++ 2 files changed, 897 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/ga4_source_categories.csv diff --git a/models/staging/stg_ga4__sessions_traffic_sources.yml b/models/staging/stg_ga4__sessions_traffic_sources.yml index fa5a54eb..5db85fd0 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources.yml @@ -14,4 +14,80 @@ models: - name: session_source description: First non-null source value of the session tests: - - not_null \ No newline at end of file + - not_null + +unit_tests: + - name: test_default_channel_grouping + model: stg_ga4__sessions_traffic_sources + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,event_timestamp,event_name,event_source,event_medium,event_campaign + A,172000000000000,event,(direct),(none), + B,172000000000000,event,(direct),(not set), + C,172000000000000,event,some-source,some-medium,some-cross-network-campaign + D,172000000000000,event,some-source,some-medium,cross-network + E,172000000000000,event,alibaba,cpc, + F,172000000000000,event,some-source,retargeting,shopping + G,172000000000000,event,google,ppc, + H,172000000000000,event,facebook,retargeting, + I,172000000000000,event,youtube.com,paid-something, + J,172000000000000,event,youtube.com,display, + K,172000000000000,event,some-source,cpc, + L,172000000000000,event,Google Shopping,, + M,172000000000000,event,some-source,,some-shopping-campaign + N,172000000000000,event,facebook,, + O,172000000000000,event,some-source,social, + P,172000000000000,event,youtube.com,, + Q,172000000000000,event,some-source,video, + R,172000000000000,event,bing,, + S,172000000000000,event,some-source,organic, + T,172000000000000,event,some-source,referral, + U,172000000000000,event,email,, + V,172000000000000,event,,e mail, + W,172000000000000,event,some-source,affiliate, + X,172000000000000,event,some-source,audio, + Y,172000000000000,event,sms,, + Z,172000000000000,event,,sms, + AA,172000000000000,event,some-source,something-push, + AB,172000000000000,event,some-source,mobile-notification, + AC,172000000000000,event,firebase,, + AD,172000000000000,event,some-source,some-medium,some-campaign + - input: ref('ga4_source_categories') + format: csv + fixture: ga4_source_categories + expect: + format: csv + rows: | + session_default_channel_grouping + Direct + Direct + Cross-network + Cross-network + Paid Shopping + Paid Shopping + Paid Search + Paid Social + Paid Video + Display + Paid Other + Organic Shopping + Organic Shopping + Organic Social + Organic Social + Organic Video + Organic Video + Organic Search + Organic Search + Referral + Email + Email + Affiliates + Audio + SMS + SMS + Mobile Push Notifications + Mobile Push Notifications + Mobile Push Notifications + Unassigned diff --git a/tests/fixtures/ga4_source_categories.csv b/tests/fixtures/ga4_source_categories.csv new file mode 100644 index 00000000..bb4a7fee --- /dev/null +++ b/tests/fixtures/ga4_source_categories.csv @@ -0,0 +1,820 @@ +source,source_category +360.cn,SOURCE_CATEGORY_SEARCH +43things,SOURCE_CATEGORY_SOCIAL +43things.com,SOURCE_CATEGORY_SOCIAL +51.com,SOURCE_CATEGORY_SOCIAL +5ch.net,SOURCE_CATEGORY_SOCIAL +Google Shopping,SOURCE_CATEGORY_SHOPPING +Hatena,SOURCE_CATEGORY_SOCIAL +IGShopping,SOURCE_CATEGORY_SHOPPING +ImageShack,SOURCE_CATEGORY_SOCIAL +aax-us-east.amazon-adsystem.com,SOURCE_CATEGORY_SHOPPING +aax.amazon-adsystem.com,SOURCE_CATEGORY_SHOPPING +academia.edu,SOURCE_CATEGORY_SOCIAL +activerain,SOURCE_CATEGORY_SOCIAL +activerain.com,SOURCE_CATEGORY_SOCIAL +activeworlds,SOURCE_CATEGORY_SOCIAL +activeworlds.com,SOURCE_CATEGORY_SOCIAL +addthis,SOURCE_CATEGORY_SOCIAL +addthis.com,SOURCE_CATEGORY_SOCIAL +airg.ca,SOURCE_CATEGORY_SOCIAL +alibaba,SOURCE_CATEGORY_SHOPPING +alibaba.com,SOURCE_CATEGORY_SHOPPING +alice,SOURCE_CATEGORY_SEARCH +allnurses.com,SOURCE_CATEGORY_SOCIAL +allrecipes.com,SOURCE_CATEGORY_SOCIAL +alumniclass,SOURCE_CATEGORY_SOCIAL +alumniclass.com,SOURCE_CATEGORY_SOCIAL +amazon,SOURCE_CATEGORY_SHOPPING +amazon.co.uk,SOURCE_CATEGORY_SHOPPING +amazon.com,SOURCE_CATEGORY_SHOPPING +ameba.jp,SOURCE_CATEGORY_SOCIAL +ameblo.jp,SOURCE_CATEGORY_SOCIAL +americantowns,SOURCE_CATEGORY_SOCIAL +americantowns.com,SOURCE_CATEGORY_SOCIAL +amp.reddit.com,SOURCE_CATEGORY_SOCIAL +ancestry.com,SOURCE_CATEGORY_SOCIAL +anobii,SOURCE_CATEGORY_SOCIAL +anobii.com,SOURCE_CATEGORY_SOCIAL +answerbag,SOURCE_CATEGORY_SOCIAL +answerbag.com,SOURCE_CATEGORY_SOCIAL +answers.yahoo.com,SOURCE_CATEGORY_SOCIAL +aol,SOURCE_CATEGORY_SEARCH +aolanswers,SOURCE_CATEGORY_SOCIAL +aolanswers.com,SOURCE_CATEGORY_SOCIAL +apps.facebook.com,SOURCE_CATEGORY_SOCIAL +apps.shopify.com,SOURCE_CATEGORY_SHOPPING +ar.pinterest.com,SOURCE_CATEGORY_SOCIAL +ar.search.yahoo.com,SOURCE_CATEGORY_SEARCH +artstation.com,SOURCE_CATEGORY_SOCIAL +ask,SOURCE_CATEGORY_SEARCH +askubuntu,SOURCE_CATEGORY_SOCIAL +askubuntu.com,SOURCE_CATEGORY_SOCIAL +asmallworld.com,SOURCE_CATEGORY_SOCIAL +at.search.yahoo.com,SOURCE_CATEGORY_SEARCH +athlinks,SOURCE_CATEGORY_SOCIAL +athlinks.com,SOURCE_CATEGORY_SOCIAL +au.search.yahoo.com,SOURCE_CATEGORY_SEARCH +auone,SOURCE_CATEGORY_SEARCH +avg,SOURCE_CATEGORY_SEARCH +away.vk.com,SOURCE_CATEGORY_SOCIAL +awe.sm,SOURCE_CATEGORY_SOCIAL +b.hatena.ne.jp,SOURCE_CATEGORY_SOCIAL +baby-gaga,SOURCE_CATEGORY_SOCIAL +baby-gaga.com,SOURCE_CATEGORY_SOCIAL +babyblog.ru,SOURCE_CATEGORY_SOCIAL +babylon,SOURCE_CATEGORY_SEARCH +badoo,SOURCE_CATEGORY_SOCIAL +badoo.com,SOURCE_CATEGORY_SOCIAL +baidu,SOURCE_CATEGORY_SEARCH +bebo,SOURCE_CATEGORY_SOCIAL +bebo.com,SOURCE_CATEGORY_SOCIAL +beforeitsnews,SOURCE_CATEGORY_SOCIAL +beforeitsnews.com,SOURCE_CATEGORY_SOCIAL +bharatstudent,SOURCE_CATEGORY_SOCIAL +bharatstudent.com,SOURCE_CATEGORY_SOCIAL +biglobe,SOURCE_CATEGORY_SEARCH +biglobe.co.jp,SOURCE_CATEGORY_SEARCH +biglobe.ne.jp,SOURCE_CATEGORY_SEARCH +biip.no,SOURCE_CATEGORY_SOCIAL +bing,SOURCE_CATEGORY_SEARCH +biswap.org,SOURCE_CATEGORY_SOCIAL +bit.ly,SOURCE_CATEGORY_SOCIAL +blackcareernetwork.com,SOURCE_CATEGORY_SOCIAL +blackplanet,SOURCE_CATEGORY_SOCIAL +blackplanet.com,SOURCE_CATEGORY_SOCIAL +blip.fm,SOURCE_CATEGORY_SOCIAL +blog.com,SOURCE_CATEGORY_SOCIAL +blog.feedspot.com,SOURCE_CATEGORY_SOCIAL +blog.goo.ne.jp,SOURCE_CATEGORY_SOCIAL +blog.naver.com,SOURCE_CATEGORY_SOCIAL +blog.twitch.tv,SOURCE_CATEGORY_VIDEO +blog.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL +blogg.no,SOURCE_CATEGORY_SOCIAL +bloggang.com,SOURCE_CATEGORY_SOCIAL +blogger,SOURCE_CATEGORY_SOCIAL +blogger.com,SOURCE_CATEGORY_SOCIAL +blogher,SOURCE_CATEGORY_SOCIAL +blogher.com,SOURCE_CATEGORY_SOCIAL +bloglines,SOURCE_CATEGORY_SOCIAL +bloglines.com,SOURCE_CATEGORY_SOCIAL +blogs.com,SOURCE_CATEGORY_SOCIAL +blogsome,SOURCE_CATEGORY_SOCIAL +blogsome.com,SOURCE_CATEGORY_SOCIAL +blogspot,SOURCE_CATEGORY_SOCIAL +blogspot.com,SOURCE_CATEGORY_SOCIAL +blogster,SOURCE_CATEGORY_SOCIAL +blogster.com,SOURCE_CATEGORY_SOCIAL +blurtit,SOURCE_CATEGORY_SOCIAL +blurtit.com,SOURCE_CATEGORY_SOCIAL +bookmarks.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL +bookmarks.yahoo.com,SOURCE_CATEGORY_SOCIAL +br.pinterest.com,SOURCE_CATEGORY_SOCIAL +br.search.yahoo.com,SOURCE_CATEGORY_SEARCH +brightkite,SOURCE_CATEGORY_SOCIAL +brightkite.com,SOURCE_CATEGORY_SOCIAL +brizzly,SOURCE_CATEGORY_SOCIAL +brizzly.com,SOURCE_CATEGORY_SOCIAL +business.facebook.com,SOURCE_CATEGORY_SOCIAL +buzzfeed,SOURCE_CATEGORY_SOCIAL +buzzfeed.com,SOURCE_CATEGORY_SOCIAL +buzznet,SOURCE_CATEGORY_SOCIAL +buzznet.com,SOURCE_CATEGORY_SOCIAL +ca.search.yahoo.com,SOURCE_CATEGORY_SEARCH +cafe.naver.com,SOURCE_CATEGORY_SOCIAL +cafemom,SOURCE_CATEGORY_SOCIAL +cafemom.com,SOURCE_CATEGORY_SOCIAL +camospace,SOURCE_CATEGORY_SOCIAL +camospace.com,SOURCE_CATEGORY_SOCIAL +canalblog.com,SOURCE_CATEGORY_SOCIAL +care.com,SOURCE_CATEGORY_SOCIAL +care2,SOURCE_CATEGORY_SOCIAL +care2.com,SOURCE_CATEGORY_SOCIAL +caringbridge.org,SOURCE_CATEGORY_SOCIAL +catster,SOURCE_CATEGORY_SOCIAL +catster.com,SOURCE_CATEGORY_SOCIAL +cbnt.io,SOURCE_CATEGORY_SOCIAL +cellufun,SOURCE_CATEGORY_SOCIAL +cellufun.com,SOURCE_CATEGORY_SOCIAL +centerblog.net,SOURCE_CATEGORY_SOCIAL +centrum.cz,SOURCE_CATEGORY_SEARCH +ch.search.yahoo.com,SOURCE_CATEGORY_SEARCH +chat.zalo.me,SOURCE_CATEGORY_SOCIAL +checkout.shopify.com,SOURCE_CATEGORY_SHOPPING +checkout.stripe.com,SOURCE_CATEGORY_SHOPPING +chegg.com,SOURCE_CATEGORY_SOCIAL +chicagonow,SOURCE_CATEGORY_SOCIAL +chicagonow.com,SOURCE_CATEGORY_SOCIAL +chiebukuro.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL +cl.search.yahoo.com,SOURCE_CATEGORY_SEARCH +classmates,SOURCE_CATEGORY_SOCIAL +classmates.com,SOURCE_CATEGORY_SOCIAL +classquest,SOURCE_CATEGORY_SOCIAL +classquest.com,SOURCE_CATEGORY_SOCIAL +cn.bing.com,SOURCE_CATEGORY_SEARCH +cnn,SOURCE_CATEGORY_SEARCH +co.pinterest.com,SOURCE_CATEGORY_SOCIAL +co.search.yahoo.com,SOURCE_CATEGORY_SEARCH +cocolog-nifty,SOURCE_CATEGORY_SOCIAL +cocolog-nifty.com,SOURCE_CATEGORY_SOCIAL +comcast,SOURCE_CATEGORY_SEARCH +conduit,SOURCE_CATEGORY_SEARCH +copainsdavant.linternaute.com,SOURCE_CATEGORY_SOCIAL +couchsurfing.org,SOURCE_CATEGORY_SOCIAL +cozycot,SOURCE_CATEGORY_SOCIAL +cozycot.com,SOURCE_CATEGORY_SOCIAL +cr.shopping.naver.com,SOURCE_CATEGORY_SHOPPING +cr2.shopping.naver.com,SOURCE_CATEGORY_SHOPPING +crackle,SOURCE_CATEGORY_VIDEO +crackle.com,SOURCE_CATEGORY_VIDEO +cross.tv,SOURCE_CATEGORY_SOCIAL +crunchyroll,SOURCE_CATEGORY_SOCIAL +crunchyroll.com,SOURCE_CATEGORY_SOCIAL +curiositystream,SOURCE_CATEGORY_VIDEO +curiositystream.com,SOURCE_CATEGORY_VIDEO +cyworld,SOURCE_CATEGORY_SOCIAL +cyworld.com,SOURCE_CATEGORY_SOCIAL +cz.pinterest.com,SOURCE_CATEGORY_SOCIAL +d.hatena.ne.jp,SOURCE_CATEGORY_SOCIAL +d.tube,SOURCE_CATEGORY_VIDEO +dailymotion,SOURCE_CATEGORY_VIDEO +dailymotion.com,SOURCE_CATEGORY_VIDEO +dailystrength.org,SOURCE_CATEGORY_SOCIAL +dashboard.twitch.tv,SOURCE_CATEGORY_VIDEO +daum,SOURCE_CATEGORY_SEARCH +daum.net,SOURCE_CATEGORY_SEARCH +de.search.yahoo.com,SOURCE_CATEGORY_SEARCH +deluxe.com,SOURCE_CATEGORY_SOCIAL +deviantart,SOURCE_CATEGORY_SOCIAL +deviantart.com,SOURCE_CATEGORY_SOCIAL +dianping,SOURCE_CATEGORY_SOCIAL +dianping.com,SOURCE_CATEGORY_SOCIAL +digg,SOURCE_CATEGORY_SOCIAL +digg.com,SOURCE_CATEGORY_SOCIAL +diigo,SOURCE_CATEGORY_SOCIAL +diigo.com,SOURCE_CATEGORY_SOCIAL +discover.hubpages.com,SOURCE_CATEGORY_SOCIAL +disneyplus,SOURCE_CATEGORY_VIDEO +disneyplus.com,SOURCE_CATEGORY_VIDEO +disqus,SOURCE_CATEGORY_SOCIAL +disqus.com,SOURCE_CATEGORY_SOCIAL +dk.search.yahoo.com,SOURCE_CATEGORY_SEARCH +dogpile,SOURCE_CATEGORY_SEARCH +dogpile.com,SOURCE_CATEGORY_SEARCH +dogster,SOURCE_CATEGORY_SOCIAL +dogster.com,SOURCE_CATEGORY_SOCIAL +dol2day,SOURCE_CATEGORY_SOCIAL +dol2day.com,SOURCE_CATEGORY_SOCIAL +doostang,SOURCE_CATEGORY_SOCIAL +doostang.com,SOURCE_CATEGORY_SOCIAL +dopplr,SOURCE_CATEGORY_SOCIAL +dopplr.com,SOURCE_CATEGORY_SOCIAL +douban,SOURCE_CATEGORY_SOCIAL +douban.com,SOURCE_CATEGORY_SOCIAL +draft.blogger.com,SOURCE_CATEGORY_SOCIAL +draugiem.lv,SOURCE_CATEGORY_SOCIAL +drugs-forum,SOURCE_CATEGORY_SOCIAL +drugs-forum.com,SOURCE_CATEGORY_SOCIAL +duckduckgo,SOURCE_CATEGORY_SEARCH +dzone,SOURCE_CATEGORY_SOCIAL +dzone.com,SOURCE_CATEGORY_SOCIAL +ebay,SOURCE_CATEGORY_SHOPPING +ebay.co.uk,SOURCE_CATEGORY_SHOPPING +ebay.com,SOURCE_CATEGORY_SHOPPING +ebay.com.au,SOURCE_CATEGORY_SHOPPING +ebay.de,SOURCE_CATEGORY_SHOPPING +ecosia.org,SOURCE_CATEGORY_SEARCH +edublogs.org,SOURCE_CATEGORY_SOCIAL +elftown,SOURCE_CATEGORY_SOCIAL +elftown.com,SOURCE_CATEGORY_SOCIAL +email.seznam.cz,SOURCE_CATEGORY_SEARCH +eniro,SOURCE_CATEGORY_SEARCH +epicurious.com,SOURCE_CATEGORY_SOCIAL +es.search.yahoo.com,SOURCE_CATEGORY_SEARCH +espanol.search.yahoo.com,SOURCE_CATEGORY_SEARCH +etsy,SOURCE_CATEGORY_SHOPPING +etsy.com,SOURCE_CATEGORY_SHOPPING +everforo.com,SOURCE_CATEGORY_SOCIAL +exalead.com,SOURCE_CATEGORY_SEARCH +exblog.jp,SOURCE_CATEGORY_SOCIAL +excite.com,SOURCE_CATEGORY_SEARCH +extole,SOURCE_CATEGORY_SOCIAL +extole.com,SOURCE_CATEGORY_SOCIAL +facebook,SOURCE_CATEGORY_SOCIAL +facebook.com,SOURCE_CATEGORY_SOCIAL +faceparty,SOURCE_CATEGORY_SOCIAL +faceparty.com,SOURCE_CATEGORY_SOCIAL +fandom.com,SOURCE_CATEGORY_SOCIAL +fanpop,SOURCE_CATEGORY_SOCIAL +fanpop.com,SOURCE_CATEGORY_SOCIAL +fark,SOURCE_CATEGORY_SOCIAL +fark.com,SOURCE_CATEGORY_SOCIAL +fast.wistia.net,SOURCE_CATEGORY_VIDEO +fb,SOURCE_CATEGORY_SOCIAL +fb.me,SOURCE_CATEGORY_SOCIAL +fc2,SOURCE_CATEGORY_SOCIAL +fc2.com,SOURCE_CATEGORY_SOCIAL +feedspot,SOURCE_CATEGORY_SOCIAL +feministing,SOURCE_CATEGORY_SOCIAL +feministing.com,SOURCE_CATEGORY_SOCIAL +fi.search.yahoo.com,SOURCE_CATEGORY_SEARCH +filmaffinity,SOURCE_CATEGORY_SOCIAL +filmaffinity.com,SOURCE_CATEGORY_SOCIAL +firmy.cz,SOURCE_CATEGORY_SEARCH +flickr,SOURCE_CATEGORY_SOCIAL +flickr.com,SOURCE_CATEGORY_SOCIAL +flipboard,SOURCE_CATEGORY_SOCIAL +flipboard.com,SOURCE_CATEGORY_SOCIAL +folkdirect,SOURCE_CATEGORY_SOCIAL +folkdirect.com,SOURCE_CATEGORY_SOCIAL +foodservice,SOURCE_CATEGORY_SOCIAL +foodservice.com,SOURCE_CATEGORY_SOCIAL +forums.androidcentral.com,SOURCE_CATEGORY_SOCIAL +forums.crackberry.com,SOURCE_CATEGORY_SOCIAL +forums.imore.com,SOURCE_CATEGORY_SOCIAL +forums.nexopia.com,SOURCE_CATEGORY_SOCIAL +forums.webosnation.com,SOURCE_CATEGORY_SOCIAL +forums.wpcentral.com,SOURCE_CATEGORY_SOCIAL +fotki,SOURCE_CATEGORY_SOCIAL +fotki.com,SOURCE_CATEGORY_SOCIAL +fotolog,SOURCE_CATEGORY_SOCIAL +fotolog.com,SOURCE_CATEGORY_SOCIAL +foursquare,SOURCE_CATEGORY_SOCIAL +foursquare.com,SOURCE_CATEGORY_SOCIAL +fr.search.yahoo.com,SOURCE_CATEGORY_SEARCH +free.facebook.com,SOURCE_CATEGORY_SOCIAL +friendfeed,SOURCE_CATEGORY_SOCIAL +friendfeed.com,SOURCE_CATEGORY_SOCIAL +fruehstueckstreff.org,SOURCE_CATEGORY_SOCIAL +fubar,SOURCE_CATEGORY_SOCIAL +fubar.com,SOURCE_CATEGORY_SOCIAL +gaiaonline,SOURCE_CATEGORY_SOCIAL +gaiaonline.com,SOURCE_CATEGORY_SOCIAL +gamerdna,SOURCE_CATEGORY_SOCIAL +gamerdna.com,SOURCE_CATEGORY_SOCIAL +gather.com,SOURCE_CATEGORY_SOCIAL +geni.com,SOURCE_CATEGORY_SOCIAL +getpocket.com,SOURCE_CATEGORY_SOCIAL +glassboard,SOURCE_CATEGORY_SOCIAL +glassboard.com,SOURCE_CATEGORY_SOCIAL +glassdoor,SOURCE_CATEGORY_SOCIAL +glassdoor.com,SOURCE_CATEGORY_SOCIAL +globo,SOURCE_CATEGORY_SEARCH +go.mail.ru,SOURCE_CATEGORY_SEARCH +godtube,SOURCE_CATEGORY_SOCIAL +godtube.com,SOURCE_CATEGORY_SOCIAL +goldenline.pl,SOURCE_CATEGORY_SOCIAL +goldstar,SOURCE_CATEGORY_SOCIAL +goldstar.com,SOURCE_CATEGORY_SOCIAL +goo.gl,SOURCE_CATEGORY_SOCIAL +gooblog,SOURCE_CATEGORY_SOCIAL +goodreads,SOURCE_CATEGORY_SOCIAL +goodreads.com,SOURCE_CATEGORY_SOCIAL +google,SOURCE_CATEGORY_SEARCH +google+,SOURCE_CATEGORY_SOCIAL +google-play,SOURCE_CATEGORY_SEARCH +googlegroups.com,SOURCE_CATEGORY_SOCIAL +googleplus,SOURCE_CATEGORY_SOCIAL +govloop,SOURCE_CATEGORY_SOCIAL +govloop.com,SOURCE_CATEGORY_SOCIAL +gowalla,SOURCE_CATEGORY_SOCIAL +gowalla.com,SOURCE_CATEGORY_SOCIAL +gree.jp,SOURCE_CATEGORY_SOCIAL +groups.google.com,SOURCE_CATEGORY_SOCIAL +gulli.com,SOURCE_CATEGORY_SOCIAL +gutefrage.net,SOURCE_CATEGORY_SOCIAL +habbo,SOURCE_CATEGORY_SOCIAL +habbo.com,SOURCE_CATEGORY_SOCIAL +help.hulu.com,SOURCE_CATEGORY_VIDEO +help.netflix.com,SOURCE_CATEGORY_VIDEO +hi5,SOURCE_CATEGORY_SOCIAL +hi5.com,SOURCE_CATEGORY_SOCIAL +hk.search.yahoo.com,SOURCE_CATEGORY_SEARCH +hootsuite,SOURCE_CATEGORY_SOCIAL +hootsuite.com,SOURCE_CATEGORY_SOCIAL +houzz,SOURCE_CATEGORY_SOCIAL +houzz.com,SOURCE_CATEGORY_SOCIAL +hoverspot,SOURCE_CATEGORY_SOCIAL +hoverspot.com,SOURCE_CATEGORY_SOCIAL +hr.com,SOURCE_CATEGORY_SOCIAL +hu.pinterest.com,SOURCE_CATEGORY_SOCIAL +hubculture,SOURCE_CATEGORY_SOCIAL +hubculture.com,SOURCE_CATEGORY_SOCIAL +hubpages.com,SOURCE_CATEGORY_SOCIAL +hulu,SOURCE_CATEGORY_VIDEO +hulu.com,SOURCE_CATEGORY_VIDEO +hyves.net,SOURCE_CATEGORY_SOCIAL +hyves.nl,SOURCE_CATEGORY_SOCIAL +ibibo,SOURCE_CATEGORY_SOCIAL +ibibo.com,SOURCE_CATEGORY_SOCIAL +id.pinterest.com,SOURCE_CATEGORY_SOCIAL +id.search.yahoo.com,SOURCE_CATEGORY_SEARCH +id.twitch.tv,SOURCE_CATEGORY_VIDEO +identi.ca,SOURCE_CATEGORY_SOCIAL +ig,SOURCE_CATEGORY_SOCIAL +imageshack.com,SOURCE_CATEGORY_SOCIAL +imageshack.us,SOURCE_CATEGORY_SOCIAL +imvu,SOURCE_CATEGORY_SOCIAL +imvu.com,SOURCE_CATEGORY_SOCIAL +in.pinterest.com,SOURCE_CATEGORY_SOCIAL +in.search.yahoo.com,SOURCE_CATEGORY_SEARCH +incredimail,SOURCE_CATEGORY_SEARCH +insanejournal,SOURCE_CATEGORY_SOCIAL +insanejournal.com,SOURCE_CATEGORY_SOCIAL +instagram,SOURCE_CATEGORY_SOCIAL +instagram.com,SOURCE_CATEGORY_SOCIAL +instapaper,SOURCE_CATEGORY_SOCIAL +instapaper.com,SOURCE_CATEGORY_SOCIAL +internations.org,SOURCE_CATEGORY_SOCIAL +interpals.net,SOURCE_CATEGORY_SOCIAL +intherooms,SOURCE_CATEGORY_SOCIAL +intherooms.com,SOURCE_CATEGORY_SOCIAL +iq.com,SOURCE_CATEGORY_VIDEO +iqiyi,SOURCE_CATEGORY_VIDEO +iqiyi.com,SOURCE_CATEGORY_VIDEO +irc-galleria.net,SOURCE_CATEGORY_SOCIAL +is.gd,SOURCE_CATEGORY_SOCIAL +it.search.yahoo.com,SOURCE_CATEGORY_SEARCH +italki,SOURCE_CATEGORY_SOCIAL +italki.com,SOURCE_CATEGORY_SOCIAL +jammerdirect,SOURCE_CATEGORY_SOCIAL +jammerdirect.com,SOURCE_CATEGORY_SOCIAL +jappy.com,SOURCE_CATEGORY_SOCIAL +jappy.de,SOURCE_CATEGORY_SOCIAL +jobs.netflix.com,SOURCE_CATEGORY_VIDEO +justin.tv,SOURCE_CATEGORY_VIDEO +kaboodle.com,SOURCE_CATEGORY_SOCIAL +kakao,SOURCE_CATEGORY_SOCIAL +kakao.com,SOURCE_CATEGORY_SOCIAL +kakaocorp.com,SOURCE_CATEGORY_SOCIAL +kaneva,SOURCE_CATEGORY_SOCIAL +kaneva.com,SOURCE_CATEGORY_SOCIAL +kin.naver.com,SOURCE_CATEGORY_SOCIAL +kvasir,SOURCE_CATEGORY_SEARCH +l.facebook.com,SOURCE_CATEGORY_SOCIAL +l.instagram.com,SOURCE_CATEGORY_SOCIAL +l.messenger.com,SOURCE_CATEGORY_SOCIAL +last.fm,SOURCE_CATEGORY_SOCIAL +lens.google.com,SOURCE_CATEGORY_SEARCH +librarything,SOURCE_CATEGORY_SOCIAL +librarything.com,SOURCE_CATEGORY_SOCIAL +lifestream.aol.com,SOURCE_CATEGORY_SOCIAL +line,SOURCE_CATEGORY_SOCIAL +line.me,SOURCE_CATEGORY_SOCIAL +linkedin,SOURCE_CATEGORY_SOCIAL +linkedin.com,SOURCE_CATEGORY_SOCIAL +listal,SOURCE_CATEGORY_SOCIAL +listal.com,SOURCE_CATEGORY_SOCIAL +listography,SOURCE_CATEGORY_SOCIAL +listography.com,SOURCE_CATEGORY_SOCIAL +lite.qwant.com,SOURCE_CATEGORY_SEARCH +livedoor.com,SOURCE_CATEGORY_SOCIAL +livedoorblog,SOURCE_CATEGORY_SOCIAL +livejournal,SOURCE_CATEGORY_SOCIAL +livejournal.com,SOURCE_CATEGORY_SOCIAL +lm.facebook.com,SOURCE_CATEGORY_SOCIAL +lnkd.in,SOURCE_CATEGORY_SOCIAL +lycos,SOURCE_CATEGORY_SEARCH +m.alibaba.com,SOURCE_CATEGORY_SHOPPING +m.baidu.com,SOURCE_CATEGORY_SEARCH +m.blog.naver.com,SOURCE_CATEGORY_SOCIAL +m.cafe.naver.com,SOURCE_CATEGORY_SOCIAL +m.facebook.com,SOURCE_CATEGORY_SOCIAL +m.kin.naver.com,SOURCE_CATEGORY_SOCIAL +m.naver.com,SOURCE_CATEGORY_SEARCH +m.search.naver.com,SOURCE_CATEGORY_SEARCH +m.shopping.naver.com,SOURCE_CATEGORY_SHOPPING +m.sogou.com,SOURCE_CATEGORY_SEARCH +m.twitch.tv,SOURCE_CATEGORY_VIDEO +m.vk.com,SOURCE_CATEGORY_SOCIAL +m.yelp.com,SOURCE_CATEGORY_SOCIAL +m.youtube.com,SOURCE_CATEGORY_VIDEO +mail.rambler.ru,SOURCE_CATEGORY_SEARCH +mail.yandex.ru,SOURCE_CATEGORY_SEARCH +malaysia.search.yahoo.com,SOURCE_CATEGORY_SEARCH +mbga.jp,SOURCE_CATEGORY_SOCIAL +medium.com,SOURCE_CATEGORY_SOCIAL +meetin.org,SOURCE_CATEGORY_SOCIAL +meetup,SOURCE_CATEGORY_SOCIAL +meetup.com,SOURCE_CATEGORY_SOCIAL +meinvz.net,SOURCE_CATEGORY_SOCIAL +meneame.net,SOURCE_CATEGORY_SOCIAL +menuism.com,SOURCE_CATEGORY_SOCIAL +mercadolibre,SOURCE_CATEGORY_SHOPPING +mercadolibre.com,SOURCE_CATEGORY_SHOPPING +mercadolibre.com.ar,SOURCE_CATEGORY_SHOPPING +mercadolibre.com.mx,SOURCE_CATEGORY_SHOPPING +message.alibaba.com,SOURCE_CATEGORY_SHOPPING +messages.google.com,SOURCE_CATEGORY_SOCIAL +messages.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL +messenger,SOURCE_CATEGORY_SOCIAL +messenger.com,SOURCE_CATEGORY_SOCIAL +mix.com,SOURCE_CATEGORY_SOCIAL +mixi.jp,SOURCE_CATEGORY_SOCIAL +mobile.facebook.com,SOURCE_CATEGORY_SOCIAL +mocospace,SOURCE_CATEGORY_SOCIAL +mocospace.com,SOURCE_CATEGORY_SOCIAL +mouthshut,SOURCE_CATEGORY_SOCIAL +mouthshut.com,SOURCE_CATEGORY_SOCIAL +movabletype,SOURCE_CATEGORY_SOCIAL +movabletype.com,SOURCE_CATEGORY_SOCIAL +msearch.shopping.naver.com,SOURCE_CATEGORY_SHOPPING +msn,SOURCE_CATEGORY_SEARCH +msn.com,SOURCE_CATEGORY_SEARCH +mubi,SOURCE_CATEGORY_SOCIAL +mubi.com,SOURCE_CATEGORY_SOCIAL +music.youtube.com,SOURCE_CATEGORY_VIDEO +mx.search.yahoo.com,SOURCE_CATEGORY_SEARCH +my.opera.com,SOURCE_CATEGORY_SOCIAL +myanimelist.net,SOURCE_CATEGORY_SOCIAL +myheritage,SOURCE_CATEGORY_SOCIAL +myheritage.com,SOURCE_CATEGORY_SOCIAL +mylife,SOURCE_CATEGORY_SOCIAL +mylife.com,SOURCE_CATEGORY_SOCIAL +mymodernmet,SOURCE_CATEGORY_SOCIAL +mymodernmet.com,SOURCE_CATEGORY_SOCIAL +myspace,SOURCE_CATEGORY_SOCIAL +myspace.com,SOURCE_CATEGORY_SOCIAL +najdi,SOURCE_CATEGORY_SEARCH +naver,SOURCE_CATEGORY_SEARCH +naver.com,SOURCE_CATEGORY_SEARCH +netflix,SOURCE_CATEGORY_VIDEO +netflix.com,SOURCE_CATEGORY_VIDEO +netvibes,SOURCE_CATEGORY_SOCIAL +netvibes.com,SOURCE_CATEGORY_SOCIAL +news.google.com,SOURCE_CATEGORY_SEARCH +news.ycombinator.com,SOURCE_CATEGORY_SOCIAL +newsshowcase,SOURCE_CATEGORY_SOCIAL +nexopia,SOURCE_CATEGORY_SOCIAL +ngopost.org,SOURCE_CATEGORY_SOCIAL +niconico,SOURCE_CATEGORY_SOCIAL +nicovideo.jp,SOURCE_CATEGORY_SOCIAL +nightlifelink,SOURCE_CATEGORY_SOCIAL +nightlifelink.com,SOURCE_CATEGORY_SOCIAL +ning,SOURCE_CATEGORY_SOCIAL +ning.com,SOURCE_CATEGORY_SOCIAL +nl.pinterest.com,SOURCE_CATEGORY_SOCIAL +nl.search.yahoo.com,SOURCE_CATEGORY_SEARCH +nl.shopping.net,SOURCE_CATEGORY_SHOPPING +no.search.yahoo.com,SOURCE_CATEGORY_SEARCH +no.shopping.net,SOURCE_CATEGORY_SHOPPING +ntp.msn.com,SOURCE_CATEGORY_SEARCH +nz.search.yahoo.com,SOURCE_CATEGORY_SEARCH +odnoklassniki.ru,SOURCE_CATEGORY_SOCIAL +odnoklassniki.ua,SOURCE_CATEGORY_SOCIAL +offer.alibaba.com,SOURCE_CATEGORY_SHOPPING +okwave.jp,SOURCE_CATEGORY_SOCIAL +old.reddit.com,SOURCE_CATEGORY_SOCIAL +one.walmart.com,SOURCE_CATEGORY_SHOPPING +onet,SOURCE_CATEGORY_SEARCH +onet.pl,SOURCE_CATEGORY_SEARCH +oneworldgroup.org,SOURCE_CATEGORY_SOCIAL +onstartups,SOURCE_CATEGORY_SOCIAL +onstartups.com,SOURCE_CATEGORY_SOCIAL +opendiary,SOURCE_CATEGORY_SOCIAL +opendiary.com,SOURCE_CATEGORY_SOCIAL +order.shopping.yahoo.co.jp,SOURCE_CATEGORY_SHOPPING +oshiete.goo.ne.jp,SOURCE_CATEGORY_SOCIAL +out.reddit.com,SOURCE_CATEGORY_SOCIAL +over-blog.com,SOURCE_CATEGORY_SOCIAL +overblog.com,SOURCE_CATEGORY_SOCIAL +paper.li,SOURCE_CATEGORY_SOCIAL +partners.shopify.com,SOURCE_CATEGORY_SHOPPING +partyflock.nl,SOURCE_CATEGORY_SOCIAL +pe.search.yahoo.com,SOURCE_CATEGORY_SEARCH +ph.search.yahoo.com,SOURCE_CATEGORY_SEARCH +photobucket,SOURCE_CATEGORY_SOCIAL +photobucket.com,SOURCE_CATEGORY_SOCIAL +pinboard,SOURCE_CATEGORY_SOCIAL +pinboard.in,SOURCE_CATEGORY_SOCIAL +pingsta,SOURCE_CATEGORY_SOCIAL +pingsta.com,SOURCE_CATEGORY_SOCIAL +pinterest,SOURCE_CATEGORY_SOCIAL +pinterest.at,SOURCE_CATEGORY_SOCIAL +pinterest.ca,SOURCE_CATEGORY_SOCIAL +pinterest.ch,SOURCE_CATEGORY_SOCIAL +pinterest.cl,SOURCE_CATEGORY_SOCIAL +pinterest.co.kr,SOURCE_CATEGORY_SOCIAL +pinterest.co.uk,SOURCE_CATEGORY_SOCIAL +pinterest.com,SOURCE_CATEGORY_SOCIAL +pinterest.com.au,SOURCE_CATEGORY_SOCIAL +pinterest.com.mx,SOURCE_CATEGORY_SOCIAL +pinterest.de,SOURCE_CATEGORY_SOCIAL +pinterest.es,SOURCE_CATEGORY_SOCIAL +pinterest.fr,SOURCE_CATEGORY_SOCIAL +pinterest.it,SOURCE_CATEGORY_SOCIAL +pinterest.jp,SOURCE_CATEGORY_SOCIAL +pinterest.nz,SOURCE_CATEGORY_SOCIAL +pinterest.ph,SOURCE_CATEGORY_SOCIAL +pinterest.pt,SOURCE_CATEGORY_SOCIAL +pinterest.ru,SOURCE_CATEGORY_SOCIAL +pinterest.se,SOURCE_CATEGORY_SOCIAL +pixiv.net,SOURCE_CATEGORY_SOCIAL +pl.pinterest.com,SOURCE_CATEGORY_SOCIAL +pl.search.yahoo.com,SOURCE_CATEGORY_SEARCH +play.google.com,SOURCE_CATEGORY_SEARCH +playahead.se,SOURCE_CATEGORY_SOCIAL +player.twitch.tv,SOURCE_CATEGORY_VIDEO +player.vimeo.com,SOURCE_CATEGORY_VIDEO +plurk,SOURCE_CATEGORY_SOCIAL +plurk.com,SOURCE_CATEGORY_SOCIAL +plus.google.com,SOURCE_CATEGORY_SOCIAL +plus.url.google.com,SOURCE_CATEGORY_SOCIAL +pocket.co,SOURCE_CATEGORY_SOCIAL +posterous,SOURCE_CATEGORY_SOCIAL +posterous.com,SOURCE_CATEGORY_SOCIAL +pro.homeadvisor.com,SOURCE_CATEGORY_SOCIAL +pulse.yahoo.com,SOURCE_CATEGORY_SOCIAL +qapacity,SOURCE_CATEGORY_SOCIAL +qapacity.com,SOURCE_CATEGORY_SOCIAL +quechup,SOURCE_CATEGORY_SOCIAL +quechup.com,SOURCE_CATEGORY_SOCIAL +quora,SOURCE_CATEGORY_SOCIAL +quora.com,SOURCE_CATEGORY_SOCIAL +qwant,SOURCE_CATEGORY_SEARCH +qwant.com,SOURCE_CATEGORY_SEARCH +qzone.qq.com,SOURCE_CATEGORY_SOCIAL +rakuten,SOURCE_CATEGORY_SEARCH +rakuten.co.jp,SOURCE_CATEGORY_SEARCH +rambler,SOURCE_CATEGORY_SEARCH +rambler.ru,SOURCE_CATEGORY_SEARCH +ravelry,SOURCE_CATEGORY_SOCIAL +ravelry.com,SOURCE_CATEGORY_SOCIAL +reddit,SOURCE_CATEGORY_SOCIAL +reddit.com,SOURCE_CATEGORY_SOCIAL +redux,SOURCE_CATEGORY_SOCIAL +redux.com,SOURCE_CATEGORY_SOCIAL +renren,SOURCE_CATEGORY_SOCIAL +renren.com,SOURCE_CATEGORY_SOCIAL +researchgate.net,SOURCE_CATEGORY_SOCIAL +reunion,SOURCE_CATEGORY_SOCIAL +reunion.com,SOURCE_CATEGORY_SOCIAL +reverbnation,SOURCE_CATEGORY_SOCIAL +reverbnation.com,SOURCE_CATEGORY_SOCIAL +rtl.de,SOURCE_CATEGORY_SOCIAL +ryze,SOURCE_CATEGORY_SOCIAL +ryze.com,SOURCE_CATEGORY_SOCIAL +s3.amazonaws.com,SOURCE_CATEGORY_SHOPPING +salespider,SOURCE_CATEGORY_SOCIAL +salespider.com,SOURCE_CATEGORY_SOCIAL +scoop.it,SOURCE_CATEGORY_SOCIAL +screenrant,SOURCE_CATEGORY_SOCIAL +screenrant.com,SOURCE_CATEGORY_SOCIAL +scribd,SOURCE_CATEGORY_SOCIAL +scribd.com,SOURCE_CATEGORY_SOCIAL +scvngr,SOURCE_CATEGORY_SOCIAL +scvngr.com,SOURCE_CATEGORY_SOCIAL +se.search.yahoo.com,SOURCE_CATEGORY_SEARCH +se.shopping.net,SOURCE_CATEGORY_SHOPPING +search-results,SOURCE_CATEGORY_SEARCH +search.aol.co.uk,SOURCE_CATEGORY_SEARCH +search.aol.com,SOURCE_CATEGORY_SEARCH +search.google.com,SOURCE_CATEGORY_SEARCH +search.smt.docomo.ne.jp,SOURCE_CATEGORY_SEARCH +search.ukr.net,SOURCE_CATEGORY_SEARCH +secondlife,SOURCE_CATEGORY_SOCIAL +secondlife.com,SOURCE_CATEGORY_SOCIAL +secureurl.ukr.net,SOURCE_CATEGORY_SEARCH +serverfault,SOURCE_CATEGORY_SOCIAL +serverfault.com,SOURCE_CATEGORY_SOCIAL +seznam,SOURCE_CATEGORY_SEARCH +seznam.cz,SOURCE_CATEGORY_SEARCH +sg.search.yahoo.com,SOURCE_CATEGORY_SEARCH +shareit,SOURCE_CATEGORY_SOCIAL +sharethis,SOURCE_CATEGORY_SOCIAL +sharethis.com,SOURCE_CATEGORY_SOCIAL +shop.app,SOURCE_CATEGORY_SHOPPING +shopify,SOURCE_CATEGORY_SHOPPING +shopify.com,SOURCE_CATEGORY_SHOPPING +shopping.naver.com,SOURCE_CATEGORY_SHOPPING +shopping.yahoo.co.jp,SOURCE_CATEGORY_SHOPPING +shopping.yahoo.com,SOURCE_CATEGORY_SHOPPING +shopzilla,SOURCE_CATEGORY_SHOPPING +shopzilla.com,SOURCE_CATEGORY_SHOPPING +shvoong.com,SOURCE_CATEGORY_SOCIAL +simplycodes.com,SOURCE_CATEGORY_SHOPPING +sites.google.com,SOURCE_CATEGORY_SOCIAL +skype,SOURCE_CATEGORY_SOCIAL +skyrock,SOURCE_CATEGORY_SOCIAL +skyrock.com,SOURCE_CATEGORY_SOCIAL +slashdot.org,SOURCE_CATEGORY_SOCIAL +slideshare.net,SOURCE_CATEGORY_SOCIAL +smartnews.com,SOURCE_CATEGORY_SOCIAL +snapchat,SOURCE_CATEGORY_SOCIAL +snapchat.com,SOURCE_CATEGORY_SOCIAL +so.com,SOURCE_CATEGORY_SEARCH +social,SOURCE_CATEGORY_SOCIAL +sociallife.com.br,SOURCE_CATEGORY_SOCIAL +socialvibe,SOURCE_CATEGORY_SOCIAL +socialvibe.com,SOURCE_CATEGORY_SOCIAL +sogou,SOURCE_CATEGORY_SEARCH +sogou.com,SOURCE_CATEGORY_SEARCH +sp-web.search.auone.jp,SOURCE_CATEGORY_SEARCH +spaces.live.com,SOURCE_CATEGORY_SOCIAL +spoke,SOURCE_CATEGORY_SOCIAL +spoke.com,SOURCE_CATEGORY_SOCIAL +spruz,SOURCE_CATEGORY_SOCIAL +spruz.com,SOURCE_CATEGORY_SOCIAL +ssense.com,SOURCE_CATEGORY_SOCIAL +stackapps,SOURCE_CATEGORY_SOCIAL +stackapps.com,SOURCE_CATEGORY_SOCIAL +stackexchange,SOURCE_CATEGORY_SOCIAL +stackexchange.com,SOURCE_CATEGORY_SOCIAL +stackoverflow,SOURCE_CATEGORY_SOCIAL +stackoverflow.com,SOURCE_CATEGORY_SOCIAL +stardoll.com,SOURCE_CATEGORY_SOCIAL +startsiden,SOURCE_CATEGORY_SEARCH +startsiden.no,SOURCE_CATEGORY_SEARCH +stickam,SOURCE_CATEGORY_SOCIAL +stickam.com,SOURCE_CATEGORY_SOCIAL +store.shopping.yahoo.co.jp,SOURCE_CATEGORY_SHOPPING +stripe,SOURCE_CATEGORY_SHOPPING +stripe.com,SOURCE_CATEGORY_SHOPPING +studivz.net,SOURCE_CATEGORY_SOCIAL +suche.aol.de,SOURCE_CATEGORY_SEARCH +suomi24.fi,SOURCE_CATEGORY_SOCIAL +superuser,SOURCE_CATEGORY_SOCIAL +superuser.com,SOURCE_CATEGORY_SOCIAL +sweeva,SOURCE_CATEGORY_SOCIAL +sweeva.com,SOURCE_CATEGORY_SOCIAL +t.co,SOURCE_CATEGORY_SOCIAL +t.me,SOURCE_CATEGORY_SOCIAL +tagged,SOURCE_CATEGORY_SOCIAL +tagged.com,SOURCE_CATEGORY_SOCIAL +taggedmail,SOURCE_CATEGORY_SOCIAL +taggedmail.com,SOURCE_CATEGORY_SOCIAL +talkbiznow,SOURCE_CATEGORY_SOCIAL +talkbiznow.com,SOURCE_CATEGORY_SOCIAL +taringa.net,SOURCE_CATEGORY_SOCIAL +techmeme,SOURCE_CATEGORY_SOCIAL +techmeme.com,SOURCE_CATEGORY_SOCIAL +ted,SOURCE_CATEGORY_VIDEO +ted.com,SOURCE_CATEGORY_VIDEO +tencent,SOURCE_CATEGORY_SOCIAL +tencent.com,SOURCE_CATEGORY_SOCIAL +terra,SOURCE_CATEGORY_SEARCH +th.search.yahoo.com,SOURCE_CATEGORY_SEARCH +tiktok,SOURCE_CATEGORY_SOCIAL +tiktok.com,SOURCE_CATEGORY_SOCIAL +tinyurl,SOURCE_CATEGORY_SOCIAL +tinyurl.com,SOURCE_CATEGORY_SOCIAL +toolbox,SOURCE_CATEGORY_SOCIAL +toolbox.com,SOURCE_CATEGORY_SOCIAL +touch.facebook.com,SOURCE_CATEGORY_SOCIAL +tr.pinterest.com,SOURCE_CATEGORY_SOCIAL +tr.search.yahoo.com,SOURCE_CATEGORY_SEARCH +travellerspoint,SOURCE_CATEGORY_SOCIAL +travellerspoint.com,SOURCE_CATEGORY_SOCIAL +tripadvisor,SOURCE_CATEGORY_SOCIAL +tripadvisor.com,SOURCE_CATEGORY_SOCIAL +trombi,SOURCE_CATEGORY_SOCIAL +trombi.com,SOURCE_CATEGORY_SOCIAL +trustpilot,SOURCE_CATEGORY_SOCIAL +tudou,SOURCE_CATEGORY_SOCIAL +tudou.com,SOURCE_CATEGORY_SOCIAL +tuenti,SOURCE_CATEGORY_SOCIAL +tuenti.com,SOURCE_CATEGORY_SOCIAL +tumblr,SOURCE_CATEGORY_SOCIAL +tumblr.com,SOURCE_CATEGORY_SOCIAL +tut.by,SOURCE_CATEGORY_SEARCH +tw.search.yahoo.com,SOURCE_CATEGORY_SEARCH +tweetdeck,SOURCE_CATEGORY_SOCIAL +tweetdeck.com,SOURCE_CATEGORY_SOCIAL +twitch,SOURCE_CATEGORY_VIDEO +twitch.tv,SOURCE_CATEGORY_VIDEO +twitter,SOURCE_CATEGORY_SOCIAL +twitter.com,SOURCE_CATEGORY_SOCIAL +twoo.com,SOURCE_CATEGORY_SOCIAL +typepad,SOURCE_CATEGORY_SOCIAL +typepad.com,SOURCE_CATEGORY_SOCIAL +uk.search.yahoo.com,SOURCE_CATEGORY_SEARCH +uk.shopping.net,SOURCE_CATEGORY_SHOPPING +ukr,SOURCE_CATEGORY_SEARCH +unblog.fr,SOURCE_CATEGORY_SOCIAL +urbanspoon.com,SOURCE_CATEGORY_SOCIAL +us.search.yahoo.com,SOURCE_CATEGORY_SEARCH +ushareit.com,SOURCE_CATEGORY_SOCIAL +ushi.cn,SOURCE_CATEGORY_SOCIAL +utreon,SOURCE_CATEGORY_VIDEO +utreon.com,SOURCE_CATEGORY_VIDEO +vampirefreaks,SOURCE_CATEGORY_SOCIAL +vampirefreaks.com,SOURCE_CATEGORY_SOCIAL +vampirerave,SOURCE_CATEGORY_SOCIAL +vampirerave.com,SOURCE_CATEGORY_SOCIAL +veoh,SOURCE_CATEGORY_VIDEO +veoh.com,SOURCE_CATEGORY_VIDEO +vg.no,SOURCE_CATEGORY_SOCIAL +viadeo.journaldunet.com,SOURCE_CATEGORY_VIDEO +video.ibm.com,SOURCE_CATEGORY_SOCIAL +vimeo,SOURCE_CATEGORY_VIDEO +vimeo.com,SOURCE_CATEGORY_VIDEO +virgilio,SOURCE_CATEGORY_SEARCH +vk.com,SOURCE_CATEGORY_SOCIAL +vkontakte.ru,SOURCE_CATEGORY_SOCIAL +vn.search.yahoo.com,SOURCE_CATEGORY_SEARCH +wakoopa,SOURCE_CATEGORY_SOCIAL +wakoopa.com,SOURCE_CATEGORY_SOCIAL +walmart,SOURCE_CATEGORY_SHOPPING +walmart.com,SOURCE_CATEGORY_SHOPPING +wap.sogou.com,SOURCE_CATEGORY_SEARCH +wattpad,SOURCE_CATEGORY_SOCIAL +wattpad.com,SOURCE_CATEGORY_SOCIAL +web.facebook.com,SOURCE_CATEGORY_SOCIAL +web.skype.com,SOURCE_CATEGORY_SOCIAL +webmaster.yandex.ru,SOURCE_CATEGORY_SEARCH +websearch.rakuten.co.jp,SOURCE_CATEGORY_SEARCH +webshots,SOURCE_CATEGORY_SOCIAL +webshots.com,SOURCE_CATEGORY_SOCIAL +wechat,SOURCE_CATEGORY_SOCIAL +wechat.com,SOURCE_CATEGORY_SOCIAL +weebly,SOURCE_CATEGORY_SOCIAL +weebly.com,SOURCE_CATEGORY_SOCIAL +weibo,SOURCE_CATEGORY_SOCIAL +weibo.com,SOURCE_CATEGORY_SOCIAL +wer-weiss-was.de,SOURCE_CATEGORY_SOCIAL +weread,SOURCE_CATEGORY_SOCIAL +weread.com,SOURCE_CATEGORY_SOCIAL +whatsapp,SOURCE_CATEGORY_SOCIAL +whatsapp.com,SOURCE_CATEGORY_SOCIAL +wiki.answers.com,SOURCE_CATEGORY_SOCIAL +wikihow.com,SOURCE_CATEGORY_SOCIAL +wikitravel.org,SOURCE_CATEGORY_SOCIAL +wistia,SOURCE_CATEGORY_VIDEO +wistia.com,SOURCE_CATEGORY_VIDEO +woot.com,SOURCE_CATEGORY_SOCIAL +wordpress,SOURCE_CATEGORY_SOCIAL +wordpress.com,SOURCE_CATEGORY_SOCIAL +wordpress.org,SOURCE_CATEGORY_SOCIAL +xanga,SOURCE_CATEGORY_SOCIAL +xanga.com,SOURCE_CATEGORY_SOCIAL +xing,SOURCE_CATEGORY_SOCIAL +xing.com,SOURCE_CATEGORY_SOCIAL +yahoo,SOURCE_CATEGORY_SEARCH +yahoo-mbga.jp,SOURCE_CATEGORY_SOCIAL +yahoo.co.jp,SOURCE_CATEGORY_SEARCH +yahoo.com,SOURCE_CATEGORY_SEARCH +yammer,SOURCE_CATEGORY_SOCIAL +yammer.com,SOURCE_CATEGORY_SOCIAL +yandex,SOURCE_CATEGORY_SEARCH +yandex.by,SOURCE_CATEGORY_SEARCH +yandex.com,SOURCE_CATEGORY_SEARCH +yandex.com.tr,SOURCE_CATEGORY_SEARCH +yandex.fr,SOURCE_CATEGORY_SEARCH +yandex.kz,SOURCE_CATEGORY_SEARCH +yandex.ru,SOURCE_CATEGORY_SEARCH +yandex.ua,SOURCE_CATEGORY_SEARCH +yandex.uz,SOURCE_CATEGORY_SEARCH +yelp,SOURCE_CATEGORY_SOCIAL +yelp.co.uk,SOURCE_CATEGORY_SOCIAL +yelp.com,SOURCE_CATEGORY_SOCIAL +youku,SOURCE_CATEGORY_VIDEO +youku.com,SOURCE_CATEGORY_VIDEO +youroom.in,SOURCE_CATEGORY_SOCIAL +youtube,SOURCE_CATEGORY_VIDEO +youtube.com,SOURCE_CATEGORY_VIDEO +za.pinterest.com,SOURCE_CATEGORY_SOCIAL +zalo,SOURCE_CATEGORY_SOCIAL +zen.yandex.ru,SOURCE_CATEGORY_SEARCH +zoo.gr,SOURCE_CATEGORY_SOCIAL +zooppa,SOURCE_CATEGORY_SOCIAL +zooppa.com,SOURCE_CATEGORY_SOCIAL From 282eeee09122a607ab5f37bd73c233d0fd1f1059 Mon Sep 17 00:00:00 2001 From: David Booke Date: Wed, 23 Oct 2024 15:35:01 -0500 Subject: [PATCH 17/47] Add unit test to stg_ga4__user_id_mapping to test the latest mapping logic --- models/staging/stg_ga4__user_id_mapping.yml | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/models/staging/stg_ga4__user_id_mapping.yml b/models/staging/stg_ga4__user_id_mapping.yml index bdca0579..da2d572c 100644 --- a/models/staging/stg_ga4__user_id_mapping.yml +++ b/models/staging/stg_ga4__user_id_mapping.yml @@ -10,3 +10,27 @@ models: - not_null - unique +unit_tests: + - name: test_user_id_mapping + description: Test whether the latest client_key to user_id mapping logic is correct + model: stg_ga4__user_id_mapping + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + client_key,user_id,event_timestamp + a1,,100 + a1,A,101 + b1,B,102 + c1,C,103 + c2,C,104 + c2,,105 + d1,,100 + expect: + format: csv + rows: | + last_seen_user_id,client_key,last_seen_user_id_timestamp + A,a1,101 + B,b1,102 + C,c1,103 + C,c2,104 From c3211976d8e42ad557e87edc0f49f1d4c23d13e4 Mon Sep 17 00:00:00 2001 From: David Booke Date: Wed, 23 Oct 2024 15:47:39 -0500 Subject: [PATCH 18/47] Add descriptions for unit tests that were missing them --- models/staging/stg_ga4__event_to_query_string_params.yml | 1 + models/staging/stg_ga4__sessions_traffic_sources.yml | 1 + .../stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/models/staging/stg_ga4__event_to_query_string_params.yml b/models/staging/stg_ga4__event_to_query_string_params.yml index bf6ede10..66679a10 100644 --- a/models/staging/stg_ga4__event_to_query_string_params.yml +++ b/models/staging/stg_ga4__event_to_query_string_params.yml @@ -5,6 +5,7 @@ models: description: This model pivots the query string parameters contained within the event's page_location field to become rows. Each row is a single parameter/value combination contained in a single event's query string. unit_tests: - name: test_stg_ga4__event_to_query_string_params + description: Test whether event query strings are flattened for each query string parameter model: stg_ga4__event_to_query_string_params given: - input: ref('stg_ga4__events') diff --git a/models/staging/stg_ga4__sessions_traffic_sources.yml b/models/staging/stg_ga4__sessions_traffic_sources.yml index 5db85fd0..fd5954ec 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources.yml @@ -18,6 +18,7 @@ models: unit_tests: - name: test_default_channel_grouping + description: Test whether the defaul_channel_grouping macro is assigning channels correctly model: stg_ga4__sessions_traffic_sources given: - input: ref('stg_ga4__events') diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml index ff63d64e..f359b9cc 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml @@ -24,6 +24,7 @@ models: - not_null unit_tests: - name: test_stg_ga4__sessions_traffic_sources_last_non_direct_daily + description: Test pulling the last non direct session parameters per client_key model: stg_ga4__sessions_traffic_sources_last_non_direct_daily given: - input : ref('stg_ga4__sessions_traffic_sources_daily') From 8a1796e1031bbefd3805320d31c09103dc93dab2 Mon Sep 17 00:00:00 2001 From: David Booke Date: Wed, 23 Oct 2024 15:50:13 -0500 Subject: [PATCH 19/47] Remove python unit tests that have been migrated to dbt unit tests --- .../test_macro_default_channel_grouping.py | 302 ------------------ .../test_macro_exclude_query_parameters.py | 52 --- ...est_macro_extract_query_parameter_value.py | 54 ---- unit_tests/test_stg_Ga4__user_id_mapping.py | 43 --- ...s_traffic_sources_last_non_direct_daily.py | 40 --- 5 files changed, 491 deletions(-) delete mode 100644 unit_tests/test_macro_default_channel_grouping.py delete mode 100644 unit_tests/test_macro_exclude_query_parameters.py delete mode 100644 unit_tests/test_macro_extract_query_parameter_value.py delete mode 100644 unit_tests/test_stg_Ga4__user_id_mapping.py delete mode 100644 unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py diff --git a/unit_tests/test_macro_default_channel_grouping.py b/unit_tests/test_macro_default_channel_grouping.py deleted file mode 100644 index ac300c95..00000000 --- a/unit_tests/test_macro_default_channel_grouping.py +++ /dev/null @@ -1,302 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -traffic_data_with_expected_channels = [ - # Direct: Source exactly matches "(direct)" AND Medium is one of ("(not set)", "(none)") - { - "source": "(direct)", - "medium": "(none)", - "campaign": "", - "expected_channel": "Direct" - }, - { - "source": "(direct)", - "medium": "(not set)", - "campaign": "", - "expected_channel": "Direct" - }, - # Cross-network: Campaign Name contains "cross-network" - { - "source": "some-source", - "medium": "some-medium", - "campaign": "some-cross-network-campaign", - "expected_channel": "Cross-network" - }, - { - "source": "some-source", - "medium": "some-medium", - "campaign": "cross-network", - "expected_channel": "Cross-network" - }, - # Paid Shopping: - # (Source matches a list of shopping sites - # OR - # Campaign Name matches regex ^(.*(([^a-df-z]|^)shop|shopping).*)$) - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "alibaba", - "medium": "", - "campaign": "", - "expected_channel": "Paid Shopping" - }, - { - "source": "some-source", - "medium": "retargeting", - "campaign": "shopping", - "expected_channel": "Paid Shopping" - }, - # Paid Search: - # Source matches a list of search sites - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "google", - "medium": "ppc", - "campaign": "", - "expected_channel": "Paid Search" - }, - # Paid Social: - # Source matches a regex list of social sites - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "facebook", - "medium": "retargeting", - "campaign": "", - "expected_channel": "Paid Social" - }, - # Paid Video: - # Source matches a list of video sites - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "youtube.com", - "medium": "paid-something", - "campaign": "", - "expected_channel": "Paid Video" - }, - # Display: - # Medium is one of (“display”, “banner”, “expandable”, “interstitial”, “cpm”) - { - "source": "youtube.com", - "medium": "display", - "campaign": "", - "expected_channel": "Display" - }, - # Paid Other: - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "some-source", - "medium": "cpc", - "campaign": "", - "expected_channel": "Paid Other" - }, - # Organic Shopping: - # Source matches a list of shopping sites - # OR - # Campaign name matches regex ^(.*(([^a-df-z]|^)shop|shopping).*)$ - { - "source": "Google Shopping", - "medium": "", - "campaign": "", - "expected_channel": "Organic Shopping" - }, - { - "source": "some-source", - "medium": "", - "campaign": "some-shopping-campaign", - "expected_channel": "Organic Shopping" - }, - # Organic Social: - # Source matches a regex list of social sites - # OR - # Medium is one of (“social”, “social-network”, “social-media”, “sm”, “social network”, “social media”) - { - "source": "facebook", - "medium": "", - "campaign": "", - "expected_channel": "Organic Social" - }, - { - "source": "some-source", - "medium": "social", - "campaign": "", - "expected_channel": "Organic Social" - }, - # Organic Video: - # Source matches a list of video sites - # OR - # Medium matches regex ^(.*video.*)$ - { - "source": "youtube.com", - "medium": "", - "campaign": "", - "expected_channel": "Organic Video" - }, - { - "source": "some-source", - "medium": "video", - "campaign": "", - "expected_channel": "Organic Video" - }, - # Organic Search: - # Source matches a list of search sites - # OR - # Medium exactly matches organic - { - "source": "bing", - "medium": "", - "campaign": "", - "expected_channel": "Organic Search" - }, - { - "source": "some-source", - "medium": "organic", - "campaign": "", - "expected_channel": "Organic Search" - }, - # Referral: - # Medium is one of ("referral", "app", or "link") - { - "source": "some-source", - "medium": "referral", - "campaign": "", - "expected_channel": "Referral" - }, - # Email: - # Source = email|e-mail|e_mail|e mail - # OR - # Medium = email|e-mail|e_mail|e mail - { - "source": "email", - "medium": "", - "campaign": "", - "expected_channel": "Email" - }, - { - "source": "", - "medium": "e mail", - "campaign": "", - "expected_channel": "Email" - }, - # Affiliates: - # Medium = affiliate - { - "source": "some-source", - "medium": "affiliate", - "campaign": "", - "expected_channel": "Affiliates" - }, - # Audio: - # Medium exactly matches audio - { - "source": "some-source", - "medium": "audio", - "campaign": "", - "expected_channel": "Audio" - }, - # SMS: - # Source exactly matches sms - # OR - # Medium exactly matches sms - { - "source": "sms", - "medium": "", - "campaign": "", - "expected_channel": "SMS" - }, - { - "source": "", - "medium": "sms", - "campaign": "", - "expected_channel": "SMS" - }, - # Mobile Push Notifications: - # Medium ends with "push" - # OR - # Medium contains "mobile" or "notification" - # OR - # Source exactly matches "firebase" - { - "source": "some-source", - "medium": "something-push", - "campaign": "", - "expected_channel": "Mobile Push Notifications" - }, - { - "source": "some-source", - "medium": "mobile-notification", - "campaign": "", - "expected_channel": "Mobile Push Notifications" - }, - { - "source": "firebase", - "medium": "", - "campaign": "", - "expected_channel": "Mobile Push Notifications" - }, - # Unassigned is the value Analytics uses when there are no other channel rules that match the event data. - { - "source": "some-source", - "medium": "some-medium", - "campaign": "some-campaign", - "expected_channel": "Unassigned" - }, -] - -# Generate the input CSV content and the expected CSV content -csv_header = "source,medium,campaign" -expected_header = "default_channel_grouping" - -traffic_input_lines = [csv_header] + [ - f"{row['source']},{row['medium']},{row['campaign']}" for row in traffic_data_with_expected_channels -] - -expected_csv_lines = [expected_header] + [ - row['expected_channel'] for row in traffic_data_with_expected_channels -] - -# Join the lines into a single string for input and expected CSV -traffic_input = "\n".join(traffic_input_lines) -expected_csv = "\n".join(expected_csv_lines) - - -actual = """ -with input as ( - select * from {{ref('traffic_input')}} - left join {{ref('source_category_mapping')}} using (source) -) -select -{{default_channel_grouping('source', 'medium', 'source_category','campaign')}} as default_channel_grouping -from input -""" - -class TestDefaultChannelGrouping(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "source_category_mapping.csv": read_file('../seeds/ga4_source_categories.csv'), - "traffic_input.csv": traffic_input, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macro_to_test.sql": read_file('../macros/default_channel_grouping.sql'), - } - - def test_mock_run_and_check(self, project): - #breakpoint() - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) \ No newline at end of file diff --git a/unit_tests/test_macro_exclude_query_parameters.py b/unit_tests/test_macro_exclude_query_parameters.py deleted file mode 100644 index fb30f7ce..00000000 --- a/unit_tests/test_macro_exclude_query_parameters.py +++ /dev/null @@ -1,52 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -urls_to_test_csv = """url -www.website.com/?param_to_exclude=1234 -www.website.com/?param_to_exclude= -www.website.com/?foo=bar¶m_to_exclude=1234 -www.website.com/?foo=bar¶m_to_exclude=1234&another=parameter -www.website.com/?foo=bar¶m_to_exclude=1234&another=parameter&exclude=nope -""".lstrip() - -expected_csv = """url -www.website.com/ -www.website.com/ -www.website.com/?foo=bar -www.website.com/?foo=bar&another=parameter -www.website.com/?foo=bar&another=parameter&exclude=nope -""".lstrip() - -actual = """ -select -{{remove_query_parameters('url', ['param_to_exclude'])}} as url -from {{ref('urls_to_test')}} -""" - -class TestUsersFirstLastEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "urls_to_test.csv": urls_to_test_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macro_to_test.sql": read_file('../macros/url_parsing.sql'), - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_macro_extract_query_parameter_value.py b/unit_tests/test_macro_extract_query_parameter_value.py deleted file mode 100644 index 473b5367..00000000 --- a/unit_tests/test_macro_extract_query_parameter_value.py +++ /dev/null @@ -1,54 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -urls_to_test_csv = """url -www.website.com/?param1=A -www.website.com/?param1=A¶m2=B -www.website.com/?param1=A¶m2=B¶m3=C -www.website.com/ -www.website.com/? -""".lstrip() - -expected_csv = """param1,param2,param3 -A,, -A,B, -A,B,C -,, -,, -""".lstrip() - -actual = """ - select - {{ extract_query_parameter_value( 'url' , 'param1' ) }} as param1, - {{ extract_query_parameter_value( 'url' , 'param2' ) }} as param2, - {{ extract_query_parameter_value( 'url' , 'param3' ) }} as param3 - from {{ref('urls_to_test')}} -""" - -class TestUsersFirstLastEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "urls_to_test.csv": urls_to_test_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macro_to_test.sql": read_file('../macros/url_parsing.sql'), - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_Ga4__user_id_mapping.py b/unit_tests/test_stg_Ga4__user_id_mapping.py deleted file mode 100644 index bcb291f1..00000000 --- a/unit_tests/test_stg_Ga4__user_id_mapping.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """client_key,user_id,event_timestamp -a1,,100 -a1,A,101 -b1,B,102 -c1,C,103 -c2,C,104 -c2,,105 -d1,,100 -""".lstrip() - -expected_csv = """last_seen_user_id,client_key,last_seen_user_id_timestamp -A,a1,101 -B,b1,102 -C,c1,103 -C,c2,104 -""".lstrip() - -actual = read_file('../models/staging/stg_ga4__user_id_mapping.sql') - -class TestUserIdMapping(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py b/unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py deleted file mode 100644 index d32ef800..00000000 --- a/unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py +++ /dev/null @@ -1,40 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__sessions_traffic_sources_daily_csv = """client_key,session_partition_key,session_partition_date,session_partition_timestamp,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,non_direct_session_partition_key -A,A,20230505,1683321359,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A -A,B,20230506,1683407759,(direct),,,,,,, -A,C,20230507,1683494159,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C -A,D,20230508,1683580559,(direct),,,,,,, -""".lstrip() - -expected_csv = """client_key,session_partition_key,session_partition_date,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,session_partition_key_last_non_direct,last_non_direct_source,last_non_direct_medium,last_non_direct_source_category,last_non_direct_campaign,last_non_direct_content,last_non_direct_term,last_non_direct_default_channel_grouping -A,A,20230505,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a -A,B,20230506,(direct),,,,,,,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a -A,C,20230507,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a -A,D,20230508,(direct),,,,,,,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a -""".lstrip() - -actual = read_file('../models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql') - -class TestSessionsTrafficSourcesLastNonDirectDaily(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__sessions_traffic_sources_daily.csv": mock_stg_ga4__sessions_traffic_sources_daily_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - # Hack-y solution to ensure the model is not partitioned. Loading mock data (date columns) from a seed file + partitioning don't work well together. - "actual.sql": actual.replace("materialized = 'incremental',","materialized = 'view',"), - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) From c0aba5fa270ce71eb3f04e2a1211c17ee7932833 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 11:08:17 -0500 Subject: [PATCH 20/47] Add unit test to stg_ga4__events for testing transformations in stg_ga4__events --- models/staging/stg_ga4__events.yml | 218 +++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index 82a0bbc0..aa9070bf 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -133,3 +133,221 @@ unit_tests: - {page_path: /with/this-path} - {page_path: /} - {page_path: /page} + - name: test_base_to_stg_ga4__events + description: "Check whether a given row from base_ga4__events produces the expected row in stg_ga4__events" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + format: sql + rows: | + with mock_event_params as ( + + select 'ga_session_number' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'engaged_session_event' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'ga_session_id' as key, struct(cast(null as string) as string_value, 1617691775 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'source' as key, struct('bing' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'page_title' as key, struct('Velir | Behavior-Driven Testing in Drupal 8' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'page_location' as key, struct('https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'session_engaged' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'engagement_time_msec' as key, struct(cast(null as string) as string_value, 30000 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + + ) + + , mock_row as ( + + select + + date('2021-04-06') as event_date_dt + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , array_agg((select as struct mock_event_params.*)) as event_params + , null as event_previous_timestamp + , null as event_value_in_usd + , 948327668 as event_bundle_sequence_id + , null as event_server_timestamp_offset + , cast(null as string) as user_id + , '1166526666.1617691776' as user_pseudo_id -- Added + , cast(null as string) as privacy_info_analytics_storage -- Added + , cast(null as string) as privacy_info_ads_storage -- Added + , cast(null as string) as privacy_info_uses_transient_token -- Added + , null as user_properties + , 1617691790431476 as user_first_touch_timestamp + , 0.0 as user_ltv_revenue -- Added + , 'USD' as user_ltv_currency -- Added + , 'desktop' as device_category -- Added + , cast(null as string) as device_mobile_brand_name -- Added + , cast(null as string) as device_mobile_model_name -- Added + , cast(null as string) as device_mobile_marketing_name -- Added + , cast(null as string) as device_mobile_os_hardware_model -- Added + , 'Windows' as device_operating_system -- Added + , 'Windows 10' as device_operating_system_version -- Added + , cast(null as string) as device_vendor_id -- Added + , cast(null as string) as device_advertising_id -- Added + , 'en-us' as device_language -- Added + , 'No' as device_is_limited_ad_tracking -- Added + , null as device_time_zone_offset_seconds -- Added + , cast(null as string) as device_browser -- Added + , cast(null as string) as device_browser_version -- Added + , 'Chrome' as device_web_info_browser -- Added + , '89.0.4389.114' as device_web_info_browser_version -- Added + , 'www.velir.com' as device_web_info_hostname -- Added + , 'Asia' as geo_continent -- Added + , 'Vietnam' as geo_country -- Added + , 'Ho Chi Minh City' as geo_region -- Added + , 'Ho Chi Minh City' as geo_city -- Added + , 'Southeast Asia' as geo_sub_continent -- Added + , '(not set)' as geo_metro -- Added + , cast(null as string) as app_info_id -- Added + , cast(null as string) as app_info_version -- Added + , cast(null as string) as app_info_install_store -- Added + , cast(null as string) as app_info_firebase_app_id -- Added + , cast(null as string) as app_info_install_source -- Added + , '(direct)' as user_campaign -- Added + , '(none)' as user_medium -- Added + , '(direct)' as user_source -- Added + , '1966637064' as stream_id + , 'WEB' as platform + , null as ecommerce + , null as items + , 12345 as property_id + , 1617691775 as session_id + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as page_location + , 1 as session_number + , 1 as session_engaged + , 30000 as engagement_time_msec -- Added + , 'Velir | Behavior-Driven Testing in Drupal 8' as page_title + , cast(null as string) as page_referrer + , 'bing' as event_source -- Added + , cast(null as string) as event_medium -- Added + , cast(null as string) as event_campaign -- Added + , cast(null as string) as event_content -- Added + , cast(null as string) as event_term -- Added + , 0 as is_page_view + , 0 as is_purchase + + from mock_event_params + + ) + select * from mock_row + expect: + format: sql + rows: | + with mock_event_params as ( + + select 'ga_session_number' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'engaged_session_event' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'ga_session_id' as key, struct(cast(null as string) as string_value, 1617691775 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'source' as key, struct('bing' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'page_title' as key, struct('Velir | Behavior-Driven Testing in Drupal 8' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'page_location' as key, struct('https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'session_engaged' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + union all + select 'engagement_time_msec' as key, struct(cast(null as string) as string_value, 30000 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value + + ) + + , expected_row as ( + + select + + date('2021-04-06') as event_date_dt + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , array_agg((select as struct mock_event_params.*)) as event_params + , null as event_previous_timestamp + , null as event_value_in_usd + , 948327668 as event_bundle_sequence_id + , null as event_server_timestamp_offset + , cast(null as string) as user_id + , '1166526666.1617691776' as user_pseudo_id -- Added + , cast(null as string) as privacy_info_analytics_storage -- Added + , cast(null as string) as privacy_info_ads_storage -- Added + , cast(null as string) as privacy_info_uses_transient_token -- Added + , null as user_properties + , 1617691790431476 as user_first_touch_timestamp + , 0.0 as user_ltv_revenue -- Added + , 'USD' as user_ltv_currency -- Added + , 'desktop' as device_category -- Added + , cast(null as string) as device_mobile_brand_name -- Added + , cast(null as string) as device_mobile_model_name -- Added + , cast(null as string) as device_mobile_marketing_name -- Added + , cast(null as string) as device_mobile_os_hardware_model -- Added + , 'Windows' as device_operating_system -- Added + , 'Windows 10' as device_operating_system_version -- Added + , cast(null as string) as device_vendor_id -- Added + , cast(null as string) as device_advertising_id -- Added + , 'en-us' as device_language -- Added + , 'No' as device_is_limited_ad_tracking -- Added + , null as device_time_zone_offset_seconds -- Added + , cast(null as string) as device_browser -- Added + , cast(null as string) as device_browser_version -- Added + , 'Chrome' as device_web_info_browser -- Added + , '89.0.4389.114' as device_web_info_browser_version -- Added + , 'www.velir.com' as device_web_info_hostname -- Added + , 'Asia' as geo_continent -- Added + , 'Vietnam' as geo_country -- Added + , 'Ho Chi Minh City' as geo_region -- Added + , 'Ho Chi Minh City' as geo_city -- Added + , 'Southeast Asia' as geo_sub_continent -- Added + , '(not set)' as geo_metro -- Added + , cast(null as string) as app_info_id -- Added + , cast(null as string) as app_info_version -- Added + , cast(null as string) as app_info_install_store -- Added + , cast(null as string) as app_info_firebase_app_id -- Added + , cast(null as string) as app_info_install_source -- Added + , '(direct)' as user_campaign -- Added + , '(none)' as user_medium -- Added + , '(direct)' as user_source -- Added + , '1966637064' as stream_id + , 'WEB' as platform + , null as ecommerce + , null as items + , 12345 as property_id + , 1617691775 as session_id + , 1 as session_number + , 1 as session_engaged + , 30000 as engagement_time_msec -- Added + , 'Velir | Behavior-Driven Testing in Drupal 8' as page_title + , cast(null as string) as event_content -- Added + , cast(null as string) as event_term -- Added + , 0 as is_page_view + , 0 as is_purchase + , 'KXZY+6kA4bpHda1EzHaVvw==' as client_key + , '90mFUqw4xBzU+Xx9/4ycCQ==' as session_key + , '90mFUqw4xBzU+Xx9/4ycCQ==2021-04-06' as session_partition_key + , '7HoiVpTakjsH3DJ1t4HGNw==' as event_key + , 'bing' as event_source -- Added + , cast(null as string) as event_medium -- Added + , cast(null as string) as event_campaign -- Added + , 'test' as query_param_utm_term + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as original_page_location + , cast(null as string) as original_page_referrer + , '/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_path + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_location + , cast(null as string) as page_referrer + , 'velir.com' as page_hostname + , cast(null as string) as page_query_string + , '2021-04-06https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_key + , 'il76Ej2ywBpSKKyYSAkOlQ==' as page_engagement_key + + from mock_event_params + + ) + select * from expected_row + overrides: + vars: + query_parameter_exclusions: ["utm_term"] + query_parameter_extraction: ["utm_term"] From 922ba079199b1a338477deaf212e3da840c10b12 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 11:10:56 -0500 Subject: [PATCH 21/47] Remove todo and example stg_ga4__events unit test files --- unit_tests/test_stg_ga4__events.example | 45 ------------------------- unit_tests/test_stg_ga4__events.todo | 43 ----------------------- 2 files changed, 88 deletions(-) delete mode 100644 unit_tests/test_stg_ga4__events.example delete mode 100644 unit_tests/test_stg_ga4__events.todo diff --git a/unit_tests/test_stg_ga4__events.example b/unit_tests/test_stg_ga4__events.example deleted file mode 100644 index d0733e26..00000000 --- a/unit_tests/test_stg_ga4__events.example +++ /dev/null @@ -1,45 +0,0 @@ -# This test doesn't quite work because the key columns are of type BYTE, but the JSON uploads the data as STRING. -# Keeping this file for now as an example of using JSON for both the input and expected output - -import pytest -from base_unit_test import BaseUnitTestModel -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -SOURCE_JSON = """ -{ "event_date_dt": "2021-04-06", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "ga_session_number", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "engaged_session_event", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "ga_session_id", "value": { "string_value": null, "int_value": "1617691775", "float_value": null, "double_value": null } }, { "key": "page_title", "value": { "string_value": "Velir | Behavior-Driven Testing in Drupal 8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "page_location", "value": { "string_value": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "session_engaged", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }], "event_previous_timestamp": null, "event_value_in_usd": null, "event_bundle_sequence_id": "948327668", "event_server_timestamp_offset": null, "user_id": null, "client_id": "1166526666.1617691776", "privacy_info": null, "user_properties": [], "user_first_touch_timestamp": "1617691790431476", "user_ltv": { "revenue": "0.0", "currency": "USD" }, "device": { "category": "desktop", "mobile_brand_name": null, "mobile_model_name": null, "mobile_marketing_name": null, "mobile_os_hardware_model": null, "operating_system": "Windows", "operating_system_version": "Windows 10", "vendor_id": null, "advertising_id": null, "language": "en-us", "is_limited_ad_tracking": "No", "time_zone_offset_seconds": null, "browser": null, "browser_version": null, "web_info": { "browser": "Chrome", "browser_version": "89.0.4389.114", "hostname": "www.velir.com" } }, "geo": { "continent": "Asia", "country": "Vietnam", "region": "Ho Chi Minh City", "city": "Ho Chi Minh City", "sub_continent": "Southeast Asia", "metro": "(not set)" }, "app_info": null, "traffic_source": { "name": "(direct)", "medium": "(none)", "source": "(direct)" }, "stream_id": "1966637064", "platform": "WEB", "ecommerce": null, "items": [], "ga_session_id": "1617691775", "page_location": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "ga_session_number": "1", "session_engaged": "1", "page_title": "Velir | Behavior-Driven Testing in Drupal 8", "page_referrer": null, "is_page_view": "0", "is_purchase": "0"} -""".lstrip() -EXPECTED_JSON = """ -{ "event_date_dt": "2021-04-06", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "ga_session_number", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "engaged_session_event", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "ga_session_id", "value": { "string_value": null, "int_value": "1617691775", "float_value": null, "double_value": null } }, { "key": "page_title", "value": { "string_value": "Velir | Behavior-Driven Testing in Drupal 8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "page_location", "value": { "string_value": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "session_engaged", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }], "event_previous_timestamp": null, "event_value_in_usd": null, "event_bundle_sequence_id": "948327668", "event_server_timestamp_offset": null, "user_id": null, "client_id": "1166526666.1617691776", "privacy_info": null, "user_properties": [], "user_first_touch_timestamp": "1617691790431476", "user_ltv": { "revenue": "0.0", "currency": "USD" }, "device": { "category": "desktop", "mobile_brand_name": null, "mobile_model_name": null, "mobile_marketing_name": null, "mobile_os_hardware_model": null, "operating_system": "Windows", "operating_system_version": "Windows 10", "vendor_id": null, "advertising_id": null, "language": "en-us", "is_limited_ad_tracking": "No", "time_zone_offset_seconds": null, "browser": null, "browser_version": null, "web_info": { "browser": "Chrome", "browser_version": "89.0.4389.114", "hostname": "www.velir.com" } }, "geo": { "continent": "Asia", "country": "Vietnam", "region": "Ho Chi Minh City", "city": "Ho Chi Minh City", "sub_continent": "Southeast Asia", "metro": "(not set)" }, "app_info": null, "traffic_source": { "name": "(direct)", "medium": "(none)", "source": "(direct)" }, "stream_id": "1966637064", "platform": "WEB", "ecommerce": null, "items": [], "ga_session_id": "1617691775", "page_location": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "ga_session_number": "1", "session_engaged": "1", "page_title": "Velir | Behavior-Driven Testing in Drupal 8", "page_referrer": null, "is_page_view": "0", "is_purchase": "0", "session_key": "TAp7hHaymXXA/Way5byPBw\u003d\u003d", "session_event_number": "1", "event_key": "DGb378zSx/aIZs76gM4aTQ\u003d\u003d", "page_hostname": "velir.com", "page_query_string": null} -""".lstrip() - -models__config_yml = """ -version: 2 -sources: - - name: fixture - schema: "{{ target.schema }}" - tables: - - name: SOURCE_JSON - - name: EXPECTED_OUTPUT -""" - -actual = read_file('../models/staging/stg_ga4__events.sql').replace( - "ref('base_ga4__events')", - "source('fixture', 'SOURCE_JSON')" -) - -class TestStgGa4Events(BaseUnitTestModel): - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "config.yml": models__config_yml, - "actual.sql": actual, - "expected.sql": "select * from {{ source('fixture', 'EXPECTED_OUTPUT') }}" - } - - def test_mock_run_and_check(self, project): - self.upload_json_fixture(project, "source.json", SOURCE_JSON, "SOURCE_JSON" ) - self.upload_json_fixture(project, "expected.json", EXPECTED_JSON, "EXPECTED_OUTPUT" ) - run_dbt(["run"]) - breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__events.todo b/unit_tests/test_stg_ga4__events.todo deleted file mode 100644 index a1d76e9c..00000000 --- a/unit_tests/test_stg_ga4__events.todo +++ /dev/null @@ -1,43 +0,0 @@ -# Test test currently fails because the event_key depends on the event_params nested field. Cannot mock that using CSV. - -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_base_ga4__events_csv = """user_id,event_name,event_timestamp,client_key,ga_session_id,stream_id,page_location,page_referrer,source,medium,campaign -user_id_1,pageview,12345,client_key_1,ga_session_id_1,stream_id_1,http://www.website.com/?foo=bar,http://www.cnn.com/,google,organic,(organic) -""".lstrip() - -expected_csv = """user_id,event_name,event_timestamp,client_key,ga_session_id,stream_id,source,user_key,session_key,event_key,medium,campaign,original_page_location,original_page_referrer,page_location,page_referrer,page_hostname,page_query_string -user_id_1,pageview,12345,client_key_1,ga_session_id_1,stream_id_1,google,c/nWU/GWhlWiLU0S6R/rwg==,9fDgaCrbd4ieAj1QpcWDjw==,70B/o+ww2nOTa32ASF/ulw==,organic,(organic),http://www.website.com/?foo=bar,http://www.cnn.com/,http://www.website.com/?foo=bar,http://www.cnn.com/,website.com,foo=bar -""" - -actual = read_file('../models/staging/stg_ga4__events.sql') - -class TestStgEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "base_ga4__events.csv": mock_base_ga4__events_csv, - "expected.csv": expected_csv - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macros.sql": read_file('../macros/url_parsing.sql'), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) From 3a4f677f478551d6deef8d601b7845c5bade5e33 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 11:18:25 -0500 Subject: [PATCH 22/47] Add sessions_traffic_sources_last_non_direct_daily python unit test back --- ...s_traffic_sources_last_non_direct_daily.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py diff --git a/unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py b/unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py new file mode 100644 index 00000000..d32ef800 --- /dev/null +++ b/unit_tests/test_stg_ga4__sessions_traffic_sources_last_non_direct_daily.py @@ -0,0 +1,40 @@ +import pytest +from dbt.tests.util import read_file,check_relations_equal,run_dbt + +# Define mocks via CSV (seeds) or SQL (models) +mock_stg_ga4__sessions_traffic_sources_daily_csv = """client_key,session_partition_key,session_partition_date,session_partition_timestamp,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,non_direct_session_partition_key +A,A,20230505,1683321359,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A +A,B,20230506,1683407759,(direct),,,,,,, +A,C,20230507,1683494159,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C +A,D,20230508,1683580559,(direct),,,,,,, +""".lstrip() + +expected_csv = """client_key,session_partition_key,session_partition_date,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,session_partition_key_last_non_direct,last_non_direct_source,last_non_direct_medium,last_non_direct_source_category,last_non_direct_campaign,last_non_direct_content,last_non_direct_term,last_non_direct_default_channel_grouping +A,A,20230505,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +A,B,20230506,(direct),,,,,,,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +A,C,20230507,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +A,D,20230508,(direct),,,,,,,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +""".lstrip() + +actual = read_file('../models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql') + +class TestSessionsTrafficSourcesLastNonDirectDaily(): + # everything that goes in the "seeds" directory (= CSV format) + @pytest.fixture(scope="class") + def seeds(self): + return { + "stg_ga4__sessions_traffic_sources_daily.csv": mock_stg_ga4__sessions_traffic_sources_daily_csv, + "expected.csv": expected_csv, + } + + # everything that goes in the "models" directory (= SQL) + @pytest.fixture(scope="class") + def models(self): + return { + # Hack-y solution to ensure the model is not partitioned. Loading mock data (date columns) from a seed file + partitioning don't work well together. + "actual.sql": actual.replace("materialized = 'incremental',","materialized = 'view',"), + } + + def test_mock_run_and_check(self, project): + run_dbt(["build"]) + check_relations_equal(project.adapter, ["actual", "expected"]) From c8701303e0e3edabe101d93fd419e69f639175df Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 11:27:03 -0500 Subject: [PATCH 23/47] Comment out unit tests for disabled models --- dbt_project.yml | 11 --- .../stg_ga4__derived_session_properties.yml | 74 +++++++++---------- .../stg_ga4__derived_user_properties.yml | 64 ++++++++-------- 3 files changed, 68 insertions(+), 81 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index 35a97219..7e2200ec 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -17,17 +17,6 @@ vars: frequency: "daily" conversion_events: ['add_to_cart', 'large_button_clicked'] static_incremental_days: 3 - derived_session_properties: - - event_parameter: "ga_session_id" - session_property_name: "ga_session_id" - value_type: "int_value" - - event_parameter: "page_title" - session_property_name: "page_title" - value_type: "string_value" - derived_user_properties: - - event_parameter: "page_title" - user_property_name: "page_title" - value_type: "string_value" target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` diff --git a/models/staging/stg_ga4__derived_session_properties.yml b/models/staging/stg_ga4__derived_session_properties.yml index 68d3d295..1c32ed5b 100644 --- a/models/staging/stg_ga4__derived_session_properties.yml +++ b/models/staging/stg_ga4__derived_session_properties.yml @@ -8,41 +8,39 @@ models: columns: - name: session_key tests: - - unique -unit_tests: - - name: test_derived_session_properties - description: Test whether a derived property is successfully retrieved from multiple event payloads - model: stg_ga4__derived_session_properties - given: - - input: ref('stg_ga4__events') - format: sql - rows: | - select - 'AAA' as session_key - , 1617691790431476 as event_timestamp - , 'first_visit' as event_name - , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params - , ARRAY[STRUCT('my_property' as key, STRUCT('value1' as string_value) as value)] as user_properties - union all - select - 'AAA' as session_key - , 1617691790431477 as event_timestamp - , 'first_visit' as event_name - , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params - , ARRAY[] as user_properties - union all - select - 'BBB' as session_key - , 1617691790431477 as event_timestamp - , 'first_visit' as event_name - , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params - , ARRAY[STRUCT('my_property' as key, STRUCT('value2' as string_value) as value)] as user_properties - expect: - format: dict - rows: - - {session_key: AAA, my_derived_property: 2, my_derived_property2: value1} - - {session_key: BBB, my_derived_property: 1, my_derived_property2: value2} - overrides: - vars: {derived_session_properties: [{event_parameter: 'my_param',session_property_name: 'my_derived_property',value_type: 'int_value'},{user_property: 'my_property',session_property_name: 'my_derived_property2',value_type: 'string_value'}]} - - \ No newline at end of file + - unique +# unit_tests: +# - name: test_derived_session_properties +# description: Test whether a derived property is successfully retrieved from multiple event payloads +# model: stg_ga4__derived_session_properties +# given: +# - input: ref('stg_ga4__events') +# format: sql +# rows: | +# select +# 'AAA' as session_key +# , 1617691790431476 as event_timestamp +# , 'first_visit' as event_name +# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params +# , ARRAY[STRUCT('my_property' as key, STRUCT('value1' as string_value) as value)] as user_properties +# union all +# select +# 'AAA' as session_key +# , 1617691790431477 as event_timestamp +# , 'first_visit' as event_name +# , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params +# , ARRAY[] as user_properties +# union all +# select +# 'BBB' as session_key +# , 1617691790431477 as event_timestamp +# , 'first_visit' as event_name +# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params +# , ARRAY[STRUCT('my_property' as key, STRUCT('value2' as string_value) as value)] as user_properties +# expect: +# format: dict +# rows: +# - {session_key: AAA, my_derived_property: 2, my_derived_property2: value1} +# - {session_key: BBB, my_derived_property: 1, my_derived_property2: value2} +# overrides: +# vars: {derived_session_properties: [{event_parameter: 'my_param',session_property_name: 'my_derived_property',value_type: 'int_value'},{user_property: 'my_property',session_property_name: 'my_derived_property2',value_type: 'string_value'}]} diff --git a/models/staging/stg_ga4__derived_user_properties.yml b/models/staging/stg_ga4__derived_user_properties.yml index 47d4ef0b..8664549c 100644 --- a/models/staging/stg_ga4__derived_user_properties.yml +++ b/models/staging/stg_ga4__derived_user_properties.yml @@ -8,35 +8,35 @@ models: description: Hashed combination of user_pseudo_id and stream_id tests: - unique -unit_tests: - - name: test_derived_user_properties - description: Test whether a derived user property is successfully retrieved from multiple event payloads - model: stg_ga4__derived_user_properties - given: - - input: ref('stg_ga4__events') - format: sql - rows: | - select - 'AAA' as client_key - , 1617691790431476 as event_timestamp - , 'first_visit' as event_name - , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params - union all - select - 'AAA' as client_key - , 1617691790431477 as event_timestamp - , 'first_visit' as event_name - , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params - union all - select - 'BBB' as client_key - , 1617691790431477 as event_timestamp - , 'first_visit' as event_name - , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params - expect: - format: dict - rows: - - {client_key: AAA, my_derived_property: 2} - - {client_key: BBB, my_derived_property: 1} - overrides: - vars: {derived_user_properties: [{event_parameter: 'my_param',user_property_name: 'my_derived_property',value_type: 'int_value'}]} \ No newline at end of file +# unit_tests: +# - name: test_derived_user_properties +# description: Test whether a derived user property is successfully retrieved from multiple event payloads +# model: stg_ga4__derived_user_properties +# given: +# - input: ref('stg_ga4__events') +# format: sql +# rows: | +# select +# 'AAA' as client_key +# , 1617691790431476 as event_timestamp +# , 'first_visit' as event_name +# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params +# union all +# select +# 'AAA' as client_key +# , 1617691790431477 as event_timestamp +# , 'first_visit' as event_name +# , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params +# union all +# select +# 'BBB' as client_key +# , 1617691790431477 as event_timestamp +# , 'first_visit' as event_name +# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params +# expect: +# format: dict +# rows: +# - {client_key: AAA, my_derived_property: 2} +# - {client_key: BBB, my_derived_property: 1} +# overrides: +# vars: {derived_user_properties: [{event_parameter: 'my_param',user_property_name: 'my_derived_property',value_type: 'int_value'}]} From 73863711c9fcf6968c11abbca544dcc6b7520913 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 11:27:47 -0500 Subject: [PATCH 24/47] Remove edits from dbt_project.yml --- dbt_project.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index 7e2200ec..a2f8bf71 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -8,16 +8,6 @@ seed-paths: ["seeds"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] -profile: 'bq_ga4' - -vars: - start_date: "20230306" # Defines the earliest GA4 _TABLE_SUFFIX to load into base events model. - source_project: "analytics" - property_ids: [id] - frequency: "daily" - conversion_events: ['add_to_cart', 'large_button_clicked'] - static_incremental_days: 3 - target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" From 76f2c7f65754d7c04670b3e6760e5972b52913c0 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 12:23:11 -0500 Subject: [PATCH 25/47] Comment out unit test for sessions_traffic_sources_last_non_direct_daily until unit test var bug fix --- ..._traffic_sources_last_non_direct_daily.yml | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml index f359b9cc..26981cd1 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml @@ -22,28 +22,28 @@ models: description: The the most recent non-direct channel grouping within a 30-day lookback window. tests: - not_null -unit_tests: - - name: test_stg_ga4__sessions_traffic_sources_last_non_direct_daily - description: Test pulling the last non direct session parameters per client_key - model: stg_ga4__sessions_traffic_sources_last_non_direct_daily - given: - - input : ref('stg_ga4__sessions_traffic_sources_daily') - format: csv - rows: | - client_key,session_partition_key,session_partition_date,session_partition_timestamp,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,non_direct_session_partition_key - A,A,20230505,1683321359,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A - A,B,20230506,1683407759,(direct),,,,,,, - A,C,20230507,1683494159,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C - A,D,20230508,1683580559,(direct),,,,,,, - expect: - format: csv - rows: - client_key,session_partition_key,session_partition_date,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,session_partition_key_last_non_direct,last_non_direct_source,last_non_direct_medium,last_non_direct_source_category,last_non_direct_campaign,last_non_direct_content,last_non_direct_term,last_non_direct_default_channel_grouping - A,A,20230505,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a - A,B,20230506,(direct),,,,,,,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a - A,C,20230507,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a - A,D,20230508,(direct),,,,,,,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a - overrides: - macros: - is_incremental: false - vars: {session_attribution_lookback_window_days: 30} +# unit_tests: +# - name: test_stg_ga4__sessions_traffic_sources_last_non_direct_daily +# description: Test pulling the last non direct session parameters per client_key +# model: stg_ga4__sessions_traffic_sources_last_non_direct_daily +# given: +# - input : ref('stg_ga4__sessions_traffic_sources_daily') +# format: csv +# rows: | +# client_key,session_partition_key,session_partition_date,session_partition_timestamp,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,non_direct_session_partition_key +# A,A,20230505,1683321359,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A +# A,B,20230506,1683407759,(direct),,,,,,, +# A,C,20230507,1683494159,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C +# A,D,20230508,1683580559,(direct),,,,,,, +# expect: +# format: csv +# rows: +# client_key,session_partition_key,session_partition_date,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,session_partition_key_last_non_direct,last_non_direct_source,last_non_direct_medium,last_non_direct_source_category,last_non_direct_campaign,last_non_direct_content,last_non_direct_term,last_non_direct_default_channel_grouping +# A,A,20230505,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# A,B,20230506,(direct),,,,,,,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# A,C,20230507,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# A,D,20230508,(direct),,,,,,,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# overrides: +# macros: +# is_incremental: false +# vars: {session_attribution_lookback_window_days: 30} From 697bafd6e521d23f54245ca8d297f88c83340b97 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 12:42:39 -0500 Subject: [PATCH 26/47] Update unit test section in README --- README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 16ecbbda..2a57eec0 100644 --- a/README.md +++ b/README.md @@ -304,7 +304,26 @@ gcloud auth application-default login --scopes=https://www.googleapis.com/auth/b ``` # Unit Testing -This package uses `pytest` as a method of unit testing individual models. More details can be found in the [unit_tests/README.md](unit_tests) folder. +The dbt-ga4 package treats each model and macro as a 'unit' of code. If we fix the input to each unit, we can test that we received the expected output. + +This package currently uses a combination of dbt unit tests and `pytest` as a method of unit testing individual models. The remaining `pytest` unit test will be refactored to a dbt unit test when possible - progress on the bug preventing that work can be tracked [here](https://github.com/dbt-labs/dbt-core/issues/10353). + +### dbt unit tests + +dbt's documentation on unit tests can be found [here](https://docs.getdbt.com/docs/build/unit-tests). Unit tests are performed the same way other types of dbt tests are executed. + +Execute a specific test: +``` +dbt test -s +``` +Execute all tests configured for a model: +``` +dbt test -s +``` + +### pytest + +More details on using `pytest` for unit testing can be found in the [unit_tests/README.md](unit_tests) folder. # Overriding Default Channel Groupings From 616da999d4cf438c4bfe432afb31c695bf80cbbc Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 14:53:41 -0500 Subject: [PATCH 27/47] Simplify event_params construction in test_base_to_stg_ga4__events input and expect --- models/staging/stg_ga4__events.yml | 381 ++++++++++++++++++----------- 1 file changed, 236 insertions(+), 145 deletions(-) diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index aa9070bf..147e2506 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -133,6 +133,7 @@ unit_tests: - {page_path: /with/this-path} - {page_path: /} - {page_path: /page} + - name: test_base_to_stg_ga4__events description: "Check whether a given row from base_ga4__events produces the expected row in stg_ga4__events" model: stg_ga4__events @@ -140,133 +141,84 @@ unit_tests: - input: ref('base_ga4__events') format: sql rows: | - with mock_event_params as ( - - select 'ga_session_number' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'engaged_session_event' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'ga_session_id' as key, struct(cast(null as string) as string_value, 1617691775 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'source' as key, struct('bing' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'page_title' as key, struct('Velir | Behavior-Driven Testing in Drupal 8' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'page_location' as key, struct('https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'session_engaged' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'engagement_time_msec' as key, struct(cast(null as string) as string_value, 30000 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - - ) - - , mock_row as ( - - select - - date('2021-04-06') as event_date_dt - , 1617691790431476 as event_timestamp - , 'first_visit' as event_name - , array_agg((select as struct mock_event_params.*)) as event_params - , null as event_previous_timestamp - , null as event_value_in_usd - , 948327668 as event_bundle_sequence_id - , null as event_server_timestamp_offset - , cast(null as string) as user_id - , '1166526666.1617691776' as user_pseudo_id -- Added - , cast(null as string) as privacy_info_analytics_storage -- Added - , cast(null as string) as privacy_info_ads_storage -- Added - , cast(null as string) as privacy_info_uses_transient_token -- Added - , null as user_properties - , 1617691790431476 as user_first_touch_timestamp - , 0.0 as user_ltv_revenue -- Added - , 'USD' as user_ltv_currency -- Added - , 'desktop' as device_category -- Added - , cast(null as string) as device_mobile_brand_name -- Added - , cast(null as string) as device_mobile_model_name -- Added - , cast(null as string) as device_mobile_marketing_name -- Added - , cast(null as string) as device_mobile_os_hardware_model -- Added - , 'Windows' as device_operating_system -- Added - , 'Windows 10' as device_operating_system_version -- Added - , cast(null as string) as device_vendor_id -- Added - , cast(null as string) as device_advertising_id -- Added - , 'en-us' as device_language -- Added - , 'No' as device_is_limited_ad_tracking -- Added - , null as device_time_zone_offset_seconds -- Added - , cast(null as string) as device_browser -- Added - , cast(null as string) as device_browser_version -- Added - , 'Chrome' as device_web_info_browser -- Added - , '89.0.4389.114' as device_web_info_browser_version -- Added - , 'www.velir.com' as device_web_info_hostname -- Added - , 'Asia' as geo_continent -- Added - , 'Vietnam' as geo_country -- Added - , 'Ho Chi Minh City' as geo_region -- Added - , 'Ho Chi Minh City' as geo_city -- Added - , 'Southeast Asia' as geo_sub_continent -- Added - , '(not set)' as geo_metro -- Added - , cast(null as string) as app_info_id -- Added - , cast(null as string) as app_info_version -- Added - , cast(null as string) as app_info_install_store -- Added - , cast(null as string) as app_info_firebase_app_id -- Added - , cast(null as string) as app_info_install_source -- Added - , '(direct)' as user_campaign -- Added - , '(none)' as user_medium -- Added - , '(direct)' as user_source -- Added - , '1966637064' as stream_id - , 'WEB' as platform - , null as ecommerce - , null as items - , 12345 as property_id - , 1617691775 as session_id - , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as page_location - , 1 as session_number - , 1 as session_engaged - , 30000 as engagement_time_msec -- Added - , 'Velir | Behavior-Driven Testing in Drupal 8' as page_title - , cast(null as string) as page_referrer - , 'bing' as event_source -- Added - , cast(null as string) as event_medium -- Added - , cast(null as string) as event_campaign -- Added - , cast(null as string) as event_content -- Added - , cast(null as string) as event_term -- Added - , 0 as is_page_view - , 0 as is_purchase - - from mock_event_params - - ) - select * from mock_row - expect: - format: sql - rows: | - with mock_event_params as ( - - select 'ga_session_number' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'engaged_session_event' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'ga_session_id' as key, struct(cast(null as string) as string_value, 1617691775 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'source' as key, struct('bing' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'page_title' as key, struct('Velir | Behavior-Driven Testing in Drupal 8' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'page_location' as key, struct('https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value, null as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'session_engaged' as key, struct(cast(null as string) as string_value, 1 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - union all - select 'engagement_time_msec' as key, struct(cast(null as string) as string_value, 30000 as int_value, cast(null as FLOAT64) as float_value, cast(null as FLOAT64) as double_value) as value - - ) - - , expected_row as ( - select - date('2021-04-06') as event_date_dt , 1617691790431476 as event_timestamp , 'first_visit' as event_name - , array_agg((select as struct mock_event_params.*)) as event_params + , array[ + struct( + 'ga_session_number' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) as value + ) + , struct( + 'engaged_session_event' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'ga_session_id' as key + , struct( + cast(null as string) as string_value + , 1617691775 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'source' as key + , struct( + 'bing' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_title' as key + , struct( + 'Velir | Behavior-Driven Testing in Drupal 8' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_location' as key + , struct( + 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'session_engaged' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'engagement_time_msec' as key + , struct( + cast(null as string) as string_value + , 30000 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + ] as event_params , null as event_previous_timestamp , null as event_value_in_usd , 948327668 as event_bundle_sequence_id @@ -317,36 +269,175 @@ unit_tests: , null as items , 12345 as property_id , 1617691775 as session_id + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as page_location , 1 as session_number , 1 as session_engaged , 30000 as engagement_time_msec -- Added , 'Velir | Behavior-Driven Testing in Drupal 8' as page_title + , cast(null as string) as page_referrer + , 'bing' as event_source -- Added + , cast(null as string) as event_medium -- Added + , cast(null as string) as event_campaign -- Added , cast(null as string) as event_content -- Added , cast(null as string) as event_term -- Added , 0 as is_page_view , 0 as is_purchase - , 'KXZY+6kA4bpHda1EzHaVvw==' as client_key - , '90mFUqw4xBzU+Xx9/4ycCQ==' as session_key - , '90mFUqw4xBzU+Xx9/4ycCQ==2021-04-06' as session_partition_key - , '7HoiVpTakjsH3DJ1t4HGNw==' as event_key - , 'bing' as event_source -- Added - , cast(null as string) as event_medium -- Added - , cast(null as string) as event_campaign -- Added - , 'test' as query_param_utm_term - , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as original_page_location - , cast(null as string) as original_page_referrer - , '/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_path - , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_location - , cast(null as string) as page_referrer - , 'velir.com' as page_hostname - , cast(null as string) as page_query_string - , '2021-04-06https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_key - , 'il76Ej2ywBpSKKyYSAkOlQ==' as page_engagement_key - - from mock_event_params - - ) - select * from expected_row + expect: + format: sql + rows: | + select + date('2021-04-06') as event_date_dt + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , array[ + struct( + 'ga_session_number' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) as value + ) + , struct( + 'engaged_session_event' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'ga_session_id' as key + , struct( + cast(null as string) as string_value + , 1617691775 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'source' as key + , struct( + 'bing' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_title' as key + , struct( + 'Velir | Behavior-Driven Testing in Drupal 8' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_location' as key + , struct( + 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'session_engaged' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'engagement_time_msec' as key + , struct( + cast(null as string) as string_value + , 30000 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + ] as event_params + , null as event_previous_timestamp + , null as event_value_in_usd + , 948327668 as event_bundle_sequence_id + , null as event_server_timestamp_offset + , cast(null as string) as user_id + , '1166526666.1617691776' as user_pseudo_id -- Added + , cast(null as string) as privacy_info_analytics_storage -- Added + , cast(null as string) as privacy_info_ads_storage -- Added + , cast(null as string) as privacy_info_uses_transient_token -- Added + , null as user_properties + , 1617691790431476 as user_first_touch_timestamp + , 0.0 as user_ltv_revenue -- Added + , 'USD' as user_ltv_currency -- Added + , 'desktop' as device_category -- Added + , cast(null as string) as device_mobile_brand_name -- Added + , cast(null as string) as device_mobile_model_name -- Added + , cast(null as string) as device_mobile_marketing_name -- Added + , cast(null as string) as device_mobile_os_hardware_model -- Added + , 'Windows' as device_operating_system -- Added + , 'Windows 10' as device_operating_system_version -- Added + , cast(null as string) as device_vendor_id -- Added + , cast(null as string) as device_advertising_id -- Added + , 'en-us' as device_language -- Added + , 'No' as device_is_limited_ad_tracking -- Added + , null as device_time_zone_offset_seconds -- Added + , cast(null as string) as device_browser -- Added + , cast(null as string) as device_browser_version -- Added + , 'Chrome' as device_web_info_browser -- Added + , '89.0.4389.114' as device_web_info_browser_version -- Added + , 'www.velir.com' as device_web_info_hostname -- Added + , 'Asia' as geo_continent -- Added + , 'Vietnam' as geo_country -- Added + , 'Ho Chi Minh City' as geo_region -- Added + , 'Ho Chi Minh City' as geo_city -- Added + , 'Southeast Asia' as geo_sub_continent -- Added + , '(not set)' as geo_metro -- Added + , cast(null as string) as app_info_id -- Added + , cast(null as string) as app_info_version -- Added + , cast(null as string) as app_info_install_store -- Added + , cast(null as string) as app_info_firebase_app_id -- Added + , cast(null as string) as app_info_install_source -- Added + , '(direct)' as user_campaign -- Added + , '(none)' as user_medium -- Added + , '(direct)' as user_source -- Added + , '1966637064' as stream_id + , 'WEB' as platform + , null as ecommerce + , null as items + , 12345 as property_id + , 1617691775 as session_id + , 1 as session_number + , 1 as session_engaged + , 30000 as engagement_time_msec -- Added + , 'Velir | Behavior-Driven Testing in Drupal 8' as page_title + , cast(null as string) as event_content -- Added + , cast(null as string) as event_term -- Added + , 0 as is_page_view + , 0 as is_purchase + , 'KXZY+6kA4bpHda1EzHaVvw==' as client_key + , '90mFUqw4xBzU+Xx9/4ycCQ==' as session_key + , '90mFUqw4xBzU+Xx9/4ycCQ==2021-04-06' as session_partition_key + , '7HoiVpTakjsH3DJ1t4HGNw==' as event_key + , 'bing' as event_source -- Added + , cast(null as string) as event_medium -- Added + , cast(null as string) as event_campaign -- Added + , 'test' as query_param_utm_term + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as original_page_location + , cast(null as string) as original_page_referrer + , '/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_path + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_location + , cast(null as string) as page_referrer + , 'velir.com' as page_hostname + , cast(null as string) as page_query_string + , '2021-04-06https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_key + , 'il76Ej2ywBpSKKyYSAkOlQ==' as page_engagement_key overrides: vars: query_parameter_exclusions: ["utm_term"] From 653e1aec1982d629fd9db53ecadb3669a9085234 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 24 Oct 2024 16:20:32 -0500 Subject: [PATCH 28/47] Update yml files to use consistent new line convention --- models/staging/stg_ga4__event_to_query_string_params.yml | 2 +- models/staging/stg_ga4__events.yml | 7 ------- models/staging/stg_ga4__page_conversions.yml | 2 +- models/staging/stg_ga4__session_conversions_daily.yml | 2 +- models/staging/stg_ga4__sessions_traffic_sources.yml | 1 - models/staging/stg_ga4__user_id_mapping.yml | 1 - 6 files changed, 3 insertions(+), 12 deletions(-) diff --git a/models/staging/stg_ga4__event_to_query_string_params.yml b/models/staging/stg_ga4__event_to_query_string_params.yml index 66679a10..c97dbaf5 100644 --- a/models/staging/stg_ga4__event_to_query_string_params.yml +++ b/models/staging/stg_ga4__event_to_query_string_params.yml @@ -22,4 +22,4 @@ unit_tests: aaa,param1,value1 aaa,param2,value2 bbb,param1, - ccc,param1, \ No newline at end of file + ccc,param1, diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index 147e2506..bdbab034 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -21,7 +21,6 @@ models: the data shows that this is not always the case. When a user_engagement event does not fire, the engagement_time_msec parameter is assigned to the next page_view. This engagement time should be credited to the previous page, so for page_view events this field uses the session_key and page_referrer as the key while all other events use the session_key and page_location. - unit_tests: - name: query_parameter_removal description: "Check that query parameters get removed from the model." @@ -42,7 +41,6 @@ unit_tests: - {page_location: https://asite.com/anotherpage, page_referrer: https://asite.com/previous_page?utm_source=source_value} - {page_location: https://anothersite.com/page?not_excluded_param=val¶m=also_not_excluded, page_referrer: https://anothersite.com/previous_page#utm_source=source_value#hash=hash_value} - {page_location: https://twoparams.com/page, page_referrer: https://twoparams.com/previous_page#hash=hash_value} - - name: query_parameter_remove_all description: "Check that the '*all*' flag removes all query parameters from the model." model: stg_ga4__events @@ -58,7 +56,6 @@ unit_tests: rows: - {page_location: https://asite.com/page, page_referrer: https://asite.com/previous_page} - {page_location: https://asite.com/anotherpage, page_referrer: https://asite.com/previous_page} - - name: query_parameter_extraction description: "Check that query parameters get extracted from the model and added as new fields." model: stg_ga4__events @@ -82,7 +79,6 @@ unit_tests: - {query_param_param1: val, query_param_param2: null, query_param_param3: null} - {query_param_param1: another_val, query_param_param2: second_val, query_param_param3: value3} - {query_param_param1: null, query_param_param2: null, query_param_param3: null} - - name: hostname_extraction_from_url description: "Check that the hostname is extracted from the URL" model: stg_ga4__events @@ -101,7 +97,6 @@ unit_tests: - {page_hostname: cool-site.com} - {page_hostname: example.site.app} - {page_hostname: madeup.org} - - name: query_string_extraction_from_url description: "Check that the query string is extracted from the URL" model: stg_ga4__events @@ -116,7 +111,6 @@ unit_tests: - {page_query_string: query_string=something} - {page_query_string: null} - {page_query_string: utm_term=test-term&utm_source=test-source} - - name: page_path_extraction description: "Check that the page path is extracted from the URL" model: stg_ga4__events @@ -133,7 +127,6 @@ unit_tests: - {page_path: /with/this-path} - {page_path: /} - {page_path: /page} - - name: test_base_to_stg_ga4__events description: "Check whether a given row from base_ga4__events produces the expected row in stg_ga4__events" model: stg_ga4__events diff --git a/models/staging/stg_ga4__page_conversions.yml b/models/staging/stg_ga4__page_conversions.yml index b285bc2d..2dfd6d55 100644 --- a/models/staging/stg_ga4__page_conversions.yml +++ b/models/staging/stg_ga4__page_conversions.yml @@ -41,4 +41,4 @@ unit_tests: A,2 B,1 overrides: - vars: {conversion_events: ['page-view']} \ No newline at end of file + vars: {conversion_events: ['page-view']} diff --git a/models/staging/stg_ga4__session_conversions_daily.yml b/models/staging/stg_ga4__session_conversions_daily.yml index 464b3249..edc6f08b 100644 --- a/models/staging/stg_ga4__session_conversions_daily.yml +++ b/models/staging/stg_ga4__session_conversions_daily.yml @@ -62,4 +62,4 @@ unit_tests: overrides: macros: is_incremental: false - vars: {conversion_events: ['my-conversion']} \ No newline at end of file + vars: {conversion_events: ['my-conversion']} diff --git a/models/staging/stg_ga4__sessions_traffic_sources.yml b/models/staging/stg_ga4__sessions_traffic_sources.yml index fd5954ec..33f372e8 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources.yml @@ -15,7 +15,6 @@ models: description: First non-null source value of the session tests: - not_null - unit_tests: - name: test_default_channel_grouping description: Test whether the defaul_channel_grouping macro is assigning channels correctly diff --git a/models/staging/stg_ga4__user_id_mapping.yml b/models/staging/stg_ga4__user_id_mapping.yml index da2d572c..311376e7 100644 --- a/models/staging/stg_ga4__user_id_mapping.yml +++ b/models/staging/stg_ga4__user_id_mapping.yml @@ -9,7 +9,6 @@ models: tests: - not_null - unique - unit_tests: - name: test_user_id_mapping description: Test whether the latest client_key to user_id mapping logic is correct From 50ff2e801309a0642e3b0054088ef5c13dbd7e3f Mon Sep 17 00:00:00 2001 From: Adam Ribaudo Date: Thu, 24 Oct 2024 17:55:01 -0400 Subject: [PATCH 29/47] update PR template --- .github/pull_request_template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index d33751fe..d1fccd01 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,4 +7,4 @@ Describe your changes, and why you're making them. - [ ] I have verified that these changes work locally - [ ] I have updated the README.md (if applicable) - [ ] I have added tests & descriptions to my models (and macros if applicable) -- [ ] I have run `dbt test` and `python -m pytest .` to validate existing tests +- [ ] I have run `dbt test` to validate existing tests From 68f9f871e9593241305fae429e398e087e190557 Mon Sep 17 00:00:00 2001 From: David Booke Date: Fri, 25 Oct 2024 11:18:21 -0500 Subject: [PATCH 30/47] Update default channel grouping test to use seed instead of fixture and delete fixture csv --- .../stg_ga4__sessions_traffic_sources.yml | 2 - tests/fixtures/ga4_source_categories.csv | 820 ------------------ 2 files changed, 822 deletions(-) delete mode 100644 tests/fixtures/ga4_source_categories.csv diff --git a/models/staging/stg_ga4__sessions_traffic_sources.yml b/models/staging/stg_ga4__sessions_traffic_sources.yml index 33f372e8..589f82e6 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources.yml @@ -55,8 +55,6 @@ unit_tests: AC,172000000000000,event,firebase,, AD,172000000000000,event,some-source,some-medium,some-campaign - input: ref('ga4_source_categories') - format: csv - fixture: ga4_source_categories expect: format: csv rows: | diff --git a/tests/fixtures/ga4_source_categories.csv b/tests/fixtures/ga4_source_categories.csv deleted file mode 100644 index bb4a7fee..00000000 --- a/tests/fixtures/ga4_source_categories.csv +++ /dev/null @@ -1,820 +0,0 @@ -source,source_category -360.cn,SOURCE_CATEGORY_SEARCH -43things,SOURCE_CATEGORY_SOCIAL -43things.com,SOURCE_CATEGORY_SOCIAL -51.com,SOURCE_CATEGORY_SOCIAL -5ch.net,SOURCE_CATEGORY_SOCIAL -Google Shopping,SOURCE_CATEGORY_SHOPPING -Hatena,SOURCE_CATEGORY_SOCIAL -IGShopping,SOURCE_CATEGORY_SHOPPING -ImageShack,SOURCE_CATEGORY_SOCIAL -aax-us-east.amazon-adsystem.com,SOURCE_CATEGORY_SHOPPING -aax.amazon-adsystem.com,SOURCE_CATEGORY_SHOPPING -academia.edu,SOURCE_CATEGORY_SOCIAL -activerain,SOURCE_CATEGORY_SOCIAL -activerain.com,SOURCE_CATEGORY_SOCIAL -activeworlds,SOURCE_CATEGORY_SOCIAL -activeworlds.com,SOURCE_CATEGORY_SOCIAL -addthis,SOURCE_CATEGORY_SOCIAL -addthis.com,SOURCE_CATEGORY_SOCIAL -airg.ca,SOURCE_CATEGORY_SOCIAL -alibaba,SOURCE_CATEGORY_SHOPPING -alibaba.com,SOURCE_CATEGORY_SHOPPING -alice,SOURCE_CATEGORY_SEARCH -allnurses.com,SOURCE_CATEGORY_SOCIAL -allrecipes.com,SOURCE_CATEGORY_SOCIAL -alumniclass,SOURCE_CATEGORY_SOCIAL -alumniclass.com,SOURCE_CATEGORY_SOCIAL -amazon,SOURCE_CATEGORY_SHOPPING -amazon.co.uk,SOURCE_CATEGORY_SHOPPING -amazon.com,SOURCE_CATEGORY_SHOPPING -ameba.jp,SOURCE_CATEGORY_SOCIAL -ameblo.jp,SOURCE_CATEGORY_SOCIAL -americantowns,SOURCE_CATEGORY_SOCIAL -americantowns.com,SOURCE_CATEGORY_SOCIAL -amp.reddit.com,SOURCE_CATEGORY_SOCIAL -ancestry.com,SOURCE_CATEGORY_SOCIAL -anobii,SOURCE_CATEGORY_SOCIAL -anobii.com,SOURCE_CATEGORY_SOCIAL -answerbag,SOURCE_CATEGORY_SOCIAL -answerbag.com,SOURCE_CATEGORY_SOCIAL -answers.yahoo.com,SOURCE_CATEGORY_SOCIAL -aol,SOURCE_CATEGORY_SEARCH -aolanswers,SOURCE_CATEGORY_SOCIAL -aolanswers.com,SOURCE_CATEGORY_SOCIAL -apps.facebook.com,SOURCE_CATEGORY_SOCIAL -apps.shopify.com,SOURCE_CATEGORY_SHOPPING -ar.pinterest.com,SOURCE_CATEGORY_SOCIAL -ar.search.yahoo.com,SOURCE_CATEGORY_SEARCH -artstation.com,SOURCE_CATEGORY_SOCIAL -ask,SOURCE_CATEGORY_SEARCH -askubuntu,SOURCE_CATEGORY_SOCIAL -askubuntu.com,SOURCE_CATEGORY_SOCIAL -asmallworld.com,SOURCE_CATEGORY_SOCIAL -at.search.yahoo.com,SOURCE_CATEGORY_SEARCH -athlinks,SOURCE_CATEGORY_SOCIAL -athlinks.com,SOURCE_CATEGORY_SOCIAL -au.search.yahoo.com,SOURCE_CATEGORY_SEARCH -auone,SOURCE_CATEGORY_SEARCH -avg,SOURCE_CATEGORY_SEARCH -away.vk.com,SOURCE_CATEGORY_SOCIAL -awe.sm,SOURCE_CATEGORY_SOCIAL -b.hatena.ne.jp,SOURCE_CATEGORY_SOCIAL -baby-gaga,SOURCE_CATEGORY_SOCIAL -baby-gaga.com,SOURCE_CATEGORY_SOCIAL -babyblog.ru,SOURCE_CATEGORY_SOCIAL -babylon,SOURCE_CATEGORY_SEARCH -badoo,SOURCE_CATEGORY_SOCIAL -badoo.com,SOURCE_CATEGORY_SOCIAL -baidu,SOURCE_CATEGORY_SEARCH -bebo,SOURCE_CATEGORY_SOCIAL -bebo.com,SOURCE_CATEGORY_SOCIAL -beforeitsnews,SOURCE_CATEGORY_SOCIAL -beforeitsnews.com,SOURCE_CATEGORY_SOCIAL -bharatstudent,SOURCE_CATEGORY_SOCIAL -bharatstudent.com,SOURCE_CATEGORY_SOCIAL -biglobe,SOURCE_CATEGORY_SEARCH -biglobe.co.jp,SOURCE_CATEGORY_SEARCH -biglobe.ne.jp,SOURCE_CATEGORY_SEARCH -biip.no,SOURCE_CATEGORY_SOCIAL -bing,SOURCE_CATEGORY_SEARCH -biswap.org,SOURCE_CATEGORY_SOCIAL -bit.ly,SOURCE_CATEGORY_SOCIAL -blackcareernetwork.com,SOURCE_CATEGORY_SOCIAL -blackplanet,SOURCE_CATEGORY_SOCIAL -blackplanet.com,SOURCE_CATEGORY_SOCIAL -blip.fm,SOURCE_CATEGORY_SOCIAL -blog.com,SOURCE_CATEGORY_SOCIAL -blog.feedspot.com,SOURCE_CATEGORY_SOCIAL -blog.goo.ne.jp,SOURCE_CATEGORY_SOCIAL -blog.naver.com,SOURCE_CATEGORY_SOCIAL -blog.twitch.tv,SOURCE_CATEGORY_VIDEO -blog.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL -blogg.no,SOURCE_CATEGORY_SOCIAL -bloggang.com,SOURCE_CATEGORY_SOCIAL -blogger,SOURCE_CATEGORY_SOCIAL -blogger.com,SOURCE_CATEGORY_SOCIAL -blogher,SOURCE_CATEGORY_SOCIAL -blogher.com,SOURCE_CATEGORY_SOCIAL -bloglines,SOURCE_CATEGORY_SOCIAL -bloglines.com,SOURCE_CATEGORY_SOCIAL -blogs.com,SOURCE_CATEGORY_SOCIAL -blogsome,SOURCE_CATEGORY_SOCIAL -blogsome.com,SOURCE_CATEGORY_SOCIAL -blogspot,SOURCE_CATEGORY_SOCIAL -blogspot.com,SOURCE_CATEGORY_SOCIAL -blogster,SOURCE_CATEGORY_SOCIAL -blogster.com,SOURCE_CATEGORY_SOCIAL -blurtit,SOURCE_CATEGORY_SOCIAL -blurtit.com,SOURCE_CATEGORY_SOCIAL -bookmarks.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL -bookmarks.yahoo.com,SOURCE_CATEGORY_SOCIAL -br.pinterest.com,SOURCE_CATEGORY_SOCIAL -br.search.yahoo.com,SOURCE_CATEGORY_SEARCH -brightkite,SOURCE_CATEGORY_SOCIAL -brightkite.com,SOURCE_CATEGORY_SOCIAL -brizzly,SOURCE_CATEGORY_SOCIAL -brizzly.com,SOURCE_CATEGORY_SOCIAL -business.facebook.com,SOURCE_CATEGORY_SOCIAL -buzzfeed,SOURCE_CATEGORY_SOCIAL -buzzfeed.com,SOURCE_CATEGORY_SOCIAL -buzznet,SOURCE_CATEGORY_SOCIAL -buzznet.com,SOURCE_CATEGORY_SOCIAL -ca.search.yahoo.com,SOURCE_CATEGORY_SEARCH -cafe.naver.com,SOURCE_CATEGORY_SOCIAL -cafemom,SOURCE_CATEGORY_SOCIAL -cafemom.com,SOURCE_CATEGORY_SOCIAL -camospace,SOURCE_CATEGORY_SOCIAL -camospace.com,SOURCE_CATEGORY_SOCIAL -canalblog.com,SOURCE_CATEGORY_SOCIAL -care.com,SOURCE_CATEGORY_SOCIAL -care2,SOURCE_CATEGORY_SOCIAL -care2.com,SOURCE_CATEGORY_SOCIAL -caringbridge.org,SOURCE_CATEGORY_SOCIAL -catster,SOURCE_CATEGORY_SOCIAL -catster.com,SOURCE_CATEGORY_SOCIAL -cbnt.io,SOURCE_CATEGORY_SOCIAL -cellufun,SOURCE_CATEGORY_SOCIAL -cellufun.com,SOURCE_CATEGORY_SOCIAL -centerblog.net,SOURCE_CATEGORY_SOCIAL -centrum.cz,SOURCE_CATEGORY_SEARCH -ch.search.yahoo.com,SOURCE_CATEGORY_SEARCH -chat.zalo.me,SOURCE_CATEGORY_SOCIAL -checkout.shopify.com,SOURCE_CATEGORY_SHOPPING -checkout.stripe.com,SOURCE_CATEGORY_SHOPPING -chegg.com,SOURCE_CATEGORY_SOCIAL -chicagonow,SOURCE_CATEGORY_SOCIAL -chicagonow.com,SOURCE_CATEGORY_SOCIAL -chiebukuro.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL -cl.search.yahoo.com,SOURCE_CATEGORY_SEARCH -classmates,SOURCE_CATEGORY_SOCIAL -classmates.com,SOURCE_CATEGORY_SOCIAL -classquest,SOURCE_CATEGORY_SOCIAL -classquest.com,SOURCE_CATEGORY_SOCIAL -cn.bing.com,SOURCE_CATEGORY_SEARCH -cnn,SOURCE_CATEGORY_SEARCH -co.pinterest.com,SOURCE_CATEGORY_SOCIAL -co.search.yahoo.com,SOURCE_CATEGORY_SEARCH -cocolog-nifty,SOURCE_CATEGORY_SOCIAL -cocolog-nifty.com,SOURCE_CATEGORY_SOCIAL -comcast,SOURCE_CATEGORY_SEARCH -conduit,SOURCE_CATEGORY_SEARCH -copainsdavant.linternaute.com,SOURCE_CATEGORY_SOCIAL -couchsurfing.org,SOURCE_CATEGORY_SOCIAL -cozycot,SOURCE_CATEGORY_SOCIAL -cozycot.com,SOURCE_CATEGORY_SOCIAL -cr.shopping.naver.com,SOURCE_CATEGORY_SHOPPING -cr2.shopping.naver.com,SOURCE_CATEGORY_SHOPPING -crackle,SOURCE_CATEGORY_VIDEO -crackle.com,SOURCE_CATEGORY_VIDEO -cross.tv,SOURCE_CATEGORY_SOCIAL -crunchyroll,SOURCE_CATEGORY_SOCIAL -crunchyroll.com,SOURCE_CATEGORY_SOCIAL -curiositystream,SOURCE_CATEGORY_VIDEO -curiositystream.com,SOURCE_CATEGORY_VIDEO -cyworld,SOURCE_CATEGORY_SOCIAL -cyworld.com,SOURCE_CATEGORY_SOCIAL -cz.pinterest.com,SOURCE_CATEGORY_SOCIAL -d.hatena.ne.jp,SOURCE_CATEGORY_SOCIAL -d.tube,SOURCE_CATEGORY_VIDEO -dailymotion,SOURCE_CATEGORY_VIDEO -dailymotion.com,SOURCE_CATEGORY_VIDEO -dailystrength.org,SOURCE_CATEGORY_SOCIAL -dashboard.twitch.tv,SOURCE_CATEGORY_VIDEO -daum,SOURCE_CATEGORY_SEARCH -daum.net,SOURCE_CATEGORY_SEARCH -de.search.yahoo.com,SOURCE_CATEGORY_SEARCH -deluxe.com,SOURCE_CATEGORY_SOCIAL -deviantart,SOURCE_CATEGORY_SOCIAL -deviantart.com,SOURCE_CATEGORY_SOCIAL -dianping,SOURCE_CATEGORY_SOCIAL -dianping.com,SOURCE_CATEGORY_SOCIAL -digg,SOURCE_CATEGORY_SOCIAL -digg.com,SOURCE_CATEGORY_SOCIAL -diigo,SOURCE_CATEGORY_SOCIAL -diigo.com,SOURCE_CATEGORY_SOCIAL -discover.hubpages.com,SOURCE_CATEGORY_SOCIAL -disneyplus,SOURCE_CATEGORY_VIDEO -disneyplus.com,SOURCE_CATEGORY_VIDEO -disqus,SOURCE_CATEGORY_SOCIAL -disqus.com,SOURCE_CATEGORY_SOCIAL -dk.search.yahoo.com,SOURCE_CATEGORY_SEARCH -dogpile,SOURCE_CATEGORY_SEARCH -dogpile.com,SOURCE_CATEGORY_SEARCH -dogster,SOURCE_CATEGORY_SOCIAL -dogster.com,SOURCE_CATEGORY_SOCIAL -dol2day,SOURCE_CATEGORY_SOCIAL -dol2day.com,SOURCE_CATEGORY_SOCIAL -doostang,SOURCE_CATEGORY_SOCIAL -doostang.com,SOURCE_CATEGORY_SOCIAL -dopplr,SOURCE_CATEGORY_SOCIAL -dopplr.com,SOURCE_CATEGORY_SOCIAL -douban,SOURCE_CATEGORY_SOCIAL -douban.com,SOURCE_CATEGORY_SOCIAL -draft.blogger.com,SOURCE_CATEGORY_SOCIAL -draugiem.lv,SOURCE_CATEGORY_SOCIAL -drugs-forum,SOURCE_CATEGORY_SOCIAL -drugs-forum.com,SOURCE_CATEGORY_SOCIAL -duckduckgo,SOURCE_CATEGORY_SEARCH -dzone,SOURCE_CATEGORY_SOCIAL -dzone.com,SOURCE_CATEGORY_SOCIAL -ebay,SOURCE_CATEGORY_SHOPPING -ebay.co.uk,SOURCE_CATEGORY_SHOPPING -ebay.com,SOURCE_CATEGORY_SHOPPING -ebay.com.au,SOURCE_CATEGORY_SHOPPING -ebay.de,SOURCE_CATEGORY_SHOPPING -ecosia.org,SOURCE_CATEGORY_SEARCH -edublogs.org,SOURCE_CATEGORY_SOCIAL -elftown,SOURCE_CATEGORY_SOCIAL -elftown.com,SOURCE_CATEGORY_SOCIAL -email.seznam.cz,SOURCE_CATEGORY_SEARCH -eniro,SOURCE_CATEGORY_SEARCH -epicurious.com,SOURCE_CATEGORY_SOCIAL -es.search.yahoo.com,SOURCE_CATEGORY_SEARCH -espanol.search.yahoo.com,SOURCE_CATEGORY_SEARCH -etsy,SOURCE_CATEGORY_SHOPPING -etsy.com,SOURCE_CATEGORY_SHOPPING -everforo.com,SOURCE_CATEGORY_SOCIAL -exalead.com,SOURCE_CATEGORY_SEARCH -exblog.jp,SOURCE_CATEGORY_SOCIAL -excite.com,SOURCE_CATEGORY_SEARCH -extole,SOURCE_CATEGORY_SOCIAL -extole.com,SOURCE_CATEGORY_SOCIAL -facebook,SOURCE_CATEGORY_SOCIAL -facebook.com,SOURCE_CATEGORY_SOCIAL -faceparty,SOURCE_CATEGORY_SOCIAL -faceparty.com,SOURCE_CATEGORY_SOCIAL -fandom.com,SOURCE_CATEGORY_SOCIAL -fanpop,SOURCE_CATEGORY_SOCIAL -fanpop.com,SOURCE_CATEGORY_SOCIAL -fark,SOURCE_CATEGORY_SOCIAL -fark.com,SOURCE_CATEGORY_SOCIAL -fast.wistia.net,SOURCE_CATEGORY_VIDEO -fb,SOURCE_CATEGORY_SOCIAL -fb.me,SOURCE_CATEGORY_SOCIAL -fc2,SOURCE_CATEGORY_SOCIAL -fc2.com,SOURCE_CATEGORY_SOCIAL -feedspot,SOURCE_CATEGORY_SOCIAL -feministing,SOURCE_CATEGORY_SOCIAL -feministing.com,SOURCE_CATEGORY_SOCIAL -fi.search.yahoo.com,SOURCE_CATEGORY_SEARCH -filmaffinity,SOURCE_CATEGORY_SOCIAL -filmaffinity.com,SOURCE_CATEGORY_SOCIAL -firmy.cz,SOURCE_CATEGORY_SEARCH -flickr,SOURCE_CATEGORY_SOCIAL -flickr.com,SOURCE_CATEGORY_SOCIAL -flipboard,SOURCE_CATEGORY_SOCIAL -flipboard.com,SOURCE_CATEGORY_SOCIAL -folkdirect,SOURCE_CATEGORY_SOCIAL -folkdirect.com,SOURCE_CATEGORY_SOCIAL -foodservice,SOURCE_CATEGORY_SOCIAL -foodservice.com,SOURCE_CATEGORY_SOCIAL -forums.androidcentral.com,SOURCE_CATEGORY_SOCIAL -forums.crackberry.com,SOURCE_CATEGORY_SOCIAL -forums.imore.com,SOURCE_CATEGORY_SOCIAL -forums.nexopia.com,SOURCE_CATEGORY_SOCIAL -forums.webosnation.com,SOURCE_CATEGORY_SOCIAL -forums.wpcentral.com,SOURCE_CATEGORY_SOCIAL -fotki,SOURCE_CATEGORY_SOCIAL -fotki.com,SOURCE_CATEGORY_SOCIAL -fotolog,SOURCE_CATEGORY_SOCIAL -fotolog.com,SOURCE_CATEGORY_SOCIAL -foursquare,SOURCE_CATEGORY_SOCIAL -foursquare.com,SOURCE_CATEGORY_SOCIAL -fr.search.yahoo.com,SOURCE_CATEGORY_SEARCH -free.facebook.com,SOURCE_CATEGORY_SOCIAL -friendfeed,SOURCE_CATEGORY_SOCIAL -friendfeed.com,SOURCE_CATEGORY_SOCIAL -fruehstueckstreff.org,SOURCE_CATEGORY_SOCIAL -fubar,SOURCE_CATEGORY_SOCIAL -fubar.com,SOURCE_CATEGORY_SOCIAL -gaiaonline,SOURCE_CATEGORY_SOCIAL -gaiaonline.com,SOURCE_CATEGORY_SOCIAL -gamerdna,SOURCE_CATEGORY_SOCIAL -gamerdna.com,SOURCE_CATEGORY_SOCIAL -gather.com,SOURCE_CATEGORY_SOCIAL -geni.com,SOURCE_CATEGORY_SOCIAL -getpocket.com,SOURCE_CATEGORY_SOCIAL -glassboard,SOURCE_CATEGORY_SOCIAL -glassboard.com,SOURCE_CATEGORY_SOCIAL -glassdoor,SOURCE_CATEGORY_SOCIAL -glassdoor.com,SOURCE_CATEGORY_SOCIAL -globo,SOURCE_CATEGORY_SEARCH -go.mail.ru,SOURCE_CATEGORY_SEARCH -godtube,SOURCE_CATEGORY_SOCIAL -godtube.com,SOURCE_CATEGORY_SOCIAL -goldenline.pl,SOURCE_CATEGORY_SOCIAL -goldstar,SOURCE_CATEGORY_SOCIAL -goldstar.com,SOURCE_CATEGORY_SOCIAL -goo.gl,SOURCE_CATEGORY_SOCIAL -gooblog,SOURCE_CATEGORY_SOCIAL -goodreads,SOURCE_CATEGORY_SOCIAL -goodreads.com,SOURCE_CATEGORY_SOCIAL -google,SOURCE_CATEGORY_SEARCH -google+,SOURCE_CATEGORY_SOCIAL -google-play,SOURCE_CATEGORY_SEARCH -googlegroups.com,SOURCE_CATEGORY_SOCIAL -googleplus,SOURCE_CATEGORY_SOCIAL -govloop,SOURCE_CATEGORY_SOCIAL -govloop.com,SOURCE_CATEGORY_SOCIAL -gowalla,SOURCE_CATEGORY_SOCIAL -gowalla.com,SOURCE_CATEGORY_SOCIAL -gree.jp,SOURCE_CATEGORY_SOCIAL -groups.google.com,SOURCE_CATEGORY_SOCIAL -gulli.com,SOURCE_CATEGORY_SOCIAL -gutefrage.net,SOURCE_CATEGORY_SOCIAL -habbo,SOURCE_CATEGORY_SOCIAL -habbo.com,SOURCE_CATEGORY_SOCIAL -help.hulu.com,SOURCE_CATEGORY_VIDEO -help.netflix.com,SOURCE_CATEGORY_VIDEO -hi5,SOURCE_CATEGORY_SOCIAL -hi5.com,SOURCE_CATEGORY_SOCIAL -hk.search.yahoo.com,SOURCE_CATEGORY_SEARCH -hootsuite,SOURCE_CATEGORY_SOCIAL -hootsuite.com,SOURCE_CATEGORY_SOCIAL -houzz,SOURCE_CATEGORY_SOCIAL -houzz.com,SOURCE_CATEGORY_SOCIAL -hoverspot,SOURCE_CATEGORY_SOCIAL -hoverspot.com,SOURCE_CATEGORY_SOCIAL -hr.com,SOURCE_CATEGORY_SOCIAL -hu.pinterest.com,SOURCE_CATEGORY_SOCIAL -hubculture,SOURCE_CATEGORY_SOCIAL -hubculture.com,SOURCE_CATEGORY_SOCIAL -hubpages.com,SOURCE_CATEGORY_SOCIAL -hulu,SOURCE_CATEGORY_VIDEO -hulu.com,SOURCE_CATEGORY_VIDEO -hyves.net,SOURCE_CATEGORY_SOCIAL -hyves.nl,SOURCE_CATEGORY_SOCIAL -ibibo,SOURCE_CATEGORY_SOCIAL -ibibo.com,SOURCE_CATEGORY_SOCIAL -id.pinterest.com,SOURCE_CATEGORY_SOCIAL -id.search.yahoo.com,SOURCE_CATEGORY_SEARCH -id.twitch.tv,SOURCE_CATEGORY_VIDEO -identi.ca,SOURCE_CATEGORY_SOCIAL -ig,SOURCE_CATEGORY_SOCIAL -imageshack.com,SOURCE_CATEGORY_SOCIAL -imageshack.us,SOURCE_CATEGORY_SOCIAL -imvu,SOURCE_CATEGORY_SOCIAL -imvu.com,SOURCE_CATEGORY_SOCIAL -in.pinterest.com,SOURCE_CATEGORY_SOCIAL -in.search.yahoo.com,SOURCE_CATEGORY_SEARCH -incredimail,SOURCE_CATEGORY_SEARCH -insanejournal,SOURCE_CATEGORY_SOCIAL -insanejournal.com,SOURCE_CATEGORY_SOCIAL -instagram,SOURCE_CATEGORY_SOCIAL -instagram.com,SOURCE_CATEGORY_SOCIAL -instapaper,SOURCE_CATEGORY_SOCIAL -instapaper.com,SOURCE_CATEGORY_SOCIAL -internations.org,SOURCE_CATEGORY_SOCIAL -interpals.net,SOURCE_CATEGORY_SOCIAL -intherooms,SOURCE_CATEGORY_SOCIAL -intherooms.com,SOURCE_CATEGORY_SOCIAL -iq.com,SOURCE_CATEGORY_VIDEO -iqiyi,SOURCE_CATEGORY_VIDEO -iqiyi.com,SOURCE_CATEGORY_VIDEO -irc-galleria.net,SOURCE_CATEGORY_SOCIAL -is.gd,SOURCE_CATEGORY_SOCIAL -it.search.yahoo.com,SOURCE_CATEGORY_SEARCH -italki,SOURCE_CATEGORY_SOCIAL -italki.com,SOURCE_CATEGORY_SOCIAL -jammerdirect,SOURCE_CATEGORY_SOCIAL -jammerdirect.com,SOURCE_CATEGORY_SOCIAL -jappy.com,SOURCE_CATEGORY_SOCIAL -jappy.de,SOURCE_CATEGORY_SOCIAL -jobs.netflix.com,SOURCE_CATEGORY_VIDEO -justin.tv,SOURCE_CATEGORY_VIDEO -kaboodle.com,SOURCE_CATEGORY_SOCIAL -kakao,SOURCE_CATEGORY_SOCIAL -kakao.com,SOURCE_CATEGORY_SOCIAL -kakaocorp.com,SOURCE_CATEGORY_SOCIAL -kaneva,SOURCE_CATEGORY_SOCIAL -kaneva.com,SOURCE_CATEGORY_SOCIAL -kin.naver.com,SOURCE_CATEGORY_SOCIAL -kvasir,SOURCE_CATEGORY_SEARCH -l.facebook.com,SOURCE_CATEGORY_SOCIAL -l.instagram.com,SOURCE_CATEGORY_SOCIAL -l.messenger.com,SOURCE_CATEGORY_SOCIAL -last.fm,SOURCE_CATEGORY_SOCIAL -lens.google.com,SOURCE_CATEGORY_SEARCH -librarything,SOURCE_CATEGORY_SOCIAL -librarything.com,SOURCE_CATEGORY_SOCIAL -lifestream.aol.com,SOURCE_CATEGORY_SOCIAL -line,SOURCE_CATEGORY_SOCIAL -line.me,SOURCE_CATEGORY_SOCIAL -linkedin,SOURCE_CATEGORY_SOCIAL -linkedin.com,SOURCE_CATEGORY_SOCIAL -listal,SOURCE_CATEGORY_SOCIAL -listal.com,SOURCE_CATEGORY_SOCIAL -listography,SOURCE_CATEGORY_SOCIAL -listography.com,SOURCE_CATEGORY_SOCIAL -lite.qwant.com,SOURCE_CATEGORY_SEARCH -livedoor.com,SOURCE_CATEGORY_SOCIAL -livedoorblog,SOURCE_CATEGORY_SOCIAL -livejournal,SOURCE_CATEGORY_SOCIAL -livejournal.com,SOURCE_CATEGORY_SOCIAL -lm.facebook.com,SOURCE_CATEGORY_SOCIAL -lnkd.in,SOURCE_CATEGORY_SOCIAL -lycos,SOURCE_CATEGORY_SEARCH -m.alibaba.com,SOURCE_CATEGORY_SHOPPING -m.baidu.com,SOURCE_CATEGORY_SEARCH -m.blog.naver.com,SOURCE_CATEGORY_SOCIAL -m.cafe.naver.com,SOURCE_CATEGORY_SOCIAL -m.facebook.com,SOURCE_CATEGORY_SOCIAL -m.kin.naver.com,SOURCE_CATEGORY_SOCIAL -m.naver.com,SOURCE_CATEGORY_SEARCH -m.search.naver.com,SOURCE_CATEGORY_SEARCH -m.shopping.naver.com,SOURCE_CATEGORY_SHOPPING -m.sogou.com,SOURCE_CATEGORY_SEARCH -m.twitch.tv,SOURCE_CATEGORY_VIDEO -m.vk.com,SOURCE_CATEGORY_SOCIAL -m.yelp.com,SOURCE_CATEGORY_SOCIAL -m.youtube.com,SOURCE_CATEGORY_VIDEO -mail.rambler.ru,SOURCE_CATEGORY_SEARCH -mail.yandex.ru,SOURCE_CATEGORY_SEARCH -malaysia.search.yahoo.com,SOURCE_CATEGORY_SEARCH -mbga.jp,SOURCE_CATEGORY_SOCIAL -medium.com,SOURCE_CATEGORY_SOCIAL -meetin.org,SOURCE_CATEGORY_SOCIAL -meetup,SOURCE_CATEGORY_SOCIAL -meetup.com,SOURCE_CATEGORY_SOCIAL -meinvz.net,SOURCE_CATEGORY_SOCIAL -meneame.net,SOURCE_CATEGORY_SOCIAL -menuism.com,SOURCE_CATEGORY_SOCIAL -mercadolibre,SOURCE_CATEGORY_SHOPPING -mercadolibre.com,SOURCE_CATEGORY_SHOPPING -mercadolibre.com.ar,SOURCE_CATEGORY_SHOPPING -mercadolibre.com.mx,SOURCE_CATEGORY_SHOPPING -message.alibaba.com,SOURCE_CATEGORY_SHOPPING -messages.google.com,SOURCE_CATEGORY_SOCIAL -messages.yahoo.co.jp,SOURCE_CATEGORY_SOCIAL -messenger,SOURCE_CATEGORY_SOCIAL -messenger.com,SOURCE_CATEGORY_SOCIAL -mix.com,SOURCE_CATEGORY_SOCIAL -mixi.jp,SOURCE_CATEGORY_SOCIAL -mobile.facebook.com,SOURCE_CATEGORY_SOCIAL -mocospace,SOURCE_CATEGORY_SOCIAL -mocospace.com,SOURCE_CATEGORY_SOCIAL -mouthshut,SOURCE_CATEGORY_SOCIAL -mouthshut.com,SOURCE_CATEGORY_SOCIAL -movabletype,SOURCE_CATEGORY_SOCIAL -movabletype.com,SOURCE_CATEGORY_SOCIAL -msearch.shopping.naver.com,SOURCE_CATEGORY_SHOPPING -msn,SOURCE_CATEGORY_SEARCH -msn.com,SOURCE_CATEGORY_SEARCH -mubi,SOURCE_CATEGORY_SOCIAL -mubi.com,SOURCE_CATEGORY_SOCIAL -music.youtube.com,SOURCE_CATEGORY_VIDEO -mx.search.yahoo.com,SOURCE_CATEGORY_SEARCH -my.opera.com,SOURCE_CATEGORY_SOCIAL -myanimelist.net,SOURCE_CATEGORY_SOCIAL -myheritage,SOURCE_CATEGORY_SOCIAL -myheritage.com,SOURCE_CATEGORY_SOCIAL -mylife,SOURCE_CATEGORY_SOCIAL -mylife.com,SOURCE_CATEGORY_SOCIAL -mymodernmet,SOURCE_CATEGORY_SOCIAL -mymodernmet.com,SOURCE_CATEGORY_SOCIAL -myspace,SOURCE_CATEGORY_SOCIAL -myspace.com,SOURCE_CATEGORY_SOCIAL -najdi,SOURCE_CATEGORY_SEARCH -naver,SOURCE_CATEGORY_SEARCH -naver.com,SOURCE_CATEGORY_SEARCH -netflix,SOURCE_CATEGORY_VIDEO -netflix.com,SOURCE_CATEGORY_VIDEO -netvibes,SOURCE_CATEGORY_SOCIAL -netvibes.com,SOURCE_CATEGORY_SOCIAL -news.google.com,SOURCE_CATEGORY_SEARCH -news.ycombinator.com,SOURCE_CATEGORY_SOCIAL -newsshowcase,SOURCE_CATEGORY_SOCIAL -nexopia,SOURCE_CATEGORY_SOCIAL -ngopost.org,SOURCE_CATEGORY_SOCIAL -niconico,SOURCE_CATEGORY_SOCIAL -nicovideo.jp,SOURCE_CATEGORY_SOCIAL -nightlifelink,SOURCE_CATEGORY_SOCIAL -nightlifelink.com,SOURCE_CATEGORY_SOCIAL -ning,SOURCE_CATEGORY_SOCIAL -ning.com,SOURCE_CATEGORY_SOCIAL -nl.pinterest.com,SOURCE_CATEGORY_SOCIAL -nl.search.yahoo.com,SOURCE_CATEGORY_SEARCH -nl.shopping.net,SOURCE_CATEGORY_SHOPPING -no.search.yahoo.com,SOURCE_CATEGORY_SEARCH -no.shopping.net,SOURCE_CATEGORY_SHOPPING -ntp.msn.com,SOURCE_CATEGORY_SEARCH -nz.search.yahoo.com,SOURCE_CATEGORY_SEARCH -odnoklassniki.ru,SOURCE_CATEGORY_SOCIAL -odnoklassniki.ua,SOURCE_CATEGORY_SOCIAL -offer.alibaba.com,SOURCE_CATEGORY_SHOPPING -okwave.jp,SOURCE_CATEGORY_SOCIAL -old.reddit.com,SOURCE_CATEGORY_SOCIAL -one.walmart.com,SOURCE_CATEGORY_SHOPPING -onet,SOURCE_CATEGORY_SEARCH -onet.pl,SOURCE_CATEGORY_SEARCH -oneworldgroup.org,SOURCE_CATEGORY_SOCIAL -onstartups,SOURCE_CATEGORY_SOCIAL -onstartups.com,SOURCE_CATEGORY_SOCIAL -opendiary,SOURCE_CATEGORY_SOCIAL -opendiary.com,SOURCE_CATEGORY_SOCIAL -order.shopping.yahoo.co.jp,SOURCE_CATEGORY_SHOPPING -oshiete.goo.ne.jp,SOURCE_CATEGORY_SOCIAL -out.reddit.com,SOURCE_CATEGORY_SOCIAL -over-blog.com,SOURCE_CATEGORY_SOCIAL -overblog.com,SOURCE_CATEGORY_SOCIAL -paper.li,SOURCE_CATEGORY_SOCIAL -partners.shopify.com,SOURCE_CATEGORY_SHOPPING -partyflock.nl,SOURCE_CATEGORY_SOCIAL -pe.search.yahoo.com,SOURCE_CATEGORY_SEARCH -ph.search.yahoo.com,SOURCE_CATEGORY_SEARCH -photobucket,SOURCE_CATEGORY_SOCIAL -photobucket.com,SOURCE_CATEGORY_SOCIAL -pinboard,SOURCE_CATEGORY_SOCIAL -pinboard.in,SOURCE_CATEGORY_SOCIAL -pingsta,SOURCE_CATEGORY_SOCIAL -pingsta.com,SOURCE_CATEGORY_SOCIAL -pinterest,SOURCE_CATEGORY_SOCIAL -pinterest.at,SOURCE_CATEGORY_SOCIAL -pinterest.ca,SOURCE_CATEGORY_SOCIAL -pinterest.ch,SOURCE_CATEGORY_SOCIAL -pinterest.cl,SOURCE_CATEGORY_SOCIAL -pinterest.co.kr,SOURCE_CATEGORY_SOCIAL -pinterest.co.uk,SOURCE_CATEGORY_SOCIAL -pinterest.com,SOURCE_CATEGORY_SOCIAL -pinterest.com.au,SOURCE_CATEGORY_SOCIAL -pinterest.com.mx,SOURCE_CATEGORY_SOCIAL -pinterest.de,SOURCE_CATEGORY_SOCIAL -pinterest.es,SOURCE_CATEGORY_SOCIAL -pinterest.fr,SOURCE_CATEGORY_SOCIAL -pinterest.it,SOURCE_CATEGORY_SOCIAL -pinterest.jp,SOURCE_CATEGORY_SOCIAL -pinterest.nz,SOURCE_CATEGORY_SOCIAL -pinterest.ph,SOURCE_CATEGORY_SOCIAL -pinterest.pt,SOURCE_CATEGORY_SOCIAL -pinterest.ru,SOURCE_CATEGORY_SOCIAL -pinterest.se,SOURCE_CATEGORY_SOCIAL -pixiv.net,SOURCE_CATEGORY_SOCIAL -pl.pinterest.com,SOURCE_CATEGORY_SOCIAL -pl.search.yahoo.com,SOURCE_CATEGORY_SEARCH -play.google.com,SOURCE_CATEGORY_SEARCH -playahead.se,SOURCE_CATEGORY_SOCIAL -player.twitch.tv,SOURCE_CATEGORY_VIDEO -player.vimeo.com,SOURCE_CATEGORY_VIDEO -plurk,SOURCE_CATEGORY_SOCIAL -plurk.com,SOURCE_CATEGORY_SOCIAL -plus.google.com,SOURCE_CATEGORY_SOCIAL -plus.url.google.com,SOURCE_CATEGORY_SOCIAL -pocket.co,SOURCE_CATEGORY_SOCIAL -posterous,SOURCE_CATEGORY_SOCIAL -posterous.com,SOURCE_CATEGORY_SOCIAL -pro.homeadvisor.com,SOURCE_CATEGORY_SOCIAL -pulse.yahoo.com,SOURCE_CATEGORY_SOCIAL -qapacity,SOURCE_CATEGORY_SOCIAL -qapacity.com,SOURCE_CATEGORY_SOCIAL -quechup,SOURCE_CATEGORY_SOCIAL -quechup.com,SOURCE_CATEGORY_SOCIAL -quora,SOURCE_CATEGORY_SOCIAL -quora.com,SOURCE_CATEGORY_SOCIAL -qwant,SOURCE_CATEGORY_SEARCH -qwant.com,SOURCE_CATEGORY_SEARCH -qzone.qq.com,SOURCE_CATEGORY_SOCIAL -rakuten,SOURCE_CATEGORY_SEARCH -rakuten.co.jp,SOURCE_CATEGORY_SEARCH -rambler,SOURCE_CATEGORY_SEARCH -rambler.ru,SOURCE_CATEGORY_SEARCH -ravelry,SOURCE_CATEGORY_SOCIAL -ravelry.com,SOURCE_CATEGORY_SOCIAL -reddit,SOURCE_CATEGORY_SOCIAL -reddit.com,SOURCE_CATEGORY_SOCIAL -redux,SOURCE_CATEGORY_SOCIAL -redux.com,SOURCE_CATEGORY_SOCIAL -renren,SOURCE_CATEGORY_SOCIAL -renren.com,SOURCE_CATEGORY_SOCIAL -researchgate.net,SOURCE_CATEGORY_SOCIAL -reunion,SOURCE_CATEGORY_SOCIAL -reunion.com,SOURCE_CATEGORY_SOCIAL -reverbnation,SOURCE_CATEGORY_SOCIAL -reverbnation.com,SOURCE_CATEGORY_SOCIAL -rtl.de,SOURCE_CATEGORY_SOCIAL -ryze,SOURCE_CATEGORY_SOCIAL -ryze.com,SOURCE_CATEGORY_SOCIAL -s3.amazonaws.com,SOURCE_CATEGORY_SHOPPING -salespider,SOURCE_CATEGORY_SOCIAL -salespider.com,SOURCE_CATEGORY_SOCIAL -scoop.it,SOURCE_CATEGORY_SOCIAL -screenrant,SOURCE_CATEGORY_SOCIAL -screenrant.com,SOURCE_CATEGORY_SOCIAL -scribd,SOURCE_CATEGORY_SOCIAL -scribd.com,SOURCE_CATEGORY_SOCIAL -scvngr,SOURCE_CATEGORY_SOCIAL -scvngr.com,SOURCE_CATEGORY_SOCIAL -se.search.yahoo.com,SOURCE_CATEGORY_SEARCH -se.shopping.net,SOURCE_CATEGORY_SHOPPING -search-results,SOURCE_CATEGORY_SEARCH -search.aol.co.uk,SOURCE_CATEGORY_SEARCH -search.aol.com,SOURCE_CATEGORY_SEARCH -search.google.com,SOURCE_CATEGORY_SEARCH -search.smt.docomo.ne.jp,SOURCE_CATEGORY_SEARCH -search.ukr.net,SOURCE_CATEGORY_SEARCH -secondlife,SOURCE_CATEGORY_SOCIAL -secondlife.com,SOURCE_CATEGORY_SOCIAL -secureurl.ukr.net,SOURCE_CATEGORY_SEARCH -serverfault,SOURCE_CATEGORY_SOCIAL -serverfault.com,SOURCE_CATEGORY_SOCIAL -seznam,SOURCE_CATEGORY_SEARCH -seznam.cz,SOURCE_CATEGORY_SEARCH -sg.search.yahoo.com,SOURCE_CATEGORY_SEARCH -shareit,SOURCE_CATEGORY_SOCIAL -sharethis,SOURCE_CATEGORY_SOCIAL -sharethis.com,SOURCE_CATEGORY_SOCIAL -shop.app,SOURCE_CATEGORY_SHOPPING -shopify,SOURCE_CATEGORY_SHOPPING -shopify.com,SOURCE_CATEGORY_SHOPPING -shopping.naver.com,SOURCE_CATEGORY_SHOPPING -shopping.yahoo.co.jp,SOURCE_CATEGORY_SHOPPING -shopping.yahoo.com,SOURCE_CATEGORY_SHOPPING -shopzilla,SOURCE_CATEGORY_SHOPPING -shopzilla.com,SOURCE_CATEGORY_SHOPPING -shvoong.com,SOURCE_CATEGORY_SOCIAL -simplycodes.com,SOURCE_CATEGORY_SHOPPING -sites.google.com,SOURCE_CATEGORY_SOCIAL -skype,SOURCE_CATEGORY_SOCIAL -skyrock,SOURCE_CATEGORY_SOCIAL -skyrock.com,SOURCE_CATEGORY_SOCIAL -slashdot.org,SOURCE_CATEGORY_SOCIAL -slideshare.net,SOURCE_CATEGORY_SOCIAL -smartnews.com,SOURCE_CATEGORY_SOCIAL -snapchat,SOURCE_CATEGORY_SOCIAL -snapchat.com,SOURCE_CATEGORY_SOCIAL -so.com,SOURCE_CATEGORY_SEARCH -social,SOURCE_CATEGORY_SOCIAL -sociallife.com.br,SOURCE_CATEGORY_SOCIAL -socialvibe,SOURCE_CATEGORY_SOCIAL -socialvibe.com,SOURCE_CATEGORY_SOCIAL -sogou,SOURCE_CATEGORY_SEARCH -sogou.com,SOURCE_CATEGORY_SEARCH -sp-web.search.auone.jp,SOURCE_CATEGORY_SEARCH -spaces.live.com,SOURCE_CATEGORY_SOCIAL -spoke,SOURCE_CATEGORY_SOCIAL -spoke.com,SOURCE_CATEGORY_SOCIAL -spruz,SOURCE_CATEGORY_SOCIAL -spruz.com,SOURCE_CATEGORY_SOCIAL -ssense.com,SOURCE_CATEGORY_SOCIAL -stackapps,SOURCE_CATEGORY_SOCIAL -stackapps.com,SOURCE_CATEGORY_SOCIAL -stackexchange,SOURCE_CATEGORY_SOCIAL -stackexchange.com,SOURCE_CATEGORY_SOCIAL -stackoverflow,SOURCE_CATEGORY_SOCIAL -stackoverflow.com,SOURCE_CATEGORY_SOCIAL -stardoll.com,SOURCE_CATEGORY_SOCIAL -startsiden,SOURCE_CATEGORY_SEARCH -startsiden.no,SOURCE_CATEGORY_SEARCH -stickam,SOURCE_CATEGORY_SOCIAL -stickam.com,SOURCE_CATEGORY_SOCIAL -store.shopping.yahoo.co.jp,SOURCE_CATEGORY_SHOPPING -stripe,SOURCE_CATEGORY_SHOPPING -stripe.com,SOURCE_CATEGORY_SHOPPING -studivz.net,SOURCE_CATEGORY_SOCIAL -suche.aol.de,SOURCE_CATEGORY_SEARCH -suomi24.fi,SOURCE_CATEGORY_SOCIAL -superuser,SOURCE_CATEGORY_SOCIAL -superuser.com,SOURCE_CATEGORY_SOCIAL -sweeva,SOURCE_CATEGORY_SOCIAL -sweeva.com,SOURCE_CATEGORY_SOCIAL -t.co,SOURCE_CATEGORY_SOCIAL -t.me,SOURCE_CATEGORY_SOCIAL -tagged,SOURCE_CATEGORY_SOCIAL -tagged.com,SOURCE_CATEGORY_SOCIAL -taggedmail,SOURCE_CATEGORY_SOCIAL -taggedmail.com,SOURCE_CATEGORY_SOCIAL -talkbiznow,SOURCE_CATEGORY_SOCIAL -talkbiznow.com,SOURCE_CATEGORY_SOCIAL -taringa.net,SOURCE_CATEGORY_SOCIAL -techmeme,SOURCE_CATEGORY_SOCIAL -techmeme.com,SOURCE_CATEGORY_SOCIAL -ted,SOURCE_CATEGORY_VIDEO -ted.com,SOURCE_CATEGORY_VIDEO -tencent,SOURCE_CATEGORY_SOCIAL -tencent.com,SOURCE_CATEGORY_SOCIAL -terra,SOURCE_CATEGORY_SEARCH -th.search.yahoo.com,SOURCE_CATEGORY_SEARCH -tiktok,SOURCE_CATEGORY_SOCIAL -tiktok.com,SOURCE_CATEGORY_SOCIAL -tinyurl,SOURCE_CATEGORY_SOCIAL -tinyurl.com,SOURCE_CATEGORY_SOCIAL -toolbox,SOURCE_CATEGORY_SOCIAL -toolbox.com,SOURCE_CATEGORY_SOCIAL -touch.facebook.com,SOURCE_CATEGORY_SOCIAL -tr.pinterest.com,SOURCE_CATEGORY_SOCIAL -tr.search.yahoo.com,SOURCE_CATEGORY_SEARCH -travellerspoint,SOURCE_CATEGORY_SOCIAL -travellerspoint.com,SOURCE_CATEGORY_SOCIAL -tripadvisor,SOURCE_CATEGORY_SOCIAL -tripadvisor.com,SOURCE_CATEGORY_SOCIAL -trombi,SOURCE_CATEGORY_SOCIAL -trombi.com,SOURCE_CATEGORY_SOCIAL -trustpilot,SOURCE_CATEGORY_SOCIAL -tudou,SOURCE_CATEGORY_SOCIAL -tudou.com,SOURCE_CATEGORY_SOCIAL -tuenti,SOURCE_CATEGORY_SOCIAL -tuenti.com,SOURCE_CATEGORY_SOCIAL -tumblr,SOURCE_CATEGORY_SOCIAL -tumblr.com,SOURCE_CATEGORY_SOCIAL -tut.by,SOURCE_CATEGORY_SEARCH -tw.search.yahoo.com,SOURCE_CATEGORY_SEARCH -tweetdeck,SOURCE_CATEGORY_SOCIAL -tweetdeck.com,SOURCE_CATEGORY_SOCIAL -twitch,SOURCE_CATEGORY_VIDEO -twitch.tv,SOURCE_CATEGORY_VIDEO -twitter,SOURCE_CATEGORY_SOCIAL -twitter.com,SOURCE_CATEGORY_SOCIAL -twoo.com,SOURCE_CATEGORY_SOCIAL -typepad,SOURCE_CATEGORY_SOCIAL -typepad.com,SOURCE_CATEGORY_SOCIAL -uk.search.yahoo.com,SOURCE_CATEGORY_SEARCH -uk.shopping.net,SOURCE_CATEGORY_SHOPPING -ukr,SOURCE_CATEGORY_SEARCH -unblog.fr,SOURCE_CATEGORY_SOCIAL -urbanspoon.com,SOURCE_CATEGORY_SOCIAL -us.search.yahoo.com,SOURCE_CATEGORY_SEARCH -ushareit.com,SOURCE_CATEGORY_SOCIAL -ushi.cn,SOURCE_CATEGORY_SOCIAL -utreon,SOURCE_CATEGORY_VIDEO -utreon.com,SOURCE_CATEGORY_VIDEO -vampirefreaks,SOURCE_CATEGORY_SOCIAL -vampirefreaks.com,SOURCE_CATEGORY_SOCIAL -vampirerave,SOURCE_CATEGORY_SOCIAL -vampirerave.com,SOURCE_CATEGORY_SOCIAL -veoh,SOURCE_CATEGORY_VIDEO -veoh.com,SOURCE_CATEGORY_VIDEO -vg.no,SOURCE_CATEGORY_SOCIAL -viadeo.journaldunet.com,SOURCE_CATEGORY_VIDEO -video.ibm.com,SOURCE_CATEGORY_SOCIAL -vimeo,SOURCE_CATEGORY_VIDEO -vimeo.com,SOURCE_CATEGORY_VIDEO -virgilio,SOURCE_CATEGORY_SEARCH -vk.com,SOURCE_CATEGORY_SOCIAL -vkontakte.ru,SOURCE_CATEGORY_SOCIAL -vn.search.yahoo.com,SOURCE_CATEGORY_SEARCH -wakoopa,SOURCE_CATEGORY_SOCIAL -wakoopa.com,SOURCE_CATEGORY_SOCIAL -walmart,SOURCE_CATEGORY_SHOPPING -walmart.com,SOURCE_CATEGORY_SHOPPING -wap.sogou.com,SOURCE_CATEGORY_SEARCH -wattpad,SOURCE_CATEGORY_SOCIAL -wattpad.com,SOURCE_CATEGORY_SOCIAL -web.facebook.com,SOURCE_CATEGORY_SOCIAL -web.skype.com,SOURCE_CATEGORY_SOCIAL -webmaster.yandex.ru,SOURCE_CATEGORY_SEARCH -websearch.rakuten.co.jp,SOURCE_CATEGORY_SEARCH -webshots,SOURCE_CATEGORY_SOCIAL -webshots.com,SOURCE_CATEGORY_SOCIAL -wechat,SOURCE_CATEGORY_SOCIAL -wechat.com,SOURCE_CATEGORY_SOCIAL -weebly,SOURCE_CATEGORY_SOCIAL -weebly.com,SOURCE_CATEGORY_SOCIAL -weibo,SOURCE_CATEGORY_SOCIAL -weibo.com,SOURCE_CATEGORY_SOCIAL -wer-weiss-was.de,SOURCE_CATEGORY_SOCIAL -weread,SOURCE_CATEGORY_SOCIAL -weread.com,SOURCE_CATEGORY_SOCIAL -whatsapp,SOURCE_CATEGORY_SOCIAL -whatsapp.com,SOURCE_CATEGORY_SOCIAL -wiki.answers.com,SOURCE_CATEGORY_SOCIAL -wikihow.com,SOURCE_CATEGORY_SOCIAL -wikitravel.org,SOURCE_CATEGORY_SOCIAL -wistia,SOURCE_CATEGORY_VIDEO -wistia.com,SOURCE_CATEGORY_VIDEO -woot.com,SOURCE_CATEGORY_SOCIAL -wordpress,SOURCE_CATEGORY_SOCIAL -wordpress.com,SOURCE_CATEGORY_SOCIAL -wordpress.org,SOURCE_CATEGORY_SOCIAL -xanga,SOURCE_CATEGORY_SOCIAL -xanga.com,SOURCE_CATEGORY_SOCIAL -xing,SOURCE_CATEGORY_SOCIAL -xing.com,SOURCE_CATEGORY_SOCIAL -yahoo,SOURCE_CATEGORY_SEARCH -yahoo-mbga.jp,SOURCE_CATEGORY_SOCIAL -yahoo.co.jp,SOURCE_CATEGORY_SEARCH -yahoo.com,SOURCE_CATEGORY_SEARCH -yammer,SOURCE_CATEGORY_SOCIAL -yammer.com,SOURCE_CATEGORY_SOCIAL -yandex,SOURCE_CATEGORY_SEARCH -yandex.by,SOURCE_CATEGORY_SEARCH -yandex.com,SOURCE_CATEGORY_SEARCH -yandex.com.tr,SOURCE_CATEGORY_SEARCH -yandex.fr,SOURCE_CATEGORY_SEARCH -yandex.kz,SOURCE_CATEGORY_SEARCH -yandex.ru,SOURCE_CATEGORY_SEARCH -yandex.ua,SOURCE_CATEGORY_SEARCH -yandex.uz,SOURCE_CATEGORY_SEARCH -yelp,SOURCE_CATEGORY_SOCIAL -yelp.co.uk,SOURCE_CATEGORY_SOCIAL -yelp.com,SOURCE_CATEGORY_SOCIAL -youku,SOURCE_CATEGORY_VIDEO -youku.com,SOURCE_CATEGORY_VIDEO -youroom.in,SOURCE_CATEGORY_SOCIAL -youtube,SOURCE_CATEGORY_VIDEO -youtube.com,SOURCE_CATEGORY_VIDEO -za.pinterest.com,SOURCE_CATEGORY_SOCIAL -zalo,SOURCE_CATEGORY_SOCIAL -zen.yandex.ru,SOURCE_CATEGORY_SEARCH -zoo.gr,SOURCE_CATEGORY_SOCIAL -zooppa,SOURCE_CATEGORY_SOCIAL -zooppa.com,SOURCE_CATEGORY_SOCIAL From a3d9c1e78edfae6476540e137d569744cfc66f50 Mon Sep 17 00:00:00 2001 From: David Booke Date: Mon, 28 Oct 2024 09:54:55 -0500 Subject: [PATCH 31/47] Comment out unit tests for disabled models --- models/staging/stg_ga4__page_conversions.yml | 78 ++++++------- .../stg_ga4__session_conversions_daily.yml | 106 +++++++++--------- 2 files changed, 92 insertions(+), 92 deletions(-) diff --git a/models/staging/stg_ga4__page_conversions.yml b/models/staging/stg_ga4__page_conversions.yml index 2dfd6d55..88233bc1 100644 --- a/models/staging/stg_ga4__page_conversions.yml +++ b/models/staging/stg_ga4__page_conversions.yml @@ -3,42 +3,42 @@ version: 2 models: - name: stg_ga4__page_conversions description: Model that calculates the number of conversions per page. Conversions are defined as variables in the project configurations. -unit_tests: - - name: test_page_conversion_count - description: Test whether the page-level count of conversions is correct - model: stg_ga4__page_conversions - given: - - input: ref('stg_ga4__events') - format: csv - rows: | - event_name,page_key - page_view,A - page_view,A - page_view,B - expect: - format: csv - rows: | - page_key,page_view_count - A,2 - B,1 - overrides: - vars: {conversion_events: ['page_view']} - - name: test_page_conversion_count_non_event_name - description: Test whether the page-level count of conversions is correct - model: stg_ga4__page_conversions - given: - - input: ref('stg_ga4__events') - format: csv - rows: | - event_name,page_key - page-view,A - page-view,A - page-view,B - expect: - format: csv - rows: | - page_key,page_view_count - A,2 - B,1 - overrides: - vars: {conversion_events: ['page-view']} +#unit_tests: +# - name: test_page_conversion_count +# description: Test whether the page-level count of conversions is correct +# model: stg_ga4__page_conversions +# given: +# - input: ref('stg_ga4__events') +# format: csv +# rows: | +# event_name,page_key +# page_view,A +# page_view,A +# page_view,B +# expect: +# format: csv +# rows: | +# page_key,page_view_count +# A,2 +# B,1 +# overrides: +# vars: {conversion_events: ['page_view']} +# - name: test_page_conversion_count_non_event_name +# description: Test whether the page-level count of conversions is correct +# model: stg_ga4__page_conversions +# given: +# - input: ref('stg_ga4__events') +# format: csv +# rows: | +# event_name,page_key +# page-view,A +# page-view,A +# page-view,B +# expect: +# format: csv +# rows: | +# page_key,page_view_count +# A,2 +# B,1 +# overrides: +# vars: {conversion_events: ['page-view']} diff --git a/models/staging/stg_ga4__session_conversions_daily.yml b/models/staging/stg_ga4__session_conversions_daily.yml index edc6f08b..8c39425d 100644 --- a/models/staging/stg_ga4__session_conversions_daily.yml +++ b/models/staging/stg_ga4__session_conversions_daily.yml @@ -10,56 +10,56 @@ models: - name: session_partition_key tests: - unique -unit_tests: - - name: test_session_conversion_count - description: Test whether the session-level count of conversions is correct - model: stg_ga4__session_conversions_daily - given: - - input: ref('stg_ga4__events') - format: csv - rows: | - session_key,session_partition_key,event_name,event_date_dt - A,A2022-01-01,page_view,2022-01-01 - A,A2022-01-01,my_conversion,2022-01-01 - A,A2022-01-01,my_conversion,2022-01-01 - B,B2022-01-01,my_conversion,2022-01-01 - C,C2022-01-01,some_other_event,2022-01-01 - A,A2022-01-02,my_conversion,2022-01-02 - expect: - format: csv - rows: | - session_key,session_partition_key,session_partition_date,my_conversion_count - A,A2022-01-01,2022-01-01,2 - B,B2022-01-01,2022-01-01,1 - C,C2022-01-01,2022-01-01,0 - A,A2022-01-02,2022-01-02,1 - overrides: - macros: - is_incremental: false - vars: {conversion_events: ['my_conversion']} - - name: test_stg_ga4__session_conversions_daily_non_standard_event_name - description: Test whether the session-level count of conversions is correct - model: stg_ga4__session_conversions_daily - given: - - input: ref('stg_ga4__events') - format: csv - rows: | - session_key,session_partition_key,event_name,event_date_dt - A,A2022-01-01,page_view,2022-01-01 - A,A2022-01-01,my-conversion,2022-01-01 - A,A2022-01-01,my-conversion,2022-01-01 - B,B2022-01-01,my-conversion,2022-01-01 - C,C2022-01-01,some_other_event,2022-01-01 - A,A2022-01-02,my-conversion,2022-01-02 - expect: - format: csv - rows: | - session_key,session_partition_key,session_partition_date,my_conversion_count - A,A2022-01-01,2022-01-01,2 - B,B2022-01-01,2022-01-01,1 - C,C2022-01-01,2022-01-01,0 - A,A2022-01-02,2022-01-02,1 - overrides: - macros: - is_incremental: false - vars: {conversion_events: ['my-conversion']} +#unit_tests: +# - name: test_session_conversion_count +# description: Test whether the session-level count of conversions is correct +# model: stg_ga4__session_conversions_daily +# given: +# - input: ref('stg_ga4__events') +# format: csv +# rows: | +# session_key,session_partition_key,event_name,event_date_dt +# A,A2022-01-01,page_view,2022-01-01 +# A,A2022-01-01,my_conversion,2022-01-01 +# A,A2022-01-01,my_conversion,2022-01-01 +# B,B2022-01-01,my_conversion,2022-01-01 +# C,C2022-01-01,some_other_event,2022-01-01 +# A,A2022-01-02,my_conversion,2022-01-02 +# expect: +# format: csv +# rows: | +# session_key,session_partition_key,session_partition_date,my_conversion_count +# A,A2022-01-01,2022-01-01,2 +# B,B2022-01-01,2022-01-01,1 +# C,C2022-01-01,2022-01-01,0 +# A,A2022-01-02,2022-01-02,1 +# overrides: +# macros: +# is_incremental: false +# vars: {conversion_events: ['my_conversion']} +# - name: test_stg_ga4__session_conversions_daily_non_standard_event_name +# description: Test whether the session-level count of conversions is correct +# model: stg_ga4__session_conversions_daily +# given: +# - input: ref('stg_ga4__events') +# format: csv +# rows: | +# session_key,session_partition_key,event_name,event_date_dt +# A,A2022-01-01,page_view,2022-01-01 +# A,A2022-01-01,my-conversion,2022-01-01 +# A,A2022-01-01,my-conversion,2022-01-01 +# B,B2022-01-01,my-conversion,2022-01-01 +# C,C2022-01-01,some_other_event,2022-01-01 +# A,A2022-01-02,my-conversion,2022-01-02 +# expect: +# format: csv +# rows: | +# session_key,session_partition_key,session_partition_date,my_conversion_count +# A,A2022-01-01,2022-01-01,2 +# B,B2022-01-01,2022-01-01,1 +# C,C2022-01-01,2022-01-01,0 +# A,A2022-01-02,2022-01-02,1 +# overrides: +# macros: +# is_incremental: false +# vars: {conversion_events: ['my-conversion']} From 1dd415e00ebb8c14154434b4d5d0d9ea0cf90974 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 29 Oct 2024 11:15:57 -0500 Subject: [PATCH 32/47] Un-comment unit tests --- .../stg_ga4__derived_session_properties.yml | 70 ++++++------ .../stg_ga4__derived_user_properties.yml | 64 +++++------ models/staging/stg_ga4__page_conversions.yml | 78 ++++++------- .../stg_ga4__session_conversions_daily.yml | 106 +++++++++--------- 4 files changed, 159 insertions(+), 159 deletions(-) diff --git a/models/staging/stg_ga4__derived_session_properties.yml b/models/staging/stg_ga4__derived_session_properties.yml index 1c32ed5b..4e955733 100644 --- a/models/staging/stg_ga4__derived_session_properties.yml +++ b/models/staging/stg_ga4__derived_session_properties.yml @@ -9,38 +9,38 @@ models: - name: session_key tests: - unique -# unit_tests: -# - name: test_derived_session_properties -# description: Test whether a derived property is successfully retrieved from multiple event payloads -# model: stg_ga4__derived_session_properties -# given: -# - input: ref('stg_ga4__events') -# format: sql -# rows: | -# select -# 'AAA' as session_key -# , 1617691790431476 as event_timestamp -# , 'first_visit' as event_name -# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params -# , ARRAY[STRUCT('my_property' as key, STRUCT('value1' as string_value) as value)] as user_properties -# union all -# select -# 'AAA' as session_key -# , 1617691790431477 as event_timestamp -# , 'first_visit' as event_name -# , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params -# , ARRAY[] as user_properties -# union all -# select -# 'BBB' as session_key -# , 1617691790431477 as event_timestamp -# , 'first_visit' as event_name -# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params -# , ARRAY[STRUCT('my_property' as key, STRUCT('value2' as string_value) as value)] as user_properties -# expect: -# format: dict -# rows: -# - {session_key: AAA, my_derived_property: 2, my_derived_property2: value1} -# - {session_key: BBB, my_derived_property: 1, my_derived_property2: value2} -# overrides: -# vars: {derived_session_properties: [{event_parameter: 'my_param',session_property_name: 'my_derived_property',value_type: 'int_value'},{user_property: 'my_property',session_property_name: 'my_derived_property2',value_type: 'string_value'}]} +unit_tests: + - name: test_derived_session_properties + description: Test whether a derived property is successfully retrieved from multiple event payloads + model: stg_ga4__derived_session_properties + given: + - input: ref('stg_ga4__events') + format: sql + rows: | + select + 'AAA' as session_key + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + , ARRAY[STRUCT('my_property' as key, STRUCT('value1' as string_value) as value)] as user_properties + union all + select + 'AAA' as session_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params + , ARRAY[] as user_properties + union all + select + 'BBB' as session_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + , ARRAY[STRUCT('my_property' as key, STRUCT('value2' as string_value) as value)] as user_properties + expect: + format: dict + rows: + - {session_key: AAA, my_derived_property: 2, my_derived_property2: value1} + - {session_key: BBB, my_derived_property: 1, my_derived_property2: value2} + overrides: + vars: {derived_session_properties: [{event_parameter: 'my_param',session_property_name: 'my_derived_property',value_type: 'int_value'},{user_property: 'my_property',session_property_name: 'my_derived_property2',value_type: 'string_value'}]} diff --git a/models/staging/stg_ga4__derived_user_properties.yml b/models/staging/stg_ga4__derived_user_properties.yml index 8664549c..c347e91c 100644 --- a/models/staging/stg_ga4__derived_user_properties.yml +++ b/models/staging/stg_ga4__derived_user_properties.yml @@ -8,35 +8,35 @@ models: description: Hashed combination of user_pseudo_id and stream_id tests: - unique -# unit_tests: -# - name: test_derived_user_properties -# description: Test whether a derived user property is successfully retrieved from multiple event payloads -# model: stg_ga4__derived_user_properties -# given: -# - input: ref('stg_ga4__events') -# format: sql -# rows: | -# select -# 'AAA' as client_key -# , 1617691790431476 as event_timestamp -# , 'first_visit' as event_name -# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params -# union all -# select -# 'AAA' as client_key -# , 1617691790431477 as event_timestamp -# , 'first_visit' as event_name -# , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params -# union all -# select -# 'BBB' as client_key -# , 1617691790431477 as event_timestamp -# , 'first_visit' as event_name -# , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params -# expect: -# format: dict -# rows: -# - {client_key: AAA, my_derived_property: 2} -# - {client_key: BBB, my_derived_property: 1} -# overrides: -# vars: {derived_user_properties: [{event_parameter: 'my_param',user_property_name: 'my_derived_property',value_type: 'int_value'}]} +unit_tests: + - name: test_derived_user_properties + description: Test whether a derived user property is successfully retrieved from multiple event payloads + model: stg_ga4__derived_user_properties + given: + - input: ref('stg_ga4__events') + format: sql + rows: | + select + 'AAA' as client_key + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + union all + select + 'AAA' as client_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params + union all + select + 'BBB' as client_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + expect: + format: dict + rows: + - {client_key: AAA, my_derived_property: 2} + - {client_key: BBB, my_derived_property: 1} + overrides: + vars: {derived_user_properties: [{event_parameter: 'my_param',user_property_name: 'my_derived_property',value_type: 'int_value'}]} diff --git a/models/staging/stg_ga4__page_conversions.yml b/models/staging/stg_ga4__page_conversions.yml index 88233bc1..2dfd6d55 100644 --- a/models/staging/stg_ga4__page_conversions.yml +++ b/models/staging/stg_ga4__page_conversions.yml @@ -3,42 +3,42 @@ version: 2 models: - name: stg_ga4__page_conversions description: Model that calculates the number of conversions per page. Conversions are defined as variables in the project configurations. -#unit_tests: -# - name: test_page_conversion_count -# description: Test whether the page-level count of conversions is correct -# model: stg_ga4__page_conversions -# given: -# - input: ref('stg_ga4__events') -# format: csv -# rows: | -# event_name,page_key -# page_view,A -# page_view,A -# page_view,B -# expect: -# format: csv -# rows: | -# page_key,page_view_count -# A,2 -# B,1 -# overrides: -# vars: {conversion_events: ['page_view']} -# - name: test_page_conversion_count_non_event_name -# description: Test whether the page-level count of conversions is correct -# model: stg_ga4__page_conversions -# given: -# - input: ref('stg_ga4__events') -# format: csv -# rows: | -# event_name,page_key -# page-view,A -# page-view,A -# page-view,B -# expect: -# format: csv -# rows: | -# page_key,page_view_count -# A,2 -# B,1 -# overrides: -# vars: {conversion_events: ['page-view']} +unit_tests: + - name: test_page_conversion_count + description: Test whether the page-level count of conversions is correct + model: stg_ga4__page_conversions + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_name,page_key + page_view,A + page_view,A + page_view,B + expect: + format: csv + rows: | + page_key,page_view_count + A,2 + B,1 + overrides: + vars: {conversion_events: ['page_view']} + - name: test_page_conversion_count_non_event_name + description: Test whether the page-level count of conversions is correct + model: stg_ga4__page_conversions + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_name,page_key + page-view,A + page-view,A + page-view,B + expect: + format: csv + rows: | + page_key,page_view_count + A,2 + B,1 + overrides: + vars: {conversion_events: ['page-view']} diff --git a/models/staging/stg_ga4__session_conversions_daily.yml b/models/staging/stg_ga4__session_conversions_daily.yml index 8c39425d..edc6f08b 100644 --- a/models/staging/stg_ga4__session_conversions_daily.yml +++ b/models/staging/stg_ga4__session_conversions_daily.yml @@ -10,56 +10,56 @@ models: - name: session_partition_key tests: - unique -#unit_tests: -# - name: test_session_conversion_count -# description: Test whether the session-level count of conversions is correct -# model: stg_ga4__session_conversions_daily -# given: -# - input: ref('stg_ga4__events') -# format: csv -# rows: | -# session_key,session_partition_key,event_name,event_date_dt -# A,A2022-01-01,page_view,2022-01-01 -# A,A2022-01-01,my_conversion,2022-01-01 -# A,A2022-01-01,my_conversion,2022-01-01 -# B,B2022-01-01,my_conversion,2022-01-01 -# C,C2022-01-01,some_other_event,2022-01-01 -# A,A2022-01-02,my_conversion,2022-01-02 -# expect: -# format: csv -# rows: | -# session_key,session_partition_key,session_partition_date,my_conversion_count -# A,A2022-01-01,2022-01-01,2 -# B,B2022-01-01,2022-01-01,1 -# C,C2022-01-01,2022-01-01,0 -# A,A2022-01-02,2022-01-02,1 -# overrides: -# macros: -# is_incremental: false -# vars: {conversion_events: ['my_conversion']} -# - name: test_stg_ga4__session_conversions_daily_non_standard_event_name -# description: Test whether the session-level count of conversions is correct -# model: stg_ga4__session_conversions_daily -# given: -# - input: ref('stg_ga4__events') -# format: csv -# rows: | -# session_key,session_partition_key,event_name,event_date_dt -# A,A2022-01-01,page_view,2022-01-01 -# A,A2022-01-01,my-conversion,2022-01-01 -# A,A2022-01-01,my-conversion,2022-01-01 -# B,B2022-01-01,my-conversion,2022-01-01 -# C,C2022-01-01,some_other_event,2022-01-01 -# A,A2022-01-02,my-conversion,2022-01-02 -# expect: -# format: csv -# rows: | -# session_key,session_partition_key,session_partition_date,my_conversion_count -# A,A2022-01-01,2022-01-01,2 -# B,B2022-01-01,2022-01-01,1 -# C,C2022-01-01,2022-01-01,0 -# A,A2022-01-02,2022-01-02,1 -# overrides: -# macros: -# is_incremental: false -# vars: {conversion_events: ['my-conversion']} +unit_tests: + - name: test_session_conversion_count + description: Test whether the session-level count of conversions is correct + model: stg_ga4__session_conversions_daily + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,session_partition_key,event_name,event_date_dt + A,A2022-01-01,page_view,2022-01-01 + A,A2022-01-01,my_conversion,2022-01-01 + A,A2022-01-01,my_conversion,2022-01-01 + B,B2022-01-01,my_conversion,2022-01-01 + C,C2022-01-01,some_other_event,2022-01-01 + A,A2022-01-02,my_conversion,2022-01-02 + expect: + format: csv + rows: | + session_key,session_partition_key,session_partition_date,my_conversion_count + A,A2022-01-01,2022-01-01,2 + B,B2022-01-01,2022-01-01,1 + C,C2022-01-01,2022-01-01,0 + A,A2022-01-02,2022-01-02,1 + overrides: + macros: + is_incremental: false + vars: {conversion_events: ['my_conversion']} + - name: test_stg_ga4__session_conversions_daily_non_standard_event_name + description: Test whether the session-level count of conversions is correct + model: stg_ga4__session_conversions_daily + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,session_partition_key,event_name,event_date_dt + A,A2022-01-01,page_view,2022-01-01 + A,A2022-01-01,my-conversion,2022-01-01 + A,A2022-01-01,my-conversion,2022-01-01 + B,B2022-01-01,my-conversion,2022-01-01 + C,C2022-01-01,some_other_event,2022-01-01 + A,A2022-01-02,my-conversion,2022-01-02 + expect: + format: csv + rows: | + session_key,session_partition_key,session_partition_date,my_conversion_count + A,A2022-01-01,2022-01-01,2 + B,B2022-01-01,2022-01-01,1 + C,C2022-01-01,2022-01-01,0 + A,A2022-01-02,2022-01-02,1 + overrides: + macros: + is_incremental: false + vars: {conversion_events: ['my-conversion']} From 4ef2503e2f05d0d5a29a3113a38f409e30857bf2 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 29 Oct 2024 11:18:08 -0500 Subject: [PATCH 33/47] Add profiles.yml for Github Actions to execute dbt commands and add .user.yml to gitignore --- .gitignore | 1 + profiles.yml | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 profiles.yml diff --git a/.gitignore b/.gitignore index 3bf3158b..6565ca5f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ target/ dbt_packages/ logs/ package-lock.yml +.user.yml google-cloud-sdk/ unit_tests/.env diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 00000000..3649cad7 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,10 @@ +default: + target: bigquery + outputs: + bigquery: + type: bigquery + method: service-account + keyfile: "{{ env_var('BIGQUERY_KEYFILE') }}" + project: "{{ env_var('BIGQUERY_PROJECT') }}" + dataset: "{{ env_var('BIGQUERY_DATASET') }}" + timeout_seconds: 300 From 83bd23bc457e45839cd1585e708fd8ee455a6659 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 29 Oct 2024 11:18:53 -0500 Subject: [PATCH 34/47] Add profile and variables to dbt_project.yml so Github Action can run unit tests --- dbt_project.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dbt_project.yml b/dbt_project.yml index a2f8bf71..d8201549 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -8,6 +8,24 @@ seed-paths: ["seeds"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] +profile: 'default' + +vars: + source_project: "{{ env_var('BIGQUERY_PROJECT') }}" + property_ids: ["{{ env_var('BIGQUERY_PROPERTY_ID') }}"] + start_date: "20230306" + static_incremental_days: 3 + derived_session_properties: + - event_parameter: "page_location" + session_property_name: "most_recent_page_location" + value_type: "string_value" + derived_user_properties: + - event_parameter: "page_title" + user_property_name: "most_recent_page_title" + value_type: "string_value" + conversion_events: ['large_button_clicked', 'add_to_cart'] + session_attribution_lookback_window_days: 30 + target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" From 6f4335e80586becfc1407f1f213fa17a43823766 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 29 Oct 2024 11:21:52 -0500 Subject: [PATCH 35/47] Add dbt unit tests job to github CI workflow --- .github/workflows/run_unit_tests_on_pr.yml | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/.github/workflows/run_unit_tests_on_pr.yml b/.github/workflows/run_unit_tests_on_pr.yml index 6a2b6dd4..173f45c2 100644 --- a/.github/workflows/run_unit_tests_on_pr.yml +++ b/.github/workflows/run_unit_tests_on_pr.yml @@ -3,6 +3,9 @@ name: Run Unit Tests on Pull Request on: [pull_request_target,workflow_dispatch] env: BIGQUERY_PROJECT: ${{ secrets.BIGQUERY_PROJECT }} + BIGQUERY_PROPERTY_ID: ${{ secrets.BIGQUERY_PROPERTY_ID }} + BIGQUERY_DATASET: ${{ secrets.BIGQUERY_DATASET }} + BIGQUERY_KEYFILE: ./unit_tests/dbt-service-account.json jobs: pytest_run_all: @@ -35,3 +38,33 @@ jobs: - name: Run tests run: python -m pytest . + + run_dbt_unit_tests: + name: Run dbt Unit Tests + runs-on: ubuntu-latest + steps: + - name: Check out + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - uses: actions/setup-python@v1 + with: + python-version: "3.11.x" + + - name: Authenticate using service account + run: 'echo "$KEYFILE" > ./unit_tests/dbt-service-account.json' + shell: bash + env: + KEYFILE: ${{ secrets.GCP_BIGQUERY_USER_KEYFILE }} + + - name: + + - name: Install dbt + run: | + pip install dbt-core + pip install dbt-bigquery + dbt deps + + - name: Run dbt unit tests + run: dbt test -s test_type:unit From 947868d0657687ca7fdb2324dd6d2d281fd31a0b Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 29 Oct 2024 11:25:50 -0500 Subject: [PATCH 36/47] Remove empty step --- .github/workflows/run_unit_tests_on_pr.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/run_unit_tests_on_pr.yml b/.github/workflows/run_unit_tests_on_pr.yml index 173f45c2..74487761 100644 --- a/.github/workflows/run_unit_tests_on_pr.yml +++ b/.github/workflows/run_unit_tests_on_pr.yml @@ -58,8 +58,6 @@ jobs: env: KEYFILE: ${{ secrets.GCP_BIGQUERY_USER_KEYFILE }} - - name: - - name: Install dbt run: | pip install dbt-core From 8c879f7909558516f8799bf814d1ad35b197039d Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 29 Oct 2024 11:40:18 -0500 Subject: [PATCH 37/47] Add repo to checkout step so PR code is checked out to test adding new dbt unit test job --- .github/workflows/run_unit_tests_on_pr.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/run_unit_tests_on_pr.yml b/.github/workflows/run_unit_tests_on_pr.yml index 74487761..f3947def 100644 --- a/.github/workflows/run_unit_tests_on_pr.yml +++ b/.github/workflows/run_unit_tests_on_pr.yml @@ -19,6 +19,7 @@ jobs: uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} + repository: ${{ github.event.pull_request.head.repo.full_name }} - uses: actions/setup-python@v1 with: @@ -47,6 +48,7 @@ jobs: uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} + repository: ${{ github.event.pull_request.head.repo.full_name }} - uses: actions/setup-python@v1 with: From 95b3a603586733cda560c8182efe7d69b861c9e6 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 29 Oct 2024 16:09:46 -0500 Subject: [PATCH 38/47] Change workflow on behavior for testing changes --- .github/workflows/run_unit_tests_on_pr.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run_unit_tests_on_pr.yml b/.github/workflows/run_unit_tests_on_pr.yml index f3947def..11579394 100644 --- a/.github/workflows/run_unit_tests_on_pr.yml +++ b/.github/workflows/run_unit_tests_on_pr.yml @@ -1,6 +1,10 @@ name: Run Unit Tests on Pull Request -on: [pull_request_target,workflow_dispatch] +# on: [pull_request_target,workflow_dispatch] +on: + push: + branches: + - 'feature/dbt-unit-tests' env: BIGQUERY_PROJECT: ${{ secrets.BIGQUERY_PROJECT }} BIGQUERY_PROPERTY_ID: ${{ secrets.BIGQUERY_PROPERTY_ID }} @@ -19,7 +23,6 @@ jobs: uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - uses: actions/setup-python@v1 with: @@ -48,7 +51,6 @@ jobs: uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - uses: actions/setup-python@v1 with: From 555671e55b0ef44203d1cd8b211ef79fbdb2c480 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 31 Oct 2024 09:06:29 -0500 Subject: [PATCH 39/47] Add comments related to unit tests and new Github Actions job to markdown files --- .github/pull_request_template.md | 2 +- README.md | 19 ++++++++++++++++++- TODO.md | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index d1fccd01..d33751fe 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,4 +7,4 @@ Describe your changes, and why you're making them. - [ ] I have verified that these changes work locally - [ ] I have updated the README.md (if applicable) - [ ] I have added tests & descriptions to my models (and macros if applicable) -- [ ] I have run `dbt test` to validate existing tests +- [ ] I have run `dbt test` and `python -m pytest .` to validate existing tests diff --git a/README.md b/README.md index 2a57eec0..5e292fe8 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,8 @@ packages: ``` ## Required Variables -This package assumes that you have an existing DBT project with a BigQuery profile and a BigQuery GCP instance available with GA4 event data loaded. Source data is defined using the `project` and `dataset` variables below. The `static_incremental_days` variable defines how many days' worth of data to reprocess during incremental runs. +This package assumes that you have an existing DBT project with a BigQuery profile and a BigQuery GCP instance available with GA4 event data loaded. Source data is defined using the `project` and `property_ids` variables below. The `static_incremental_days` variable defines how many days' worth of data to reprocess during incremental runs. +The `start_date` variable defines the earliest date for which data is included and loaded into the models in this package. ``` vars: @@ -214,6 +215,9 @@ vars: value_type: "string_value" ``` +The `derived_user_properties` set in `dbt_project.yml` should either be updated to reflect the derived user properties for your project +or they should be removed if you don't wish to set any derived user properties. + ### Derived Session Properties Derived session properties are similar to derived user properties, but on a per-session basis, for properties that change slowly over time. This provides additional flexibility in allowing users to turn any event parameter into a session property. @@ -247,6 +251,9 @@ vars: value_type: "int_value" ``` +The `derived_session_properties` set in `dbt_project.yml` should either be updated to reflect the derived session properties for your project +or they should be removed if you don't wish to set any derived session properties. + ### GA4 Recommended Events See the README file at /dbt_packages/models/staging/recommended_events for instructions on enabling [Google's recommended events](https://support.google.com/analytics/answer/9267735?hl=en). @@ -261,6 +268,9 @@ vars: conversion_events: ['purchase','download'] ``` +The `conversion_events` set in `dbt_project.yml` should either be updated to reflect the conversion events for your project +or they should be removed if you don't wish to set any conversion events. + ### Session Attribution Lookback Window The `stg_ga4__sessions_traffic_sources_last_non_direct_daily` model provides last non-direct session attribution within a configurable lookback window. The default is 30 days, but this can be overridden with the `session_attribution_lookback_window_days` variable. @@ -302,6 +312,9 @@ The easiest option is using OAuth with your Google Account. Summarized instructi ``` gcloud auth application-default login --scopes=https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/iam.test ``` + +The `profiles.yml` file included in this package should be removed. The `profile: 'default'` line in `dbt_project.yml` in this package should also be removed. + # Unit Testing The dbt-ga4 package treats each model and macro as a 'unit' of code. If we fix the input to each unit, we can test that we received the expected output. @@ -320,6 +333,10 @@ Execute all tests configured for a model: ``` dbt test -s ``` +Execute all dbt unit tests: +``` +dbt test -s test_type:unit +``` ### pytest diff --git a/TODO.md b/TODO.md index 84ca8488..ee262afc 100644 --- a/TODO.md +++ b/TODO.md @@ -22,6 +22,7 @@ - Configuration and dynamic templates to create custom event tables and dimensions - Configuration to create custom dimensions (session, user, event_*) from event parameters - Use Fivetran's `union_data` method (or something similar) to handle multiple, unioned GA4 exports. https://github.com/fivetran/dbt_xero_source/blob/main/models/tmp/stg_xero__account_tmp.sql +- Un-comment unit test in `stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml` once [this bug](https://github.com/dbt-labs/dbt-core/issues/10353) is resolved. Once that is complete, the `unit_tests` folder pertaining to the `pytest` unit tests should be removed along with the `pytest_run_all` job in `run_unit_tests_on_pr.yml`. ## Misc From f198262e80e973f120982d0b83c98dbf5f0b31d5 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 31 Oct 2024 09:09:00 -0500 Subject: [PATCH 40/47] Make updates for dbt unit test Github Action and allow for use of environment variables instead of project variables --- .github/workflows/run_unit_tests_on_pr.yml | 11 ++++++----- dbt_project.yml | 5 ++--- macros/base_select.sql | 3 ++- models/staging/src_ga4.yml | 6 ++++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run_unit_tests_on_pr.yml b/.github/workflows/run_unit_tests_on_pr.yml index 11579394..61e5211f 100644 --- a/.github/workflows/run_unit_tests_on_pr.yml +++ b/.github/workflows/run_unit_tests_on_pr.yml @@ -1,10 +1,6 @@ name: Run Unit Tests on Pull Request -# on: [pull_request_target,workflow_dispatch] -on: - push: - branches: - - 'feature/dbt-unit-tests' +on: [pull_request_target,workflow_dispatch] env: BIGQUERY_PROJECT: ${{ secrets.BIGQUERY_PROJECT }} BIGQUERY_PROPERTY_ID: ${{ secrets.BIGQUERY_PROPERTY_ID }} @@ -68,5 +64,10 @@ jobs: pip install dbt-bigquery dbt deps + - name: Materialize necessary dbt resources + run: | + dbt seed -f + dbt run -s +test_type:unit -f --empty + - name: Run dbt unit tests run: dbt test -s test_type:unit diff --git a/dbt_project.yml b/dbt_project.yml index d8201549..10f76dca 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -10,9 +10,9 @@ snapshot-paths: ["snapshots"] profile: 'default' +# These variables are used for unit tests during CI for the package +# These variables should either be removed or updated to reflect the needs of your GA data and dbt project vars: - source_project: "{{ env_var('BIGQUERY_PROJECT') }}" - property_ids: ["{{ env_var('BIGQUERY_PROPERTY_ID') }}"] start_date: "20230306" static_incremental_days: 3 derived_session_properties: @@ -24,7 +24,6 @@ vars: user_property_name: "most_recent_page_title" value_type: "string_value" conversion_events: ['large_button_clicked', 'add_to_cart'] - session_attribution_lookback_window_days: 30 target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` diff --git a/macros/base_select.sql b/macros/base_select.sql index 8919388f..7603d446 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -36,7 +36,8 @@ , ecommerce.transaction_id , items , {%- if var('combined_dataset', false) != false %} cast(left(regexp_replace(_table_suffix, r'^(intraday_)?\d{8}', ''), 100) as int64) - {%- else %} {{ var('property_ids')[0] }} + {%- elif var('property_ids', false) != false %} {{ var('property_ids')[0] }} + {%- else %} {{ env_var('BIGQUERY_PROPERTY_ID') }} {%- endif %} as property_id {% endmacro %} diff --git a/models/staging/src_ga4.yml b/models/staging/src_ga4.yml index 29104767..8f29701d 100644 --- a/models/staging/src_ga4.yml +++ b/models/staging/src_ga4.yml @@ -4,11 +4,13 @@ sources: - name: ga4 database: | # Source from target.project if multi-property, otherwise source from source_project {%- if var('combined_dataset', false) != false -%} {{target.project}} - {%- else -%} {{var('source_project')}} + {%- elif var('source_project', false) != false -%} {{var('source_project')}} + {%- else -%} {{env_var('BIGQUERY_PROJECT')}} {%- endif -%} schema: | # Source from combined property dataset if set, otherwise source from original GA4 property {%- if var('combined_dataset', false) != false -%} {{var('combined_dataset')}} - {%- else -%} analytics_{{var('property_ids')[0]}} + {%- elif var('property_ids', false) != false -%} analytics_{{var('property_ids')[0]}} + {%- else -%} analytics_{{env_var('BIGQUERY_PROPERTY_ID')}} {%- endif -%} tables: - name: events From 621e429d25d3338950d6378973d6ca68cc1c6a29 Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 31 Oct 2024 09:09:34 -0500 Subject: [PATCH 41/47] Add conditional logic to allow for use of --empty flag --- models/staging/base/base_ga4__events.sql | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 533dbc0f..b045a4ab 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -21,9 +21,11 @@ with source as ( select {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} - where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} - {% if is_incremental() %} - and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) + {% if not flags.EMPTY %} + where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} + {% if is_incremental() %} + and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) + {% endif %} {% endif %} ), renamed as ( From c82a36fa352137be90709d53ef27e283338d928a Mon Sep 17 00:00:00 2001 From: David Booke Date: Thu, 31 Oct 2024 09:40:34 -0500 Subject: [PATCH 42/47] Fix spacing for comments added to README.md --- README.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 5e292fe8..02f0aebd 100644 --- a/README.md +++ b/README.md @@ -72,8 +72,7 @@ packages: ``` ## Required Variables -This package assumes that you have an existing DBT project with a BigQuery profile and a BigQuery GCP instance available with GA4 event data loaded. Source data is defined using the `project` and `property_ids` variables below. The `static_incremental_days` variable defines how many days' worth of data to reprocess during incremental runs. -The `start_date` variable defines the earliest date for which data is included and loaded into the models in this package. +This package assumes that you have an existing DBT project with a BigQuery profile and a BigQuery GCP instance available with GA4 event data loaded. Source data is defined using the `project` and `property_ids` variables below. The `static_incremental_days` variable defines how many days' worth of data to reprocess during incremental runs. The `start_date` variable defines the earliest date for which data is included and loaded into the models in this package. ``` vars: @@ -215,8 +214,7 @@ vars: value_type: "string_value" ``` -The `derived_user_properties` set in `dbt_project.yml` should either be updated to reflect the derived user properties for your project -or they should be removed if you don't wish to set any derived user properties. +The `derived_user_properties` set in `dbt_project.yml` should either be updated to reflect the derived user properties for your project or they should be removed if you don't wish to set any derived user properties. ### Derived Session Properties @@ -251,8 +249,7 @@ vars: value_type: "int_value" ``` -The `derived_session_properties` set in `dbt_project.yml` should either be updated to reflect the derived session properties for your project -or they should be removed if you don't wish to set any derived session properties. +The `derived_session_properties` set in `dbt_project.yml` should either be updated to reflect the derived session properties for your project or they should be removed if you don't wish to set any derived session properties. ### GA4 Recommended Events @@ -268,8 +265,7 @@ vars: conversion_events: ['purchase','download'] ``` -The `conversion_events` set in `dbt_project.yml` should either be updated to reflect the conversion events for your project -or they should be removed if you don't wish to set any conversion events. +The `conversion_events` set in `dbt_project.yml` should either be updated to reflect the conversion events for your project or they should be removed if you don't wish to set any conversion events. ### Session Attribution Lookback Window From 0ae02cc27e7dac4bd7f372de7f7895b31574ded5 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 5 Nov 2024 08:40:13 -0600 Subject: [PATCH 43/47] Enable models dependent on project variables if environment variables exist --- models/staging/stg_ga4__derived_session_properties.sql | 2 +- models/staging/stg_ga4__derived_session_properties_daily.sql | 2 +- models/staging/stg_ga4__derived_user_properties.sql | 2 +- models/staging/stg_ga4__page_conversions.sql | 2 +- models/staging/stg_ga4__session_conversions_daily.sql | 2 +- models/staging/stg_ga4__user_properties.sql | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/models/staging/stg_ga4__derived_session_properties.sql b/models/staging/stg_ga4__derived_session_properties.sql index 65fbcfd6..0b4816f7 100644 --- a/models/staging/stg_ga4__derived_session_properties.sql +++ b/models/staging/stg_ga4__derived_session_properties.sql @@ -1,5 +1,5 @@ {{ config( - enabled = true if var('derived_session_properties', false) else false, + enabled = true if var('derived_session_properties', false) or env_var('GA4_DERIVED_SESSION_PROPERTIES', false) else false, materialized = "table" ) }} diff --git a/models/staging/stg_ga4__derived_session_properties_daily.sql b/models/staging/stg_ga4__derived_session_properties_daily.sql index f997d40b..b3042d37 100644 --- a/models/staging/stg_ga4__derived_session_properties_daily.sql +++ b/models/staging/stg_ga4__derived_session_properties_daily.sql @@ -4,7 +4,7 @@ {% endfor %} {{ config( - enabled = true if var('derived_session_properties', false) else false, + enabled = true if var('derived_session_properties', false) or env_var('GA4_DERIVED_SESSION_PROPERTIES', false) else false, materialized = 'incremental', incremental_strategy = 'insert_overwrite', tags = ["incremental"], diff --git a/models/staging/stg_ga4__derived_user_properties.sql b/models/staging/stg_ga4__derived_user_properties.sql index ec1fd6b6..b3bd666b 100644 --- a/models/staging/stg_ga4__derived_user_properties.sql +++ b/models/staging/stg_ga4__derived_user_properties.sql @@ -1,5 +1,5 @@ {{ config( - enabled = true if var('derived_user_properties', false) else false, + enabled = true if var('derived_user_properties', false) or env_var('GA4_DERIVED_USER_PROPERTIES', false) else false, materialized = "table" ) }} diff --git a/models/staging/stg_ga4__page_conversions.sql b/models/staging/stg_ga4__page_conversions.sql index ea5a7f57..538f94fd 100644 --- a/models/staging/stg_ga4__page_conversions.sql +++ b/models/staging/stg_ga4__page_conversions.sql @@ -1,5 +1,5 @@ {{ config( - enabled= var('conversion_events', false) != false + enabled= var('conversion_events', false) != false or env_var('GA4_CONVERSION_EVENTS', false) != false ) }} select diff --git a/models/staging/stg_ga4__session_conversions_daily.sql b/models/staging/stg_ga4__session_conversions_daily.sql index 983657c5..d49b0f9f 100644 --- a/models/staging/stg_ga4__session_conversions_daily.sql +++ b/models/staging/stg_ga4__session_conversions_daily.sql @@ -8,7 +8,7 @@ {{ config( - enabled= var('conversion_events', false) != false, + enabled= var('conversion_events', false) != false or env_var('GA4_CONVERSION_EVENTS', false) != false, materialized = 'incremental', incremental_strategy = 'insert_overwrite', tags = ["incremental"], diff --git a/models/staging/stg_ga4__user_properties.sql b/models/staging/stg_ga4__user_properties.sql index c9deaba5..ff531c8e 100644 --- a/models/staging/stg_ga4__user_properties.sql +++ b/models/staging/stg_ga4__user_properties.sql @@ -1,5 +1,5 @@ {{ config( - enabled = true if var('user_properties', false) else false, + enabled = true if var('user_properties', false) or env_var('GA4_DERIVED_USER_PROPERTIES', false) else false, materialized = "table" ) }} From 99a50c85b60c8f168b2b18c1608e797809a23624 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 5 Nov 2024 08:48:29 -0600 Subject: [PATCH 44/47] Set start_date to environment variable if it exists --- models/staging/base/base_ga4__events.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index b045a4ab..698068a0 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -3,6 +3,8 @@ {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} +{%- set start_date = env_var('GA4_START_DATE') if env_var('GA4_START_DATE', false) else var('start_date') -%} + {{ config( pre_hook="{{ ga4.combine_property_data() }}" if var('combined_dataset', false) else "", @@ -22,7 +24,7 @@ with source as ( {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} {% if not flags.EMPTY %} - where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} + where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{ start_date }} {% if is_incremental() %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) {% endif %} From cd35ef940e542afaa16154c1f97b767d9bafc2f7 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 5 Nov 2024 10:09:56 -0600 Subject: [PATCH 45/47] Remove variables from dbt_project.yml and have models look for incremental days environment variable before using project variable --- dbt_project.yml | 15 --------------- models/marts/core/dim_ga4__sessions_daily.sql | 2 +- models/marts/core/fct_ga4__pages.sql | 2 +- models/marts/core/fct_ga4__sessions_daily.sql | 2 +- models/staging/base/base_ga4__events.sql | 6 ++---- .../stg_ga4__event_purchase_deduplicated.sql | 2 +- .../stg_ga4__derived_session_properties_daily.sql | 2 +- .../stg_ga4__session_conversions_daily.sql | 2 +- .../stg_ga4__sessions_traffic_sources_daily.sql | 2 +- ...ions_traffic_sources_last_non_direct_daily.sql | 2 +- 10 files changed, 10 insertions(+), 27 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index 10f76dca..cdc786ff 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -10,21 +10,6 @@ snapshot-paths: ["snapshots"] profile: 'default' -# These variables are used for unit tests during CI for the package -# These variables should either be removed or updated to reflect the needs of your GA data and dbt project -vars: - start_date: "20230306" - static_incremental_days: 3 - derived_session_properties: - - event_parameter: "page_location" - session_property_name: "most_recent_page_location" - value_type: "string_value" - derived_user_properties: - - event_parameter: "page_title" - user_property_name: "most_recent_page_title" - value_type: "string_value" - conversion_events: ['large_button_clicked', 'add_to_cart'] - target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" diff --git a/models/marts/core/dim_ga4__sessions_daily.sql b/models/marts/core/dim_ga4__sessions_daily.sql index 8273dd74..854af34d 100644 --- a/models/marts/core/dim_ga4__sessions_daily.sql +++ b/models/marts/core/dim_ga4__sessions_daily.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/marts/core/fct_ga4__pages.sql b/models/marts/core/fct_ga4__pages.sql index ca37ac0b..85aad420 100644 --- a/models/marts/core/fct_ga4__pages.sql +++ b/models/marts/core/fct_ga4__pages.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/marts/core/fct_ga4__sessions_daily.sql b/models/marts/core/fct_ga4__sessions_daily.sql index 08c35798..7b09a975 100644 --- a/models/marts/core/fct_ga4__sessions_daily.sql +++ b/models/marts/core/fct_ga4__sessions_daily.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 698068a0..bce14406 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -1,10 +1,8 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} -{%- set start_date = env_var('GA4_START_DATE') if env_var('GA4_START_DATE', false) else var('start_date') -%} - {{ config( pre_hook="{{ ga4.combine_property_data() }}" if var('combined_dataset', false) else "", @@ -24,7 +22,7 @@ with source as ( {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} {% if not flags.EMPTY %} - where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{ start_date }} + where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{ env_var('GA4_START_DATE') if env_var('GA4_START_DATE', false) else var('start_date') }} {% if is_incremental() %} and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) {% endif %} diff --git a/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql b/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql index ce5aeae1..66488bee 100644 --- a/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql +++ b/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql @@ -1,6 +1,6 @@ {% if not flags.FULL_REFRESH %} {% set partitions_to_query = ['current_date'] %} - {% for i in range(var('static_incremental_days', 1)) %} + {% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_query = partitions_to_query.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {% endif %} diff --git a/models/staging/stg_ga4__derived_session_properties_daily.sql b/models/staging/stg_ga4__derived_session_properties_daily.sql index b3042d37..487c29e2 100644 --- a/models/staging/stg_ga4__derived_session_properties_daily.sql +++ b/models/staging/stg_ga4__derived_session_properties_daily.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/staging/stg_ga4__session_conversions_daily.sql b/models/staging/stg_ga4__session_conversions_daily.sql index d49b0f9f..5ac65a36 100644 --- a/models/staging/stg_ga4__session_conversions_daily.sql +++ b/models/staging/stg_ga4__session_conversions_daily.sql @@ -1,7 +1,7 @@ {% set partitions_to_replace = ['current_date'] %} {% if is_incremental() %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {% endif %} diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index 97c9763f..eaa7c252 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql index 188ad069..38298312 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql @@ -1,7 +1,7 @@ {% set partitions_to_replace = ['current_date'] %} {% if is_incremental() %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {% endif %} From ac415db6765b502df686f0638304d2969336b925 Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 5 Nov 2024 10:11:54 -0600 Subject: [PATCH 46/47] Add more environment variables to CI workflow --- .github/workflows/run_unit_tests_on_pr.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/run_unit_tests_on_pr.yml b/.github/workflows/run_unit_tests_on_pr.yml index 61e5211f..9790836e 100644 --- a/.github/workflows/run_unit_tests_on_pr.yml +++ b/.github/workflows/run_unit_tests_on_pr.yml @@ -6,6 +6,11 @@ env: BIGQUERY_PROPERTY_ID: ${{ secrets.BIGQUERY_PROPERTY_ID }} BIGQUERY_DATASET: ${{ secrets.BIGQUERY_DATASET }} BIGQUERY_KEYFILE: ./unit_tests/dbt-service-account.json + GA4_CONVERSION_EVENTS: ${{ vars.GA4_CONVERSION_EVENTS }} + GA4_DERIVED_SESSION_PROPERTIES: ${{ vars.GA4_DERIVED_SESSION_PROPERTIES }} + GA4_DERIVED_USER_PROPERTIES: ${{ vars.GA4_DERIVED_USER_PROPERTIES }} + GA4_INCREMENTAL_DAYS: ${{ vars.GA4_INCREMENTAL_DAYS }} + GA4_START_DATE: ${{ vars.GA4_START_DATE }} jobs: pytest_run_all: From 7e44907d411f1c83fcea6cf7b720d7cd539f223f Mon Sep 17 00:00:00 2001 From: David Booke Date: Tue, 5 Nov 2024 10:16:21 -0600 Subject: [PATCH 47/47] Update README after removing project variables in dbt_project.yml --- README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/README.md b/README.md index 02f0aebd..1c1c3b61 100644 --- a/README.md +++ b/README.md @@ -214,8 +214,6 @@ vars: value_type: "string_value" ``` -The `derived_user_properties` set in `dbt_project.yml` should either be updated to reflect the derived user properties for your project or they should be removed if you don't wish to set any derived user properties. - ### Derived Session Properties Derived session properties are similar to derived user properties, but on a per-session basis, for properties that change slowly over time. This provides additional flexibility in allowing users to turn any event parameter into a session property. @@ -249,8 +247,6 @@ vars: value_type: "int_value" ``` -The `derived_session_properties` set in `dbt_project.yml` should either be updated to reflect the derived session properties for your project or they should be removed if you don't wish to set any derived session properties. - ### GA4 Recommended Events See the README file at /dbt_packages/models/staging/recommended_events for instructions on enabling [Google's recommended events](https://support.google.com/analytics/answer/9267735?hl=en). @@ -265,8 +261,6 @@ vars: conversion_events: ['purchase','download'] ``` -The `conversion_events` set in `dbt_project.yml` should either be updated to reflect the conversion events for your project or they should be removed if you don't wish to set any conversion events. - ### Session Attribution Lookback Window The `stg_ga4__sessions_traffic_sources_last_non_direct_daily` model provides last non-direct session attribution within a configurable lookback window. The default is 30 days, but this can be overridden with the `session_attribution_lookback_window_days` variable. @@ -309,8 +303,6 @@ The easiest option is using OAuth with your Google Account. Summarized instructi gcloud auth application-default login --scopes=https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/iam.test ``` -The `profiles.yml` file included in this package should be removed. The `profile: 'default'` line in `dbt_project.yml` in this package should also be removed. - # Unit Testing The dbt-ga4 package treats each model and macro as a 'unit' of code. If we fix the input to each unit, we can test that we received the expected output.