diff --git a/samples/snowplow-web-analytics/readme.md b/samples/snowplow-web-analytics/readme.md new file mode 100644 index 0000000..6ac289b --- /dev/null +++ b/samples/snowplow-web-analytics/readme.md @@ -0,0 +1,3 @@ +# Snowplow Digital Analytics with Snowflake + +We are providing a notebook to be used, in [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks) along with our [tutorial](https://quickstarts.snowflake.com/guide/snowplow-web-analytics). \ No newline at end of file diff --git a/samples/snowplow-web-analytics/snowplow_derived_data_exploration.ipynb b/samples/snowplow-web-analytics/snowplow_derived_data_exploration.ipynb new file mode 100644 index 0000000..442e32e --- /dev/null +++ b/samples/snowplow-web-analytics/snowplow_derived_data_exploration.ipynb @@ -0,0 +1,166 @@ +{ + "metadata": { + "kernelspec": { + "display_name": "Streamlit Notebook", + "name": "streamlit" + } + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [ + { + "cell_type": "code", + "id": "3775908f-ca36-4846-8f38-5adca39217f2", + "metadata": { + "language": "python", + "name": "cell1" + }, + "source": "# Import python packages\nimport streamlit as st\nimport pandas as pd\n\n# We can also use Snowpark for our analyses!\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9", + "metadata": { + "language": "sql", + "name": "cell2" + }, + "source": "SHOW TABLES IN SCHEMA snowplow.derived;", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "id": "c695373e-ac74-4b62-a1f1-08206cbd5c81", + "metadata": { + "language": "sql", + "name": "cell3" + }, + "source": "SELECT * FROM derived.snowplow_unified_users LIMIT 5;", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "id": "f48e1d0d-1578-4727-a46b-dcb78daa91a0", + "metadata": { + "language": "sql", + "name": "cell4" + }, + "outputs": [], + "source": "SELECT\n DATE_TRUNC('day', START_TSTAMP) AS first_session_date,\n COUNT(*) AS new_users\nFROM\n derived.snowplow_unified_users\nGROUP BY\n first_session_date\nORDER BY\n first_session_date;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "d903087d-c987-4a37-8687-335fbe08371d", + "metadata": { + "language": "sql", + "name": "cell5" + }, + "outputs": [], + "source": "SELECT\n USER_ID,\n SESSIONS,\n ENGAGED_TIME_IN_S\nFROM derived.snowplow_unified_users\nORDER BY\n ENGAGED_TIME_IN_S DESC\nLIMIT 10;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "0bce5d61-1b09-4582-86cd-7e1fb968acbd", + "metadata": { + "language": "sql", + "name": "cell6" + }, + "outputs": [], + "source": "SELECT\n AVG(engaged_time_in_s) AS average_session_duration\nFROM\n derived.snowplow_unified_sessions;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "3dc3730a-8590-4322-a1f4-11f6e81a48ee", + "metadata": { + "language": "sql", + "name": "cell7" + }, + "outputs": [], + "source": "SELECT\n DATE_TRUNC('day', START_TSTAMP) AS session_date,\n COUNT(*) AS session_count\nFROM\n derived.snowplow_unified_sessions\nWHERE\n START_TSTAMP >= DATEADD('day', -7, CURRENT_DATE)\nGROUP BY\n session_date\nORDER BY\n session_date;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "a468690c-84c5-4258-bbaa-b31e39b218f0", + "metadata": { + "language": "sql", + "name": "cell8" + }, + "outputs": [], + "source": "SELECT\n user_id,\n COUNT(*) AS session_count\nFROM derived.snowplow_unified_sessions\nGROUP BY\n user_id\nORDER BY\n session_count DESC\nLIMIT 10;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "a2b2ee06-26d0-4f71-be36-7e436e56b61e", + "metadata": { + "language": "sql", + "name": "cell9" + }, + "outputs": [], + "source": "SELECT\n page_urlpath,\n COUNT(*) AS view_count\nFROM derived.snowplow_unified_views\nGROUP BY\n page_urlpath\nORDER BY\n view_count DESC\nLIMIT 10;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "72dc36d7-5216-4f32-b089-22cd5c415bd6", + "metadata": { + "language": "sql", + "name": "cell10" + }, + "outputs": [], + "source": "SELECT\n page_urlpath,\n AVG(ENGAGED_TIME_IN_S) AS average_time_spent\nFROM derived.snowplow_unified_views\nGROUP BY page_urlpath\nORDER BY average_time_spent DESC\nLIMIT 10;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "40e49120-c3d2-4427-80d2-8fa5e8d781d6", + "metadata": { + "language": "sql", + "name": "cell11" + }, + "outputs": [], + "source": "SELECT\n DEVICE_CATEGORY,\n COUNT(*) AS view_count\nFROM derived.snowplow_unified_views\nGROUP BY DEVICE_CATEGORY\nORDER BY view_count DESC;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "25021f3b-30a5-475e-9ab3-538b12cbb281", + "metadata": { + "language": "sql", + "name": "cell12" + }, + "outputs": [], + "source": "SELECT\n FIRST_GEO_COUNTRY,\n COUNT(*) AS session_count\nFROM derived.snowplow_unified_sessions\nGROUP BY FIRST_GEO_COUNTRY\nORDER BY session_count DESC;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "1f01e289-823c-4750-8fc5-3d624c6e38c2", + "metadata": { + "language": "sql", + "name": "cell13" + }, + "outputs": [], + "source": "SELECT\n FIRST_GEO_REGION_NAME,\n COUNT(*) AS new_user_count\nFROM derived.snowplow_unified_users\nGROUP BY FIRST_GEO_REGION_NAME\nORDER BY new_user_count DESC;", + "execution_count": null + }, + { + "cell_type": "code", + "id": "42d1371a-5218-4e7a-822e-e9a5a2bc31ac", + "metadata": { + "language": "sql", + "name": "cell14" + }, + "outputs": [], + "source": "SELECT\n user_id,\n ENGAGED_TIME_IN_S\nFROM derived.snowplow_unified_users\nWHERE\n ENGAGED_TIME_IN_S > (\n SELECT AVG(ENGAGED_TIME_IN_S) FROM derived.snowplow_unified_users\n )\nORDER BY ENGAGED_TIME_IN_S DESC;", + "execution_count": null + } + ] +} \ No newline at end of file