Snowflake-Labs · wluna01 · Dec 12, 2024 · Dec 13, 2024 · Dec 18, 2024 · Dec 19, 2024
diff --git a/samples/notebooks/anaconda_webinar/README.md b/samples/notebooks/anaconda_webinar/README.md
@@ -0,0 +1,2 @@
+# Title
+Placeholder title
diff --git a/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb b/samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
@@ -0,0 +1,326 @@
+{
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Streamlit Notebook",
+   "name": "streamlit"
+  }
+ },
+ "nbformat_minor": 5,
+ "nbformat": 4,
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "3775908f-ca36-4846-8f38-5adca39217f2",
+   "metadata": {
+    "language": "python",
+    "name": "cell1",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n\nimport logging\nlogging.getLogger(\"cmdstanpy\").setLevel(logging.WARNING)\nimport warnings\nwarnings.filterwarnings('ignore', category=FutureWarning)",
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ae58f97-bb31-4290-b2dd-2416f3c2ce15",
+   "metadata": {
+    "name": "cell9",
+    "collapsed": false,
+    "resultHeight": 74
+   },
+   "source": "# Growth Accounting\n"
+  },
+  {
+   "cell_type": "code",
+   "id": "435baefb-25ff-42a1-b4f8-236a98b4afac",
+   "metadata": {
+    "language": "sql",
+    "name": "cell3",
+    "collapsed": false,
+    "resultHeight": 510
+   },
+   "outputs": [],
+   "source": "select\n    o_custkey as id,\n    date_trunc(year, o_orderdate) as order_year,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by all\norder by id, order_year",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "61f451db-8ff2-4d83-b9be-6c1a77365446",
+   "metadata": {
+    "language": "python",
+    "name": "cell12",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "import pandas as pd",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "20f1dd62-d796-4190-b34a-89a16fea1819",
+   "metadata": {
+    "language": "python",
+    "name": "cell10",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "df = cell3.to_pandas()\n\n#pivot data to add row for each id:year with no revenue\nresult = df.pivot_table(\n    index='ID',\n    columns='ORDER_YEAR', \n    values='TOTAL',\n    fill_value=0\n).reset_index().melt(\n    id_vars='ID',\n    var_name='ORDER_YEAR',\n    value_name='TOTAL'\n)\n\n# save the dataframe as table for SQL querying \ndf = session.create_dataframe(result)\ndf.write.mode(\"overwrite\").save_as_table(\"df\", table_type=\"temporary\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "52ae5a36-e143-4ebb-b884-e17750b0c77f",
+   "metadata": {
+    "language": "sql",
+    "name": "cell7",
+    "collapsed": false,
+    "resultHeight": 426
+   },
+   "outputs": [],
+   "source": "select * from df\norder by id, order_year\nlimit 10",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "11971c03-53a7-4429-870a-4b51bbef7aca",
+   "metadata": {
+    "language": "sql",
+    "name": "cell6",
+    "collapsed": false,
+    "resultHeight": 159
+   },
+   "outputs": [],
+   "source": "with windowed as (\n    \n    select\n        *,\n        sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n        coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n    from df\n\n)\n\nselect *,\n  case\n    when total = previous_year_total and total > 0 then 'retained'\n    when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n    when total = 0 and previous_year_total > 0 then 'churned'\n    when total > previous_year_total and previous_year_total > 0 then 'expanded'\n    when total < previous_year_total and previous_year_total > 0 then 'contracted'\n    when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n  else 'irrelevant' end as category,\n  case category\n    when 'retained' then 0\n    when 'new' then total\n    when 'churned' then (-1 * previous_year_total)\n    when 'expanded' then total - previous_year_total\n    when 'contracted' then (-1 * (previous_year_total - total))\n    when 'resurrected' then total\n  else 0 end as net_change\nfrom windowed\norder by id, order_year",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "13f099e5-4265-438d-ab46-b3315bfc1f1d",
+   "metadata": {
+    "language": "sql",
+    "name": "cell4",
+    "collapsed": false,
+    "resultHeight": 438
+   },
+   "outputs": [],
+   "source": "select\n    date_part(year, order_year) as order_year,\n    category,\n    round(sum(total)) as total,\n    round(sum(net_change)) as net_change\nfrom {{ cell6 }}\ngroup by all",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "735da8fc-91c0-4604-8041-1437208a1f01",
+   "metadata": {
+    "language": "python",
+    "name": "cell2",
+    "collapsed": false,
+    "resultHeight": 772
+   },
+   "outputs": [],
+   "source": "import streamlit as st\n# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(cell4, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "06f083eb-ae70-42ad-af0d-261138126bed",
+   "metadata": {
+    "language": "python",
+    "name": "cell5",
+    "collapsed": false,
+    "resultHeight": 96
+   },
+   "outputs": [],
+   "source": "df = cell6.to_pandas()\nbutton_csv = df.to_csv().encode(\"utf-8\")\nst.download_button(label=\"Download\", data=button_csv, file_name=\"growth_accounting.csv\", mime=\"text/csv\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db63ea18-13d4-43a4-a29c-a734db89e796",
+   "metadata": {
+    "name": "cell8",
+    "collapsed": false,
+    "resultHeight": 74
+   },
+   "source": "# Forecasting\n"
+  },
+  {
+   "cell_type": "code",
+   "id": "2a9b9481-4d24-4f6c-9b53-4f50add6458e",
+   "metadata": {
+    "language": "sql",
+    "name": "cell14",
+    "collapsed": false,
+    "resultHeight": 438
+   },
+   "outputs": [],
+   "source": "select\n    date_trunc(day, o_orderdate) as order_date,\n    sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by 1\norder by order_date asc",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "9d5d7b4a-43cc-4c62-844e-a1954c312cbf",
+   "metadata": {
+    "language": "python",
+    "name": "cell15",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "from prophet import Prophet\nfrom prophet.plot import plot_plotly, plot_components_plotly",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "87ca009b-4da8-46c2-a86c-9cad46fac89f",
+   "metadata": {
+    "language": "python",
+    "name": "cell17",
+    "collapsed": false,
+    "resultHeight": 150
+   },
+   "outputs": [],
+   "source": "df = cell14.to_pandas()\ndf = df.rename(columns={'ORDER_DATE': 'ds', 'TOTAL': 'y'})\nprint(df.head())",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "4efeff4d-da4b-4c1d-b3d5-a892bb2a2bc5",
+   "metadata": {
+    "language": "python",
+    "name": "cell19",
+    "collapsed": false,
+    "resultHeight": 372
+   },
+   "outputs": [],
+   "source": "st.line_chart(df, x='ds', y='y')",
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cbffd526-a4b0-405b-9718-6c5c2f8f6144",
+   "metadata": {
+    "name": "cell21",
+    "collapsed": false,
+    "resultHeight": 120
+   },
+   "source": "Waiting on role permission to write UDFs for Prophet library to run properly. Until then, code cell below will return \n```<class 'Exception'> Failed with error [Errno 1] Operation not permitted: '/usr/lib/python_udf/d212b0f949a4a60cf75395f561f7016ea978bad39b2e60eee12ece87d118e861/lib/python3.9/site-packages/prophet/stan_model/prophet_model.bin'```"
+  },
+  {
+   "cell_type": "code",
+   "id": "9d2c4877-5815-4f49-a53d-816b38de4eb6",
+   "metadata": {
+    "language": "python",
+    "name": "cell26",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "m = Prophet()\ntry:\n    m.fit(df)\nexcept Exception as err:\n    print(Exception, err)",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "ce582f14-9490-4a54-8fe0-bbfc8b56f61f",
+   "metadata": {
+    "language": "python",
+    "name": "cell23",
+    "collapsed": false,
+    "resultHeight": 885
+   },
+   "outputs": [],
+   "source": "future = m.make_future_dataframe(periods=365)\nforecast = m.predict(future)\nfig1 = m.plot(forecast)\n#fig2 = m.plot_components(forecast)",
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5dc1abf7-b9ea-4fe4-88ae-109342f6dc05",
+   "metadata": {
+    "name": "cell25",
+    "collapsed": false,
+    "resultHeight": 74
+   },
+   "source": "# Customer Segmentation"
+  },
+  {
+   "cell_type": "code",
+   "id": "939a7d50-2679-46ee-a43b-b7d03b627d61",
+   "metadata": {
+    "language": "sql",
+    "name": "cell16",
+    "collapsed": false,
+    "resultHeight": 426
+   },
+   "outputs": [],
+   "source": "select *\nfrom ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST\nlimit 10",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "9bd53742-511c-4cf9-9e28-02bdbcaca463",
+   "metadata": {
+    "language": "python",
+    "name": "cell13",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "import requests\n\ndef get_wiki_extract(title):\n    # Base URL for Wikipedia's API\n    url = \"https://en.wikipedia.org/w/api.php\"\n    \n    # Parameters for the API request\n    params = {\n        \"action\": \"query\",\n        \"format\": \"json\",\n        \"titles\": title,\n        \"prop\": \"extracts\",\n        \"exintro\": True,  # Only get the intro section\n        \"explaintext\": True,  # Get plain text instead of HTML\n    }\n    \n    # Make the request\n    response = requests.get(url, params=params)\n    \n    # Check if request was successful\n    if response.status_code == 200:\n        data = response.json()\n        # Navigate through the JSON response to get the extract\n        pages = data[\"query\"][\"pages\"]\n        # Get the first (and only) page's extract\n        page = list(pages.values())[0]\n        return page.get(\"extract\", \"No extract available\")\n    else:\n        return f\"Error: {response.status_code}\"",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "0557102d-3584-469a-9fdc-be53fd0a249b",
+   "metadata": {
+    "language": "python",
+    "name": "cell22",
+    "collapsed": false,
+    "resultHeight": 60
+   },
+   "outputs": [],
+   "source": "df = cell16.to_pandas()\ncompany_names = df['NAME'].tolist()\ncsv_list = []\n\nprint(\"extracting descriptions\")\n\nfor name in company_names:\n    try:\n        extract = get_wiki_extract(name.replace(\" \", \"_\"))\n        #print(f'extracted description of {name} from Wikipedia')\n    except Exception as e:\n        #print(f\"Error getting Wikipedia extract for {name}: {str(e)}\")\n        extract = \"None available\"\n        \n    csv_list.append((name, extract))\n\nprint(\"finished extracting descriptions\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "e979ca68-494a-46d4-a92d-d106d52980fb",
+   "metadata": {
+    "language": "python",
+    "name": "cell18",
+    "collapsed": false,
+    "resultHeight": 0
+   },
+   "outputs": [],
+   "source": "# save the dataframe as table for SQL querying \ndf = pd.DataFrame(csv_list, columns=['name', 'description'])\ndf = session.create_dataframe(df)\ndf.write.mode(\"overwrite\").save_as_table(\"prospects\", table_type=\"temporary\")",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "3f5d40d9-ca69-4137-affa-905caef97c29",
+   "metadata": {
+    "language": "sql",
+    "name": "cell20",
+    "resultHeight": 426,
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": "select \"name\", \"description\" from prospects limit 10",
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "51396730-f96a-476b-bb12-d7cac8c02576",
+   "metadata": {
+    "language": "sql",
+    "name": "cell24",
+    "codeCollapsed": false,
+    "collapsed": false,
+    "resultHeight": 391
+   },
+   "outputs": [],
+   "source": "select \n    \"name\",\n    \"description\",\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['extremely likely', 'somewhat likely', 'unlikely'],\n        {\n            'task_description': 'Return the likelihood that this company would be interested in attending a webinar showcasing the GTM utility of Snowflake Notebooks and Anaconda Python Packages.'\n        }\n    ):label::STRING as persona_likelihood,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['healthcare', 'finance', 'retail', 'technology', 'communication', 'other'],\n        {\n            'task_description': 'Return the most likely industry of the company based on this description.'\n        }\n    ):label::STRING as industry,\n    snowflake.cortex.classify_text(\n        \"description\",\n        ['California', 'South', 'Northeast', 'Midatlantic', 'Midwest', 'Pacific Northwest', 'Outsite the US'],\n        {\n            'task_description': 'Return the most likely region the company is headquartered in based on this description.'\n        }\n    ):label::STRING as region\nfrom prospects\nwhere \"description\" is not null and \"description\" != ''\nlimit 10\n-- other class. ideas: industry, main product, region",
+   "execution_count": null
+  }
+ ]
+}