Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added initial folder structure for demo #148

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions samples/notebooks/anaconda_webinar/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Title
Placeholder title
326 changes: 326 additions & 0 deletions samples/notebooks/anaconda_webinar/anaconda_webinar_notebook.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
{
"metadata": {
"kernelspec": {
"display_name": "Streamlit Notebook",
"name": "streamlit"
}
},
"nbformat_minor": 5,
"nbformat": 4,
"cells": [
{
"cell_type": "code",
"id": "3775908f-ca36-4846-8f38-5adca39217f2",
"metadata": {
"language": "python",
"name": "cell1",
"collapsed": false,
"resultHeight": 0
},
"source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n\nimport logging\nlogging.getLogger(\"cmdstanpy\").setLevel(logging.WARNING)\nimport warnings\nwarnings.filterwarnings('ignore', category=FutureWarning)",
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"id": "8ae58f97-bb31-4290-b2dd-2416f3c2ce15",
"metadata": {
"name": "cell9",
"collapsed": false,
"resultHeight": 74
},
"source": "# Growth Accounting\n"
},
{
"cell_type": "code",
"id": "435baefb-25ff-42a1-b4f8-236a98b4afac",
"metadata": {
"language": "sql",
"name": "cell3",
"collapsed": false,
"resultHeight": 510
},
"outputs": [],
"source": "select\n o_custkey as id,\n date_trunc(year, o_orderdate) as order_year,\n sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by all\norder by id, order_year",
"execution_count": null
},
{
"cell_type": "code",
"id": "61f451db-8ff2-4d83-b9be-6c1a77365446",
"metadata": {
"language": "python",
"name": "cell12",
"collapsed": false,
"resultHeight": 0
},
"outputs": [],
"source": "import pandas as pd",
"execution_count": null
},
{
"cell_type": "code",
"id": "20f1dd62-d796-4190-b34a-89a16fea1819",
"metadata": {
"language": "python",
"name": "cell10",
"collapsed": false,
"resultHeight": 0
},
"outputs": [],
"source": "df = cell3.to_pandas()\n\n#pivot data to add row for each id:year with no revenue\nresult = df.pivot_table(\n index='ID',\n columns='ORDER_YEAR', \n values='TOTAL',\n fill_value=0\n).reset_index().melt(\n id_vars='ID',\n var_name='ORDER_YEAR',\n value_name='TOTAL'\n)\n\n# save the dataframe as table for SQL querying \ndf = session.create_dataframe(result)\ndf.write.mode(\"overwrite\").save_as_table(\"df\", table_type=\"temporary\")",
"execution_count": null
},
{
"cell_type": "code",
"id": "52ae5a36-e143-4ebb-b884-e17750b0c77f",
"metadata": {
"language": "sql",
"name": "cell7",
"collapsed": false,
"resultHeight": 426
},
"outputs": [],
"source": "select * from df\norder by id, order_year\nlimit 10",
"execution_count": null
},
{
"cell_type": "code",
"id": "11971c03-53a7-4429-870a-4b51bbef7aca",
"metadata": {
"language": "sql",
"name": "cell6",
"collapsed": false,
"resultHeight": 159
},
"outputs": [],
"source": "with windowed as (\n \n select\n *,\n sum(total) over(partition by id order by order_year asc) as lifetime_spend,\n coalesce(lag(total) over(partition by id order by order_year asc), 0) as previous_year_total,\n from df\n\n)\n\nselect *,\n case\n when total = previous_year_total and total > 0 then 'retained'\n when total > 0 and previous_year_total = 0 and lifetime_spend = total then 'new'\n when total = 0 and previous_year_total > 0 then 'churned'\n when total > previous_year_total and previous_year_total > 0 then 'expanded'\n when total < previous_year_total and previous_year_total > 0 then 'contracted'\n when total > 0 and previous_year_total = 0 and lifetime_spend > total then 'resurrected'\n else 'irrelevant' end as category,\n case category\n when 'retained' then 0\n when 'new' then total\n when 'churned' then (-1 * previous_year_total)\n when 'expanded' then total - previous_year_total\n when 'contracted' then (-1 * (previous_year_total - total))\n when 'resurrected' then total\n else 0 end as net_change\nfrom windowed\norder by id, order_year",
"execution_count": null
},
{
"cell_type": "code",
"id": "13f099e5-4265-438d-ab46-b3315bfc1f1d",
"metadata": {
"language": "sql",
"name": "cell4",
"collapsed": false,
"resultHeight": 438
},
"outputs": [],
"source": "select\n date_part(year, order_year) as order_year,\n category,\n round(sum(total)) as total,\n round(sum(net_change)) as net_change\nfrom {{ cell6 }}\ngroup by all",
"execution_count": null
},
{
"cell_type": "code",
"id": "735da8fc-91c0-4604-8041-1437208a1f01",
"metadata": {
"language": "python",
"name": "cell2",
"collapsed": false,
"resultHeight": 772
},
"outputs": [],
"source": "import streamlit as st\n# Option to define dictionary to color code each category, may need to use matplotlib\n# Option to use altair for better control of ticks on Y axis\nst.bar_chart(cell4, x='ORDER_YEAR', y='NET_CHANGE', color='CATEGORY', height=750)",
"execution_count": null
},
{
"cell_type": "code",
"id": "06f083eb-ae70-42ad-af0d-261138126bed",
"metadata": {
"language": "python",
"name": "cell5",
"collapsed": false,
"resultHeight": 96
},
"outputs": [],
"source": "df = cell6.to_pandas()\nbutton_csv = df.to_csv().encode(\"utf-8\")\nst.download_button(label=\"Download\", data=button_csv, file_name=\"growth_accounting.csv\", mime=\"text/csv\")",
"execution_count": null
},
{
"cell_type": "markdown",
"id": "db63ea18-13d4-43a4-a29c-a734db89e796",
"metadata": {
"name": "cell8",
"collapsed": false,
"resultHeight": 74
},
"source": "# Forecasting\n"
},
{
"cell_type": "code",
"id": "2a9b9481-4d24-4f6c-9b53-4f50add6458e",
"metadata": {
"language": "sql",
"name": "cell14",
"collapsed": false,
"resultHeight": 438
},
"outputs": [],
"source": "select\n date_trunc(day, o_orderdate) as order_date,\n sum(o_totalprice) as total\nfrom SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS\ngroup by 1\norder by order_date asc",
"execution_count": null
},
{
"cell_type": "code",
"id": "9d5d7b4a-43cc-4c62-844e-a1954c312cbf",
"metadata": {
"language": "python",
"name": "cell15",
"collapsed": false,
"resultHeight": 0
},
"outputs": [],
"source": "from prophet import Prophet\nfrom prophet.plot import plot_plotly, plot_components_plotly",
"execution_count": null
},
{
"cell_type": "code",
"id": "87ca009b-4da8-46c2-a86c-9cad46fac89f",
"metadata": {
"language": "python",
"name": "cell17",
"collapsed": false,
"resultHeight": 150
},
"outputs": [],
"source": "df = cell14.to_pandas()\ndf = df.rename(columns={'ORDER_DATE': 'ds', 'TOTAL': 'y'})\nprint(df.head())",
"execution_count": null
},
{
"cell_type": "code",
"id": "4efeff4d-da4b-4c1d-b3d5-a892bb2a2bc5",
"metadata": {
"language": "python",
"name": "cell19",
"collapsed": false,
"resultHeight": 372
},
"outputs": [],
"source": "st.line_chart(df, x='ds', y='y')",
"execution_count": null
},
{
"cell_type": "markdown",
"id": "cbffd526-a4b0-405b-9718-6c5c2f8f6144",
"metadata": {
"name": "cell21",
"collapsed": false,
"resultHeight": 120
},
"source": "Waiting on role permission to write UDFs for Prophet library to run properly. Until then, code cell below will return \n```<class 'Exception'> Failed with error [Errno 1] Operation not permitted: '/usr/lib/python_udf/d212b0f949a4a60cf75395f561f7016ea978bad39b2e60eee12ece87d118e861/lib/python3.9/site-packages/prophet/stan_model/prophet_model.bin'```"
},
{
"cell_type": "code",
"id": "9d2c4877-5815-4f49-a53d-816b38de4eb6",
"metadata": {
"language": "python",
"name": "cell26",
"collapsed": false,
"resultHeight": 0
},
"outputs": [],
"source": "m = Prophet()\ntry:\n m.fit(df)\nexcept Exception as err:\n print(Exception, err)",
"execution_count": null
},
{
"cell_type": "code",
"id": "ce582f14-9490-4a54-8fe0-bbfc8b56f61f",
"metadata": {
"language": "python",
"name": "cell23",
"collapsed": false,
"resultHeight": 885
},
"outputs": [],
"source": "future = m.make_future_dataframe(periods=365)\nforecast = m.predict(future)\nfig1 = m.plot(forecast)\n#fig2 = m.plot_components(forecast)",
"execution_count": null
},
{
"cell_type": "markdown",
"id": "5dc1abf7-b9ea-4fe4-88ae-109342f6dc05",
"metadata": {
"name": "cell25",
"collapsed": false,
"resultHeight": 74
},
"source": "# Customer Segmentation"
},
{
"cell_type": "code",
"id": "939a7d50-2679-46ee-a43b-b7d03b627d61",
"metadata": {
"language": "sql",
"name": "cell16",
"collapsed": false,
"resultHeight": 426
},
"outputs": [],
"source": "select *\nfrom ADHOC_ANALYSIS.USER_UPLOADS.SP500_COMPANY_LIST\nlimit 10",
"execution_count": null
},
{
"cell_type": "code",
"id": "9bd53742-511c-4cf9-9e28-02bdbcaca463",
"metadata": {
"language": "python",
"name": "cell13",
"collapsed": false,
"resultHeight": 0
},
"outputs": [],
"source": "import requests\n\ndef get_wiki_extract(title):\n # Base URL for Wikipedia's API\n url = \"https://en.wikipedia.org/w/api.php\"\n \n # Parameters for the API request\n params = {\n \"action\": \"query\",\n \"format\": \"json\",\n \"titles\": title,\n \"prop\": \"extracts\",\n \"exintro\": True, # Only get the intro section\n \"explaintext\": True, # Get plain text instead of HTML\n }\n \n # Make the request\n response = requests.get(url, params=params)\n \n # Check if request was successful\n if response.status_code == 200:\n data = response.json()\n # Navigate through the JSON response to get the extract\n pages = data[\"query\"][\"pages\"]\n # Get the first (and only) page's extract\n page = list(pages.values())[0]\n return page.get(\"extract\", \"No extract available\")\n else:\n return f\"Error: {response.status_code}\"",
"execution_count": null
},
{
"cell_type": "code",
"id": "0557102d-3584-469a-9fdc-be53fd0a249b",
"metadata": {
"language": "python",
"name": "cell22",
"collapsed": false,
"resultHeight": 60
},
"outputs": [],
"source": "df = cell16.to_pandas()\ncompany_names = df['NAME'].tolist()\ncsv_list = []\n\nprint(\"extracting descriptions\")\n\nfor name in company_names:\n try:\n extract = get_wiki_extract(name.replace(\" \", \"_\"))\n #print(f'extracted description of {name} from Wikipedia')\n except Exception as e:\n #print(f\"Error getting Wikipedia extract for {name}: {str(e)}\")\n extract = \"None available\"\n \n csv_list.append((name, extract))\n\nprint(\"finished extracting descriptions\")",
"execution_count": null
},
{
"cell_type": "code",
"id": "e979ca68-494a-46d4-a92d-d106d52980fb",
"metadata": {
"language": "python",
"name": "cell18",
"collapsed": false,
"resultHeight": 0
},
"outputs": [],
"source": "# save the dataframe as table for SQL querying \ndf = pd.DataFrame(csv_list, columns=['name', 'description'])\ndf = session.create_dataframe(df)\ndf.write.mode(\"overwrite\").save_as_table(\"prospects\", table_type=\"temporary\")",
"execution_count": null
},
{
"cell_type": "code",
"id": "3f5d40d9-ca69-4137-affa-905caef97c29",
"metadata": {
"language": "sql",
"name": "cell20",
"resultHeight": 426,
"collapsed": false
},
"outputs": [],
"source": "select \"name\", \"description\" from prospects limit 10",
"execution_count": null
},
{
"cell_type": "code",
"id": "51396730-f96a-476b-bb12-d7cac8c02576",
"metadata": {
"language": "sql",
"name": "cell24",
"codeCollapsed": false,
"collapsed": false,
"resultHeight": 391
},
"outputs": [],
"source": "select \n \"name\",\n \"description\",\n snowflake.cortex.classify_text(\n \"description\",\n ['extremely likely', 'somewhat likely', 'unlikely'],\n {\n 'task_description': 'Return the likelihood that this company would be interested in attending a webinar showcasing the GTM utility of Snowflake Notebooks and Anaconda Python Packages.'\n }\n ):label::STRING as persona_likelihood,\n snowflake.cortex.classify_text(\n \"description\",\n ['healthcare', 'finance', 'retail', 'technology', 'communication', 'other'],\n {\n 'task_description': 'Return the most likely industry of the company based on this description.'\n }\n ):label::STRING as industry,\n snowflake.cortex.classify_text(\n \"description\",\n ['California', 'South', 'Northeast', 'Midatlantic', 'Midwest', 'Pacific Northwest', 'Outsite the US'],\n {\n 'task_description': 'Return the most likely region the company is headquartered in based on this description.'\n }\n ):label::STRING as region\nfrom prospects\nwhere \"description\" is not null and \"description\" != ''\nlimit 10\n-- other class. ideas: industry, main product, region",
"execution_count": null
}
]
}