Skip to content

Commit

Permalink
Draft: Scenario 1 Sonja
Browse files Browse the repository at this point in the history
  • Loading branch information
mictebbe committed Aug 10, 2021
1 parent 266a578 commit d511978
Show file tree
Hide file tree
Showing 7 changed files with 54,351 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,6 @@ dmypy.json

# Pyre type checker
.pyre/
scenarios/sonja/twitter_sentiment_data.csv
.gitignore
.gitignore
Binary file added scenarios/.DS_Store
Binary file not shown.
Binary file added scenarios/sonja/.DS_Store
Binary file not shown.
197 changes: 197 additions & 0 deletions scenarios/sonja/Generate_random _dates.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b068c96a-8c40-4354-a148-c9f4dfdc6d32",
"metadata": {},
"source": [
"Generate Random Dates\n",
"\n",
"The dataset of tweets we are using in the example is downloaded from https://www.kaggle.com/edqian/twitter-climate-change-sentiment-dataset \n",
"The tweets do not contain the date they were posted date.\n",
"To showcase how the development of interest in topics over time can be analyzed with the reflexive ML toolbox, we generate a random date for each tweet and save the dataset back to a csv-file."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e54d8c12-530f-49f4-9d08-b6ab2d94c3ed",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import random\n",
"def getRandomDate():\n",
" start_date = datetime.date(2019, 1, 1)\n",
" end_date = datetime.date(2021, 1, 1)\n",
"\n",
" time_between_dates = end_date - start_date\n",
" days_between_dates = time_between_dates.days\n",
" random_number_of_days = random.randrange(days_between_dates)\n",
" random_date = start_date + datetime.timedelta(days=random_number_of_days)\n",
" return random_date\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7882d22a-150a-41fc-aebd-ccc02378f792",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"data = pd.read_csv(\"twitter_sentiment_data.csv\", encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "11179db5-def1-4821-9909-5c330d96cd63",
"metadata": {},
"outputs": [],
"source": [
"data['publishedAt'] =''\n",
"data['publishedAt'] = data['publishedAt'].apply(lambda x: getRandomDate())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "aa1f0dc0-07d5-4539-868f-d1bd1c17c725",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sentiment</th>\n",
" <th>message</th>\n",
" <th>tweetid</th>\n",
" <th>publishedAt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-1</td>\n",
" <td>@tiniebeany climate change is an interesting h...</td>\n",
" <td>792927353886371840</td>\n",
" <td>2019-10-20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>RT @NatGeoChannel: Watch #BeforeTheFlood right...</td>\n",
" <td>793124211518832641</td>\n",
" <td>2019-12-27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>Fabulous! Leonardo #DiCaprio's film on #climat...</td>\n",
" <td>793124402388832256</td>\n",
" <td>2019-08-13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>RT @Mick_Fanning: Just watched this amazing do...</td>\n",
" <td>793124635873275904</td>\n",
" <td>2019-08-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>RT @cnalive: Pranita Biswasi, a Lutheran from ...</td>\n",
" <td>793125156185137153</td>\n",
" <td>2020-05-12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sentiment message \\\n",
"0 -1 @tiniebeany climate change is an interesting h... \n",
"1 1 RT @NatGeoChannel: Watch #BeforeTheFlood right... \n",
"2 1 Fabulous! Leonardo #DiCaprio's film on #climat... \n",
"3 1 RT @Mick_Fanning: Just watched this amazing do... \n",
"4 2 RT @cnalive: Pranita Biswasi, a Lutheran from ... \n",
"\n",
" tweetid publishedAt \n",
"0 792927353886371840 2019-10-20 \n",
"1 793124211518832641 2019-12-27 \n",
"2 793124402388832256 2019-08-13 \n",
"3 793124635873275904 2019-08-09 \n",
"4 793125156185137153 2020-05-12 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ca09b9d9-19c8-41d7-9fc3-96fe4a4658ca",
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('twitter_sentiment_data_dates.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "655a121e-689d-4b67-95f7-ce91f07e5f47",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit d511978

Please sign in to comment.