Skip to content

Commit

Permalink
consolidated taxi data
Browse files Browse the repository at this point in the history
  • Loading branch information
paladique committed Sep 27, 2021
1 parent f9dbcf7 commit b03d1ed
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 27 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -352,5 +352,4 @@ MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
4-Data-Science-Lifecycle/14-Introduction/README.md
.vscode/settings.json
Data/Taxi/*
.vscode/settings.json
84 changes: 59 additions & 25 deletions 4-Data-Science-Lifecycle/14-Introduction/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,44 +31,78 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 7,
"source": [
"import pandas as pd\r\n",
"import glob\r\n",
"\r\n",
"path = '../../data/Taxi/yellow_tripdata_2019-{}.csv'\r\n",
"july_taxi = pd.read_csv(path.format('07'))\r\n",
"january_taxi = pd.read_csv(path.format('01'))\r\n",
"\r\n",
"df = pd.concat([january_taxi, july_taxi])\r\n",
"path = '../../data/taxi.csv'\r\n",
"# july_taxi = pd.read_csv(path.format('07'))\r\n",
"# january_taxi = pd.read_csv(path.format('01'))\r\n",
"\r\n",
"# df = pd.concat([july_taxi.sample(100), january_taxi.sample(100)])\r\n",
"df = pd.read_csv(path)\r\n",
"# df.describe()\r\n",
"print(df.describe())"
"print(df)\r\n"
],
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"c:\\Users\\jasmineg\\projects\\Data-Science-For-Beginners\\venv\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3441: DtypeWarning: Columns (6) have mixed types.Specify dtype option on import or set low_memory=False.\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
" VendorID passenger_count ... total_amount congestion_surcharge\n",
"count 1.394425e+07 1.394425e+07 ... 1.397821e+07 9.122233e+06\n",
"mean 1.641553e+00 1.569314e+00 ... 1.745644e+01 1.563450e+00\n",
"std 5.172343e-01 1.219889e+00 ... 1.945658e+02 1.215658e+00\n",
"min 1.000000e+00 0.000000e+00 ... -4.508000e+02 -2.500000e+00\n",
"25% 1.000000e+00 1.000000e+00 ... 9.360000e+00 0.000000e+00\n",
"50% 2.000000e+00 1.000000e+00 ... 1.295000e+01 2.500000e+00\n",
"75% 2.000000e+00 2.000000e+00 ... 1.880000e+01 2.500000e+00\n",
"max 4.000000e+00 9.000000e+00 ... 6.232617e+05 2.750000e+00\n",
" VendorID tpep_pickup_datetime tpep_dropoff_datetime passenger_count \\\n",
"0 2.0 2019-07-15 16:27:53 2019-07-15 16:44:21 3.0 \n",
"1 2.0 2019-07-17 20:26:35 2019-07-17 20:40:09 6.0 \n",
"2 2.0 2019-07-06 16:01:08 2019-07-06 16:10:25 1.0 \n",
"3 1.0 2019-07-18 22:32:23 2019-07-18 22:35:08 1.0 \n",
"4 2.0 2019-07-19 14:54:29 2019-07-19 15:19:08 1.0 \n",
".. ... ... ... ... \n",
"195 2.0 2019-01-18 08:42:15 2019-01-18 08:56:57 1.0 \n",
"196 1.0 2019-01-19 04:34:45 2019-01-19 04:43:44 1.0 \n",
"197 2.0 2019-01-05 10:37:39 2019-01-05 10:42:03 1.0 \n",
"198 2.0 2019-01-23 10:36:29 2019-01-23 10:44:34 2.0 \n",
"199 2.0 2019-01-30 06:55:58 2019-01-30 07:07:02 5.0 \n",
"\n",
" trip_distance RatecodeID store_and_fwd_flag PULocationID DOLocationID \\\n",
"0 2.02 1.0 N 186 233 \n",
"1 1.59 1.0 N 141 161 \n",
"2 1.69 1.0 N 246 249 \n",
"3 0.90 1.0 N 229 141 \n",
"4 4.79 1.0 N 237 107 \n",
".. ... ... ... ... ... \n",
"195 1.18 1.0 N 43 237 \n",
"196 2.30 1.0 N 148 234 \n",
"197 0.83 1.0 N 237 263 \n",
"198 1.12 1.0 N 144 113 \n",
"199 2.41 1.0 N 209 107 \n",
"\n",
" payment_type fare_amount extra mta_tax tip_amount tolls_amount \\\n",
"0 1.0 12.0 1.0 0.5 4.08 0.0 \n",
"1 2.0 10.0 0.5 0.5 0.00 0.0 \n",
"2 2.0 8.5 0.0 0.5 0.00 0.0 \n",
"3 1.0 4.5 3.0 0.5 1.65 0.0 \n",
"4 1.0 19.5 0.0 0.5 5.70 0.0 \n",
".. ... ... ... ... ... ... \n",
"195 1.0 10.0 0.0 0.5 2.16 0.0 \n",
"196 1.0 9.5 0.5 0.5 2.15 0.0 \n",
"197 1.0 5.0 0.0 0.5 1.16 0.0 \n",
"198 2.0 7.0 0.0 0.5 0.00 0.0 \n",
"199 1.0 10.5 0.0 0.5 1.00 0.0 \n",
"\n",
" improvement_surcharge total_amount congestion_surcharge \n",
"0 0.3 20.38 2.5 \n",
"1 0.3 13.80 2.5 \n",
"2 0.3 11.80 2.5 \n",
"3 0.3 9.95 2.5 \n",
"4 0.3 28.50 2.5 \n",
".. ... ... ... \n",
"195 0.3 12.96 0.0 \n",
"196 0.3 12.95 0.0 \n",
"197 0.3 6.96 0.0 \n",
"198 0.3 7.80 0.0 \n",
"199 0.3 12.30 0.0 \n",
"\n",
"[8 rows x 15 columns]\n"
"[200 rows x 18 columns]\n"
]
}
],
Expand Down
Loading

0 comments on commit b03d1ed

Please sign in to comment.