-
Notifications
You must be signed in to change notification settings - Fork 0
/
task_four.py
165 lines (126 loc) · 7.26 KB
/
task_four.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# to be run every week: takes in data from task three and further aggregates it to get a cohesive weekly report.
import sqlite3
import sys
import sqlalchemy
import pandas as pd
import numpy as np
import datetime
PATH = 'my_file'
engine = sqlalchemy.create_engine('sqlite:///' + PATH)
daily_data = pd.read_sql_table("DailyDatabase", engine)
daily_data['Days With Problems'] = None
all_temps = pd.read_sql_table("DailyTempDatabase", engine)
all_carbon = pd.read_sql_table("DailyCarbonDatabase", engine)
all_temps_copy = all_temps.set_index(['Room #', 'Temperature'])
all_carbon_copy = all_carbon.set_index(['Room #', 'CO2'])
def convert_back(z):
if z == "N/A":
return np.NaN
elif z is not None:
return datetime.datetime.strptime(z, "%Y-%m-%d %H:%M:%S").timestamp()
else:
return None
for x in range(0, len(daily_data['First Time Too Cold'])):
daily_data['First Time Too Cold'].loc[x] = convert_back(daily_data['First Time Too Cold'].loc[x])
daily_data['Last Time Too Cold'].loc[x] = convert_back(daily_data['Last Time Too Cold'].loc[x])
daily_data['First Time Too Warm'].loc[x] = convert_back(daily_data['First Time Too Warm'].loc[x])
daily_data['Last Time Too Warm'].loc[x] = convert_back(daily_data['Last Time Too Warm'].loc[x])
print(daily_data['Last Time Too Cold'])
def none_to_nan(x):
if x is None:
return np.NaN
return x
all_temps['Temperature'] = all_temps['Temperature'].apply(none_to_nan)
all_carbon['CO2'] = all_carbon['CO2'].apply(none_to_nan)
all_temps['Median Temperature'] = all_temps['Temperature']
all_temps['Mean Temperature'] = all_temps['Temperature']
temp_analysis = all_temps.groupby("Room #").agg({"Mean Temperature": np.nanmean,
"Median Temperature": np.nanmedian})
all_carbon['Median CO2'] = all_carbon['CO2']
all_carbon['Mean CO2'] = all_carbon['CO2']
co2_analysis = all_carbon.groupby("Room #").agg({"Mean CO2": np.mean,
"Median CO2": np.median})
# for some reason, sql was automatically converting all the interval values to bytes... but this reverses it
def convert_to_int(x):
if x is not None:
return int.from_bytes(x, sys.byteorder)
return None
daily_data['Intervals Too Warm'] = daily_data['Intervals Too Warm'].apply(convert_to_int)
daily_data['Intervals Too Cold'] = daily_data['Intervals Too Cold'].apply(convert_to_int)
daily_data['Intervals Too Much CO2'] = daily_data['Intervals Too Much CO2'].apply(convert_to_int)
daily_data['Intervals Too Little CO2'] = daily_data['Intervals Too Little CO2'].apply(convert_to_int)
print(daily_data['First Time Too Cold'])
print(daily_data['Last Time Too Cold'])
daily_data = daily_data.groupby("Room #").agg({"Days With Problems": np.size,
"Intervals Too Warm": np.sum,
"Intervals Too Cold": np.sum,
"Intervals Too Much CO2": np.sum,
"Intervals Too Little CO2": np.sum,
"Highest Temperature": np.max,
"Lowest Temperature": np.min,
'Highest CO2': np.max,
'Lowest CO2': np.min,
"First Time Too Warm": np.min,
"Last Time Too Warm": np.max,
"First Time Too Cold": np.min,
"Last Time Too Cold": np.max})
daily_data['Time of Highest Temperature'] = None
daily_data['Time of Lowest Temperature'] = None
daily_data['Time of Highest CO2'] = None
daily_data['Time of Lowest CO2'] = None
# For each room, goes back into the copies to find the times of the most extreme values
for room in daily_data.index:
if not np.isnan(daily_data['Highest Temperature'][room]):
# match highest temp to time at which it occurred
index_tuple = (room, int(daily_data['Highest Temperature'][room]))
if type(all_temps_copy.loc[index_tuple]) == pd.Series:
temp_df =(pd.DataFrame(all_temps_copy.loc[index_tuple]).T.sort_values('Timestamp')).T
daily_data['Time of Highest Temperature'][room] = temp_df.loc['Timestamp'][0]
else:
daily_data['Time of Highest Temperature'][room] = all_temps_copy.loc[index_tuple].sort_values('Timestamp').reset_index().iloc[0]['Timestamp']
if not np.isnan(daily_data['Lowest Temperature'][room]):
# match lowest temp to time at which it occurred
index_tuple = (room, int(daily_data['Lowest Temperature'][room]))
if type(all_temps_copy.loc[index_tuple]) == pd.Series:
temp_df =(pd.DataFrame(all_temps_copy.loc[index_tuple]).T.sort_values('Timestamp')).T
daily_data['Time of Lowest Temperature'][room] = temp_df.loc['Timestamp'][0]
else:
daily_data['Time of Lowest Temperature'][room] = all_temps_copy.loc[index_tuple].sort_values('Timestamp').reset_index().iloc[0]['Timestamp']
if not np.isnan(daily_data['Highest CO2'][room]):
# match highest co2 to time at which it occurred
index_tuple = (room, int(daily_data['Highest CO2'][room]))
if type(all_carbon_copy.loc[index_tuple]) == pd.Series:
temp_df =(pd.DataFrame(all_carbon_copy.loc[index_tuple]).T.sort_values('Timestamp')).T
daily_data['Time of Highest CO2'][room] = temp_df.loc['Timestamp'][0]
else:
daily_data['Time of Highest CO2'][room] = all_carbon_copy.loc[index_tuple].sort_values('Timestamp').reset_index().iloc[0]['Timestamp']
if not np.isnan(daily_data['Lowest CO2'][room]):
# match lowest co2 to time at which it occurred
index_tuple = (room, int(daily_data['Lowest CO2'][room]))
if type(all_carbon_copy.loc[index_tuple]) == pd.Series:
temp_df =(pd.DataFrame(all_carbon_copy.loc[index_tuple]).T.sort_values('Timestamp')).T
daily_data['Time of Lowest CO2'][room] = temp_df.loc['Timestamp'][0]
else:
daily_data['Time of Lowest CO2'][room] = all_carbon_copy.loc[index_tuple].sort_values('Timestamp').reset_index().iloc[0]['Timestamp']
def make_time_readable(x):
if (x is not None) and (not np.isnan(x)):
return datetime.datetime.fromtimestamp(x)
return None
daily_data["First Time Too Warm"] = daily_data["First Time Too Warm"].apply(make_time_readable)
daily_data["Last Time Too Warm"] = daily_data["Last Time Too Warm"].apply(make_time_readable)
daily_data["First Time Too Cold"] = daily_data["First Time Too Cold"].apply(make_time_readable)
daily_data["Last Time Too Cold"] = daily_data["Last Time Too Cold"].apply(make_time_readable)
daily_data = pd.merge(daily_data, temp_analysis, how='outer', on=['Room #'])
daily_data = pd.merge(daily_data, co2_analysis, how='outer', on=['Room #'])
daily_data.to_excel("output.xlsx")
# daily_data.to_csv('tester.csv')
PATH = 'my_file'
conn = sqlite3.connect(PATH)
cursor = conn.cursor()
drop = "DROP TABLE DailyDatabase"
drop2 = "DROP TABLE DailyTempDatabase"
drop3 = "DROP TABLE DailyCarbonDatabase"
cursor.execute(drop)
cursor.execute(drop2)
cursor.execute(drop3)
conn.close()