This repository has been archived by the owner on Dec 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
processData.py
98 lines (76 loc) · 2.85 KB
/
processData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import argparse
from readData import readData
import pandas as pd
import numpy as np
def processData(dataDir="data/"):
fileNames = [
".testing.reported.csv",
".cases.reported.csv",
".cases.csv",
".deaths.reported.csv",
".deaths.csv",
".deaths.onCertificate.csv",
".hospitalisations.csv",
".inHospital.csv"
]
names = [
"reportedTests",
"reportedCases",
"specimenCases",
"reportedDeaths",
"specimenDeaths",
"certificateDeaths",
"hospitalisations",
"inHospital",
]
nationList = ["UK", "Scotland", "England", "Northern Ireland", "Wales"]
for nation in nationList:
nationSeries = []
for i, fileName in enumerate(fileNames):
fileName = dataDir + nation + fileName
if i in [2, 4]:
data = readData(fileName, type="dict", skip=5)
elif fileName == ".deaths.onCertificate.csv":
data = readData(fileName, type="dict", skip=11)
else:
data = readData(fileName, type="dict")
series = pd.Series(data, name=names[i])
nationSeries.append(series)
nationData = pd.concat(nationSeries, axis=1)
calculateFeatures(nationData)
nationData.to_csv(dataDir + nation + ".csv")
for column in names:
nationData[column].loc[
nationData[column].first_valid_index() : nationData[column].last_valid_index()
].fillna(0, inplace=True)
nationData[column] = nationData[column].rolling(7).mean()
calculateFeatures(nationData)
nationData.to_csv(dataDir + nation + ".avg.csv")
def calculateFeatures(nationData):
nationData["posTests"] = nationData.apply(
lambda row: min(row["reportedCases"] / row["reportedTests"] * 100, 100), axis=1,
)
nationData["mortCases"] = nationData["reportedCases"].rolling(28).sum()
nationData["mortality"] = nationData.apply(
lambda row: min(row["specimenDeaths"] / row["mortCases"] * 100, 100), axis=1
)
nationData["hospitalisationRate"] = nationData.apply(
lambda row: min(row["hospitalisations"] / row["mortCases"] * 100, 100), axis=1,
)
def defineArgParser():
"""Creates parser for command line arguments"""
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"-D",
"--dataDir",
help="Directory where the csv files will be stored [default: data/]",
default="data/",
type=str,
)
return parser
if __name__ == "__main__":
argParser = defineArgParser()
clArgs = argParser.parse_args()
processData(clArgs.dataDir)