-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_player_stats.py
307 lines (261 loc) · 13.6 KB
/
parse_player_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
import json
import os
import pandas as pd
TANK_DATA = "parsed_stats/tank_data.txt" # file name to store all tank data
DAMAGE_DATA = "parsed_stats/damage_data.txt" # file name to store all damage data
SUPPORT_DATA = "parsed_stats/support_data.txt" # file name to store all support data
SR_RANK_INFO = "parsed_stats/sr_rank_info.txt" # file name to store SR and rank info
PLAYER_INFO_FOLDER = os.getcwd() + "/raw_stats/" # folder containing all player info scraped in get_player_json.py
ROLES = ["tank", "damage", "support"]
RANKS = ["Bronze", "Silver", "Gold", "Platinum", "Diamond", "Masters", "Grandmaster"]
# lists of heroes for each role
TANK_LIST = ["dVa", "orisa", "reinhardt", "roadhog", "sigma", "winston", "wreckingBall", "zarya"]
DAMAGE_LIST = ["ashe", "bastion", "doomfist", "echo", "genji", "hanzo", "junkrat", "mccree", "mei",
"pharah", "reaper", "soldier76", "sombra", "symmetra", "torbjorn", "tracer", "widowmaker"]
SUPPORT_LIST = ["ana", "baptiste", "brigitte", "lucio", "mercy", "moira", "zenyatta"]
ROLES_LISTS = dict(zip(ROLES, [TANK_LIST, DAMAGE_LIST, SUPPORT_LIST]))
# lists of statistics for each role
TANK_AVERAGE = ["barrierDamageDoneAvgPer10Min", "criticalHitsAvgPer10Min", "deathsAvgPer10Min",
"eliminationsAvgPer10Min", "finalBlowsAvgPer10Min", "heroDamageDoneAvgPer10Min",
"objectiveKillsAvgPer10Min", "objectiveTimeAvgPer10Min", "soloKillsAvgPer10Min",
"timeSpentOnFireAvgPer10Min"]
TANK_COMBAT = ["weaponAccuracy"]
DAMAGE_AVERAGE = ["barrierDamageDoneAvgPer10Min", "criticalHitsAvgPer10Min", "deathsAvgPer10Min",
"eliminationsAvgPer10Min", "finalBlowsAvgPer10Min", "heroDamageDoneAvgPer10Min",
"objectiveKillsAvgPer10Min", "objectiveTimeAvgPer10Min", "soloKillsAvgPer10Min",
"timeSpentOnFireAvgPer10Min"]
DAMAGE_COMBAT = ["criticalHitsAccuracy", "weaponAccuracy"]
SUPPORT_ASSIST = ["defensiveAssistsAvgPer10Min", "offensiveAssistsAvgPer10Min"]
SUPPORT_AVERAGE = ["barrierDamageDoneAvgPer10Min", "criticalHitsAvgPer10Min", "deathsAvgPer10Min",
"eliminationsAvgPer10Min", "finalBlowsAvgPer10Min", "healingDoneAvgPer10Min",
"heroDamageDoneAvgPer10Min", "objectiveKillsAvgPer10Min",
"objectiveTimeAvgPer10Min", "soloKillsAvgPer10Min", "timeSpentOnFireAvgPer10Min"]
SUPPORT_COMBAT = ["weaponAccuracy"]
GAME = ["timePlayed", "winPercentage"]
# hero specific statistics
TANK_HERO_SPECIFIC = {
"dVa": ["damageBlockedAvgPer10Min", "mechsCalledAvgPer10Min", "selfDestructKillsAvgPer10Min"],
"orisa": ["damageBlockedAvgPer10Min", "superchargerAssistsAvgPer10Min"],
"reinhardt": ["chargeKillsAvgPer10Min", "damageBlockedAvgPer10Min",
"earthshatterKillsAvgPer10Min", "fireStrikeKillsAvgPer10Min"],
"roadhog": ["enemiesHookedAvgPer10Min", "hookAccuracy", "selfHealingAvgPer10Min",
"wholeHogKillsAvgPer10Min"],
"sigma": ["accretionKillsAvgPer10Min", "damageAbsorbedAvgPer10Min",
"damageBlockedAvgPer10Min", "graviticFluxKillsAvgPer10Min"],
"winston": ["damageBlockedAvgPer10Min", "jumpPackKillsAvgPer10Min",
"playersKnockedBackAvgPer10Min", "primalRageKillsAvgPer10Min",
"primalRageMeleeAccuracy", "teslaCannonAccuracy"],
"wreckingBall": ["grapplingClawKillsAvgPer10Min", "minefieldKillsAvgPer10Min",
"piledriverKillsAvgPer10Min", "playersKnockedBackAvgPer10Min"],
"zarya": ["averageEnergy", "damageBlockedAvgPer10Min", "gravitonSurgeKillsAvgPer10Min",
"highEnergyKillsAvgPer10Min", "primaryFireAccuracy"]
}
DAMAGE_HERO_SPECIFIC = {
"ashe": ["bobKillsAvgPer10Min", "dynamiteKillsAvgPer10Min", "scopedAccuracy",
"scopedCriticalHitsAccuracy"],
"bastion": ["reconKillsAvgPer10Min", "selfHealingAvgPer10Min", "sentryKillsAvgPer10Min"],
"doomfist": ["abilityDamageDoneAvgPer10Min", "meteorStrikeKillsAvgPer10Min",
"shieldsCreatedAvgPer10Min"],
"echo": ["duplicateKillsAvgPer10Min", "focusingBeamKillsAvgPer10Min",
"stickyBombsKillsAvgPer10Min"],
"genji": ["damageReflectedAvgPer10Min", "dragonbladesKillsAvgPer10Min"],
"hanzo": ["dragonstrikeKillsAvgPer10Min", "stormArrowKillsAvgPer10Min"],
"junkrat": ["concussionMineKillsAvgPer10Min", "enemiesTrappedAvgPer10Min",
"ripTireKillsAvgPer10Min"],
"mccree": ["deadeyeKillsAvgPer10Min", "fanTheHammerKillsAvgPer10Min"],
"mei": ["blizzardKillsAvgPer10Min", "damageBlockedAvgPer10Min", "enemiesFrozenAvgPer10Min",
"selfHealingAvgPer10Min"],
"pharah": ["barrageKillsAvgPer10Min", "directHitsAccuracy", "rocketDirectHitsAvgPer10Min"],
"reaper": ["deathsBlossomKillsAvgPer10Min", "selfHealingAvgPer10Min"],
"soldier76": ["bioticFieldHealingDone", "helixRocketKillsAvgPer10Min",
"tacticalVisorKillsAvgPer10Min"],
"sombra": ["enemiesEmpdAvgPer10Min", "enemiesHackedAvgPer10Min"],
"symmetra": ["damageBlockedAvgPer10Min", "playersTeleportedAvgPer10Min", "primaryFireAccuracy",
"secondaryFireAccuracy", "sentryTurretsKillsAvgPer10Min"],
"torbjorn": ["moltenCoreKillsAvgPer10Min", "turretsDamageAvgPer10Min", "turretsKillsAvgPer10Min"],
"tracer": ["healthRecoveredAvgPer10Min", "pulseBombsKillsAvgPer10Min"],
"widowmaker": ["scopedAccuracy", "scopedCriticalHitsAccuracy"]
}
SUPPORT_HERO_SPECIFIC = {
"ana": ["enemiesSleptAvgPer10Min", "nanoBoostAssistsAvgPer10Min", "scopedAccuracy",
"selfHealingAvgPer10Min", "unscopedAccuracy"],
"baptiste": ["amplificationMatrixAssistsAvgPer10Min", "healingAccuracy",
"immortalityFieldDeathsPreventedAvgPer10Min", "selfHealingAvgPer10Min"],
"brigitte": ["armorProvidedAvgPer10Min", "damageBlockedAvgPer10Min", "inspireUptimePercentage"],
"lucio": ["soundBarriersProvidedAvgPer10Min", "selfHealingAvgPer10Min"],
"mercy": ["damageAmplifiedAvgPer10Min", "playersResurrectedAvgPer10Min"],
"moira": ["coalescenceHealingAvgPer10Min", "coalescenceKillsAvgPer10Min",
"secondaryFireAccuracy", "selfHealingAvgPer10Min"],
"zenyatta": ["transcendenceHealingBest"]
}
#lists of statistics for each hero, separated by category
TANK_STATS = {}
for tank in TANK_LIST:
TANK_STATS[tank] = {"average": TANK_AVERAGE, "combat": TANK_COMBAT,
"heroSpecific": TANK_HERO_SPECIFIC[tank]}
DAMAGE_STATS = {}
for damage in DAMAGE_LIST:
DAMAGE_STATS[damage] = {"average": DAMAGE_AVERAGE, "combat": DAMAGE_COMBAT,
"heroSpecific": DAMAGE_HERO_SPECIFIC[damage]}
SUPPORT_STATS = {}
for support in SUPPORT_LIST:
SUPPORT_STATS[support] = {"assists": SUPPORT_ASSIST, "average": SUPPORT_AVERAGE,
"combat": SUPPORT_COMBAT,
"heroSpecific": SUPPORT_HERO_SPECIFIC[support]}
ROLES_STATS = dict(zip(ROLES, [TANK_STATS, DAMAGE_STATS, SUPPORT_STATS]))
ALL_SRS = {"tank": [], "damage": [], "support": []}
ALL_RANKS = {"tank": 0, "damage": 0, "support": 0}
TIME_THRESHOLD = 3600 # only keep statistics for heroes with time played above this many seconds
INTERVAL = 1000 # print the current scraping progress after every INTERVAL number of players
DEBUG = 0
def srToRank(sr):
""" Map an sr to the corresponding rank
args:
sr: an integer
return:
a string of rank of invalid if sr < 0 or sr > 5000
"""
if 0 <= sr and sr < 1500:
return "Bronze"
elif sr < 2000:
return "Silver"
elif sr < 2500:
return "Gold"
elif sr < 3000:
return "Platinum"
elif sr < 3500:
return "Diamond"
elif sr < 4000:
return "Masters"
elif sr <= 5000:
return "Grandmaster"
else:
return "Invalid"
def formatDuration(timeStr):
"""
args:
time: a string of format hh:mm:ss or mm:ss or ss
return:
sec: an integer, number of seconds corresponding to time
"""
timeLst = timeStr.split(":")
if len(timeLst) == 0:
return 0
sec = int(timeLst[-1])
if len(timeLst) == 2:
sec += int(timeLst[-2]) * 60
elif len(timeLst) == 3:
sec += int(timeLst[-3]) * 3600
return sec
def storeStats(role, sr, playerStats, outputFile):
"""
args:
role: a string, tank, damage, or support
sr: an integer in [0, 5000] indicating player's SR
playerStats: a dictionary of player's competitive statistics
outputFile: an opened file corresponding to the input role to store player stats
heroList: a list of all hero names under the input role
heroStats: a dictionary of all statistics names for each hero under the input role
return:
numHeroes: number of heroes under the given role with valid stats for the given player
"""
allStats = dict()
allStats["rating"] = sr
heroList = ROLES_LISTS[role]
heroStats = ROLES_STATS[role]
for curHero, curStats in playerStats.items():
if not curHero or not curStats:
continue
# only check heroes for the current role
if curHero not in heroList:
continue
# only check heroes with time played above TIME_THRESHOLD
try:
if "timePlayed" not in curStats["game"]:
continue
elif formatDuration(curStats["game"]["timePlayed"]) < TIME_THRESHOLD:
continue
except:
continue
statsToSave = dict()
statsToSave["timePlayed"] = formatDuration(curStats["game"]["timePlayed"])
# only keep the stats specified in TANK/DAMAGE/SUPPORT_HERO_SPECIFIC
for statCategory, statDict in curStats.items():
if not statDict:
continue
statsToKeep = heroStats[curHero]
if statCategory in statsToKeep.keys():
for statName, stat in statDict.items():
if statName in statsToKeep[statCategory]:
statsToSave[statName] = stat
allStats[curHero] = statsToSave
numHeroes = len(allStats) - 1
if numHeroes:
outputFile.write(json.dumps(allStats) + "\n")
return numHeroes
def separateDataByRole(allPlayers, tankOutput, damageOutput, supportOutput):
""" For each player, save the followings to the output file of the corresponding role:
1) rating: an integer, player's SR for that role
2) compStats: a dictionary, player's competitive stats for all heroes played under that role
compStats is a nested dictionary {hero: {name of the statistic: number}}.
Keys in the outer dictionary are names of all heroes played by a hero under a given role
(see TANK_LIST, DAMAGE_LIST, and SUPPORT_LIST). Keys in the inner dictionary are names of
statistics for a given hero (see TANK_STATS, DAMAGE_STATS, and SUPPORT_STATS).
Global variable ALL_SRS will contain a list of SRs for each role.
args:
allPlayers: an list of player info
tankOutput: file name under which to store info for tanks
damageOutput: file name under which to store info for damages
supportOutput: file name under which to store info for supports
return:
player_count: total number of players processed
"""
playerCount = 0
with open(tankOutput, "a+") as tankFile, open(damageOutput, "a+") as damageFile, open(supportOutput, "a+") as supportFile:
roleFiles = dict(zip(ROLES, [tankFile, damageFile, supportFile]))
for player in allPlayers:
try:
player = json.loads(player)
except:
print("[separateDataByRole] Failed to decode JSON")
continue
if "compStats" not in player.keys():
continue
# extract player's SR(s)
for srInfo in player["ratings"]:
curLevel = srInfo["level"]
curRole = srInfo["role"]
if curLevel < 0 or curLevel > 5000:
print("[separateDataByRole] Invalid player SR")
continue
ALL_SRS[curRole].append(curLevel)
numRoles = dict(zip(ROLES, [0, 0, 0]))
numRoles[curRole] += storeStats(curRole, curLevel, player["compStats"], roleFiles[curRole])
playerCount += 1
if playerCount % INTERVAL == 0:
print(f"[separateDataByRole] Current number of players added: {playerCount}/{len(allPlayers)}")
for role, srs in ALL_SRS.items():
print(f"[separateDataByRole] Number of {role}s so far: {len(srs)}")
return playerCount
if __name__ == "__main__":
for file in os.listdir(PLAYER_INFO_FOLDER):
if file.endswith(".txt"):
filePath = PLAYER_INFO_FOLDER + file
with open(filePath, "r") as inFile:
print(f"[checkAllFiles] Successfully opened {file}")
allPlayers = inFile.readlines()
print(f"Total number of players in the input file: {len(allPlayers)}")
playerCount = separateDataByRole(allPlayers, TANK_DATA, DAMAGE_DATA, SUPPORT_DATA)
# map SRs to ranks for each role
for role, sr in ALL_SRS.items():
sr = pd.Series(sr)
rank = sr.map(srToRank).astype("category").cat.reorder_categories(RANKS)
ALL_RANKS[role] = rank
# show rank distribution for each role
for role, rank in ALL_RANKS.items():
contingency = rank.groupby(rank).count()/len(rank)*100
print(f"Rank distribution (percentage) of {role} from a total of {len(ALL_RANKS[role])} players:")
print(round(contingency, 1))
# save SR and rank data
with open(SR_RANK_INFO, "w") as output:
output.write(str(ALL_SRS) + "\n" + str(ALL_RANKS))