-
Notifications
You must be signed in to change notification settings - Fork 14
/
exploregames.py
233 lines (176 loc) Β· 8.38 KB
/
exploregames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
from typing import List, Tuple
import streamlit as st
import altair as alt
import pandas as pd
import numpy as np
SPACES = ' ' * 10
def load_page(df: pd.DataFrame,
player_list: List[str]) -> None:
""" In this section you can compare explore data for specific games.
Sections
* On the left you can see how often games were played in the last year of matches.
* You can see the total amount certain board games have been played.
* The longest break between board games.
* The longest chain of games played in days.
* The day most games have been played.
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
player_list : list of str
List of players that participated in the board games
"""
selected_game_df, selected_game = prepare_layout(df)
plot_distribution(selected_game_df)
plot_frequent_players(selected_game_df, player_list)
show_min_max_stats(selected_game_df, selected_game)
sidebar_activity_plot(selected_game_df)
def prepare_layout(df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
""" Prepare layout and widgets
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
Returns:
--------
selected_game_df : pandas.core.frame.DataFrame
Data filtered by the selected game
selected_game : str
The selected game
"""
st.title("π² Explore games")
st.write("In this section you can explore the games that were played in the last year. "
"Note that some games have different versions wich are needed to select in order to continue. "
"For example, Qwixx also has a 'Big Points' version which typically leads to much higher points. ")
st.markdown("There are several things you see on this page:".format(SPACES))
st.markdown("{}πΉ On the **left** you can see how often the selected game was played "
"in the last year. ".format(SPACES))
st.markdown("{}πΉ You can see the **distribution** of scores for the selected game. ".format(SPACES))
st.markdown("{}πΉ The **frequency** of matches for each player. ".format(SPACES))
st.markdown("{}πΉ The **top** and **bottom** players for the selected game.".format(SPACES))
# Prepare ordered selection of games
games = list(df.Game.unique())
games.sort()
# Select game and possibly a version of it
selected_game = st.selectbox("Select a game to explore.", games)
selected_game_df = df.loc[(df.Game == selected_game), :]
versions = list(selected_game_df.Version.unique())
versions.sort()
if len(versions) > 1:
version = st.selectbox("Select a game to explore.", versions)
selected_game_df = selected_game_df.loc[selected_game_df.Version == version, :]
return selected_game_df, selected_game
def plot_distribution(selected_game_df: pd.DataFrame) -> None:
""" Plot distribution of scores for a single board game
Parameters:
-----------
selected_game_df : pandas.core.frame.DataFrame
Data filtered by the selected game
"""
if sum(selected_game_df.has_score.values) > 0:
st.header("**β** Distribution of Scores **β**")
st.write("Here, you can see the distribution of all scores that were achieved in the game. ")
score_selection_df = selected_game_df.loc[:, [column for column in selected_game_df.columns
if ('score' in column) & ('has_score' not in column)]]
game_scores = np.array(score_selection_df)
game_scores = pd.DataFrame(game_scores[game_scores.nonzero()], columns=['Scores'])
chart = alt.Chart(game_scores).mark_bar().encode(
alt.X("Scores:Q"),
y='count()',
)
st.altair_chart(chart)
def show_min_max_stats(selected_game_df: pd.DataFrame,
selected_game: str) -> None:
""" Show statistics for the worst and best players
Parameters:
-----------
selected_game_df : pandas.core.frame.DataFrame
Data filtered by the selected game
selected_game : str
The selected game
"""
score_selection = selected_game_df.loc[:, [column for column in selected_game_df.columns
if ('score' in column) & ('has_score' not in column)]]
score_matrix = np.array(score_selection)
# Calculate average scores per player
averages = []
for column in score_selection.columns:
vals = score_selection[column].to_numpy()
average_nonzero = np.mean(vals[vals.nonzero()])
averages.append(average_nonzero)
if not all(np.isnan(averages)):
# Extract player with lowest average score
low_avg_player_idx = np.nanargmin(averages)
low_avg_player_val = averages[low_avg_player_idx]
low_avg_player = score_selection.columns[low_avg_player_idx].split("_")[0]
# Extract player with highest average score
high_avg_player_idx = np.nanargmax(averages)
high_avg_player_val = averages[high_avg_player_idx]
high_avg_player = score_selection.columns[high_avg_player_idx].split("_")[0]
# Get max score
max_x, max_y = np.unravel_index(np.argmax(score_matrix, axis=None), score_matrix.shape)
max_player = score_selection.columns[max_y].split("_")[0]
max_score = score_selection[score_selection.columns[max_y]].values[max_x]
# Get min score
min_x, min_y = np.where(
score_matrix == np.min(
score_matrix[np.nonzero(score_matrix)])) # get indices of non-zero minimum
min_player = score_selection.columns[min_y[0]].split("_")[0]
min_score = score_matrix[min_x[0]][min_y[0]]
# Top players
st.header("**β** Top players **β**")
st.write("Here are the best players for the game **{}**:".format(selected_game))
st.write("{}πΉ Highest score by **{}** with {} points".format(SPACES, max_player, max_score))
st.write("{}πΈ Highest average score by **{}** with {} points".format(SPACES, high_avg_player,
high_avg_player_val))
st.write(" ")
# Bottom players
st.header("**β** Bottom players **β**")
st.write("Here are the worst players for the game **{}**:".format(selected_game))
st.write("{}πΉ Lowest (non-zero) score by **{}** with {} points".format(SPACES, min_player, min_score))
st.write("{}πΈ Lowest average score by **{}** with {} points".format(SPACES, low_avg_player, low_avg_player_val))
def plot_frequent_players(selected_game_df: pd.DataFrame,
player_list: List[str]) -> None:
""" Show frequency of played games
Parameters:
-----------
selected_game_df : pandas.core.frame.DataFrame
Data filtered by the selected game
selected_game : str
The selected game
"""
st.header("**β** Frequency of Matches **β**")
st.write("For each player, their total number of matches is displayed below.")
# Calculate Frequencies
frequency = [len(selected_game_df.loc[selected_game_df[player + "_played"] == 1, :]) for player in player_list]
frequency = pd.DataFrame(np.array([player_list, frequency]).T, columns=['Player', 'Frequency'])
# Visualize Results
bars = alt.Chart(frequency,
height=200).mark_bar(color='#4db6ac').encode(
x='Frequency:Q',
y="Player:O"
)
text = bars.mark_text(
align='left',
baseline='middle',
dx=3
).encode(
text='Frequency:Q'
)
st.altair_chart(bars + text)
def sidebar_activity_plot(selected_game_df: pd.DataFrame) -> None:
""" Show frequency of played games over time
Parameters:
-----------
selected_game_df : pandas.core.frame.DataFrame
Data filtered by the selected game
"""
selected_game_df = selected_game_df.sort_values("Date").set_index("Date").resample("3D").count().reset_index()
chart = alt.Chart(selected_game_df).mark_area(
color='goldenrod',
opacity=1
).encode(
x='Date',
y=alt.Y('Players', title='Number of Games'),
).properties(background='transparent')
st.sidebar.altair_chart(chart)