forked from MaartenGr/boardgame
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generalstats.py
217 lines (168 loc) Β· 7.81 KB
/
generalstats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import numpy as np
import pandas as pd
import altair as alt
import streamlit as st
SPACES = ' ' * 10
def load_page(df: pd.DataFrame) -> None:
""" The Data Exploration Page
Sections:
* On the left you can see how often games were played in the last year of matches.
* You can see the total amount certain board games have been played.
* The longest break between board games.
* The longest chain of games played in days.
* The day most games have been played.
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
"""
prepare_layout()
sidebar_activity_plot(df)
plot_play_count_graph(df)
longest_break_between_games(df)
most_subsequent_days_played(df)
most_games_on_one_day(df)
def sidebar_activity_plot(df: pd.DataFrame) -> None:
""" Show the frequency of played games in the sidebar
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
"""
to_plot = df.sort_values("Date").set_index("Date").resample("3D").count().reset_index()
chart = alt.Chart(to_plot).mark_area(
color='goldenrod',
opacity=1
).encode(
x='Date',
y=alt.Y('Players', title='Number of Games'),
).properties(background='transparent')
st.sidebar.altair_chart(chart)
def prepare_layout() -> None:
""" Prepare the text of the page at the top """
st.title("π² Data Exploration")
st.write("This page contains basic exploratory data analyses for the purpose of getting a general "
"feeling of what the data contains. ".format(SPACES))
st.markdown("There are several things you see on this page:".format(SPACES))
st.markdown("{}πΉ On the **left** you can see how often games were played "
"in the last year of matches. ".format(SPACES))
st.markdown("{}πΉ You can see the **total amount** certain board games have been played. ".format(SPACES))
st.markdown("{}πΉ The longest **break** between board games. ".format(SPACES))
st.markdown("{}πΉ The **longest chain** of games played in days. ".format(SPACES))
st.markdown("{}πΉ The **day** most games have been played. ".format(SPACES))
st.write(" ")
def plot_play_count_graph(df: pd.DataFrame) -> None:
""" Shows how often games were played
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
"""
st.header("**β** Board Game Frequency **β**")
st.write("Below you can see the total amount of time a game has been played. I should note that these games "
"can also be played with different number of people.")
grouped_by_game = df.groupby("Game").count().reset_index()
order_by = st.selectbox("Order by:", ["Amount", "Name"])
if order_by == "Amount":
bars = alt.Chart(grouped_by_game,
height=100+(20*len(grouped_by_game))).mark_bar(color='#4db6ac').encode(
x=alt.X('Players:Q', axis=alt.Axis(title='Total times played')),
y=alt.Y('Game:O',
sort=alt.EncodingSortField(
field="Players", # The field to use for the sort
order="descending" # The order to sort in
)
)
)
else:
bars = alt.Chart(grouped_by_game,
height=100+(20*len(grouped_by_game))).mark_bar(color='#4db6ac').encode(
x=alt.X('Players:Q', axis=alt.Axis(title='Total times played')),
y='Game:O',
)
text = bars.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
text='Players:Q'
)
st.write(bars + text)
average_nr_games_per_day = round(np.mean(df.groupby('Date').count().has_score), 2)
st.write("On average {} games per day were played on days "
"that there were board game matches".format(average_nr_games_per_day))
def longest_break_between_games(df: pd.DataFrame) -> None:
""" Extract the longest nr of days between games
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
"""
dates = df.groupby("Date").count().index
differences = [(dates[i],
dates[i + 1],
int((dates[i + 1] - dates[i]) / np.timedelta64(1, 'D')))
for i in range(len(dates) - 1)]
differences = pd.DataFrame(differences, columns=['Start_date',
'End_date',
'Count']).sort_values('Count', ascending=False).head(5)
st.header("**β** Longest Break between Games **β**")
st.write("The longest breaks between games were:")
for row in differences.iterrows():
start_date = str(row[1].Start_date).split(" ")[0]
end_date = str(row[1].End_date).split(" ")[0]
st.markdown("{}πΉ **{}** days between **{}** and **{}**".format(SPACES, row[1].Count, start_date, end_date))
st.write(" ")
def most_subsequent_days_played(df: pd.DataFrame) -> None:
""" The largest number of subsequent days that games were played.
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
"""
count = 0
dates = df.Date.unique()
most_subsequent_days = 0
day_previous = ""
day_next = ""
for i in range(len(dates) - 1):
days = dates[i + 1] - dates[i]
days = days.astype('timedelta64[D]') / np.timedelta64(1, 'D')
if days == 1:
count += 1
else:
if count > most_subsequent_days:
most_subsequent_days = count + 1 # Needed because it counts the days between and not the actual days
day_next = str(dates[i + 1]).split("T")[0]
day_previous = str(dates[i + 1] - np.timedelta64(count, 'D')).split("T")[0]
count = 0
st.header("**β** Longest Chain of Games Played **β**")
st.write("The longest number of subsequent days we played games was:")
st.write("{}πΈ **{}** days".format(SPACES, most_subsequent_days))
st.write("{}πΉ between **{}** and **{}**".format(SPACES, day_previous, day_next))
st.markdown("<br>", unsafe_allow_html=True)
def most_games_on_one_day(df: pd.DataFrame) -> None:
""" Extract when the most games have been played on one day and how many
Parameters:
-----------
df : pandas.core.frame.DataFrame
The data to be used for the analyses of played board game matches.
"""
# Extract on which day the most games have been played
grouped_date = df.groupby("Date").count()
most_games_idx = grouped_date.Players.to_numpy().argmax()
nr_games = grouped_date.Players.to_numpy().max()
date = str(grouped_date.index[most_games_idx]).split(" ")[0]
# Extract players in these games
played = [column for column in df.columns if "_played" in column]
played = df.loc[df.Date == date, played]
played_idx = np.where(played.any(axis=0))[0]
players = [player.split("_")[0] for player in played.columns[played_idx]]
# Write results to streamlit
st.header("**β** Most Games Played in One Day **β**")
st.write("The most games on a single day were played on:")
st.write("{}πΈ **{}** with **{}** games.".format(SPACES, date, nr_games))
st.write("Players that took in a part in at least one of the games: ")
players = ["**" + player + "**" for player in players]
players[-1] = 'and ' + players[-1]
st.write("{}πΉ {}".format(SPACES, ", ".join(players)))