-
Notifications
You must be signed in to change notification settings - Fork 0
/
tournament.py
196 lines (168 loc) · 6.17 KB
/
tournament.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import attrs
from typing import Callable, Any
from abc import ABC, abstractmethod
from enum import Enum
import numpy as np
import random
import math
class Player(ABC):
@abstractmethod
def act(self, options: list[Enum], state: dict):
pass
# Abstract function
class Game(ABC):
# 1 if p1 won, -1 if p2 won, 0 if draw
@abstractmethod
def play(self, player1: Player, player2: Player):
pass
@abstractmethod
def reset(self):
pass
@attrs.define
class Tournament:
game : Game
# FIXIT: Get the function typing down
player_factory : Any # Callable[list[Integer
@abstractmethod
def play_round(self, player_configs: list[np.ndarray]) -> np.ndarray:
pass
class RoundRobinTournament(Tournament):
def play_round(self, player_configs: list[np.ndarray]) -> np.ndarray:
players = [self.player_factory(config) for config in player_configs]
wins = np.zeros(len(player_configs))
for i, p1 in enumerate(players):
for j, p2 in enumerate(players[(i+1):]):
result = self.game.play(p1, p2)
# TODO: Make result an enum
if result >= 0:
wins[i] += 1
if result <= 0:
wins[j+i+1] += 1
self.game.reset()
return wins
@attrs.define
class NTrialsTournament(Tournament):
n_trials : int = 10
# TODO : Enforce no repeat matches
# TODO : Use ELO score for fitness
def play_round(self, player_configs: list[np.ndarray]) -> np.ndarray:
players = [self.player_factory(config) for config in player_configs]
wins = np.zeros(len(player_configs))
indices = [i for i in range(len(players))]
for i in range(self.n_trials):
random.shuffle(indices)
for i in range(0, len(indices), 2):
p1 = players[i]
p1_idx = indices[i]
next_idx = (i+1)%len(indices)
# TODO: Should I allow this? One extra iteration
p2 = players[next_idx]
p2_idx = indices[next_idx]
result = self.game.play(p1, p2)
if result > 0:
wins[p1_idx] += 1
elif result == 0:
wins[p1_idx] += 0.5
wins[p2_idx] += 0.5
elif result < 0:
wins[p2_idx] += 1
self.game.reset()
return wins
@attrs.define
class SingleEliminationTournament(Tournament):
def _check_power_of_two(self, n_players):
log2n = math.log10(n_players) / math.log10(2)
ceil = math.ceil(log2n)
floor = math.floor(log2n)
return ceil == floor
def play_round(self, player_configs: list[np.ndarray]) -> np.ndarray:
assert self._check_power_of_two(len(player_configs)), "Number of players must be a power of 2"
players = [self.player_factory(config) for config in player_configs]
indices = [i for i in range(len(players))]
wins = np.zeros(len(player_configs))
while len(indices) > 1:
random.shuffle(indices)
round_i_players = [players[i] for i in indices]
new_players = []
new_indices = []
for i in range(0, len(indices), 2):
p1 = round_i_players[i]
p1_idx = indices[i]
p2 = round_i_players[i+1]
p2_idx = indices[i+1]
result = self.game.play(p1, p2)
if result >= 0:
wins[p1_idx] += 1
winner_idx = p1_idx
else:
winner_idx = p2_idx
if result <= 0:
wins[p2_idx] += 1
new_indices.append(winner_idx)
self.game.reset()
indices = new_indices
return wins
# Test classes
class RockPaperScissorsOptions(Enum):
ROCK=0
PAPER=1
SCISSORS=2
class RandomRPSPlayer(Player):
def __init__(self, config):
self.probs = config / config.sum()
def act(self, options, metadata):
i = np.random.choice(options, p=self.probs)
return RockPaperScissorsOptions(i)
class RPS(Game):
def __init__(self):
self.reset()
def reset(self):
self._p1_options = list(RockPaperScissorsOptions)
self._p2_options = list(RockPaperScissorsOptions)
def play(self, player1: Player, player2: Player):
p1_option = player1.act(self._p1_options, {})
p2_option = player2.act(self._p2_options, {})
print(p1_option)
print(p2_option)
if p1_option.value == ((p2_option.value + 1) % 3):
return 1
elif p2_option.value == ((p1_option.value + 1) % 3):
return -1
else:
return 0
if __name__ == "__main__":
game = RPS()
p1 = RandomRPSPlayer(np.array([1,0,0]))
p2 = RandomRPSPlayer(np.array([0,1,0]))
p3 = RandomRPSPlayer(np.array([0,0,1]))
print("Game 1: Rock vs. Paper (expect -1)")
print(game.play(p1, p2))
print("Game 2: Paper vs. Rock (expect 1)")
print(game.play(p2, p1))
print("Game 3: Rock vs. Rock (expect 0)")
print(game.play(p1, p1))
print("Game 4: Rock vs. Scissors (expect 1)")
print(game.play(p1, p3))
# Now test the tournament
player_factory = lambda config: RandomRPSPlayer(config)
tournament = SingleEliminationTournament(game, player_factory)
winner = np.array([0,1,0])
loser1 = np.array([1,0,0])
loser2 = np.array([1,0,0])
loser3 = np.array([1,0,0])
results = tournament.play_round([winner, loser1, loser2, loser3])
print(results)
# p1 loses twice, p2 wins twice, p3 + p4 win,lose,+tie
p1 = np.array([0,1,0])
p2 = np.array([1,0,0])
p3 = np.array([0,0,1])
p4 = np.array([0,0,1])
results = tournament.play_round([p1, p2, p3, p4])
print(results)
tournament = NTrialsTournament(game, player_factory, n_trials=10)
p1 = lambda : np.array([0,1,0])
p2 = lambda : np.array([1,0,0])
p3 = lambda : np.array([0,0,1])
results = tournament.play_round([p1(),p1(),p1(),p2(),p2(),p2(),p2(),p2(),p2(),p2(),p2(),p2(),p3(),p3(),p3()])
print("N trials")
print(results)