-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
93 lines (73 loc) · 2.83 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from dqn_agent import DQNAgent
from tetris import Tetris
from datetime import datetime
from statistics import mean, median
import random
from logs import CustomTensorBoard
from tqdm import tqdm
# Run dqn with Tetris
def dqn():
env = Tetris()
episodes = 2000
max_steps = None
epsilon_stop_episode = 1500
mem_size = 20000
discount = 0.95
batch_size = 512
epochs = 1
render_every = 1 # adjust time for visualization
log_every = 50
replay_start_size = 2000
train_every = 1
n_neurons = [32, 32]
render_delay = None
activations = ['relu', 'relu', 'linear']
agent = DQNAgent(env.get_state_size(),
n_neurons=n_neurons, activations=activations,
epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size,
discount=discount, replay_start_size=replay_start_size)
log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
# log = CustomTensorBoard(log_dir=log_dir)
scores = []
max_score_run = 0
file = open("data.txt", "w")
for episode in tqdm(range(episodes)):
current_state = env.reset()
done = False
steps = 0
if render_every and episode % render_every == 0:
render = True
else:
render = False
# Game
while not done and (not max_steps or steps < max_steps):
next_states = env.get_next_states()
best_state = agent.best_state(next_states.values())
best_action = None
for action, state in next_states.items():
if state == best_state:
best_action = action
break
reward, done = env.play(best_action[0], best_action[1], render=render,
render_delay=render_delay)
agent.add_to_memory(current_state, next_states[best_action], reward, done)
current_state = next_states[best_action]
steps += 1
game_score = env.get_game_score()
scores.append(game_score)
max_score_run = max(game_score, max_score_run)
print("\n@@@@@@@ Episode: " + str(episode) + " @@@@@@@\n")
print("Max Score: " + str(max_score_run) + "\n")
file.write(str(episode) + " " + str(game_score) + "\n")
# Train
if episode % train_every == 0:
agent.train(batch_size=batch_size, epochs=epochs)
# Logs
if log_every and episode and episode % log_every == 0:
avg_score = mean(scores[-log_every:])
min_score = min(scores[-log_every:])
max_score = max(scores[-log_every:])
# log.log(episode, avg_score=avg_score, min_score=min_score,
# max_score=max_score)
if __name__ == "__main__":
dqn()