Skip to content

Commit

Permalink
Merge pull request #22 from Galtvam/gcloud-workers
Browse files Browse the repository at this point in the history
Google Cloud Workers
  • Loading branch information
Galtvam authored Dec 2, 2020
2 parents c75c054 + 67014b5 commit d0c3f86
Show file tree
Hide file tree
Showing 16 changed files with 1,434 additions and 372 deletions.
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM tensorflow/tensorflow

COPY requirements.txt requirements.txt

RUN pip install -r requirements.txt

ENTRYPOINT [ "python" ]
2 changes: 1 addition & 1 deletion MCTS/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def simulate(self, state):
self._Qsa[hash_][action] = (self._N(hash_, action) * self._Q(hash_, action) + value) / (self._N(hash_, action) + 1)
self._Nsa[hash_][action] += 1
self._Ns[hash_] += 1
return value
return -value

def N(self, state, action=None):
"""Get number of visits during MCTS simulations
Expand Down
21 changes: 9 additions & 12 deletions Net/NNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import numpy as np
import os
import tensorflow as tf

from enum import Enum, auto

Expand All @@ -20,7 +21,7 @@ class NeuralNets(Enum):
"""
class NNetWrapper:
def __init__(self, board_size=(8,8), batch_size=32, epochs=10,
num_channels_1=128, num_channels_2=256, lr=0.001, dropout=0.3, network=NeuralNets.ONN):
num_channels_1=512, num_channels_2=256, lr=0.001, dropout=0.3, network=NeuralNets.ONN):
'''
Inputs:
board_size -> a Tuple with the size of the board (n,n)
Expand Down Expand Up @@ -60,8 +61,11 @@ def train(self, examples, verbose=None):
target_pis = np.asarray(target_pis)
target_vs = np.asarray(target_vs)

log_dir = "logs/fit/" + f"onn-{self.board_size_x}"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

return self.nnet.model.fit(x=input_boards, y=[target_pis, target_vs], batch_size=self.batch_size,
epochs=self.epochs, verbose=verbose)
epochs=self.epochs, verbose=verbose, callbacks=[tensorboard_callback])

def predict(self, board):
'''
Expand All @@ -84,22 +88,15 @@ def predict(self, board):

# save weights
def save_checkpoint(self, filepath):
if self.network_type == NeuralNets.ONN:
filepath += f'-onn-{self.board_size_x}.h5'
elif self.network_type == NeuralNets.BNN:
filepath += f'-bnn-{self.board_size_x}.h5'
self.nnet.model.save_weights(filepath)
self.nnet.model.save_weights(filepath, save_format='h5')

# load saved weights
def load_checkpoint(self, filepath):
if self.network_type == NeuralNets.ONN:
filepath += f'-onn-{self.board_size_x}.h5'
elif self.network_type == NeuralNets.BNN:
filepath += f'-bnn-{self.board_size_x}.h5'
assert filepath.endswith('.h5'), 'Expecting a file with .h5 as extension'
self.nnet.model.load_weights(filepath)

def copy(self):
copy_wrapper = NNetWrapper((self.board_size_x, self.board_size_y), network=self.network_type)
copy_wrapper.nnet.model.set_weights(self.nnet.model.get_weights())
return copy_wrapper


37 changes: 14 additions & 23 deletions Net/OthelloNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,22 @@ def __init__(self, board_size, num_channels_1, num_channels_2, lr=0.001, dropout
self.action_size = self.board_x * self.board_y
self.learning_rate = lr
self.dropout = dropout
self.num_channels_1 = num_channels_1
self.num_channels_2 = num_channels_2
self.num_channels = num_channels_1
#self.num_channels_2 = num_channels_2


self.input_boards = Input(shape=(self.board_x, self.board_y, 2)) #shape (batch_size, board_x, board_y, 2)

conv1 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_1, 3, padding='same', use_bias=False)(self.input_boards))) #shape (batch_size, board_x, board_y, num_channels_1)
conv2 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_1, 3, padding='same', use_bias=False)(conv1))) #shape (batch_size, board_x, board_y, num_channels_1)
special1 = MaxPooling2D((2, 2), strides=(2, 2), padding='valid')(conv2) #shape (batch_size, board_x/2, board_y/2, num_channels_1)

conv3 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_2, 3, padding='same', use_bias=False)(special1))) #shape (batch_size, board_x/2, board_y/2, num_channels_2)
conv4 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_2, 3, padding='same', use_bias=False)(conv3))) #shape (batch_size, board_x/2, board_y/2, num_channels_2)

flatten = Flatten()(conv4) #shape (batch_size, board_x/2 x board_y/2 x num_channels_2)

#value side
v_dense1 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(512, use_bias=False)(flatten)))) # shape (batch_size x 512)
v_dense2 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(256, use_bias=False)(v_dense1)))) # shpe (batch_size x 256)
self.v = Dense(1, activation='tanh', name='v')(v_dense2) # shape (batch_size x 1)

#pi side
pi_dense1 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(512, use_bias=False)(flatten)))) # shape (batch_size x 512)
pi_dense2 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(256, use_bias=False)(pi_dense1)))) # shape (batch_size x 256)
self.pi = Dense(self.action_size, activation='softmax', name='pi')(pi_dense2) # shape (batch_size x action_size)
self.pi = Reshape((self.board_x, self.board_y))(self.pi)
h_conv1 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='same')(self.input_boards)))
h_conv2 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='same')(h_conv1)))
h_conv3 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='valid')(h_conv2)))
h_conv4 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='valid')(h_conv3)))
h_conv4_flat = Flatten()(h_conv4)
s_fc1 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(1024)(h_conv4_flat))))
s_fc2 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(512)(s_fc1))))
pi = Dense(self.action_size, activation='softmax', name='pi')(s_fc2)
self.pi = Reshape((self.board_x, self.board_y), name='pi-reshaped')(pi)
self.v = Dense(1, activation='tanh', name='v')(s_fc2)

self.model = Model(inputs=self.input_boards, outputs=[self.pi, self.v])
self.model.compile(loss=['categorical_crossentropy','mean_squared_error'], optimizer=Adam(self.learning_rate))
self.model.compile(loss=['categorical_crossentropy','mean_squared_error'],
optimizer=Adam(self.learning_rate, clipvalue=0.5))
13 changes: 0 additions & 13 deletions Othello/random_agent.py

This file was deleted.

10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
# OthelloZero
[![license](https://img.shields.io/badge/license-GPL%20v3.0-brightgreen.svg?style=flat-square)](https://github.com/Galtvam/OthelloZero/blob/main/LICENSE)
![LastCommit](https://img.shields.io/github/last-commit/Galtvam/OthelloZero?style=flat-square)
![CommitSinceLastversion](https://img.shields.io/github/commits-since/Galtvam/OthelloZero/0.0.1/master?label=Commits%20Since%20Lastest%20Version&style=flat-square)
# OthelloZero

## How to create SSH private key:
```ssh-keygen -t rsa -f ~/.ssh/othello-zero -C othello-zero```

Add private-keys to Computer Engine metadatas
84 changes: 84 additions & 0 deletions agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import random
import logging
import numpy as np

from Net.NNet import NeuralNets
from Othello import OthelloGame, OthelloPlayer, BoardView

from othelo_mcts import OthelloMCTS


class OthelloAgent:
def __init__(self, game):
self.game = game

def play(self):
"""Do an action on OthelloGame"""
raise NotImplementedError


class RandomOthelloAgent(OthelloAgent):
def play(self):
possible_moves = tuple(self.game.get_valid_actions())
move = random.choice(possible_moves)
self.game.play(*move)


class GreedyOthelloAgent(OthelloAgent):
def play(self):
move_points = {}
possible_moves = tuple(self.game.get_valid_actions())
points_before = game.get_players_points()[game.current_player]
board = self.game.board(BoardView)

for move in possible_moves:
state = np.copy(self.game.board(BoardView))
OthelloGame.flip_board_squares(state, game.current_playe, *move)
points = OthelloGame.get_board_players_points(state)[OthelloPlayer.BLACK] - points_before
move_points[move] = points

greedy_move = max(move_points, key=move_points.get)
game.play(*greedy_move)


class NeuralNetworkOthelloAgent(OthelloAgent):
def __init__(self, game, neural_network, num_simulations, degree_exploration, temperature=0):
self.temperature = 0
self.neural_network = neural_network
self.num_simulations = num_simulations
self.mcts = OthelloMCTS(game.board_size, neural_network, degree_exploration)
super().__init__(game)

def play(self):
state = self.game.board(BoardView.TWO_CHANNELS)
for _ in range(self.num_simulations):
self.mcts.simulate(state, self.game.current_player)

if self.game.current_player == OthelloPlayer.WHITE:
state = OthelloGame.invert_board(state)

if self.neural_network.network_type is NeuralNets.ONN:
action_probabilities = self.mcts.get_policy_action_probabilities(state, self.temperature)
else:
action_probabilities = self.mcts.get_policy_action_probabilities(
self.game.board(), self.temperature)

valid_actions = self.game.get_valid_actions()
best_action = max(valid_actions, key=lambda position: action_probabilities[tuple(position)])
self.game.play(*best_action)


def duel_between_agents(game, agent_1, agent_2):
players_agents = {
OthelloPlayer.BLACK: agent_1,
OthelloPlayer.WHITE: agent_2
}

logging.info(f'Duel - Started')
while not game.has_finished():
logging.info(f'Duel - Round: {game.round}')
agent = players_agents[game.current_player]
agent.play()

winner, points = game.get_winning_player()
return players_agents[winner], points
56 changes: 56 additions & 0 deletions gcloud-startup-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#! /bin/bash

LOGIN_USER=othello-zero
STARTUP_SUCCESS_FILE=/home/$LOGIN_USER/.ran-startup-script

if test ! -f "$STARTUP_SUCCESS_FILE"; then
echo "$STARTUP_SUCCESS_FILE does not exist. running startup..."

# add user
sudo useradd -m $LOGIN_USER

# no more 'sudo docker' after this
sudo groupadd docker
sudo usermod -aG docker $LOGIN_USER
newgrp docker

# make sure docker-credential-gcloud is in PATH
# https://stackoverflow.com/questions/54494386/gcloud-auth-configure-docker-on-gcp-vm-instance-with-ubuntu-not-setup-properly
sudo ln -s /snap/google-cloud-sdk/current/bin/docker-credential-gcloud /usr/local/bin

# make gcloud docker's credential helper
sudo -u $LOGIN_USER bash -c 'gcloud auth configure-docker --quiet'

# host machine requires nvidia drivers. tensorflow image should contain the rest required
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
sudo apt-get update && sudo apt-get install -y cuda-drivers

# install docker
sudo apt-get update && apt-get install -y \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common

curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update && sudo apt-get install -y docker-ce docker-ce-cli containerd.io

# install nvidia docker support
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
sudo systemctl restart docker

docker pull igorxp5/othello-zero

# create file which will be checked on next reboot
touch /home/$LOGIN_USER/.ran-startup-script
else
echo "$STARTUP_SUCCESS_FILE exists. not running startup script!"
fi
Loading

0 comments on commit d0c3f86

Please sign in to comment.