Merge pull request #22 from Galtvam/gcloud-workers

Google Cloud Workers
Galtvam · Dec 2, 2020 · d0c3f86 · d0c3f86
2 parents c75c054 + 67014b5
commit d0c3f86
Show file tree

Hide file tree

Showing 16 changed files with 1,434 additions and 372 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,7 @@
+FROM tensorflow/tensorflow
+
+COPY requirements.txt requirements.txt
+
+RUN pip install -r requirements.txt
+
+ENTRYPOINT [ "python" ]
diff --git a/MCTS/__init__.py b/MCTS/__init__.py
@@ -68,7 +68,7 @@ def simulate(self, state):
             self._Qsa[hash_][action] = (self._N(hash_, action) * self._Q(hash_, action) + value) / (self._N(hash_, action) + 1)
             self._Nsa[hash_][action] += 1
             self._Ns[hash_] += 1
-            return value
+            return -value
 
     def N(self, state, action=None):
         """Get number of visits during MCTS simulations

diff --git a/Net/NNet.py b/Net/NNet.py
@@ -4,6 +4,7 @@
 """
 import numpy as np
 import os
+import tensorflow as tf
 
 from enum import Enum, auto
 
@@ -20,7 +21,7 @@ class NeuralNets(Enum):
 """
 class NNetWrapper:
     def __init__(self, board_size=(8,8), batch_size=32, epochs=10,
-                 num_channels_1=128, num_channels_2=256, lr=0.001, dropout=0.3, network=NeuralNets.ONN):
+                 num_channels_1=512, num_channels_2=256, lr=0.001, dropout=0.3, network=NeuralNets.ONN):
         '''
         Inputs:
           board_size -> a Tuple with the size of the board (n,n)
@@ -60,8 +61,11 @@ def train(self, examples, verbose=None):
         target_pis = np.asarray(target_pis)
         target_vs = np.asarray(target_vs)
 
+        log_dir = "logs/fit/" + f"onn-{self.board_size_x}"
+        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+
         return self.nnet.model.fit(x=input_boards, y=[target_pis, target_vs], batch_size=self.batch_size, 
-                                   epochs=self.epochs, verbose=verbose)
+                                   epochs=self.epochs, verbose=verbose, callbacks=[tensorboard_callback])
 
     def predict(self, board):
         '''
@@ -84,22 +88,15 @@ def predict(self, board):
 
     # save weights
     def save_checkpoint(self, filepath):
-        if self.network_type == NeuralNets.ONN:
-          filepath += f'-onn-{self.board_size_x}.h5'
-        elif self.network_type == NeuralNets.BNN:
-          filepath += f'-bnn-{self.board_size_x}.h5'
-        self.nnet.model.save_weights(filepath)
+        self.nnet.model.save_weights(filepath, save_format='h5')
 
     # load saved weights
     def load_checkpoint(self, filepath):
-        if self.network_type == NeuralNets.ONN:
-            filepath += f'-onn-{self.board_size_x}.h5'
-        elif self.network_type == NeuralNets.BNN:
-            filepath += f'-bnn-{self.board_size_x}.h5'
+        assert filepath.endswith('.h5'), 'Expecting a file with .h5 as extension'
         self.nnet.model.load_weights(filepath)
 
     def copy(self):
         copy_wrapper = NNetWrapper((self.board_size_x, self.board_size_y), network=self.network_type)
         copy_wrapper.nnet.model.set_weights(self.nnet.model.get_weights())
         return copy_wrapper
-
+
diff --git a/Net/OthelloNN.py b/Net/OthelloNN.py
@@ -35,31 +35,22 @@ def __init__(self, board_size, num_channels_1, num_channels_2, lr=0.001, dropout
     self.action_size = self.board_x * self.board_y
     self.learning_rate = lr
     self.dropout = dropout
-    self.num_channels_1 = num_channels_1
-    self.num_channels_2 = num_channels_2
+    self.num_channels = num_channels_1
+    #self.num_channels_2 = num_channels_2
 
 
     self.input_boards = Input(shape=(self.board_x, self.board_y, 2)) #shape (batch_size, board_x, board_y, 2)
-
-    conv1 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_1, 3, padding='same', use_bias=False)(self.input_boards))) #shape (batch_size, board_x, board_y, num_channels_1)
-    conv2 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_1, 3, padding='same', use_bias=False)(conv1))) #shape (batch_size, board_x, board_y, num_channels_1)
-    special1 = MaxPooling2D((2, 2), strides=(2, 2), padding='valid')(conv2) #shape (batch_size, board_x/2, board_y/2, num_channels_1)
-
-    conv3 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_2, 3, padding='same', use_bias=False)(special1))) #shape (batch_size, board_x/2, board_y/2, num_channels_2)
-    conv4 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels_2, 3, padding='same', use_bias=False)(conv3))) #shape (batch_size, board_x/2, board_y/2, num_channels_2)
-
-    flatten = Flatten()(conv4)  #shape (batch_size, board_x/2 x board_y/2 x num_channels_2)
-
-    #value side
-    v_dense1 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(512, use_bias=False)(flatten)))) # shape (batch_size x 512)
-    v_dense2 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(256, use_bias=False)(v_dense1)))) # shpe (batch_size x 256)
-    self.v = Dense(1, activation='tanh', name='v')(v_dense2) # shape (batch_size x 1)
-
-    #pi side
-    pi_dense1 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(512, use_bias=False)(flatten)))) # shape (batch_size x 512)
-    pi_dense2 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(256, use_bias=False)(pi_dense1)))) # shape (batch_size x 256)
-    self.pi = Dense(self.action_size, activation='softmax', name='pi')(pi_dense2) # shape (batch_size x action_size)
-    self.pi = Reshape((self.board_x, self.board_y))(self.pi)
+    h_conv1 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='same')(self.input_boards)))         
+    h_conv2 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='same')(h_conv1)))         
+    h_conv3 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='valid')(h_conv2)))        
+    h_conv4 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(self.num_channels, 3, padding='valid')(h_conv3)))       
+    h_conv4_flat = Flatten()(h_conv4)       
+    s_fc1 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(1024)(h_conv4_flat)))) 
+    s_fc2 = Dropout(self.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(512)(s_fc1))))          
+    pi = Dense(self.action_size, activation='softmax', name='pi')(s_fc2)
+    self.pi = Reshape((self.board_x, self.board_y), name='pi-reshaped')(pi)
+    self.v = Dense(1, activation='tanh', name='v')(s_fc2)
 
     self.model = Model(inputs=self.input_boards, outputs=[self.pi, self.v])
-    self.model.compile(loss=['categorical_crossentropy','mean_squared_error'], optimizer=Adam(self.learning_rate))
+    self.model.compile(loss=['categorical_crossentropy','mean_squared_error'], 
+                       optimizer=Adam(self.learning_rate, clipvalue=0.5))
diff --git a/Othello/random_agent.py b/Othello/random_agent.py
diff --git a/README.md b/README.md
@@ -1 +1,9 @@
-# OthelloZero
+[![license](https://img.shields.io/badge/license-GPL%20v3.0-brightgreen.svg?style=flat-square)](https://github.com/Galtvam/OthelloZero/blob/main/LICENSE)
+![LastCommit](https://img.shields.io/github/last-commit/Galtvam/OthelloZero?style=flat-square)
+![CommitSinceLastversion](https://img.shields.io/github/commits-since/Galtvam/OthelloZero/0.0.1/master?label=Commits%20Since%20Lastest%20Version&style=flat-square)
+# OthelloZero
+
+## How to create SSH private key:
+```ssh-keygen -t rsa -f ~/.ssh/othello-zero -C othello-zero```
+
+Add private-keys to Computer Engine metadatas
diff --git a/agents.py b/agents.py
@@ -0,0 +1,84 @@
+import random
+import logging
+import numpy as np
+
+from Net.NNet import NeuralNets
+from Othello import OthelloGame, OthelloPlayer, BoardView
+
+from othelo_mcts import OthelloMCTS 
+
+
+class OthelloAgent:
+    def __init__(self, game):
+        self.game = game
+
+    def play(self):
+        """Do an action on OthelloGame"""
+        raise NotImplementedError
+
+
+class RandomOthelloAgent(OthelloAgent):
+    def play(self):
+        possible_moves = tuple(self.game.get_valid_actions())
+        move = random.choice(possible_moves)
+        self.game.play(*move)
+
+
+class GreedyOthelloAgent(OthelloAgent):
+    def play(self):
+        move_points = {}
+        possible_moves = tuple(self.game.get_valid_actions())
+        points_before = game.get_players_points()[game.current_player]
+        board = self.game.board(BoardView)
+
+        for move in possible_moves:
+            state = np.copy(self.game.board(BoardView))
+            OthelloGame.flip_board_squares(state, game.current_playe, *move)
+            points = OthelloGame.get_board_players_points(state)[OthelloPlayer.BLACK] - points_before
+            move_points[move] = points
+
+        greedy_move = max(move_points, key=move_points.get)
+        game.play(*greedy_move)
+
+
+class NeuralNetworkOthelloAgent(OthelloAgent):
+    def __init__(self, game, neural_network, num_simulations, degree_exploration, temperature=0):
+        self.temperature = 0
+        self.neural_network = neural_network
+        self.num_simulations = num_simulations
+        self.mcts = OthelloMCTS(game.board_size, neural_network, degree_exploration)
+        super().__init__(game)
+
+    def play(self):
+        state = self.game.board(BoardView.TWO_CHANNELS)
+        for _ in range(self.num_simulations):
+            self.mcts.simulate(state, self.game.current_player)
+
+        if self.game.current_player == OthelloPlayer.WHITE:
+            state = OthelloGame.invert_board(state)
+
+        if self.neural_network.network_type is NeuralNets.ONN:
+            action_probabilities = self.mcts.get_policy_action_probabilities(state, self.temperature)
+        else:
+            action_probabilities = self.mcts.get_policy_action_probabilities(
+                self.game.board(), self.temperature)
+
+        valid_actions = self.game.get_valid_actions()
+        best_action = max(valid_actions, key=lambda position: action_probabilities[tuple(position)])
+        self.game.play(*best_action)
+
+
+def duel_between_agents(game, agent_1, agent_2):
+    players_agents = {
+        OthelloPlayer.BLACK: agent_1,
+        OthelloPlayer.WHITE: agent_2
+    }
+
+    logging.info(f'Duel - Started')
+    while not game.has_finished():
+        logging.info(f'Duel - Round: {game.round}')
+        agent = players_agents[game.current_player]
+        agent.play()
+
+    winner, points = game.get_winning_player()
+    return players_agents[winner], points
diff --git a/gcloud-startup-script.sh b/gcloud-startup-script.sh
@@ -0,0 +1,56 @@
+#! /bin/bash
+
+LOGIN_USER=othello-zero
+STARTUP_SUCCESS_FILE=/home/$LOGIN_USER/.ran-startup-script
+
+if test ! -f "$STARTUP_SUCCESS_FILE"; then
+	echo "$STARTUP_SUCCESS_FILE does not exist. running startup..."
+
+	# add user
+	sudo useradd -m $LOGIN_USER
+
+	# no more 'sudo docker' after this
+	sudo groupadd docker
+	sudo usermod -aG docker $LOGIN_USER
+	newgrp docker
+
+	# make sure docker-credential-gcloud is in PATH
+	# https://stackoverflow.com/questions/54494386/gcloud-auth-configure-docker-on-gcp-vm-instance-with-ubuntu-not-setup-properly
+	sudo ln -s /snap/google-cloud-sdk/current/bin/docker-credential-gcloud /usr/local/bin
+
+	# make gcloud docker's credential helper
+	sudo -u $LOGIN_USER bash -c 'gcloud auth configure-docker --quiet'
+
+	# host machine requires nvidia drivers. tensorflow image should contain the rest required
+	wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
+	sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
+	sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
+	sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
+	sudo apt-get update && sudo apt-get install -y cuda-drivers
+
+	# install docker
+	sudo apt-get update && apt-get install -y \
+	    apt-transport-https \
+	    ca-certificates \
+	    curl \
+	    gnupg-agent \
+	    software-properties-common
+
+	curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+	sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
+	sudo apt-get update && sudo apt-get install -y docker-ce docker-ce-cli containerd.io
+
+	# install nvidia docker support
+	distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+	curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
+	curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
+	sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
+	sudo systemctl restart docker
+
+	docker pull igorxp5/othello-zero
+
+	# create file which will be checked on next reboot
+	touch /home/$LOGIN_USER/.ran-startup-script
+else
+	echo "$STARTUP_SUCCESS_FILE exists. not running startup script!"
+fi