Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add chess implementation #240

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Arena.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ def playGames(self, num, verbose=False):

for _ in tqdm(range(num), desc="Arena.playGames (2)"):
gameResult = self.playGame(verbose=verbose)
if gameResult == -1:
if gameResult == 1:
oneWon += 1
elif gameResult == 1:
elif gameResult == -1:
Comment on lines -93 to +95
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to precisely invert the logic of which party won; was this done to fix a bug?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the original code is correct, this was not a bug.

Note a few lines up the order of the players was inverted, and also in Arena.py it normalizes for player ID via curPlayer * self.game.getGameEnded(board, curPlayer) here.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be because the getGameEnded(self, board, player) method of ChessGame returns 1 if player 1 won instead of the given player won. Suggest multiplying the return value in ChessGame.getGameEnded by the function parameter player, instead of modifying the code here.

twoWon += 1
else:
draws += 1
Expand Down
6 changes: 3 additions & 3 deletions Coach.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def executeEpisode(self):
pi = self.mcts.getActionProb(canonicalBoard, temp=temp)
sym = self.game.getSymmetries(canonicalBoard, pi)
for b, p in sym:
trainExamples.append([b, self.curPlayer, p, None])
trainExamples.append([self.game.toArray(b), self.curPlayer, p, None])

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would break Coach.py for all other games

Note the same comment holds for the change in MCTS.py


action = np.random.choice(len(pi), p=pi)
board, self.curPlayer = self.game.getNextState(board, self.curPlayer, action)
Expand Down Expand Up @@ -88,15 +88,15 @@ def learn(self):
self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree
iterationTrainExamples += self.executeEpisode()

# save the iteration examples to the history
# save the iteration examples to the history
self.trainExamplesHistory.append(iterationTrainExamples)

if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
log.warning(
f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}")
self.trainExamplesHistory.pop(0)
# backup history to a file
# NB! the examples were collected using the model from the previous iteration, so (i-1)
# NB! the examples were collected using the model from the previous iteration, so (i-1)
self.saveTrainExamples(i - 1)

# shuffle examples before training
Expand Down
9 changes: 8 additions & 1 deletion Game.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def getGameEnded(self, board, player):
Returns:
r: 0 if game has not ended. 1 if player won, -1 if player lost,
small non-zero value for draw.

"""
pass

Expand Down Expand Up @@ -111,3 +111,10 @@ def stringRepresentation(self, board):
Required by MCTS for hashing.
"""
pass

def toArray(self, board):
"""
Returns:
a board representation suitable as the input to your neural network
"""
return board
4 changes: 2 additions & 2 deletions MCTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def search(self, canonicalBoard):

if s not in self.Ps:
# leaf node
self.Ps[s], v = self.nnet.predict(canonicalBoard)
self.Ps[s], v = self.nnet.predict(self.game.toArray(canonicalBoard))
valids = self.game.getValidMoves(canonicalBoard, 1)
self.Ps[s] = self.Ps[s] * valids # masking invalid moves
sum_Ps_s = np.sum(self.Ps[s])
Expand All @@ -92,7 +92,7 @@ def search(self, canonicalBoard):
# if all valid moves were masked make all valid moves equally probable

# NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.
# If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
# If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
log.error("All valid moves were masked, doing a workaround.")
self.Ps[s] = self.Ps[s] + valids
self.Ps[s] /= np.sum(self.Ps[s])
Expand Down
112 changes: 112 additions & 0 deletions _chess/ChessGame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from __future__ import print_function
import sys
sys.path.append('..')
from Game import Game

import numpy as np
import chess

def to_np(board):
a = [0]*(8*8*6)
for sq,pc in board.piece_map().items():
a[sq*6+pc.piece_type-1] = 1 if pc.color else -1
return np.array(a)

def from_move(move):
return move.from_square*64+move.to_square

def to_move(action):
to_sq = action % 64
from_sq = int(action / 64)
return chess.Move(from_sq, to_sq)

def who(turn):
return 1 if turn else -1

def mirror_move(move):
return chess.Move(chess.square_mirror(move.from_square), chess.square_mirror(move.to_square))

CHECKMATE =1
STALEMATE= 2
INSUFFICIENT_MATERIAL= 3
SEVENTYFIVE_MOVES= 4
FIVEFOLD_REPETITION= 5
FIFTY_MOVES= 6
THREEFOLD_REPETITION= 7

class ChessGame(Game):

def __init__(self, n=8):
pass

def getInitBoard(self):
# return initial board (numpy board)
return chess.Board()

def getBoardSize(self):
# (a,b) tuple
# 6 piece type
return (8, 8, 6)

def toArray(self, board):
return to_np(board)

def getActionSize(self):
# return number of actions
return 64*64
# return self.n*self.n*16+1

def getNextState(self, board, player, action):
# if player takes action on board, return next (board,player)
# action must be a valid move
assert(who(board.turn) == player)
move = to_move(action)
if not board.turn:
# assume the move comes from the canonical board...
move = mirror_move(move)
if move not in board.legal_moves:
# could be a pawn promotion, which has an extra letter in UCI format
move = chess.Move.from_uci(move.uci()+'q') # assume promotion to queen
if move not in board.legal_moves:
assert False, "%s not in %s" % (str(move), str(list(board.legal_moves)))
board = board.copy()
board.push(move)
return (board, who(board.turn))

def getValidMoves(self, board, player):
# return a fixed size binary vector
assert(who(board.turn) == player)
acts = [0]*self.getActionSize()
for move in board.legal_moves:
acts[from_move(move)] = 1
return np.array(acts)

def getGameEnded(self, board, player):
# return 0 if not ended, 1 if player 1 won, -1 if player 1 lost
outcome = board.outcome()
if outcome is not None:
if outcome.winner is None:
# draw return very little value
return 1e-4
else:
return who(outcome.winner)
return 0

def getCanonicalForm(self, board, player):
# return state if player==1, else return -state if player==-1
assert(who(board.turn) == player)
if board.turn:
return board
else:
return board.mirror()

def getSymmetries(self, board, pi):
# mirror, rotational
return [(board,pi)]

def stringRepresentation(self, board):
return board.fen()

@staticmethod
def display(board):
print(board)
57 changes: 57 additions & 0 deletions _chess/ChessPlayers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import chess
import random
import numpy as np
from _chess.ChessGame import who, from_move, mirror_move
from stockfish import Stockfish

class RandomPlayer():
def __init__(self, game):
self.game = game

def play(self, board):
valids = self.game.getValidMoves(board, who(board.turn))
moves = np.argwhere(valids==1)
return random.choice(moves)[0]

def move_from_uci(board, uci):
try:
move = chess.Move.from_uci(uci)
except ValueError:
print('expected an UCI move')
return None
if move not in board.legal_moves:
print('expected a valid move')
return None
return move

class HumanChessPlayer():
def __init__(self, game):
pass

def play(self, board):
mboard = board
if board.turn:
mboard = board.mirror()
print('Valid Moves', end=':')
for move in mboard.legal_moves:
print(move.uci(), end=',')
print()
human_move = input()
move = move_from_uci(mboard, human_move.strip())
if move is None:
print('try again, e.g., %s' % random.choice(list(mboard.legal_moves)).uci())
return self.play(board)
if board.turn:
move = mirror_move(move)
return from_move(move)

class StockFishPlayer():
def __init__(self, game, elo=1000):
self.stockfish = Stockfish(parameters={"Threads": 2, "Minimum Thinking Time": 30})
self.stockfish.set_elo_rating(elo)

def play(self, board):
self.stockfish.set_fen_position(board.fen())
uci_move = self.stockfish.get_best_move()
move = move_from_uci(board, uci_move.strip())
return from_move(move)
Empty file added _chess/__init__.py
Empty file.
55 changes: 55 additions & 0 deletions _chess/pytorch/ChessNNet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import sys
sys.path.append('..')
from utils import *

import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

class ChessNNet(nn.Module):
def __init__(self, game, args):
# game params
self.board_x, self.board_y, self.board_z = game.getBoardSize()
self.action_size = game.getActionSize()
self.args = args

super(ChessNNet, self).__init__()
self.conv1 = nn.Conv3d(1, args.num_channels, 3, stride=1, padding=1)
self.conv2 = nn.Conv3d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
self.conv3 = nn.Conv3d(args.num_channels, args.num_channels, 3, stride=1)
self.conv4 = nn.Conv3d(args.num_channels, args.num_channels, 3, stride=1)

self.bn1 = nn.BatchNorm3d(args.num_channels)
self.bn2 = nn.BatchNorm3d(args.num_channels)
self.bn3 = nn.BatchNorm3d(args.num_channels)
self.bn4 = nn.BatchNorm3d(args.num_channels)

self.fc1 = nn.Linear(args.num_channels*(self.board_x-4)*(self.board_y-4)*(self.board_z-4), 1024)
self.fc_bn1 = nn.BatchNorm1d(1024)

self.fc2 = nn.Linear(1024, 512)
self.fc_bn2 = nn.BatchNorm1d(512)

self.fc3 = nn.Linear(512, self.action_size)

self.fc4 = nn.Linear(512, 1)

def forward(self, s):
s = s.view(-1, 1, self.board_x, self.board_y, self.board_z)
s = F.relu(self.bn1(self.conv1(s)))
s = F.relu(self.bn2(self.conv2(s)))
s = F.relu(self.bn3(self.conv3(s)))
s = F.relu(self.bn4(self.conv4(s)))
s = s.view(-1, self.args.num_channels*(self.board_x-4)*(self.board_y-4)*(self.board_z-4))

s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.args.dropout, training=self.training)
s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.args.dropout, training=self.training)

pi = self.fc3(s)
v = self.fc4(s)

return F.log_softmax(pi, dim=1), torch.tanh(v)
Loading