diff --git a/.gitignore b/.gitignore index 894a44c..486f201 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,5 @@ venv.bak/ # mypy .mypy_cache/ + +.idea \ No newline at end of file diff --git a/gym_2048/env.py b/gym_2048/env.py index c05360a..9e0c3e3 100644 --- a/gym_2048/env.py +++ b/gym_2048/env.py @@ -1,45 +1,40 @@ -import numpy as np import gym +import numpy as np import gym.spaces as spaces from gym.utils import seeding +import matplotlib.pyplot as plt class Base2048Env(gym.Env): - metadata = { - 'render.modes': ['human'], - } + # NOTE: Don't modify these numbers as they define the number of + # anti-clockwise rotations before applying the left action on a grid - ## - # NOTE: Don't modify these numbers as - # they define the number of - # anti-clockwise rotations before - # applying the left action on a grid - # - LEFT = 0 - UP = 1 - RIGHT = 2 - DOWN = 3 + LEFT, UP, RIGHT, DOWN = range(4) ACTION_STRING = { - LEFT: 'left', - UP: 'up', - RIGHT: 'right', - DOWN: 'down', + LEFT: 'left', + UP: 'up', + RIGHT: 'right', + DOWN: 'down', } - def __init__(self, width=4, height=4): - self.width = width - self.height = height + def __init__(self, width=4, height=4, max_invalid_moves=16): + + if width < 1 or height < 1: + raise ValueError('expecting width and height to be positive') - self.observation_space = spaces.Box(low=2, - high=2**32, - shape=(self.width, self.height), - dtype=np.int64) + if max_invalid_moves < 1: + raise ValueError('expecting number of invalid moves to be positive') + + self.shape = (width, height) + self.max_power = 1 + width * height + self.max_invalid_moves = max_invalid_moves + + self.observation_space = spaces.Box(low=0, high=self.max_power, shape=self.shape, dtype=np.int64) self.action_space = spaces.Discrete(4) # Internal Variables - self.board = None - self.np_random = None + self.board, self.np_random, self.score, self.n_invalid_moves = [None] * 4 self.seed() self.reset() @@ -54,16 +49,25 @@ def step(self, action: int): # Align board action with left action rotated_obs = np.rot90(self.board, k=action) reward, updated_obs = self._slide_left_and_merge(rotated_obs) - self.board = np.rot90(updated_obs, k=4 - action) - # Place one random tile on empty location - self._place_random_tiles(self.board, count=1) + # check if the move was valid + if (rotated_obs == updated_obs).all(): + self.n_invalid_moves += 1 + else: + self.n_invalid_moves = 0 + self.score += reward + self.board = np.rot90(updated_obs, k=4 - action) + self._place_random_tiles(self.board, count=1) done = self.is_done() - return self.board, reward, done, {} def is_done(self): + + # check if invalid moves are more than max_invalid_moves + if self.n_invalid_moves >= self.max_invalid_moves: + return True + copy_board = self.board.copy() if not copy_board.all(): @@ -77,80 +81,107 @@ def is_done(self): return True - - def reset(self): + def reset(self, **kwargs): """Place 2 tiles on empty board.""" - self.board = np.zeros((self.width, self.height), dtype=np.int64) + self.score, self.n_invalid_moves = 0, 0 + self.board = np.zeros(self.shape, dtype=np.int64) self._place_random_tiles(self.board, count=2) return self.board - def render(self, mode='human'): - if mode == 'human': - for row in self.board.tolist(): - print(' \t'.join(map(str, row))) + def get_board(self): + # raise board to power of 2, make zeros nan + board = np.array(2 ** self.board, dtype=np.float64) + board[self.board == 0] = np.nan + return board def _sample_tiles(self, count=1): """Sample tile 2 or 4.""" - - choices = [2, 4] - probs = [0.9, 0.1] - - tiles = self.np_random.choice(choices, - size=count, - p=probs) - return tiles.tolist() + return self.np_random.choice([1, 2], size=count, p=[0.9, 0.1]).tolist() def _sample_tile_locations(self, board, count=1): """Sample grid locations with no tile.""" - zero_locs = np.argwhere(board == 0) - zero_indices = self.np_random.choice( - len(zero_locs), size=count) - - zero_pos = zero_locs[zero_indices] - zero_pos = list(zip(*zero_pos)) - return zero_pos + zero_indices = self.np_random.choice(len(zero_locs), size=count) + return zero_locs[zero_indices].tolist() def _place_random_tiles(self, board, count=1): if not board.all(): tiles = self._sample_tiles(count) tile_locs = self._sample_tile_locations(board, count) - board[tile_locs] = tiles + for (x, y), tile in zip(tile_locs, tiles): + board[x, y] = tile def _slide_left_and_merge(self, board): """Slide tiles on a grid to the left and merge.""" - result = [] - - score = 0 + score, result = 0, [] for row in board: row = np.extract(row > 0, row) score_, result_row = self._try_merge(row) score += score_ - row = np.pad(np.array(result_row), (0, self.width - len(result_row)), - 'constant', constant_values=(0,)) + row = np.pad(np.array(result_row), (0, self.shape[0] - len(result_row)), 'constant', constant_values=(0,)) result.append(row) return score, np.array(result, dtype=np.int64) @staticmethod def _try_merge(row): - score = 0 - result_row = [] + score, result_row = 0, [] i = 1 while i < len(row): - if row[i] == row[i - 1]: - score += row[i] + row[i - 1] - result_row.append(row[i] + row[i - 1]) + a, b = row[i], row[i - 1] + if a == b: + score += 2 ** (a + 1) + result_row.append(a + 1) i += 2 else: - result_row.append(row[i - 1]) + result_row.append(b) i += 1 if i == len(row): result_row.append(row[i - 1]) return score, result_row + + def render(self, show: bool = False): + + # import matplotlib.colors as mcolors + # cmap, norm = mcolors.from_levels_and_colors( + # 2 ** np.arange(1, self.max_power + 1), [ + # 'red', 'green', 'blue' ... + # ]) + + board = self.get_board() + + fig, ax = plt.subplots(figsize=(4, 4)) + ax.imshow(board, cmap='viridis') + + plt.title(f'Score: {self.score}') + + plt.tick_params( + bottom=False, + labelbottom=False, + left=False, + labelleft=False, + ) + + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + + width, height = board.shape + for i in range(width): + for j in range(height): + if not np.isnan(board[i, j]): + ax.text(j, i, int(board[i, j]), ha="center", va="center", color="w", fontsize=12, fontweight='bold') + + plt.tight_layout() + + if show: + plt.show() + + return fig, ax diff --git a/requirements.txt b/requirements.txt index 6a46c24..d147339 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ gym~=0.10.0 numpy~=1.14.0 +matplotlib~=2.2.2 \ No newline at end of file