Skip to content

Commit

Permalink
added more german comments. updated readme.
Browse files Browse the repository at this point in the history
  • Loading branch information
chucnorrisful committed Mar 8, 2019
1 parent 3fcb431 commit 90f15a8
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 115 deletions.
52 changes: 44 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,54 @@ and modding it to to use the [Rainbow-DQN](https://arxiv.org/abs/1710.02298) alg

- [x] Naive DQN with basic keras-rl dqn agent
- [x] Fully-conv network with 2 outputs (described in [this deepmind paper](https://deepmind.com/documents/110/sc2le.pdf))
- [x] Double DQN
- [x] Dueling DQN
- [x] Prioritized experience replay
- [x] Multi-step learning
- [x] Noisy nets
- [ ] Distributional RL
- [ ] Final [rainbow agent](https://arxiv.org/pdf/1710.02298.pdf)
- [x] Double DQN (described [here](https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/viewPaper/12389))
- [x] Dueling DQN (described [here](https://arxiv.org/abs/1511.06581))
- [x] Prioritized experience replay (described [here](https://arxiv.org/abs/1511.05952))
- [x] Multi-step learning (described [here](https://arxiv.org/pdf/1710.02298.pdf))
- [x] Noisy nets (described [here](https://arxiv.org/abs/1706.10295))
- [ ] Distributional RL - working, but not learning (described [here](https://dl.acm.org/citation.cfm?id=3305428))
- [x] Final [rainbow agent](https://arxiv.org/pdf/1710.02298.pdf) without Distributional RL

---
### Installation:

Make sure, you have Python 3.6.

Follow the instructions on the [pysc2](https://github.com/deepmind/pysc2) repository
for installing it as well as for installing StarCraft2 and the required mini_games Maps.

Follow the instructions on the [keras-rl](https://github.com/keras-rl/keras-rl) repository for installation.

Follow the instructions on the [baselines](https://github.com/openai/baselines) repository for installation.

You will also need the following python packages installed:
- tensorflow 1.12 (newer is currently not working with CUDA support for me)
- keras 2.2.4
- numpy
- matplotlib

If you want to use a CUDA-able GPU, install tensorflow-gpu and keras-gpu as well. You need to make sure to
have a compatible driver and CUDA-toolkit (9.0 works for me) and the cudnn library (7.1.2 works for me) installed.
This provides a 5x to 20x SpeedUp and therefor is recommended for training.

Running it on Linux is recommended for training as well, because it is required for running the game headless
with up to 2x speedup.

Download the project files:
```bash
git clone https://github.com/chucnorrisful/dqn.git
```

The executable is located in exec.py - just set some Hyperparameters and run it!

The plot.py file provides some visualisation, but you have to manually enter the
path to a (created by execution) log file.


---
### Challenges and Benchmarks (Deepmind SC2 minigames)

- [x] MoveToBeacon [mean: 26, max: 32]
- [x] MoveToBeacon [mean: 25,64, max: 34]
- [x] CollectMineralShards [mean: 89, max: 120]
- [ ] FindAndDefeatZerglings
- [ ] DefeatRoaches
Expand Down
30 changes: 25 additions & 5 deletions agent2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
# -*- coding: utf-8 -*-
import warnings
from copy import deepcopy

import numpy as np
from keras.callbacks import History

from keras.callbacks import History
from rl.callbacks import (
CallbackList,
TestLogger,
Expand All @@ -14,8 +12,22 @@
)


# Dies ist eine modifizierte Version der Keras-rl Agent Klasse, welche insbesondere in Form der fit()-Funktion die
# Hauptschleife des Lernalgorithmus implementiert. Modifiziert wurden nur Details, die Struktur des Algorithmus ist
# unverändert. Interessant ist dabei nur Agent3; Agent2 wird nur als Grundklasse für ältere Versionen benutzt, und
# ist nur aus historischen Gründen noch nicht gelöscht worden.

class Agent3(object):
"""Abstract base class for all implemented agents.
"""Modifizierte Version des Keras-rl core/Agent
Änderungen:
- backward() hat als zusätzliches Argument observation_1;
- Außerdem Änderung des Ende-der-Episode Codes in fit(), welcher nun den RingBuffer der State-Action-Paare leert.
Anmerkung: Da der Code von Keras-rl stammt, habe ich ihn nicht weiter mit Kommentaren versehen abseits
meiner Änderungen.
Abstract base class for all implemented agents.
Each agent interacts with the environment (as defined by the `Env` class) by first observing the
state of the environment. Based on this observation the agent changes the environment by performing
Expand Down Expand Up @@ -54,6 +66,7 @@ def fit(self, env, nb_steps, action_repetition=1, callbacks=None, verbose=1,
visualize=False, nb_max_start_steps=0, start_step_policy=None, log_interval=10000,
nb_max_episode_steps=None):
"""Trains the agent on the given environment.
Modifiziert, siehe Text am Klassenanfang.
# Arguments
env: (`Env` instance): Environment that the agent interacts with. See [Env](#env) for details.
Expand Down Expand Up @@ -191,6 +204,7 @@ def fit(self, env, nb_steps, action_repetition=1, callbacks=None, verbose=1,
if nb_max_episode_steps and episode_step >= nb_max_episode_steps - 1:
# Force a terminal state.
done = True
# backward hat nun das zusätzliche Argument observation_1
metrics = self.backward(reward, terminal=done, observation_1=observation)
episode_reward += reward

Expand Down Expand Up @@ -227,6 +241,7 @@ def fit(self, env, nb_steps, action_repetition=1, callbacks=None, verbose=1,

episode += 1
observation = None
# RingBuffer leeren!
for _ in range(self.recent.maxlen):
self.recent.append(None)
episode_step = None
Expand All @@ -241,6 +256,7 @@ def fit(self, env, nb_steps, action_repetition=1, callbacks=None, verbose=1,

return history

# Änderungen, um die modifizierte Version von backward() zu ermöglichen, ansonsten unverändert.
def test(self, env, nb_episodes=1, action_repetition=1, callbacks=None, visualize=True,
nb_max_episode_steps=None, nb_max_start_steps=0, start_step_policy=None, verbose=1):
"""Callback that is called before training begins.
Expand Down Expand Up @@ -417,6 +433,7 @@ def forward(self, observation):
"""
raise NotImplementedError()

# zusätzliches Argument observation_1 hinzugefügt!
def backward(self, reward, terminal, observation_1):
"""Updates the agent after having executed the action returned by `forward`.
If the policy is implemented by a neural network, this corresponds to a weight update using back-prop.
Expand Down Expand Up @@ -500,7 +517,10 @@ def _on_test_end(self):


class Agent2(object):
"""Abstract base class for all implemented agents.
"""Modifizierte Version des Keras-rl core/Agent
Änderung: backward() hat nun das zusätzliche Argument observation_1.
Abstract base class for all implemented agents.
Each agent interacts with the environment (as defined by the `Env` class) by first observing the
state of the environment. Based on this observation the agent changes the environment by performing
Expand Down
136 changes: 75 additions & 61 deletions env.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from rl.core import Env

from pysc2.env import sc2_env
from pysc2.lib import features
from pysc2.lib import actions
Expand All @@ -8,7 +7,16 @@
FUNCTIONS = actions.FUNCTIONS


class Sc2Env1Output(Env):
# Environment Wrapper für StarCraft2 (pysc2 Bibliothek)
# Erwartet als Action das Output-Format der FullyConv Netzwerk Architektur: ein Tupel bestehend aus zwei Arrays:
# - einem linearen, welches Q-Werte für jede unterschiedliche Aktion enthält
# - einem zweidimensionalen, welches Q-Werte für jede Koordinate auf dem Screen enthält
# Die Methode action_to_sc2 wandelt dabei diesen Output in für pysc2 verwendbare Actions um.
# Außerdem wird die Art der Observation definiert; hier werden aktuell zwei Feature-Layers übergeben:
# - feature_screen.player_relative (Ganzzahlige Klassen 0-3 für (Nichts, Spieler, Gegner, Neutral))
# - feature_screen.selected (1 für selektierte Einheit, 0 für rest)
# Die Klasse implementiert das Interface Keras-rl/core/Env.
class Sc2Env2Outputs(Env):
last_obs = None

def __init__(self, screen=16, visualize=False, env_name="MoveToBeacon", training=False):
Expand Down Expand Up @@ -36,22 +44,23 @@ def __init__(self, screen=16, visualize=False, env_name="MoveToBeacon", training
)

def action_to_sc2(self, act):

real_action = FUNCTIONS.no_op()

# hacked to only move_screen
if 0 < act <= self._SCREEN * self._SCREEN:
if act.action == 1:
if 331 in self.last_obs.observation.available_actions:
arg = act - 1
x = int(arg / self._SCREEN)
y = arg % self._SCREEN
real_action = FUNCTIONS.Move_screen("now", (y, x))

elif self._SCREEN * self._SCREEN < act < self._SCREEN * self._SCREEN * 2:
# if FUNCTIONS.select_point.id in self.last_obs.observation.available_actions:
arg = act - 1 - self._SCREEN * self._SCREEN
x = int(arg / self._SCREEN)
y = arg % self._SCREEN
real_action = FUNCTIONS.select_point("toggle", (y, x))
real_action = FUNCTIONS.Move_screen("now", (act.coords[1], act.coords[0]))

elif act.action == 2:

real_action = FUNCTIONS.select_point("toggle", (act.coords[1], act.coords[0]))

elif act.action == 0:
pass
else:
print(act.action, "wtf")
assert False

return real_action

Expand All @@ -62,21 +71,27 @@ def step(self, action):

observation = self.env.step(actions=(real_action,))
self.last_obs = observation[0]
small_observation = [observation[0].observation.feature_screen.player_relative, observation[0].observation.feature_screen.selected]

# small_observation = observation[0].observation.feature_screen.unit_density
small_observation = [observation[0].observation.feature_screen.player_relative,
observation[0].observation.feature_screen.selected]

return small_observation, observation[0].reward, observation[0].last(), {}

def reset(self):
observation = self.env.reset()

if self._TRAINING and np.random.random_integers(1, 1) == 1:
if self._TRAINING and np.random.random_integers(0, 1) == 4:
ys, xs = np.where(observation[0].observation.feature_screen.player_relative == 1)
observation = self.env.step(actions=(FUNCTIONS.select_point("toggle", (xs[0], ys[0])),))

# observation = self.env.step(actions=(FUNCTIONS.select_army()))
observation = self.env.step(actions=(FUNCTIONS.select_army(0),))

self.last_obs = observation[0]
small_observation = np.array([observation[0].observation.feature_screen.player_relative, observation[0].observation.feature_screen.selected])

# small_observation = observation[0].observation.feature_screen.unit_density
small_observation = [observation[0].observation.feature_screen.player_relative,
observation[0].observation.feature_screen.selected]

return small_observation

Expand Down Expand Up @@ -117,8 +132,14 @@ def set_visualize(self, visualize: bool):
def set_minimap(self, minimap: int):
self._MINIMAP = minimap

@property
def screen(self):
return self._SCREEN


class Sc2Env2Outputs(Env):
# Selbes wie Sc2Env2Outputs, allerdings mit anderem Output:
# Gibt ALLE Screen-Feature-Layers zurück (war als Experiment nützlich, wird aber aktuell nicht verwendet).
class Sc2Env2OutputsFull(Env):
last_obs = None

def __init__(self, screen=16, visualize=False, env_name="MoveToBeacon", training=False):
Expand Down Expand Up @@ -167,33 +188,33 @@ def action_to_sc2(self, act):
return real_action

def step(self, action):
# print(action, " ACTION")

real_action = self.action_to_sc2(action)

observation = self.env.step(actions=(real_action,))
self.last_obs = observation[0]

# small_observation = observation[0].observation.feature_screen.unit_density
small_observation = [observation[0].observation.feature_screen.player_relative,
observation[0].observation.feature_screen.selected]
small_observation = observation[0].observation.feature_screen
# small_observation = [observation[0].observation.feature_screen.player_relative,
# observation[0].observation.feature_screen.selected]

return small_observation, observation[0].reward, observation[0].last(), {}

def reset(self):
observation = self.env.reset()
self.env.reset()

if self._TRAINING and np.random.random_integers(0, 1) == 4:
ys, xs = np.where(observation[0].observation.feature_screen.player_relative == 1)
observation = self.env.step(actions=(FUNCTIONS.select_point("toggle", (xs[0], ys[0])),))
# if self._TRAINING and np.random.random_integers(0, 1) == 4:
# ys, xs = np.where(observation[0].observation.feature_screen.player_relative == 1)
# observation = self.env.step(actions=(FUNCTIONS.select_point("toggle", (xs[0], ys[0])),))

observation = self.env.step(actions=(FUNCTIONS.select_army(0),))

self.last_obs = observation[0]

# small_observation = observation[0].observation.feature_screen.unit_density
small_observation = [observation[0].observation.feature_screen.player_relative,
observation[0].observation.feature_screen.selected]
small_observation = observation[0].observation.feature_screen

# small_observation = [observation[0].observation.feature_screen.player_relative,
# observation[0].observation.feature_screen.selected]

return small_observation

Expand Down Expand Up @@ -239,7 +260,11 @@ def screen(self):
return self._SCREEN


class Sc2Env2OutputsFull(Env):
# Environment Wrapper für StarCraft2 (pysc2 Bibliothek)
# Diese Version erwartet als Action einen einzelnen Vektor mit Q-Values für entsprechende Aktionen.
# Durch die FullyConv Architektur, welche 2 Outputs verschiedener Dimension bereitstellt ist dies nicht mehr verwendbar.
# Aus historischen Gründen noch nicht gelöscht.
class Sc2Env1Output(Env):
last_obs = None

def __init__(self, screen=16, visualize=False, env_name="MoveToBeacon", training=False):
Expand Down Expand Up @@ -267,23 +292,22 @@ def __init__(self, screen=16, visualize=False, env_name="MoveToBeacon", training
)

def action_to_sc2(self, act):

real_action = FUNCTIONS.no_op()

if act.action == 1:
# hacked to only move_screen
if 0 < act <= self._SCREEN * self._SCREEN:
if 331 in self.last_obs.observation.available_actions:
arg = act - 1
x = int(arg / self._SCREEN)
y = arg % self._SCREEN
real_action = FUNCTIONS.Move_screen("now", (y, x))

real_action = FUNCTIONS.Move_screen("now", (act.coords[1], act.coords[0]))

elif act.action == 2:

real_action = FUNCTIONS.select_point("toggle", (act.coords[1], act.coords[0]))

elif act.action == 0:
pass
else:
print(act.action, "wtf")
assert False
elif self._SCREEN * self._SCREEN < act < self._SCREEN * self._SCREEN * 2:
# if FUNCTIONS.select_point.id in self.last_obs.observation.available_actions:
arg = act - 1 - self._SCREEN * self._SCREEN
x = int(arg / self._SCREEN)
y = arg % self._SCREEN
real_action = FUNCTIONS.select_point("toggle", (y, x))

return real_action

Expand All @@ -294,28 +318,21 @@ def step(self, action):

observation = self.env.step(actions=(real_action,))
self.last_obs = observation[0]

small_observation = observation[0].observation.feature_screen
# small_observation = [observation[0].observation.feature_screen.player_relative,
# observation[0].observation.feature_screen.selected]
small_observation = [observation[0].observation.feature_screen.player_relative, observation[0].observation.feature_screen.selected]

return small_observation, observation[0].reward, observation[0].last(), {}

def reset(self):
self.env.reset()
observation = self.env.reset()

# if self._TRAINING and np.random.random_integers(0, 1) == 4:
# ys, xs = np.where(observation[0].observation.feature_screen.player_relative == 1)
# observation = self.env.step(actions=(FUNCTIONS.select_point("toggle", (xs[0], ys[0])),))
if self._TRAINING and np.random.random_integers(1, 1) == 1:
ys, xs = np.where(observation[0].observation.feature_screen.player_relative == 1)
observation = self.env.step(actions=(FUNCTIONS.select_point("toggle", (xs[0], ys[0])),))

observation = self.env.step(actions=(FUNCTIONS.select_army(0),))
# observation = self.env.step(actions=(FUNCTIONS.select_army()))

self.last_obs = observation[0]

small_observation = observation[0].observation.feature_screen

# small_observation = [observation[0].observation.feature_screen.player_relative,
# observation[0].observation.feature_screen.selected]
small_observation = np.array([observation[0].observation.feature_screen.player_relative, observation[0].observation.feature_screen.selected])

return small_observation

Expand Down Expand Up @@ -356,6 +373,3 @@ def set_visualize(self, visualize: bool):
def set_minimap(self, minimap: int):
self._MINIMAP = minimap

@property
def screen(self):
return self._SCREEN
Loading

0 comments on commit 90f15a8

Please sign in to comment.