diff --git a/setup.py b/setup.py index db8f51bc98..3bae8f3040 100644 --- a/setup.py +++ b/setup.py @@ -33,4 +33,4 @@ author='OpenAI', url='https://github.com/openai/stable_baselines', author_email='gym@openai.com', - version='0.1.7') + version='0.2.0') diff --git a/stable_baselines/acer/acer_simple.py b/stable_baselines/acer/acer_simple.py index 4b3193de98..9c8c67a835 100644 --- a/stable_baselines/acer/acer_simple.py +++ b/stable_baselines/acer/acer_simple.py @@ -155,8 +155,6 @@ def setup_model(self): else: raise ValueError("Error: ACER does not work with {} actions space.".format(self.action_space)) - self.n_batch = self.n_envs * self.n_steps - self.graph = tf.Graph() with self.graph.as_default(): self.sess = tf_util.make_session(num_cpu=self.num_procs, graph=self.graph) diff --git a/tests/test_identity.py b/tests/test_identity.py index 18ee31560a..9698f18006 100644 --- a/tests/test_identity.py +++ b/tests/test_identity.py @@ -19,7 +19,7 @@ n_steps=1, replay_ratio=1).learn(total_timesteps=10000, seed=0), lambda e: ACKTR(policy=MlpPolicy, env=e, learning_rate=5e-4, n_steps=1).learn(total_timesteps=20000, seed=0), lambda e: DeepQ(policy=deepq_models.mlp([16]), batch_size=16, gamma=0.1, - exploration_fraction=0.001, env=e).learn(total_timesteps=20000, seed=0), + exploration_fraction=0.001, env=e).learn(total_timesteps=30000, seed=0), lambda e: PPO1(policy=MlpPolicy, env=e, lam=0.7, optim_batchsize=16, optim_stepsize=1e-3).learn(total_timesteps=10000, seed=0), lambda e: PPO2(policy=MlpPolicy, env=e, learning_rate=1.5e-3,