Merge branch 'stable' of https://github.com/hill-a/stable-baselines i…

…nto stable
hill-a · Aug 20, 2018 · b6b063d · b6b063d
2 parents 16dc1b0 + bbfb154
commit b6b063d
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 8 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -14,4 +14,4 @@ install:
 
 script:
     - flake8 --select=F stable_baselines/common
-    - docker run --ipc=host --env CODACY_PROJECT_TOKEN=$CODACY_PROJECT_TOKEN baselines-test sh -c 'pytest --cov-config .coveragerc --cov-report term --cov-report xml --cov=. && python-codacy-coverage -r coverage.xml --token=$CODACY_PROJECT_TOKEN'
+    - docker run --env CODACY_PROJECT_TOKEN=$CODACY_PROJECT_TOKEN baselines-test sh -c 'pytest --cov-config .coveragerc --cov-report term --cov-report xml --cov=. && python-codacy-coverage -r coverage.xml --token=$CODACY_PROJECT_TOKEN'
diff --git a/tests/test_identity.py b/tests/test_identity.py
@@ -13,16 +13,18 @@
 from stable_baselines.deepq import models as deepq_models
 
 learn_func_list = [
-    lambda e: A2C(policy=MlpPolicy, env=e).learn(total_timesteps=50000, seed=0),
+    lambda e: A2C(policy=MlpPolicy, learning_rate=1e-3, n_steps=1,
+                  gamma=0.7, env=e).learn(total_timesteps=10000, seed=0),
     lambda e: ACER(policy=MlpPolicy, env=e,
                    n_steps=1, replay_ratio=1).learn(total_timesteps=10000, seed=0),
     lambda e: ACKTR(policy=MlpPolicy, env=e, learning_rate=5e-4, n_steps=1).learn(total_timesteps=20000, seed=0),
-    lambda e: DeepQ(policy=deepq_models.mlp([32]), env=e).learn(total_timesteps=50000, seed=0),
-    lambda e: PPO1(policy=MlpPolicy, env=e, timesteps_per_actorbatch=32, schedule='constant', lam=0.9,
-                   optim_batchsize=16, optim_stepsize=1e-3).learn(total_timesteps=50000, seed=0),
-    lambda e: PPO2(policy=MlpPolicy, env=e, learning_rate=1e-3).learn(total_timesteps=50000, seed=0),
-    lambda e: TRPO(policy=MlpPolicy, env=e, max_kl=0.05, lam=0.9,
-                   timesteps_per_batch=128, vf_stepsize=1e-1).learn(total_timesteps=50000, seed=0),
+    lambda e: DeepQ(policy=deepq_models.mlp([16]), batch_size=16, gamma=0.1,
+                    exploration_fraction=0.001, env=e).learn(total_timesteps=20000, seed=0),
+    lambda e: PPO1(policy=MlpPolicy, env=e, lam=0.7,
+                   optim_batchsize=16, optim_stepsize=1e-3).learn(total_timesteps=10000, seed=0),
+    lambda e: PPO2(policy=MlpPolicy, env=e, learning_rate=1.5e-3,
+                   lam=0.8).learn(total_timesteps=20000, seed=0),
+    lambda e: TRPO(policy=MlpPolicy, env=e, max_kl=0.05, lam=0.7).learn(total_timesteps=10000, seed=0),
 ]