From f98ad5d3ce9d452c95ef8a5c6c7815c70ab9c4b8 Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Mon, 20 Aug 2018 14:03:20 +0200 Subject: [PATCH] Fix test identity hyperparams --- .travis.yml | 2 +- tests/test_identity.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 62d0655344..4d5abfbdaf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,4 +14,4 @@ install: script: - flake8 --select=F baselines/common - - docker run --ipc=host --env CODACY_PROJECT_TOKEN=$CODACY_PROJECT_TOKEN baselines-test sh -c 'pytest --cov-config .coveragerc --cov-report term --cov-report xml --cov=. && python-codacy-coverage -r coverage.xml --token=$CODACY_PROJECT_TOKEN' + - docker run --env CODACY_PROJECT_TOKEN=$CODACY_PROJECT_TOKEN baselines-test sh -c 'pytest --cov-config .coveragerc --cov-report term --cov-report xml --cov=. && python-codacy-coverage -r coverage.xml --token=$CODACY_PROJECT_TOKEN' diff --git a/tests/test_identity.py b/tests/test_identity.py index 24cf18c711..878d4a8f42 100644 --- a/tests/test_identity.py +++ b/tests/test_identity.py @@ -13,16 +13,18 @@ from baselines.deepq import models as deepq_models learn_func_list = [ - lambda e: A2C(policy=MlpPolicy, env=e).learn(total_timesteps=50000, seed=0), + lambda e: A2C(policy=MlpPolicy, learning_rate=1e-3, n_steps=1, + gamma=0.7, env=e).learn(total_timesteps=10000, seed=0), lambda e: ACER(policy=MlpPolicy, env=e, n_steps=1, replay_ratio=1).learn(total_timesteps=10000, seed=0), lambda e: ACKTR(policy=MlpPolicy, env=e, learning_rate=5e-4, n_steps=1).learn(total_timesteps=20000, seed=0), - lambda e: DeepQ(policy=deepq_models.mlp([32]), env=e).learn(total_timesteps=50000, seed=0), - lambda e: PPO1(policy=MlpPolicy, env=e, timesteps_per_actorbatch=32, schedule='constant', lam=0.9, - optim_batchsize=16, optim_stepsize=1e-3).learn(total_timesteps=50000, seed=0), - lambda e: PPO2(policy=MlpPolicy, env=e, learning_rate=1e-3).learn(total_timesteps=50000, seed=0), - lambda e: TRPO(policy=MlpPolicy, env=e, max_kl=0.05, lam=0.9, - timesteps_per_batch=128, vf_stepsize=1e-1).learn(total_timesteps=50000, seed=0), + lambda e: DeepQ(policy=deepq_models.mlp([16]), batch_size=16, gamma=0.1, + exploration_fraction=0.001, env=e).learn(total_timesteps=20000, seed=0), + lambda e: PPO1(policy=MlpPolicy, env=e, lam=0.7, + optim_batchsize=16, optim_stepsize=1e-3).learn(total_timesteps=10000, seed=0), + lambda e: PPO2(policy=MlpPolicy, env=e, learning_rate=1.5e-3, + lam=0.8).learn(total_timesteps=20000, seed=0), + lambda e: TRPO(policy=MlpPolicy, env=e, max_kl=0.05, lam=0.7).learn(total_timesteps=10000, seed=0), ]