From bddd1ab29d9b96f94087576dc81db39e710f446e Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Sat, 4 May 2019 10:52:36 +0200 Subject: [PATCH] Release 2.5.1 (#304) * Fix typo in kl-div * Update tb legacy instructions * Bump version * Capitalize Leibler * Typo in GAIL model --- docs/guide/tensorboard.rst | 8 ++++++++ docs/misc/changelog.rst | 8 +++++--- setup.py | 2 +- stable_baselines/__init__.py | 2 +- stable_baselines/acktr/acktr_cont.py | 2 +- stable_baselines/acktr/acktr_disc.py | 2 +- stable_baselines/acktr/kfac.py | 2 +- stable_baselines/acktr/utils.py | 4 ++-- stable_baselines/common/distributions.py | 2 +- stable_baselines/gail/model.py | 2 +- stable_baselines/ppo1/pposgd_simple.py | 2 +- stable_baselines/ppo2/ppo2.py | 2 +- stable_baselines/trpo_mpi/trpo_mpi.py | 4 ++-- 13 files changed, 26 insertions(+), 16 deletions(-) diff --git a/docs/guide/tensorboard.rst b/docs/guide/tensorboard.rst index d6194d1f03..8a13fc6999 100644 --- a/docs/guide/tensorboard.rst +++ b/docs/guide/tensorboard.rst @@ -89,6 +89,14 @@ For that, you need to define several environment variables: export OPENAI_LOG_FORMAT='stdout,log,csv,tensorboard' export OPENAI_LOGDIR=path/to/tensorboard/data +and to configure the logger using: + +.. code-block:: python + + from stable_baselines.logger import configure + + configure() + Then start tensorboard with: diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index e3fcbb22bc..2063a9d6d5 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -5,9 +5,11 @@ Changelog For download links, please look at `Github release page `_. -Pre-Release 2.5.1a0 (WIP) +Release 2.5.1 (2019-05-04) -------------------------- +**Bug fixes + improvements in the VecEnv** + - doc update (fix example of result plotter + improve doc) - fixed logger issues when stdout lacks ``read`` function - fixed a bug in ``common.dataset.Dataset`` where shuffling was not disabled properly (it affects only PPO1 with recurrent policies) @@ -20,8 +22,8 @@ Pre-Release 2.5.1a0 (WIP) ``set_attr`` now returns ``None`` rather than a list of ``None``. (@kantneel) - ``GAIL``: ``gail.dataset.ExpertDataset` supports loading from memory rather than file, and ``gail.dataset.record_expert`` supports returning in-memory rather than saving to file. -- added support in ``VecEnvWrapper`` for accessing attributes of arbitrarily deeply nested - instances of ``VecEnvWrapper`` and ``VecEnv``. This is allowed as long as the attribute belongs +- added support in ``VecEnvWrapper`` for accessing attributes of arbitrarily deeply nested + instances of ``VecEnvWrapper`` and ``VecEnv``. This is allowed as long as the attribute belongs to exactly one of the nested instances i.e. it must be unambiguous. (@kantneel) - fixed bug where result plotter would crash on very short runs (@Pastafarianist) - added option to not trim output of result plotter by number of timesteps (@Pastafarianist) diff --git a/setup.py b/setup.py index db32adc1c6..3e2bd445c4 100644 --- a/setup.py +++ b/setup.py @@ -143,7 +143,7 @@ license="MIT", long_description=long_description, long_description_content_type='text/markdown', - version="2.5.1a0", + version="2.5.1", ) # python setup.py sdist diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py index 5963f79b85..3d6df5b289 100644 --- a/stable_baselines/__init__.py +++ b/stable_baselines/__init__.py @@ -9,4 +9,4 @@ from stable_baselines.trpo_mpi import TRPO from stable_baselines.sac import SAC -__version__ = "2.5.1a0" +__version__ = "2.5.1" diff --git a/stable_baselines/acktr/acktr_cont.py b/stable_baselines/acktr/acktr_cont.py index eb920d9b8c..0acc519fb8 100644 --- a/stable_baselines/acktr/acktr_cont.py +++ b/stable_baselines/acktr/acktr_cont.py @@ -72,7 +72,7 @@ def learn(env, policy, value_fn, gamma, lam, timesteps_per_batch, num_timesteps, :param num_timesteps: (int) the total number of timesteps to run :param animate: (bool) if render env :param callback: (function) called every step, used for logging and saving - :param desired_kl: (float) the Kullback leibler weight for the loss + :param desired_kl: (float) the Kullback-Leibler weight for the loss """ obfilter = ZFilter(env.observation_space.shape) diff --git a/stable_baselines/acktr/acktr_disc.py b/stable_baselines/acktr/acktr_disc.py index d64635f408..7ae2ff9a11 100644 --- a/stable_baselines/acktr/acktr_disc.py +++ b/stable_baselines/acktr/acktr_disc.py @@ -33,7 +33,7 @@ class ACKTR(ActorCriticRLModel): :param vf_fisher_coef: (float) The weight for the fisher loss on the value function :param learning_rate: (float) The initial learning rate for the RMS prop optimizer :param max_grad_norm: (float) The clipping value for the maximum gradient - :param kfac_clip: (float) gradient clipping for Kullback leiber + :param kfac_clip: (float) gradient clipping for Kullback-Leibler :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant', 'double_linear_con', 'middle_drop' or 'double_middle_drop') :param verbose: (int) the verbosity level: 0 none, 1 training information, 2 tensorflow debug diff --git a/stable_baselines/acktr/kfac.py b/stable_baselines/acktr/kfac.py index 5470182d93..9cc1da1fa1 100644 --- a/stable_baselines/acktr/kfac.py +++ b/stable_baselines/acktr/kfac.py @@ -22,7 +22,7 @@ def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2 :param learning_rate: (float) The learning rate :param momentum: (float) The momentum value for the TensorFlow momentum optimizer - :param clip_kl: (float) gradient clipping for Kullback leiber + :param clip_kl: (float) gradient clipping for Kullback-Leibler :param kfac_update: (int) update kfac after kfac_update steps :param stats_accum_iter: (int) how may steps to accumulate stats :param full_stats_init: (bool) whether or not to fully initalize stats diff --git a/stable_baselines/acktr/utils.py b/stable_baselines/acktr/utils.py index 5b67b2c804..69e07e12ef 100644 --- a/stable_baselines/acktr/utils.py +++ b/stable_baselines/acktr/utils.py @@ -33,12 +33,12 @@ def dense(input_tensor, size, name, weight_init=None, bias_init=0, weight_loss_d def kl_div(action_dist1, action_dist2, action_size): """ - Kullback leiber divergence + Kullback-Leibler divergence :param action_dist1: ([TensorFlow Tensor]) action distribution 1 :param action_dist2: ([TensorFlow Tensor]) action distribution 2 :param action_size: (int) the shape of an action - :return: (float) Kullback leiber divergence + :return: (float) Kullback-Leibler divergence """ mean1, std1 = action_dist1[:, :action_size], action_dist1[:, action_size:] mean2, std2 = action_dist2[:, :action_size], action_dist2[:, action_size:] diff --git a/stable_baselines/common/distributions.py b/stable_baselines/common/distributions.py index 4d5ae9ef13..62186d6707 100644 --- a/stable_baselines/common/distributions.py +++ b/stable_baselines/common/distributions.py @@ -39,7 +39,7 @@ def neglogp(self, x): def kl(self, other): """ - Calculates the Kullback-Leiber divergence from the given probabilty distribution + Calculates the Kullback-Leibler divergence from the given probabilty distribution :param other: ([float]) the distibution to compare with :return: (float) the KL divergence of the two distributions diff --git a/stable_baselines/gail/model.py b/stable_baselines/gail/model.py index a81491a6c8..8e6989aa80 100644 --- a/stable_baselines/gail/model.py +++ b/stable_baselines/gail/model.py @@ -15,7 +15,7 @@ class GAIL(TRPO): :param expert_dataset: (ExpertDataset) the dataset manager :param gamma: (float) the discount value :param timesteps_per_batch: (int) the number of timesteps to run per batch (horizon) - :param max_kl: (float) the kullback leiber loss threashold + :param max_kl: (float) the Kullback-Leibler loss threshold :param cg_iters: (int) the number of iterations for the conjugate gradient calculation :param lam: (float) GAE factor :param entcoeff: (float) the weight for the entropy loss diff --git a/stable_baselines/ppo1/pposgd_simple.py b/stable_baselines/ppo1/pposgd_simple.py index 3df78d3ca8..b60e9b7f19 100644 --- a/stable_baselines/ppo1/pposgd_simple.py +++ b/stable_baselines/ppo1/pposgd_simple.py @@ -145,7 +145,7 @@ def setup_model(self): tf.summary.scalar('entropy_loss', pol_entpen) tf.summary.scalar('policy_gradient_loss', pol_surr) tf.summary.scalar('value_function_loss', vf_loss) - tf.summary.scalar('approximate_kullback-leiber', meankl) + tf.summary.scalar('approximate_kullback-leibler', meankl) tf.summary.scalar('clip_factor', clip_param) tf.summary.scalar('loss', total_loss) diff --git a/stable_baselines/ppo2/ppo2.py b/stable_baselines/ppo2/ppo2.py index ab6cf2eae1..eb009cee78 100644 --- a/stable_baselines/ppo2/ppo2.py +++ b/stable_baselines/ppo2/ppo2.py @@ -161,7 +161,7 @@ def setup_model(self): tf.summary.scalar('entropy_loss', self.entropy) tf.summary.scalar('policy_gradient_loss', self.pg_loss) tf.summary.scalar('value_function_loss', self.vf_loss) - tf.summary.scalar('approximate_kullback-leiber', self.approxkl) + tf.summary.scalar('approximate_kullback-leibler', self.approxkl) tf.summary.scalar('clip_factor', self.clipfrac) tf.summary.scalar('loss', loss) diff --git a/stable_baselines/trpo_mpi/trpo_mpi.py b/stable_baselines/trpo_mpi/trpo_mpi.py index d05be71a1e..218a40607a 100644 --- a/stable_baselines/trpo_mpi/trpo_mpi.py +++ b/stable_baselines/trpo_mpi/trpo_mpi.py @@ -26,7 +26,7 @@ class TRPO(ActorCriticRLModel): :param env: (Gym environment or str) The environment to learn from (if registered in Gym, can be str) :param gamma: (float) the discount value :param timesteps_per_batch: (int) the number of timesteps to run per batch (horizon) - :param max_kl: (float) the kullback leiber loss threshold + :param max_kl: (float) the Kullback-Leibler loss threshold :param cg_iters: (int) the number of iterations for the conjugate gradient calculation :param lam: (float) GAE factor :param entcoeff: (float) the weight for the entropy loss @@ -183,7 +183,7 @@ def setup_model(self): tf.summary.scalar('entropy_loss', meanent) tf.summary.scalar('policy_gradient_loss', optimgain) tf.summary.scalar('value_function_loss', surrgain) - tf.summary.scalar('approximate_kullback-leiber', meankl) + tf.summary.scalar('approximate_kullback-leibler', meankl) tf.summary.scalar('loss', optimgain + meankl + entbonus + surrgain + meanent) self.assign_old_eq_new = \