diff --git a/docs/guide/algos.rst b/docs/guide/algos.rst index d3c29b15c5..cea0669fa1 100644 --- a/docs/guide/algos.rst +++ b/docs/guide/algos.rst @@ -48,3 +48,8 @@ Actions ``gym.spaces``: - ``MultiBinary``: A list of possible actions, where each timestep any of the actions can be used in any combination. .. _MPI: https://mpi4py.readthedocs.io/en/stable/ + +.. note:: + + Some logging values (like `ep_rewmean`, `eplenmean`) are only available when using a Monitor wrapper + See `Issue #339 `_ for more info. diff --git a/docs/guide/tensorboard.rst b/docs/guide/tensorboard.rst index 8a13fc6999..2795c590c3 100644 --- a/docs/guide/tensorboard.rst +++ b/docs/guide/tensorboard.rst @@ -76,6 +76,42 @@ It will display information such as the model graph, the episode reward, the mod :width: 400 :alt: graph + +Logging More Values +------------------- + +Using a callback, you can easily log more values with TensorBoard. +Here is a simple example on how to log both additional tensor or arbitrary scalar value: + +.. code-block:: python + + import tensorflow as tf + import numpy as np + + from stable_baselines import SAC + + model = SAC("MlpPolicy", "Pendulum-v0", tensorboard_log="/tmp/sac/", verbose=1) + # Define a new property to avoid global variable + model.is_tb_set = False + + + def callback(locals_, globals_): + self_ = locals_['self'] + # Log additional tensor + if not self_.is_tb_set: + with self_.graph.as_default(): + tf.summary.scalar('value_target', tf.reduce_mean(self_.value_target)) + self_.summary = tf.summary.merge_all() + self_.is_tb_set = True + # Log scalar value (here a random variable) + value = np.random.random() + summary = tf.Summary(value=[tf.Summary.Value(tag='random_value', simple_value=value)]) + locals_['writer'].add_summary(summary, self_.num_timesteps) + return True + + + model.learn(50000, callback=callback) + Legacy Integration ------------------- diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 1453f6418f..da5fa68242 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -6,10 +6,10 @@ Changelog For download links, please look at `Github release page `_. -Pre-Release 2.7.0a0 (WIP) +Release 2.7.0 (2019-07-31) -------------------------- -**Twin Delayed DDPG (TD3)** +**Twin Delayed DDPG (TD3) and GAE bug fix (TRPO, PPO1, GAIL)** Breaking Changes: ^^^^^^^^^^^^^^^^^ @@ -17,18 +17,17 @@ Breaking Changes: New Features: ^^^^^^^^^^^^^ - added Twin Delayed DDPG (TD3) algorithm, with HER support - -- Add support for continuous action spaces to `action_probability`, computing the PDF of a Gaussian +- added support for continuous action spaces to `action_probability`, computing the PDF of a Gaussian policy in addition to the existing support for categorical stochastic policies. -- Add flag to `action_probability` to return log-probabilities. -- Added support for python lists and numpy arrays in ``logger.writekvs``. (@dwiel) -- The info dicts returned by VecEnvs now include a ``terminal_observation`` key providing access to the last observation in a trajectory. (@qxcv) +- added flag to `action_probability` to return log-probabilities. +- added support for python lists and numpy arrays in ``logger.writekvs``. (@dwiel) +- the info dict returned by VecEnvs now include a ``terminal_observation`` key providing access to the last observation in a trajectory. (@qxcv) Bug Fixes: ^^^^^^^^^^ - fixed a bug in ``traj_segment_generator`` where the ``episode_starts`` was wrongly recorded, resulting in wrong calculation of Generalized Advantage Estimation (GAE), this affects TRPO, PPO1 and GAIL (thanks to @miguelrass for spotting the bug) -- add missing property `n_batch` in `BasePolicy`. +- added missing property `n_batch` in `BasePolicy`. Deprecations: ^^^^^^^^^^^^^ @@ -38,12 +37,13 @@ Others: - renamed some keys in ``traj_segment_generator`` to be more meaningful - retrieve unnormalized reward when using Monitor wrapper with TRPO, PPO1 and GAIL to display them in the logs (mean episode reward) -- Clean up DDPG code (renamed variables) +- clean up DDPG code (renamed variables) Documentation: ^^^^^^^^^^^^^^ - doc fix for the hyperparameter tuning command in the rl zoo +- added an example on how to log additional variable with tensorboard and a callback diff --git a/setup.py b/setup.py index 336c4f2f8f..56387e8f3d 100644 --- a/setup.py +++ b/setup.py @@ -118,7 +118,7 @@ ] + tf_dependency, extras_require={ 'tests': [ - 'pytest==3.5.1', + 'pytest', 'pytest-cov', 'pytest-env', 'pytest-xdist', @@ -138,7 +138,7 @@ license="MIT", long_description=long_description, long_description_content_type='text/markdown', - version="2.7.0a0", + version="2.7.0", ) # python setup.py sdist diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py index 93d1b0e193..35f62999ae 100644 --- a/stable_baselines/__init__.py +++ b/stable_baselines/__init__.py @@ -11,4 +11,4 @@ from stable_baselines.trpo_mpi import TRPO from stable_baselines.sac import SAC -__version__ = "2.6.1a0" +__version__ = "2.7.0"