From 284be5ba3aa98ecf88a0a64354b67c0413644492 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 25 Aug 2023 19:34:17 +0200 Subject: [PATCH 1/7] replace gym with gymnasium --- deformable_gym/envs/base_env.py | 68 ++++++++++++------- deformable_gym/envs/floating_mia_grasp_env.py | 32 +++------ .../envs/floating_shadow_grasp_env.py | 30 +++----- deformable_gym/envs/grasp_env.py | 2 +- deformable_gym/envs/ur10_shadow_grasp_env.py | 9 ++- deformable_gym/envs/ur5_mia_grasp_env.py | 11 +-- deformable_gym/robots/bullet_robot.py | 2 +- 7 files changed, 71 insertions(+), 83 deletions(-) diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index 3de44d0..ab2fd53 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -1,18 +1,19 @@ import abc from typing import Any +import gymnasium as gym import numpy as np import numpy.typing as npt import pybullet as pb import pytransform3d.rotations as pr -from gym.core import Env -from gym import spaces + +from gymnasium import spaces from deformable_gym.robots.bullet_robot import BulletRobot from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.helpers.pybullet_helper import MultibodyPose -class BaseBulletEnv(Env, abc.ABC): +class BaseBulletEnv(gym.Env, abc.ABC): """Configures PyBullet for the gym environment. :param gui: Show PyBullet GUI. @@ -79,14 +80,12 @@ def _hard_reset(self): self._load_objects() self.robot = self._create_robot() - def reset(self) -> np.ndarray: + def reset(self, seed=None, options=None) -> npt.ArrayLike: """Reset the environment to its initial state and returns it. :return: Initial state. """ - - if self.verbose: - print("Performing reset (base)") + super().reset(seed=seed) assert isinstance(self.robot, BulletRobot) @@ -94,10 +93,13 @@ def reset(self) -> np.ndarray: self.step_counter = 0 self.simulation.timing.reset() + observation = self._get_observation() + info = self._get_info() + if self.verbose: - print(f"Resetting env: {self.observe_state()}") + print(f"Resetting env: {observation}") - return self.observe_state() + return observation, info def render(self, mode: str = "human"): """Render environment. @@ -113,15 +115,22 @@ def render(self, mode: str = "human"): else: raise NotImplementedError(f"Render mode {mode} not supported") - def observe_state(self) -> np.ndarray: + def _get_observation(self) -> npt.ArrayLike: """Returns the current environment state. :return: The observation """ return self.robot.get_joint_positions() - def is_done(self, state: np.ndarray, action: np.ndarray, next_state: np.ndarray) -> bool: - """Checks whether the current episode is over or not. + def _get_info(self): + """Returns the current environment state. + + :return: The observation + """ + return {} + + def _is_terminated(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike) -> bool: + """Checks whether the current episode is terminated. :param state: State :param action: Action taken in this state @@ -130,6 +139,16 @@ def is_done(self, state: np.ndarray, action: np.ndarray, next_state: np.ndarray) """ return self.step_counter >= self.horizon + def _is_truncated(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike) -> bool: + """Checks whether the current episode is truncated. + + :param state: State + :param action: Action taken in this state + :param next_state: State after action was taken + :return: Is the current episode done? + """ + return False + def step(self, action: npt.ArrayLike): """Take a step in the environment. @@ -145,7 +164,7 @@ def step(self, action: npt.ArrayLike): for gym environments. Info dict is currently unused. """ # observe current state - state = self.observe_state() + observation = self._get_observation() # execute action self.robot.perform_command(action) @@ -153,31 +172,35 @@ def step(self, action: npt.ArrayLike): # simulate until next time step self.simulation.step_to_trigger("time_step") - # observe new state - next_state = self.observe_state() + next_observation = self._get_observation() # update time step self.step_counter += 1 # check if episode is over - done = self.is_done(state, action, next_state) + terminated = self._is_terminated(observation, action, next_observation) + truncated = self._is_truncated(observation, action, next_observation) # calculate the reward - reward = self.calculate_reward(state, action, next_state, done) + reward = self.calculate_reward(observation, action, next_observation, terminated) if self.verbose: - print(f"Finished environment step: {next_state=}, {reward=}, {done=}") + print(f"Finished environment step: {next_observation=}, {reward=}, {terminated=}, {truncated=}") - return next_state, reward, done, {} + return next_observation, reward, terminated, truncated, {} @abc.abstractmethod - def calculate_reward(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike, done: bool): + def calculate_reward(self, + state: npt.ArrayLike, + action: npt.ArrayLike, + next_state: npt.ArrayLike, + terminated: bool): """Calculate reward. :param state: State of the environment. :param action: Action that has been executed in the state. :param next_state: State after executing the action. - :param done: Is the episode done? + :param terminated: Is the episode terminated? :return: Measured reward. """ @@ -209,8 +232,7 @@ def _check_forces(self, robot, high_force_threshold, verbose): contact_points = robot.get_contact_points(self.object_to_grasp.get_id()) accumulated_forces = 0.0 for contact_point in contact_points: - _, _, _, _, _, _, _, _, dist, force, _, _, _, _ = contact_point - accumulated_forces += force + accumulated_forces += contact_point[9] high_forces = accumulated_forces > high_force_threshold contact_forces = accumulated_forces > 0 diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py index 687735e..d81103c 100644 --- a/deformable_gym/envs/floating_mia_grasp_env.py +++ b/deformable_gym/envs/floating_mia_grasp_env.py @@ -1,11 +1,9 @@ -import random -import time from typing import Union import numpy as np import numpy.typing as npt import pybullet as pb -from gym import spaces +from gymnasium import spaces from deformable_gym.robots import mia_hand from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin from deformable_gym.helpers import pybullet_helper as pbh @@ -108,8 +106,8 @@ def __init__( np.ones(6)*5]) if self._observable_object_pos: - lower_observations = np.append(lower_observations, np.ones(3)) - upper_observations = np.append(upper_observations, -np.ones(3)) + lower_observations = np.append(lower_observations, -np.ones(3)) + upper_observations = np.append(upper_observations, np.ones(3)) if self._observable_time_step: lower_observations = np.append(lower_observations, 0) @@ -132,7 +130,6 @@ def __init__( def _create_robot(self): orn_limit = None - # orn_limit = [[0, 0, 0], [0, 0, 0]] if self.velocity_commands: robot = mia_hand.MiaHandVelocity( @@ -157,8 +154,7 @@ def _create_robot(self): def _load_objects(self): super()._load_objects() - self.object_to_grasp, self.object_position, self.object_orientation = \ - ObjectFactory().create(self.object_name) + self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name) def set_difficulty_mode(self, mode: str): """ @@ -192,16 +188,11 @@ def reset(self, hard_reset=False, pos=None): return super().reset() - def is_done(self, state, action, next_state): - + def _is_truncated(self, state, action, next_state): # check if insole is exploded - if self._deformable_is_exploded(): - print("Exploded insole") - return True - - return super().is_done(state, action, next_state) + return self._deformable_is_exploded() - def observe_state(self): + def _get_observation(self): joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) ee_pose = self.robot.get_ee_pose() sensor_readings = self.robot.get_sensor_readings() @@ -217,17 +208,12 @@ def observe_state(self): return state - def calculate_reward(self, state, action, next_state, done): + def calculate_reward(self, state, action, next_state, terminated): """ Calculates the reward by counting how many insole vertices are in the target position. """ - if done: - if not self.compute_reward: - return 0.0 - if not (round(action[-1]) == 1 or self.step_counter >= self.horizon): - return -100 - + if terminated: self.robot.deactivate_motors() # remove insole anchors and simulate steps self.object_to_grasp.remove_anchors() diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py index a54f30d..2518651 100644 --- a/deformable_gym/envs/floating_shadow_grasp_env.py +++ b/deformable_gym/envs/floating_shadow_grasp_env.py @@ -1,7 +1,7 @@ import random import numpy as np -from gym import spaces +from gymnasium import spaces from deformable_gym.robots import shadow_hand from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin from deformable_gym.helpers import pybullet_helper as pbh @@ -61,8 +61,8 @@ def __init__(self, object_name="insole", np.array([5, 5, 5])], axis=0) if self._observable_object_pos: - lower_observations = np.append(lower_observations, np.ones(3)) - upper_observations = np.append(upper_observations, -np.ones(3)) + lower_observations = np.append(lower_observations, -np.ones(3)) + upper_observations = np.append(upper_observations, np.ones(3)) if self._observable_time_step: lower_observations = np.append(lower_observations, 0) @@ -105,28 +105,16 @@ def _load_objects(self): self.object_to_grasp, self.object_position, self.object_orientation = \ ObjectFactory().create(self.object_name) - def reset(self, hard_reset=False): - if self.verbose: - print("Performing reset (april)") - - pos = None - - self.object_to_grasp.reset(pos=pos) - + def reset(self, seed=None, hard_reset=False): + self.object_to_grasp.reset() self.robot.activate_motors() - return super().reset() - - def is_done(self, state, action, next_state): - - # check if insole is exploded - if self._deformable_is_exploded(): - print("Exploded insole") - return True + return super().reset(seed) - return super().is_done(state, action, next_state) + def _is_truncated(self, state, action, next_state): + return self._deformable_is_exploded() - def observe_state(self): + def _get_observation(self): finger_pos = self.robot.get_joint_positions()[6:] return np.concatenate([finger_pos], axis=0) diff --git a/deformable_gym/envs/grasp_env.py b/deformable_gym/envs/grasp_env.py index d226477..b2ef0cd 100644 --- a/deformable_gym/envs/grasp_env.py +++ b/deformable_gym/envs/grasp_env.py @@ -36,7 +36,7 @@ def reset(self, hard_reset=False, pos=None): return super().reset() - def observe_state(self): + def _get_observation(self): joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) sensor_readings = self.robot.get_sensor_readings() diff --git a/deformable_gym/envs/ur10_shadow_grasp_env.py b/deformable_gym/envs/ur10_shadow_grasp_env.py index 2f8789e..efa3adf 100644 --- a/deformable_gym/envs/ur10_shadow_grasp_env.py +++ b/deformable_gym/envs/ur10_shadow_grasp_env.py @@ -1,12 +1,11 @@ -import random - +import pytransform3d.transformations as pt import numpy as np -from gym import spaces + from deformable_gym.robots import ur10_shadow from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin from deformable_gym.helpers import pybullet_helper as pbh from deformable_gym.objects.bullet_object import ObjectFactory -import pytransform3d.transformations as pt +from gymnasium import spaces class UR10ShadowGraspEnv(GraspDeformableMixin, BaseBulletEnv): @@ -116,7 +115,7 @@ def is_done(self, state, action, next_state): return super().is_done(state, action, next_state) - def observe_state(self): + def _get_observation(self): finger_pos = self.robot.get_joint_positions()[6:] ee_pose = self.robot.get_ee_pose() diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index 1fd8b6b..c75b052 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -1,7 +1,5 @@ -import random - import numpy as np -from gym import spaces +from gymnasium import spaces from deformable_gym.robots import ur5_mia from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin from deformable_gym.helpers import pybullet_helper as pbh @@ -145,7 +143,6 @@ def __init__( def _create_robot(self): task_space_limit = None orn_limit = None - # orn_limit = [[0, 0, 0], [0, 0, 0]] if self.velocity_commands: robot = ur5_mia.UR5MiaVelocity( @@ -180,14 +177,10 @@ def reset(self, hard_reset=False): def is_done(self, state, action, next_state): - # check if insole is exploded - if self._deformable_is_exploded(): - print("Exploded insole") - return True return super().is_done(state, action, next_state) - def observe_state(self): + def _get_observation(self): joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) ee_pose = self.robot.get_ee_pose() sensor_readings = self.robot.get_sensor_readings() diff --git a/deformable_gym/robots/bullet_robot.py b/deformable_gym/robots/bullet_robot.py index 5708752..0bb0ce7 100644 --- a/deformable_gym/robots/bullet_robot.py +++ b/deformable_gym/robots/bullet_robot.py @@ -7,7 +7,7 @@ from deformable_gym.robots.bullet_utils import draw_limits from deformable_gym.robots.inverse_kinematics import PyBulletSolver from deformable_gym.objects.bullet_object import Pose -from gym.spaces import Box +from gymnasium.spaces import Box class BulletRobot(abc.ABC): From d00fe403a6e9018f03aaa6820d6a87c5ba294563 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 25 Aug 2023 19:35:50 +0200 Subject: [PATCH 2/7] adapt tests to gymnasium --- tests/envs/test_floating_mia_grasp_env.py | 12 ++++++------ tests/envs/test_floating_shadow_grasp_env.py | 14 +++++++------- tests/envs/test_ur10_shadow_grasp_env.py | 10 +++++----- tests/envs/test_ur5_mia_grasp_env.py | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/envs/test_floating_mia_grasp_env.py b/tests/envs/test_floating_mia_grasp_env.py index 389684b..e07d003 100644 --- a/tests/envs/test_floating_mia_grasp_env.py +++ b/tests/envs/test_floating_mia_grasp_env.py @@ -32,7 +32,7 @@ def test_obs_space_dims(env): def test_initial_obs(env): - obs = env.reset() + obs, info = env.reset() if env._observable_object_pos: obs_space_dims_expected = 19 else: @@ -48,14 +48,14 @@ def test_eps_done(env): for t in range(9): action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert len(obs) == obs_space_dims_expected assert isinstance(reward, float) - assert isinstance(done, bool) - assert not done + assert isinstance(terminated, bool) + assert not terminated action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) - assert done + assert terminated diff --git a/tests/envs/test_floating_shadow_grasp_env.py b/tests/envs/test_floating_shadow_grasp_env.py index 3f377b3..88f96c2 100644 --- a/tests/envs/test_floating_shadow_grasp_env.py +++ b/tests/envs/test_floating_shadow_grasp_env.py @@ -27,23 +27,23 @@ def test_obs_space_dims(env): @pytest.mark.skip("TODO") def test_initial_obs(env): - obs = env.reset() + obs, info = env.reset() assert len(obs) == 18 -def test_eps_done(env): +def test_ep_termination(env): env.reset() for t in range(9): action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert len(obs) == 18 assert isinstance(reward, float) - assert isinstance(done, bool) - assert not done + assert isinstance(terminated, bool) + assert not terminated action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) - assert done + assert terminated diff --git a/tests/envs/test_ur10_shadow_grasp_env.py b/tests/envs/test_ur10_shadow_grasp_env.py index 73c4512..79249bd 100644 --- a/tests/envs/test_ur10_shadow_grasp_env.py +++ b/tests/envs/test_ur10_shadow_grasp_env.py @@ -35,15 +35,15 @@ def test_eps_done(env): env.reset() for t in range(9): action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, truncated, terminated, info = env.step(action) assert len(obs) == 18 assert isinstance(reward, float) - assert isinstance(done, bool) - assert not done + assert isinstance(terminated, bool) + assert not terminated action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) - assert done + assert terminated diff --git a/tests/envs/test_ur5_mia_grasp_env.py b/tests/envs/test_ur5_mia_grasp_env.py index 1e4ce67..ee7579f 100644 --- a/tests/envs/test_ur5_mia_grasp_env.py +++ b/tests/envs/test_ur5_mia_grasp_env.py @@ -26,7 +26,7 @@ def test_obs_space_dims(env): @pytest.mark.skip("TODO") def test_initial_obs(env): - obs = env.reset() + obs, info = env.reset() assert len(obs) == 18 @@ -35,15 +35,15 @@ def test_eps_done(env): env.reset() for t in range(9): action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) assert len(obs) == 16 assert isinstance(reward, float) - assert isinstance(done, bool) - assert not done + assert isinstance(terminated, bool) + assert not terminated action = env.action_space.sample() - obs, reward, done, info = env.step(action) + obs, reward, terminated, truncated, info = env.step(action) - assert done + assert terminated From 16365191e13fa658702a181028d83f693f7e3077 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 25 Aug 2023 19:36:41 +0200 Subject: [PATCH 3/7] adapt to gymnasium --- examples/floating_mia_example.py | 5 +++-- examples/floating_shadow_example.py | 6 ++++-- examples/ur10_shadow_grasp_example.py | 5 +++-- examples/ur5_mia_grasp_example.py | 6 ++++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/floating_mia_example.py b/examples/floating_mia_example.py index a327e21..35050d2 100644 --- a/examples/floating_mia_example.py +++ b/examples/floating_mia_example.py @@ -25,11 +25,12 @@ action = env.action_space.sample() - state, reward, done, _ = env.step(action) + state, reward, terminated, truncated, _ = env.step(action) episode_return += reward - if done: + if terminated or truncated: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + episode_return = 0 env.reset() diff --git a/examples/floating_shadow_example.py b/examples/floating_shadow_example.py index cbd97e2..8f905b3 100644 --- a/examples/floating_shadow_example.py +++ b/examples/floating_shadow_example.py @@ -25,10 +25,12 @@ action = env.action_space.sample() - state, reward, done, _ = env.step(action) + state, reward, terminated, truncated, _ = env.step(action) episode_return += reward - if done: + if terminated or truncated: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + episode_return = 0 + env.reset() diff --git a/examples/ur10_shadow_grasp_example.py b/examples/ur10_shadow_grasp_example.py index 0610b7c..39edb8a 100644 --- a/examples/ur10_shadow_grasp_example.py +++ b/examples/ur10_shadow_grasp_example.py @@ -23,11 +23,12 @@ action = env.action_space.sample() - state, reward, done, _ = env.step(action) + state, reward, terminated, truncated, _ = env.step(action) episode_return += reward - if done: + if terminated or truncated: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + episode_return = 0 env.reset() diff --git a/examples/ur5_mia_grasp_example.py b/examples/ur5_mia_grasp_example.py index 416eb88..36d1182 100644 --- a/examples/ur5_mia_grasp_example.py +++ b/examples/ur5_mia_grasp_example.py @@ -23,10 +23,12 @@ action = env.action_space.sample() - state, reward, done, _ = env.step(action) + state, reward, terminated, truncated, _ = env.step(action) episode_return += reward - if done: + if terminated or truncated: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + episode_return = 0 + env.reset() From 52b3e4d9177f5f89b7c4bfe2c20875f52f5af8b4 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 25 Aug 2023 19:37:14 +0200 Subject: [PATCH 4/7] replace gym with gymnasium --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e0ec997..16f7f24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,8 +14,7 @@ entrypoints==0.4 executing==1.2.0 fastjsonschema==2.16.2 fonttools==4.38.0 -gym==0.26.2 -gym-notices==0.0.8 +gymnasium==0.29.1 ipykernel==6.17.1 ipython==8.6.0 ipywidgets==8.0.2 From f6566fb61baa74fcd612d2dcf1450129a8468453 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 25 Aug 2023 19:38:02 +0200 Subject: [PATCH 5/7] update required packages and bump version --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index ec4239c..7609224 100644 --- a/setup.py +++ b/setup.py @@ -6,16 +6,16 @@ long_description = f.read() setup(name='deformable_gym', - version="0.1.1", + version="0.2.0", maintainer='Melvin Laux', - maintainer_email='melvin.laux@dfki.de', - description='APRIL gym environments for grasping deformable objects', + maintainer_email='melvin.laux@uni-bremen.de', + description='Gym environments for grasping deformable objects', long_description=long_description, long_description_content_type="text/markdown", license='BSD-3-Clause', packages=["deformable_gym"], install_requires=["pybullet", - "gym", + "gymnasium", "numpy", "pytransform3d" ]) From 474e14bce1fd0e2d74ca8fb6721b289c4dc09cc9 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Tue, 29 Aug 2023 16:30:05 +0200 Subject: [PATCH 6/7] updated exmample in readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e4029f7..28163e7 100644 --- a/README.md +++ b/README.md @@ -75,10 +75,10 @@ while num_episodes <= 10: action = env.action_space.sample() - state, reward, done, _ = env.step(action) + state, reward, terminated, truncated, _ = env.step(action) episode_return += reward - if done: + if terminated or truncated: print(f"Episode finished with return {episode_return}!") num_episodes += 1 From 8cdbf2f654dc0cfd71a85fd9fb7298fdbc049c4e Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Tue, 29 Aug 2023 19:23:41 +0200 Subject: [PATCH 7/7] register floating hand envs --- deformable_gym/__init__.py | 15 +++++++++++ deformable_gym/envs/base_env.py | 11 +++++--- deformable_gym/envs/floating_mia_grasp_env.py | 26 +++++++------------ .../envs/floating_shadow_grasp_env.py | 19 +++++++------- deformable_gym/envs/ur5_mia_grasp_env.py | 9 ++----- examples/floating_mia_example.py | 15 +++++------ examples/floating_shadow_example.py | 14 +++++----- 7 files changed, 56 insertions(+), 53 deletions(-) diff --git a/deformable_gym/__init__.py b/deformable_gym/__init__.py index e69de29..219c544 100644 --- a/deformable_gym/__init__.py +++ b/deformable_gym/__init__.py @@ -0,0 +1,15 @@ +from gymnasium.envs.registration import register + +register( + id="FloatingMiaGraspInsole-v0", + entry_point="deformable_gym.envs.floating_mia_grasp_env:FloatingMiaGraspEnv", + kwargs={"object_name": "insole_on_conveyor_belt/back", + "observable_object_pos": True} +) + +register( + id="FloatingShadowGraspInsole-v0", + entry_point="deformable_gym.envs.floating_shadow_grasp_env:FloatingShadowGraspEnv", + kwargs={"object_name": "insole_on_conveyor_belt/back", + "observable_object_pos": True} +) diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index ab2fd53..8ddce99 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -129,12 +129,12 @@ def _get_info(self): """ return {} - def _is_terminated(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike) -> bool: + def _is_terminated(self, observation: npt.ArrayLike, action: npt.ArrayLike, next_observation: npt.ArrayLike) -> bool: """Checks whether the current episode is terminated. - :param state: State - :param action: Action taken in this state - :param next_state: State after action was taken + :param observation: observation before action was taken + :param action: Action taken in this step + :param next_observation: Observation after action was taken :return: Is the current episode done? """ return self.step_counter >= self.horizon @@ -189,6 +189,9 @@ def step(self, action: npt.ArrayLike): return next_observation, reward, terminated, truncated, {} + def close(self): + self.simulation.disconnect() + @abc.abstractmethod def calculate_reward(self, state: npt.ArrayLike, diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py index d81103c..8843e79 100644 --- a/deformable_gym/envs/floating_mia_grasp_env.py +++ b/deformable_gym/envs/floating_mia_grasp_env.py @@ -1,7 +1,4 @@ -from typing import Union - import numpy as np -import numpy.typing as npt import pybullet as pb from gymnasium import spaces from deformable_gym.robots import mia_hand @@ -66,8 +63,6 @@ def __init__( object_name: str = "insole", compute_reward: bool = True, object_scale: float = 1.0, - task_space_limit: Union[npt.ArrayLike, None] = ((-0.02, -.05, .95), - (0.02, 0.05, 1.05)), observable_object_pos: bool = False, observable_time_step: bool = False, difficulty_mode: str = "hard", @@ -79,7 +74,6 @@ def __init__( self.randomised = False self.compute_reward = compute_reward self.object_scale = object_scale - self.task_space_limit = task_space_limit self._observable_object_pos = observable_object_pos self._observable_time_step = observable_time_step @@ -97,17 +91,17 @@ def __init__( np.array([-2, -2, 0]), -np.ones(4), limits[0][self.actuated_finger_ids], - -np.ones(6)*5]) + -np.ones(6)*10]) upper_observations = np.concatenate([ np.array([2, 2, 2]), np.ones(4), limits[1][self.actuated_finger_ids], - np.ones(6)*5]) + np.ones(6)*10]) if self._observable_object_pos: - lower_observations = np.append(lower_observations, -np.ones(3)) - upper_observations = np.append(upper_observations, np.ones(3)) + lower_observations = np.append(lower_observations, -np.ones(3)*2) + upper_observations = np.append(upper_observations, np.ones(3)*2) if self._observable_time_step: lower_observations = np.append(lower_observations, 0) @@ -115,7 +109,9 @@ def __init__( self.observation_space = spaces.Box( low=lower_observations, - high=upper_observations) + high=upper_observations, + dtype=np.float64 + ) # build the action space lower = [-np.ones(3) * .0005, # max negative base pos offset @@ -126,7 +122,7 @@ def __init__( np.ones(4) * .000005, # max positive base orn offset limits[1][self.actuated_finger_ids]] # positive joint limits - self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper)) + self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper), dtype=np.float64) def _create_robot(self): orn_limit = None @@ -135,7 +131,6 @@ def _create_robot(self): robot = mia_hand.MiaHandVelocity( world_pos=self.hand_world_pose[:3], world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]), - task_space_limit=self.task_space_limit, verbose=self.verbose, orn_limit=orn_limit, base_commands=True) @@ -143,7 +138,6 @@ def _create_robot(self): robot = mia_hand.MiaHandPosition( world_pos=self.hand_world_pose[:3], world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]), - task_space_limit=self.task_space_limit, verbose=self.verbose, orn_limit=orn_limit, base_commands=True) @@ -176,7 +170,7 @@ def set_difficulty_mode(self, mode: str): else: raise ValueError(f"Received unknown difficulty mode {mode}!") - def reset(self, hard_reset=False, pos=None): + def reset(self, seed=None, options=None, hard_reset=False, pos=None): if pos is None: self.robot.reset_base(self.hand_world_pose) @@ -186,7 +180,7 @@ def reset(self, hard_reset=False, pos=None): self.object_to_grasp.reset() self.robot.activate_motors() - return super().reset() + return super().reset(seed, options) def _is_truncated(self, state, action, next_state): # check if insole is exploded diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py index 2518651..104e1d6 100644 --- a/deformable_gym/envs/floating_shadow_grasp_env.py +++ b/deformable_gym/envs/floating_shadow_grasp_env.py @@ -54,22 +54,22 @@ def __init__(self, object_name="insole", lower_observations = np.concatenate([ np.array([-2, -2, 0]), -np.ones(4), limits[0][6:], - np.array([-5, -5, -5])], axis=0) + np.array([-10, -10, -10])], axis=0) upper_observations = np.concatenate([ np.array([2, 2, 2]), np.ones(4), limits[1][6:], - np.array([5, 5, 5])], axis=0) + np.array([10, 10, 10])], axis=0) if self._observable_object_pos: - lower_observations = np.append(lower_observations, -np.ones(3)) - upper_observations = np.append(upper_observations, np.ones(3)) + lower_observations = np.append(lower_observations, -np.ones(3)*2) + upper_observations = np.append(upper_observations, np.ones(3)*2) if self._observable_time_step: lower_observations = np.append(lower_observations, 0) upper_observations = np.append(upper_observations, self.horizon) self.observation_space = spaces.Box( - low=lower_observations, high=upper_observations) + low=lower_observations, high=upper_observations, dtype=np.float64) lower_actions = np.concatenate([ -np.ones(7)*0.01, limits[0], [0]], axis=0) @@ -77,7 +77,7 @@ def __init__(self, object_name="insole", upper_actions = np.concatenate([ np.ones(7)*0.01, limits[1], [1]], axis=0) - self.action_space = spaces.Box(low=lower_actions, high=upper_actions) + self.action_space = spaces.Box(low=lower_actions, high=upper_actions, dtype=np.float64) def _create_robot(self): task_space_limit = None @@ -102,14 +102,13 @@ def _create_robot(self): def _load_objects(self): super()._load_objects() - self.object_to_grasp, self.object_position, self.object_orientation = \ - ObjectFactory().create(self.object_name) + self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name) - def reset(self, seed=None, hard_reset=False): + def reset(self, seed=None, options=None, hard_reset=False): self.object_to_grasp.reset() self.robot.activate_motors() - return super().reset(seed) + return super().reset(seed, options) def _is_truncated(self, state, action, next_state): return self._deformable_is_exploded() diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index c75b052..e4b1520 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -166,19 +166,14 @@ def _load_objects(self): self.object_to_grasp, self.object_position, self.object_orientation = \ ObjectFactory().create(self.object_name, object2world=self.object2world, scale=self.object_scale) - def reset(self, hard_reset=False): + def reset(self, seed=None, options=None, hard_reset=False): pos = None self.object_to_grasp.reset(pos=pos) self.robot.activate_motors() - return super().reset() - - def is_done(self, state, action, next_state): - - - return super().is_done(state, action, next_state) + return super().reset(seed, options) def _get_observation(self): joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) diff --git a/examples/floating_mia_example.py b/examples/floating_mia_example.py index 35050d2..0743dd4 100644 --- a/examples/floating_mia_example.py +++ b/examples/floating_mia_example.py @@ -1,4 +1,4 @@ -from deformable_gym.envs.floating_mia_grasp_env import FloatingMiaGraspEnv +import gymnasium """ ========= @@ -10,14 +10,9 @@ """ -env = FloatingMiaGraspEnv( - gui=True, - horizon=100, - object_name="insole_on_conveyor_belt/back", - observable_time_step=False, - observable_object_pos=True) +env = gymnasium.make("FloatingMiaGraspInsole-v0") -env.reset() +obs, info = env.reset() episode_return = 0 num_episodes = 0 @@ -25,7 +20,7 @@ action = env.action_space.sample() - state, reward, terminated, truncated, _ = env.step(action) + obs, reward, terminated, truncated, _ = env.step(action) episode_return += reward if terminated or truncated: @@ -34,3 +29,5 @@ episode_return = 0 env.reset() + +env.close() diff --git a/examples/floating_shadow_example.py b/examples/floating_shadow_example.py index 8f905b3..5f62358 100644 --- a/examples/floating_shadow_example.py +++ b/examples/floating_shadow_example.py @@ -1,3 +1,5 @@ +import gymnasium + from deformable_gym.envs.floating_shadow_grasp_env import FloatingShadowGraspEnv """ @@ -10,14 +12,10 @@ """ -env = FloatingShadowGraspEnv( - gui=True, - horizon=100, - object_name="insole", -) +env = gymnasium.make("FloatingShadowGraspInsole-v0") -env.reset() +obs, info = env.reset() episode_return = 0 num_episodes = 0 @@ -25,7 +23,7 @@ action = env.action_space.sample() - state, reward, terminated, truncated, _ = env.step(action) + obs, reward, terminated, truncated, _ = env.step(action) episode_return += reward if terminated or truncated: @@ -34,3 +32,5 @@ episode_return = 0 env.reset() + +env.close()