From c0ce66c16d9c10e94254ce0c91c5a63ad6413e9b Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Thu, 27 Jul 2023 18:30:47 +0200 Subject: [PATCH 01/15] remove early episode termination --- deformable_gym/envs/base_env.py | 19 ++++--------------- deformable_gym/envs/floating_mia_grasp_env.py | 5 ----- .../envs/floating_shadow_grasp_env.py | 2 +- deformable_gym/envs/mia_grasp_env.py | 3 +-- deformable_gym/envs/shadow_grasp_env.py | 3 +-- deformable_gym/envs/ur10_shadow_grasp_env.py | 2 +- deformable_gym/envs/ur5_mia_grasp_env.py | 2 +- examples/floating_shadow_example.py | 1 - tests/envs/test_floating_mia_grasp_env.py | 1 - tests/envs/test_floating_shadow_grasp_env.py | 2 -- 10 files changed, 9 insertions(+), 31 deletions(-) diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index 6221a20..216ba1f 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -40,13 +40,11 @@ def __init__( horizon: int = 100, soft: bool = False, load_plane: bool = True, verbose: bool = False, time_delta: float = 0.0001, verbose_dt: float = 10.00, - early_episode_termination: bool = True, pybullet_options: str = ""): self.gui = gui self.verbose = verbose self.__load_plane = load_plane self.horizon = horizon - self.early_episode_termination = early_episode_termination mode = pb.GUI if gui else pb.DIRECT @@ -69,9 +67,7 @@ def _load_objects(self): If a plane should be loaded, it will have the position (0, 0, 0). """ if self.__load_plane: - PLANE_POSITION = (0, 0, 0) - self.plane = pb.loadURDF( - "plane.urdf", PLANE_POSITION, useFixedBase=1) + self.plane = pb.loadURDF("plane.urdf", (0, 0, 0), useFixedBase=1) def _hard_reset(self): """Hard reset. @@ -136,10 +132,6 @@ def is_done(self, state: np.ndarray, action: np.ndarray, :param next_state: State after action was taken :return: Is the current episode done? """ - # check for termination action - if self.early_episode_termination and round(action[-1]) == 1: - return True - return self.step_counter >= self.horizon def step(self, action: npt.ArrayLike): @@ -160,10 +152,7 @@ def step(self, action: npt.ArrayLike): state = self.observe_state() # execute action - if self.early_episode_termination: - self.robot.perform_command(action[:-1]) - else: - self.robot.perform_command(action) + self.robot.perform_command(action) # simulate until next time step self.simulation.step_to_trigger("time_step") @@ -187,8 +176,8 @@ def step(self, action: npt.ArrayLike): @abc.abstractmethod def calculate_reward( - self, state: np.ndarray, action: np.ndarray, - next_state: np.ndarray, done: bool): + self, state: npt.ArrayLike, action: npt.ArrayLike, + next_state: npt.ArrayLike, done: bool): """Calculate reward. :param state: State of the environment. diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py index 54164a8..644d9c0 100644 --- a/deformable_gym/envs/floating_mia_grasp_env.py +++ b/deformable_gym/envs/floating_mia_grasp_env.py @@ -120,7 +120,6 @@ def __init__( high=upper_observations) # build the action space - lower = [-np.ones(3) * .0005, # max negative base pos offset -np.ones(4) * .000005, # max negative base orn offset limits[0][self.actuated_finger_ids]] # negative joint limits @@ -129,10 +128,6 @@ def __init__( np.ones(4) * .000005, # max positive base orn offset limits[1][self.actuated_finger_ids]] # positive joint limits - if self.early_episode_termination: - lower.append([0.0]) - upper.append([1.0]) - self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper)) def _create_robot(self): diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py index 9613555..97fe199 100644 --- a/deformable_gym/envs/floating_shadow_grasp_env.py +++ b/deformable_gym/envs/floating_shadow_grasp_env.py @@ -38,7 +38,7 @@ class FloatingShadowGraspEnv(GraspDeformableMixin, BaseBulletEnv): [-0.8, 0.0, 1.6]) def __init__(self, object_name="insole", - horizon=200, train=True, + horizon=100, train=True, compute_reward=True, object_scale=1.0, **kwargs): self.insole = None self.train = train diff --git a/deformable_gym/envs/mia_grasp_env.py b/deformable_gym/envs/mia_grasp_env.py index 3f66953..364f517 100644 --- a/deformable_gym/envs/mia_grasp_env.py +++ b/deformable_gym/envs/mia_grasp_env.py @@ -57,7 +57,6 @@ class MiaGraspEnv(FloatingHandMixin, GraspDeformableMixin, BaseBulletEnv): is in collision with the object. """ - HORIZON = 500 ACCUMULATED_FORCES_THRESHOLD = 200.0 # greater forces end the episode DROP_TEST_TIME = 0.5 # time to test grasp stability in the end @@ -81,7 +80,7 @@ def __init__( self.abort_on_first_step_with_collision = abort_on_first_step_with_collision super().__init__( - gui=gui, real_time=real_time, horizon=self.HORIZON, soft=True, + gui=gui, real_time=real_time, horizon=100, soft=True, load_plane=True, verbose=verbose, time_delta=time_delta, verbose_dt=verbose_dt) diff --git a/deformable_gym/envs/shadow_grasp_env.py b/deformable_gym/envs/shadow_grasp_env.py index 8f61bd8..410be04 100644 --- a/deformable_gym/envs/shadow_grasp_env.py +++ b/deformable_gym/envs/shadow_grasp_env.py @@ -31,7 +31,6 @@ class ShadowGraspEnv(FloatingHandMixin, GraspDeformableMixin, BaseBulletEnv): timing information (default: 0.1). """ - HORIZON = 500 ACCUMULATED_FORCES_THRESHOLD = 200.0 # greater forces end the episode DROP_TEST_TIME = 0.5 # time to test grasp stability in the end @@ -54,7 +53,7 @@ def __init__( self.velocity_control = velocity_control super().__init__( - gui=gui, real_time=real_time, horizon=self.HORIZON, soft=True, + gui=gui, real_time=real_time, horizon=100, soft=True, load_plane=True, verbose=verbose, time_delta=time_delta, verbose_dt=verbose_dt) diff --git a/deformable_gym/envs/ur10_shadow_grasp_env.py b/deformable_gym/envs/ur10_shadow_grasp_env.py index e888d3b..7a6e72b 100644 --- a/deformable_gym/envs/ur10_shadow_grasp_env.py +++ b/deformable_gym/envs/ur10_shadow_grasp_env.py @@ -42,7 +42,7 @@ class UR10ShadowGraspEnv(GraspDeformableMixin, BaseBulletEnv): p=np.array([-0.7, 0.1, 1.8])) def __init__(self, gui=True, real_time=False, object_name="insole", - verbose=False, horizon=200, train=True, + verbose=False, horizon=100, train=True, compute_reward=True, object_scale=1.0, verbose_dt=10.0): self.insole = None self.train = train diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index 104c9f7..7c565a8 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -97,7 +97,7 @@ class UR5MiaGraspEnv(GraspDeformableMixin, BaseBulletEnv): def __init__( self, gui: bool = True, real_time: bool = False, object_name: str = "insole", verbose: bool = False, - horizon: int = 200, train: bool = True, + horizon: int = 100, train: bool = True, thumb_adducted: bool = True, compute_reward: bool = True, object_scale: float = 1.0, verbose_dt: float = 10.0, pybullet_options: str = ""): diff --git a/examples/floating_shadow_example.py b/examples/floating_shadow_example.py index c8dcdce..1570a1e 100644 --- a/examples/floating_shadow_example.py +++ b/examples/floating_shadow_example.py @@ -14,7 +14,6 @@ gui=True, horizon=100, object_name="insole", - early_episode_termination=False ) diff --git a/tests/envs/test_floating_mia_grasp_env.py b/tests/envs/test_floating_mia_grasp_env.py index 97d8e43..389684b 100644 --- a/tests/envs/test_floating_mia_grasp_env.py +++ b/tests/envs/test_floating_mia_grasp_env.py @@ -9,7 +9,6 @@ def env(): verbose=True, horizon=10, object_name="insole_on_conveyor_belt/back", - early_episode_termination=False, observable_object_pos=True, difficulty_mode="hard") diff --git a/tests/envs/test_floating_shadow_grasp_env.py b/tests/envs/test_floating_shadow_grasp_env.py index 5464b3c..5202a50 100644 --- a/tests/envs/test_floating_shadow_grasp_env.py +++ b/tests/envs/test_floating_shadow_grasp_env.py @@ -9,7 +9,6 @@ def env(): verbose=True, horizon=10, object_name="insole_on_conveyor_belt/back", - early_episode_termination=False, # observable_object_pos=True, # difficulty_mode="hard" ) @@ -32,7 +31,6 @@ def test_initial_obs(env): assert len(obs) == 18 -@pytest.mark.skip("TODO") def test_eps_done(env): env.reset() for t in range(9): From 41de3a72ccfbee86a272cb6f4d5e367ace5bb7e3 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Thu, 27 Jul 2023 18:52:14 +0200 Subject: [PATCH 02/15] rename pybullet_tools to bullet_simulation --- deformable_gym/envs/base_env.py | 2 +- ...pybullet_tools.py => bullet_simulation.py} | 0 deformable_gym/envs/floating_mia_grasp_env.py | 16 ++++++------- .../envs/floating_shadow_grasp_env.py | 23 +++++++++++-------- tests/conftest.py | 2 +- tests/envs/test_floating_shadow_grasp_env.py | 4 ++-- tests/objects/test_box.py | 2 +- tests/objects/test_insole.py | 2 +- tests/objects/test_insole_on_conveyor.py | 2 +- tests/objects/test_pillow.py | 2 +- tests/objects/test_sphere.py | 2 +- tests/objects/test_urdf_object.py | 2 +- tests/robots/test_shadow_hand_velocity.py | 2 +- 13 files changed, 32 insertions(+), 29 deletions(-) rename deformable_gym/envs/{pybullet_tools.py => bullet_simulation.py} (100%) diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index 216ba1f..37d5bca 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -8,7 +8,7 @@ from gym.core import Env from gym import spaces from deformable_gym.robots.bullet_robot import BulletRobot -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.helpers.pybullet_helper import MultibodyPose diff --git a/deformable_gym/envs/pybullet_tools.py b/deformable_gym/envs/bullet_simulation.py similarity index 100% rename from deformable_gym/envs/pybullet_tools.py rename to deformable_gym/envs/bullet_simulation.py diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py index 644d9c0..170ddbf 100644 --- a/deformable_gym/envs/floating_mia_grasp_env.py +++ b/deformable_gym/envs/floating_mia_grasp_env.py @@ -139,14 +139,16 @@ def _create_robot(self): world_pos=self.hand_world_pose[:3], world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]), task_space_limit=self.task_space_limit, - verbose=self.verbose, orn_limit=orn_limit, + verbose=self.verbose, + orn_limit=orn_limit, base_commands=True) else: robot = mia_hand.MiaHandPosition( world_pos=self.hand_world_pose[:3], world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]), task_space_limit=self.task_space_limit, - verbose=self.verbose, orn_limit=orn_limit, + verbose=self.verbose, + orn_limit=orn_limit, base_commands=True) self.simulation.add_robot(robot) @@ -180,10 +182,6 @@ def set_difficulty_mode(self, mode: str): def reset(self, hard_reset=False, pos=None): - if self.verbose: - print("Performing reset (april)") - print("Pose:", self.hand_world_pose) - if pos is None: self.robot.reset_base(self.hand_world_pose) else: @@ -235,12 +233,12 @@ def calculate_reward(self, state, action, next_state, done): self.object_to_grasp.remove_anchors() for _ in range(50): if self._deformable_is_exploded(): - return -1 + return -1.0 self.simulation.step_to_trigger("time_step") height = self.object_to_grasp.get_pose()[2] if height < 0.9: - return -1 + return -1.0 else: - return 1 + return 1.0 else: return 0.0 diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py index 97fe199..459f7b2 100644 --- a/deformable_gym/envs/floating_shadow_grasp_env.py +++ b/deformable_gym/envs/floating_shadow_grasp_env.py @@ -29,17 +29,12 @@ class FloatingShadowGraspEnv(GraspDeformableMixin, BaseBulletEnv): object should be sampled from the training or the test set. """ - train_positions = ([-0.7, 0.1, 1.6], - [-0.7, 0.2, 1.6], - [-0.7, 0.0, 1.6]) - - test_positions = ([-0.8, 0.1, 1.6], - [-0.8, 0.2, 1.6], - [-0.8, 0.0, 1.6]) - def __init__(self, object_name="insole", horizon=100, train=True, - compute_reward=True, object_scale=1.0, **kwargs): + compute_reward=True, object_scale=1.0, + observable_object_pos: bool = False, + observable_time_step: bool = False, + **kwargs): self.insole = None self.train = train self.velocity_commands = False @@ -47,6 +42,8 @@ def __init__(self, object_name="insole", self.randomised = False self.compute_reward = compute_reward self.object_scale = object_scale + self._observable_object_pos = observable_object_pos + self._observable_time_step = observable_time_step super().__init__(horizon=horizon, soft=True, load_plane=True, **kwargs) @@ -63,6 +60,14 @@ def __init__(self, object_name="insole", np.array([2, 2, 2]), np.ones(4), limits[1][6:], np.array([5, 5, 5])], axis=0) + if self._observable_object_pos: + lower_observations = np.append(lower_observations, np.ones(3)) + upper_observations = np.append(upper_observations, -np.ones(3)) + + if self._observable_time_step: + lower_observations = np.append(lower_observations, 0) + upper_observations = np.append(upper_observations, self.horizon) + self.observation_space = spaces.Box( low=lower_observations, high=upper_observations) diff --git a/tests/conftest.py b/tests/conftest.py index 3e37e9c..d4e2a4b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import pytest import pybullet as pb -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation @pytest.fixture diff --git a/tests/envs/test_floating_shadow_grasp_env.py b/tests/envs/test_floating_shadow_grasp_env.py index 5202a50..f3de32c 100644 --- a/tests/envs/test_floating_shadow_grasp_env.py +++ b/tests/envs/test_floating_shadow_grasp_env.py @@ -8,8 +8,8 @@ def env(): gui=False, verbose=True, horizon=10, - object_name="insole_on_conveyor_belt/back", - # observable_object_pos=True, + object_name="insole", + observable_object_pos=True, # difficulty_mode="hard" ) diff --git a/tests/objects/test_box.py b/tests/objects/test_box.py index 830c39d..641d146 100644 --- a/tests/objects/test_box.py +++ b/tests/objects/test_box.py @@ -1,7 +1,7 @@ import numpy as np import pybullet as pb import pytest -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject from numpy.testing import assert_array_almost_equal diff --git a/tests/objects/test_insole.py b/tests/objects/test_insole.py index f9696cb..db731ce 100644 --- a/tests/objects/test_insole.py +++ b/tests/objects/test_insole.py @@ -1,7 +1,7 @@ import numpy as np import pybullet as pb import pytest -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject from numpy.testing import assert_array_almost_equal diff --git a/tests/objects/test_insole_on_conveyor.py b/tests/objects/test_insole_on_conveyor.py index 6cb8b5d..b0e7a36 100644 --- a/tests/objects/test_insole_on_conveyor.py +++ b/tests/objects/test_insole_on_conveyor.py @@ -1,7 +1,7 @@ import numpy as np import pybullet as pb import pytest -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject from numpy.testing import assert_array_almost_equal diff --git a/tests/objects/test_pillow.py b/tests/objects/test_pillow.py index 8746c68..f37e8ab 100644 --- a/tests/objects/test_pillow.py +++ b/tests/objects/test_pillow.py @@ -1,7 +1,7 @@ import numpy as np import pybullet as pb import pytest -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject from numpy.testing import assert_array_almost_equal diff --git a/tests/objects/test_sphere.py b/tests/objects/test_sphere.py index bb90240..512c0e6 100644 --- a/tests/objects/test_sphere.py +++ b/tests/objects/test_sphere.py @@ -1,7 +1,7 @@ import numpy as np import pybullet as pb import pytest -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject from numpy.testing import assert_array_almost_equal diff --git a/tests/objects/test_urdf_object.py b/tests/objects/test_urdf_object.py index e94d482..6cbe752 100644 --- a/tests/objects/test_urdf_object.py +++ b/tests/objects/test_urdf_object.py @@ -1,7 +1,7 @@ import numpy as np import pybullet as pb import pytest -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject from numpy.testing import assert_array_almost_equal diff --git a/tests/robots/test_shadow_hand_velocity.py b/tests/robots/test_shadow_hand_velocity.py index 606e68f..9aa7f59 100644 --- a/tests/robots/test_shadow_hand_velocity.py +++ b/tests/robots/test_shadow_hand_velocity.py @@ -1,6 +1,6 @@ import numpy as np import pybullet as pb -from deformable_gym.envs.pybullet_tools import BulletSimulation +from deformable_gym.envs.bullet_simulation import BulletSimulation from deformable_gym.robots.shadow_hand import ShadowHandVelocity from numpy.testing import assert_almost_equal From 0c9ae294f2c838a2d7daca5874b8c3ba91bdadf1 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 12:32:21 +0200 Subject: [PATCH 03/15] added test files for non-floating grasp envs --- deformable_gym/envs/ur5_mia_grasp_env.py | 6 --- examples/mia_grasp_example.py | 31 +++++++++++++ tests/envs/test_floating_shadow_grasp_env.py | 3 +- tests/envs/test_ur10_shadow_grasp_env.py | 49 ++++++++++++++++++++ tests/envs/test_ur5_mia_grasp_env.py | 49 ++++++++++++++++++++ 5 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 examples/mia_grasp_example.py create mode 100644 tests/envs/test_ur10_shadow_grasp_env.py create mode 100644 tests/envs/test_ur5_mia_grasp_env.py diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index 7c565a8..d43e68d 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -171,8 +171,6 @@ def _load_objects(self): scale=self.object_scale) def reset(self, hard_reset=False): - if self.verbose: - print("Performing reset (april)") if self.randomised: if self.train: @@ -189,10 +187,6 @@ def reset(self, hard_reset=False): def is_done(self, state, action, next_state): - # check for termination action - # if round(action[-1]) == 1: - # return True - # check if insole is exploded if self._deformable_is_exploded(): print("Exploded insole") diff --git a/examples/mia_grasp_example.py b/examples/mia_grasp_example.py new file mode 100644 index 0000000..551fddb --- /dev/null +++ b/examples/mia_grasp_example.py @@ -0,0 +1,31 @@ +from deformable_gym.envs.mia_grasp_env import MiaGraspEnv + +""" +========= +Floating Mia Example +========= + +This is an example of how to use the FloatingMiaGraspEnv. A random policy is then +used to generate ten episodes. + +""" + +env = MiaGraspEnv( + gui=True, + object_name="insole_on_conveyor_belt/back" +) + +env.reset() +episode_return = 0 +num_episodes = 0 + +while num_episodes <= 10: + + action = env.action_space.sample() + + state, reward, done, _ = env.step(action) + episode_return += reward + + if done: + print(f"Episode finished with return {episode_return}!") + num_episodes += 1 diff --git a/tests/envs/test_floating_shadow_grasp_env.py b/tests/envs/test_floating_shadow_grasp_env.py index f3de32c..6be0370 100644 --- a/tests/envs/test_floating_shadow_grasp_env.py +++ b/tests/envs/test_floating_shadow_grasp_env.py @@ -10,8 +10,7 @@ def env(): horizon=10, object_name="insole", observable_object_pos=True, - # difficulty_mode="hard" - ) + ) @pytest.mark.skip("TODO") diff --git a/tests/envs/test_ur10_shadow_grasp_env.py b/tests/envs/test_ur10_shadow_grasp_env.py new file mode 100644 index 0000000..73c4512 --- /dev/null +++ b/tests/envs/test_ur10_shadow_grasp_env.py @@ -0,0 +1,49 @@ +import pytest +from deformable_gym.envs.ur10_shadow_grasp_env import UR10ShadowGraspEnv + + +@pytest.fixture +def env(): + return UR10ShadowGraspEnv( + gui=False, + verbose=True, + horizon=10, + object_name="insole", + #observable_object_pos=True, + ) + + +@pytest.mark.skip("TODO") +def test_action_space_dims(env): + action_space = env.action_space + assert action_space.shape[0] == 10 + + +@pytest.mark.skip("TODO") +def test_obs_space_dims(env): + obs_space = env.observation_space + assert obs_space.shape[0] == 28 + + +@pytest.mark.skip("TODO") +def test_initial_obs(env): + obs = env.reset() + assert len(obs) == 18 + +@pytest.mark.skip("TODO") +def test_eps_done(env): + env.reset() + for t in range(9): + action = env.action_space.sample() + obs, reward, done, info = env.step(action) + + assert len(obs) == 18 + assert isinstance(reward, float) + assert isinstance(done, bool) + assert not done + + action = env.action_space.sample() + obs, reward, done, info = env.step(action) + + assert done + diff --git a/tests/envs/test_ur5_mia_grasp_env.py b/tests/envs/test_ur5_mia_grasp_env.py new file mode 100644 index 0000000..756d85e --- /dev/null +++ b/tests/envs/test_ur5_mia_grasp_env.py @@ -0,0 +1,49 @@ +import pytest +from deformable_gym.envs.ur5_mia_grasp_env import UR5MiaGraspEnv + + +@pytest.fixture +def env(): + return UR5MiaGraspEnv( + gui=False, + verbose=True, + horizon=10, + object_name="insole", + #observable_object_pos=True, + ) + + +@pytest.mark.skip("TODO") +def test_action_space_dims(env): + action_space = env.action_space + assert action_space.shape[0] == 10 + +@pytest.mark.skip("TODO") +def test_obs_space_dims(env): + obs_space = env.observation_space + assert obs_space.shape[0] == 28 + + +@pytest.mark.skip("TODO") +def test_initial_obs(env): + obs = env.reset() + assert len(obs) == 18 + + +@pytest.mark.skip("TODO") +def test_eps_done(env): + env.reset() + for t in range(9): + action = env.action_space.sample() + obs, reward, done, info = env.step(action) + + assert len(obs) == 18 + assert isinstance(reward, float) + assert isinstance(done, bool) + assert not done + + action = env.action_space.sample() + obs, reward, done, info = env.step(action) + + assert done + From 49d49c4b1a58802f6711ea90384bff26d44acde1 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 12:50:21 +0200 Subject: [PATCH 04/15] create example scripts for ur5_mia and ur10_shadow envs --- ...xample.py => ur10_shadow_grasp_example.py} | 6 ++-- examples/ur5_mia_grasp_example.py | 31 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) rename examples/{mia_grasp_example.py => ur10_shadow_grasp_example.py} (79%) create mode 100644 examples/ur5_mia_grasp_example.py diff --git a/examples/mia_grasp_example.py b/examples/ur10_shadow_grasp_example.py similarity index 79% rename from examples/mia_grasp_example.py rename to examples/ur10_shadow_grasp_example.py index 551fddb..71cf096 100644 --- a/examples/mia_grasp_example.py +++ b/examples/ur10_shadow_grasp_example.py @@ -1,4 +1,4 @@ -from deformable_gym.envs.mia_grasp_env import MiaGraspEnv +from deformable_gym.envs.ur10_shadow_grasp_env import UR10ShadowGraspEnv """ ========= @@ -10,9 +10,9 @@ """ -env = MiaGraspEnv( +env = UR10ShadowGraspEnv( gui=True, - object_name="insole_on_conveyor_belt/back" + object_name="insole" ) env.reset() diff --git a/examples/ur5_mia_grasp_example.py b/examples/ur5_mia_grasp_example.py new file mode 100644 index 0000000..23eb7ee --- /dev/null +++ b/examples/ur5_mia_grasp_example.py @@ -0,0 +1,31 @@ +from deformable_gym.envs.ur5_mia_grasp_env import UR5MiaGraspEnv + +""" +========= +Floating Mia Example +========= + +This is an example of how to use the FloatingMiaGraspEnv. A random policy is then +used to generate ten episodes. + +""" + +env = UR5MiaGraspEnv( + gui=True, + object_name="insole2" +) + +env.reset() +episode_return = 0 +num_episodes = 0 + +while num_episodes <= 10: + + action = env.action_space.sample() + + state, reward, done, _ = env.step(action) + episode_return += reward + + if done: + print(f"Episode finished with return {episode_return}!") + num_episodes += 1 From 24910f407c70766b8db94b5919193350662ff90d Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 12:50:55 +0200 Subject: [PATCH 05/15] fixed incorrect action spaces --- deformable_gym/envs/ur10_shadow_grasp_env.py | 30 ++++---------------- deformable_gym/envs/ur5_mia_grasp_env.py | 24 ++++++++-------- tests/envs/test_floating_shadow_grasp_env.py | 1 + tests/envs/test_ur5_mia_grasp_env.py | 2 +- tests/robots/test_ur5_mia_position.py | 17 ++++++++++- 5 files changed, 36 insertions(+), 38 deletions(-) diff --git a/deformable_gym/envs/ur10_shadow_grasp_env.py b/deformable_gym/envs/ur10_shadow_grasp_env.py index 7a6e72b..a96c7c9 100644 --- a/deformable_gym/envs/ur10_shadow_grasp_env.py +++ b/deformable_gym/envs/ur10_shadow_grasp_env.py @@ -72,12 +72,10 @@ def __init__(self, gui=True, real_time=False, object_name="insole", low=lower_observations, high=upper_observations) lower_actions = np.concatenate([ - np.array([-2, -2, 0]), -np.ones(4), limits[0][6:], - [0]], axis=0) + np.array([-2, -2, 0]), -np.ones(4), limits[0][6:]], axis=0) upper_actions = np.concatenate([ - np.array([2, 2, 2]), np.ones(4), limits[1][6:], - [1]], axis=0) + np.array([2, 2, 2]), np.ones(4), limits[1][6:]], axis=0) self.action_space = spaces.Box(low=lower_actions, high=upper_actions) @@ -97,19 +95,7 @@ def _create_robot(self): end_effector_link="rh_forearm", verbose=self.verbose, orn_limit=orn_limit) - # robot.set_ee_pose([-0.3, 0.2, 1.5], - # [-0.69284743, 0.69317556, 0.14108795, -0.13987236]) - - robot.set_initial_joint_positions(dict(zip(robot.motors, - robot.get_joint_positions()))) - - """ - robot.set_initial_joint_positions(np.array( - [2.44388798, -2.01664781, 1.72892952, -0.3965438, 1.18004689, - 0.18013334, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.] - )) - """ + robot.set_initial_joint_positions(dict(zip(robot.motors, robot.get_joint_positions()))) self.simulation.add_robot(robot) @@ -123,9 +109,6 @@ def _load_objects(self): scale=self.object_scale) def reset(self, hard_reset=False): - if self.verbose: - print("Performing reset (april)") - if self.randomised: if self.train: pos = random.choice(self.train_positions) @@ -135,7 +118,6 @@ def reset(self, hard_reset=False): pos = None self.object_to_grasp.reset(pos=pos) - self.robot.activate_motors() return super().reset() @@ -174,9 +156,9 @@ def calculate_reward(self, state, action, next_state, done): return -100 self.simulation.step_to_trigger("time_step") height = self.object_to_grasp.get_pose()[2] - if height < 0.5: - return -50 + if height > 0.9: + return 1.0 else: - return 0.0 + return -1.0 else: return 0.0 diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index d43e68d..33ebe03 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -132,12 +132,14 @@ def __init__( actuated_finger_ids = np.array([6, 7, 11], dtype=int) lower_actions = np.concatenate([ - np.array([-2, -2, 0]), -np.ones(4), limits[0][actuated_finger_ids], - [0]], axis=0) + np.array([-2, -2, 0]), + -np.ones(4), + limits[0][actuated_finger_ids]], axis=0) upper_actions = np.concatenate([ - np.array([2, 2, 2]), np.ones(4), limits[1][actuated_finger_ids], - [1]], axis=0) + np.array([2, 2, 2]), + np.ones(4), + limits[1][actuated_finger_ids]], axis=0) self.action_space = spaces.Box(low=lower_actions, high=upper_actions) @@ -166,9 +168,7 @@ def _create_robot(self): def _load_objects(self): super()._load_objects() self.object_to_grasp, self.object_position, self.object_orientation = \ - ObjectFactory().create( - self.object_name, object2world=self.object2world, - scale=self.object_scale) + ObjectFactory().create(self.object_name, object2world=self.object2world, scale=self.object_scale) def reset(self, hard_reset=False): @@ -195,8 +195,7 @@ def is_done(self, state, action, next_state): return super().is_done(state, action, next_state) def observe_state(self): - joint_pos = self.robot.get_joint_positions( - self.robot.actuated_real_joints) + joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) ee_pose = self.robot.get_ee_pose() sensor_readings = self.robot.get_sensor_readings() @@ -222,9 +221,10 @@ def calculate_reward(self, state, action, next_state, done): return -100 self.simulation.step_to_trigger("time_step") height = self.object_to_grasp.get_pose()[2] - if height < 0.5: - return -50 + + if height > 0.9: + return 1.0 else: - return 0.0 + return -1.0 else: return 0.0 diff --git a/tests/envs/test_floating_shadow_grasp_env.py b/tests/envs/test_floating_shadow_grasp_env.py index 6be0370..3f377b3 100644 --- a/tests/envs/test_floating_shadow_grasp_env.py +++ b/tests/envs/test_floating_shadow_grasp_env.py @@ -18,6 +18,7 @@ def test_action_space_dims(env): action_space = env.action_space assert action_space.shape[0] == 10 + @pytest.mark.skip("TODO") def test_obs_space_dims(env): obs_space = env.observation_space diff --git a/tests/envs/test_ur5_mia_grasp_env.py b/tests/envs/test_ur5_mia_grasp_env.py index 756d85e..1e4ce67 100644 --- a/tests/envs/test_ur5_mia_grasp_env.py +++ b/tests/envs/test_ur5_mia_grasp_env.py @@ -37,7 +37,7 @@ def test_eps_done(env): action = env.action_space.sample() obs, reward, done, info = env.step(action) - assert len(obs) == 18 + assert len(obs) == 16 assert isinstance(reward, float) assert isinstance(done, bool) assert not done diff --git a/tests/robots/test_ur5_mia_position.py b/tests/robots/test_ur5_mia_position.py index 4394dc3..b4c9121 100644 --- a/tests/robots/test_ur5_mia_position.py +++ b/tests/robots/test_ur5_mia_position.py @@ -17,9 +17,24 @@ def robot(): @pytest.mark.skip("TODO") -def test_ur5_mia_position_creation(simulation, robot): +def test_ur5_mia_position_initial_position(simulation, robot): actual_pose = np.concatenate(robot.multibody_pose.get_pose()) expected_pose = np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]) assert_almost_equal(actual_pose, expected_pose) + +def test_mia_hand_position_creation(simulation, robot, mia_motors, mia_sensors): + + found_motors = robot.motors.keys() + + # check motor creation + for motor in mia_motors: + assert motor in found_motors + + found_sensors = robot.sensors.keys() + + # check sensor creation + for sensor in mia_sensors: + assert sensor in found_sensors + From 61b9c0281346fdb1d78ce21402927f785f030db3 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 13:12:56 +0200 Subject: [PATCH 06/15] added motor creation tests for arm+hand robots --- tests/conftest.py | 28 +++++++++++++++++++---- tests/robots/test_ur10_shadow_position.py | 5 ++++ tests/robots/test_ur10_shadow_velocity.py | 4 ++++ tests/robots/test_ur5_mia_position.py | 14 +++--------- tests/robots/test_ur5_mia_velocity.py | 7 ++++++ 5 files changed, 43 insertions(+), 15 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d4e2a4b..63c0819 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,27 @@ def mia_sensors(): @pytest.fixture def shadow_motors(): - return ["rh_WRJ2", "rh_WRJ1", "rh_THJ5", "rh_THJ4", "rh_THJ3", "rh_THJ2", - "rh_THJ1", "rh_LFJ5", "rh_LFJ4", "rh_LFJ3", "rh_LFJ2", "rh_RFJ1", - "rh_RFJ4", "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4", "rh_MFJ3", - "rh_MFJ2", "rh_MFJ1", "rh_FFJ4", "rh_FFJ3", "rh_FFJ2", "rh_FFJ1"] + return ["rh_WRJ2", "rh_WRJ1", "rh_THJ5", "rh_THJ4", "rh_THJ3", "rh_THJ2", "rh_THJ1", "rh_LFJ5", "rh_LFJ4", + "rh_LFJ3", "rh_LFJ2", "rh_LFJ2", "rh_RFJ1", "rh_RFJ4", "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4", + "rh_MFJ3", "rh_MFJ2", "rh_MFJ1", "rh_FFJ4", "rh_FFJ3", "rh_FFJ2", "rh_FFJ1"] + + +@pytest.fixture +def ur5_mia_motors(): + return ["ur5_shoulder_pan_joint", "ur5_shoulder_lift_joint", "ur5_elbow_joint", "ur5_wrist_1_joint", + "ur5_wrist_2_joint", "ur5_wrist_3_joint", "j_thumb_fle", "j_thumb_opp", "j_index_fle", "j_mrl_fle", + "j_ring_fle", "j_little_fle"] + + +@pytest.fixture +def ur5_mia_sensors(): + return ["j_index_sensor", "j_middle_sensor", "j_thumb_sensor"] + + +@pytest.fixture +def ur10_shadow_motors(): + return ["ur10_shoulder_pan_joint", "ur10_shoulder_lift_joint", "ur10_elbow_joint", "ur10_wrist_1_joint", + "ur10_wrist_2_joint", "ur10_wrist_3_joint", "rh_WRJ2", "rh_WRJ1", "rh_THJ5", "rh_THJ4", "rh_THJ3", + "rh_THJ2", "rh_THJ1", "rh_LFJ5", "rh_LFJ4", "rh_LFJ3", "rh_LFJ2", "rh_LFJ1", "rh_RFJ1", "rh_RFJ4", + "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4", "rh_MFJ3", "rh_MFJ2", "rh_MFJ1", "rh_FFJ4", "rh_FFJ3", + "rh_FFJ2", "rh_FFJ1"] diff --git a/tests/robots/test_ur10_shadow_position.py b/tests/robots/test_ur10_shadow_position.py index 99a1927..b5926a1 100644 --- a/tests/robots/test_ur10_shadow_position.py +++ b/tests/robots/test_ur10_shadow_position.py @@ -25,3 +25,8 @@ def test_ur10_shadow_position_creation(simulation, robot): actual_pose = np.concatenate(robot.multibody_pose.get_pose()) expected_pose = np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]) assert_almost_equal(actual_pose, expected_pose) + + +def test_ur10_shadow_position_motor_creation(simulation, robot, ur10_shadow_motors): + # check motor creation + assert set(robot.motors.keys()) == set(ur10_shadow_motors) diff --git a/tests/robots/test_ur10_shadow_velocity.py b/tests/robots/test_ur10_shadow_velocity.py index 0427683..8fc1527 100644 --- a/tests/robots/test_ur10_shadow_velocity.py +++ b/tests/robots/test_ur10_shadow_velocity.py @@ -25,3 +25,7 @@ def test_ur10_shadow_velocity_creation(simulation, robot): expected_pose = np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]) assert_almost_equal(actual_pose, expected_pose) + +def test_ur10_shadow_velocity_motor_creation(simulation, robot, ur10_shadow_motors): + # check motor creation + assert set(robot.motors.keys()) == set(ur10_shadow_motors) diff --git a/tests/robots/test_ur5_mia_position.py b/tests/robots/test_ur5_mia_position.py index b4c9121..f802091 100644 --- a/tests/robots/test_ur5_mia_position.py +++ b/tests/robots/test_ur5_mia_position.py @@ -24,17 +24,9 @@ def test_ur5_mia_position_initial_position(simulation, robot): assert_almost_equal(actual_pose, expected_pose) -def test_mia_hand_position_creation(simulation, robot, mia_motors, mia_sensors): - - found_motors = robot.motors.keys() +def test_ur5_mia_position_creation(simulation, robot, ur5_mia_motors, ur5_mia_sensors): # check motor creation - for motor in mia_motors: - assert motor in found_motors - - found_sensors = robot.sensors.keys() - + assert set(robot.motors.keys()) == set(ur5_mia_motors) # check sensor creation - for sensor in mia_sensors: - assert sensor in found_sensors - + assert set(robot.sensors.keys()) == set(ur5_mia_sensors) diff --git a/tests/robots/test_ur5_mia_velocity.py b/tests/robots/test_ur5_mia_velocity.py index 7fc97e7..fe41c5f 100644 --- a/tests/robots/test_ur5_mia_velocity.py +++ b/tests/robots/test_ur5_mia_velocity.py @@ -26,3 +26,10 @@ def test_ur5_mia_velocity_creation(simulation, robot): robot.perform_command(np.array([0.3, -0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])) + +def test_ur5_mia_velocity_motor_creation(simulation, robot, ur5_mia_motors, ur5_mia_sensors): + # check motor creation + assert set(robot.motors.keys()) == set(ur5_mia_motors) + # check sensor creation + assert set(robot.sensors.keys()) == set(ur5_mia_sensors) + From 5c74178de52fd83bf4492b884ff565d422ded022 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 13:14:58 +0200 Subject: [PATCH 07/15] fix motor creation tests --- tests/robots/test_mia_hand_position.py | 12 ++---------- tests/robots/test_mia_hand_velocity.py | 11 ++--------- tests/robots/test_shadow_hand_position.py | 7 +------ tests/robots/test_shadow_hand_velocity.py | 8 ++------ 4 files changed, 7 insertions(+), 31 deletions(-) diff --git a/tests/robots/test_mia_hand_position.py b/tests/robots/test_mia_hand_position.py index cc5efac..53322dc 100644 --- a/tests/robots/test_mia_hand_position.py +++ b/tests/robots/test_mia_hand_position.py @@ -25,18 +25,10 @@ def test_mia_hand_position_initial_pose(simulation, robot): def test_mia_hand_position_creation(simulation, robot, mia_motors, mia_sensors): - - found_motors = robot.motors.keys() - # check motor creation - for motor in mia_motors: - assert motor in found_motors - - found_sensors = robot.sensors.keys() - + assert set(robot.motors.keys()) == set(mia_motors) # check sensor creation - for sensor in mia_sensors: - assert sensor in found_sensors + assert set(robot.sensors.keys()) == set(mia_sensors) @pytest.mark.skip("TODO") diff --git a/tests/robots/test_mia_hand_velocity.py b/tests/robots/test_mia_hand_velocity.py index c7e1de4..f5c8a82 100644 --- a/tests/robots/test_mia_hand_velocity.py +++ b/tests/robots/test_mia_hand_velocity.py @@ -28,17 +28,10 @@ def test_mia_hand_velocity_initial_pose(simulation, robot): def test_mia_hand_velocity_motor_creation(simulation, robot, mia_motors, mia_sensors): - found_motors = robot.motors.keys() - # check motor creation - for motor in mia_motors: - assert motor in found_motors - - found_sensors = robot.sensors.keys() - + assert set(robot.motors.keys()) == set(mia_motors) # check sensor creation - for sensor in mia_sensors: - assert sensor in found_sensors + assert set(robot.sensors.keys()) == set(mia_sensors) @pytest.mark.skip("TODO") diff --git a/tests/robots/test_shadow_hand_position.py b/tests/robots/test_shadow_hand_position.py index 65f930c..0fd2fb2 100644 --- a/tests/robots/test_shadow_hand_position.py +++ b/tests/robots/test_shadow_hand_position.py @@ -33,13 +33,8 @@ def test_shadow_hand_position_initial_pose(simulation, robot, shadow_motors): def test_shadow_hand_position_motor_creation(simulation, robot, shadow_motors): - simulation.add_robot(robot) - - found_motors = robot.motors.keys() - # check motor creation - for motor in shadow_motors: - assert motor in found_motors + assert set(robot.motors.keys()) == set(shadow_motors) @pytest.mark.skip("TODO") diff --git a/tests/robots/test_shadow_hand_velocity.py b/tests/robots/test_shadow_hand_velocity.py index 9aa7f59..3ab5e8e 100644 --- a/tests/robots/test_shadow_hand_velocity.py +++ b/tests/robots/test_shadow_hand_velocity.py @@ -28,13 +28,9 @@ def test_shadow_hand_velocity_initial_pose(simulation, robot): def test_shadow_hand_velocity_motor_creation(simulation, robot, shadow_motors): - simulation.add_robot(robot) - - found_motors = robot.motors.keys() - # check motor creation - for motor in shadow_motors: - assert motor in found_motors + assert set(robot.motors.keys()) == set(shadow_motors) + @pytest.mark.skip("TODO") From 06e9eb6e5d2760513b3d9d7fde2381878940db67 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 13:42:44 +0200 Subject: [PATCH 08/15] fix typos --- tests/conftest.py | 2 +- tests/robots/test_shadow_hand_velocity.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 63c0819..a08ca31 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,7 +23,7 @@ def mia_sensors(): @pytest.fixture def shadow_motors(): return ["rh_WRJ2", "rh_WRJ1", "rh_THJ5", "rh_THJ4", "rh_THJ3", "rh_THJ2", "rh_THJ1", "rh_LFJ5", "rh_LFJ4", - "rh_LFJ3", "rh_LFJ2", "rh_LFJ2", "rh_RFJ1", "rh_RFJ4", "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4", + "rh_LFJ3", "rh_LFJ2", "rh_LFJ1", "rh_RFJ1", "rh_RFJ4", "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4", "rh_MFJ3", "rh_MFJ2", "rh_MFJ1", "rh_FFJ4", "rh_FFJ3", "rh_FFJ2", "rh_FFJ1"] diff --git a/tests/robots/test_shadow_hand_velocity.py b/tests/robots/test_shadow_hand_velocity.py index 3ab5e8e..3f259d9 100644 --- a/tests/robots/test_shadow_hand_velocity.py +++ b/tests/robots/test_shadow_hand_velocity.py @@ -32,7 +32,6 @@ def test_shadow_hand_velocity_motor_creation(simulation, robot, shadow_motors): assert set(robot.motors.keys()) == set(shadow_motors) - @pytest.mark.skip("TODO") def test_shadow_hand_velocity_base_movement(simulation, robot): From 16fb8a95f97f5bab2c8eb6b13de80ada4e03a7e5 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 13:45:45 +0200 Subject: [PATCH 09/15] remove option to not load plane in envs --- deformable_gym/envs/base_env.py | 11 +++-------- deformable_gym/envs/floating_mia_grasp_env.py | 2 +- deformable_gym/envs/floating_shadow_grasp_env.py | 2 +- deformable_gym/envs/mia_grasp_env.py | 2 +- deformable_gym/envs/shadow_grasp_env.py | 2 +- deformable_gym/envs/ur10_shadow_grasp_env.py | 2 +- deformable_gym/envs/ur5_mia_grasp_env.py | 2 +- 7 files changed, 9 insertions(+), 14 deletions(-) diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index 37d5bca..e184294 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -38,12 +38,11 @@ class BaseBulletEnv(Env, abc.ABC): def __init__( self, gui: bool = True, real_time: bool = False, horizon: int = 100, soft: bool = False, - load_plane: bool = True, verbose: bool = False, + verbose: bool = False, time_delta: float = 0.0001, verbose_dt: float = 10.00, pybullet_options: str = ""): self.gui = gui self.verbose = verbose - self.__load_plane = load_plane self.horizon = horizon mode = pb.GUI if gui else pb.DIRECT @@ -62,12 +61,8 @@ def _create_robot(self): """Load (or reload) robot to PyBullet simulation.""" def _load_objects(self): - """Load objects to PyBullet simulation. - - If a plane should be loaded, it will have the position (0, 0, 0). - """ - if self.__load_plane: - self.plane = pb.loadURDF("plane.urdf", (0, 0, 0), useFixedBase=1) + """Load objects to PyBullet simulation.""" + self.plane = pb.loadURDF("plane.urdf", (0, 0, 0), useFixedBase=1) def _hard_reset(self): """Hard reset. diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py index 170ddbf..340c28e 100644 --- a/deformable_gym/envs/floating_mia_grasp_env.py +++ b/deformable_gym/envs/floating_mia_grasp_env.py @@ -85,7 +85,7 @@ def __init__( self._observable_object_pos = observable_object_pos self._observable_time_step = observable_time_step - super().__init__(soft=True, load_plane=True, **kwargs) + super().__init__(soft=True, **kwargs) self.hand_world_pose = self._STANDARD_INITIAL_POSE self.robot = self._create_robot() diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py index 459f7b2..bd69636 100644 --- a/deformable_gym/envs/floating_shadow_grasp_env.py +++ b/deformable_gym/envs/floating_shadow_grasp_env.py @@ -45,7 +45,7 @@ def __init__(self, object_name="insole", self._observable_object_pos = observable_object_pos self._observable_time_step = observable_time_step - super().__init__(horizon=horizon, soft=True, load_plane=True, **kwargs) + super().__init__(horizon=horizon, soft=True, **kwargs) self.hand_world_pose = (0, -0.5, 1, -np.pi/2, np.pi, 0) self.robot = self._create_robot() diff --git a/deformable_gym/envs/mia_grasp_env.py b/deformable_gym/envs/mia_grasp_env.py index 364f517..08724e9 100644 --- a/deformable_gym/envs/mia_grasp_env.py +++ b/deformable_gym/envs/mia_grasp_env.py @@ -81,7 +81,7 @@ def __init__( super().__init__( gui=gui, real_time=real_time, horizon=100, soft=True, - load_plane=True, verbose=verbose, time_delta=time_delta, + verbose=verbose, time_delta=time_delta, verbose_dt=verbose_dt) self.hand_world_pose = (0, 0, 1, -np.pi / 8, np.pi, 0) diff --git a/deformable_gym/envs/shadow_grasp_env.py b/deformable_gym/envs/shadow_grasp_env.py index 410be04..d90ed8f 100644 --- a/deformable_gym/envs/shadow_grasp_env.py +++ b/deformable_gym/envs/shadow_grasp_env.py @@ -54,7 +54,7 @@ def __init__( super().__init__( gui=gui, real_time=real_time, horizon=100, soft=True, - load_plane=True, verbose=verbose, time_delta=time_delta, + verbose=verbose, time_delta=time_delta, verbose_dt=verbose_dt) self.hand_world_pose = (0, -0.2, 1, -np.pi / 2, -np.pi, 0) diff --git a/deformable_gym/envs/ur10_shadow_grasp_env.py b/deformable_gym/envs/ur10_shadow_grasp_env.py index a96c7c9..aab5be5 100644 --- a/deformable_gym/envs/ur10_shadow_grasp_env.py +++ b/deformable_gym/envs/ur10_shadow_grasp_env.py @@ -53,7 +53,7 @@ def __init__(self, gui=True, real_time=False, object_name="insole", self.object_scale = object_scale super().__init__(gui=gui, real_time=real_time, horizon=horizon, - soft=True, load_plane=True, verbose=verbose, + soft=True, verbose=verbose, verbose_dt=verbose_dt) self.robot = self._create_robot() diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index 33ebe03..66eb067 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -110,7 +110,7 @@ def __init__( self.object_scale = object_scale super().__init__(gui=gui, real_time=real_time, horizon=horizon, - soft=True, load_plane=True, verbose=verbose, + soft=True, verbose=verbose, verbose_dt=verbose_dt, pybullet_options=pybullet_options) From 110767c452e4e7879b7c5d0762b28438d742403d Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 15:42:19 +0200 Subject: [PATCH 10/15] removed env with metrics --- .../envs/mia_grasp_env_with_metrics.py | 251 ------------------ 1 file changed, 251 deletions(-) delete mode 100644 deformable_gym/envs/mia_grasp_env_with_metrics.py diff --git a/deformable_gym/envs/mia_grasp_env_with_metrics.py b/deformable_gym/envs/mia_grasp_env_with_metrics.py deleted file mode 100644 index f6b7d07..0000000 --- a/deformable_gym/envs/mia_grasp_env_with_metrics.py +++ /dev/null @@ -1,251 +0,0 @@ -from typing import Union -import pybullet as pb -import numpy as np -import numpy.typing as npt -import pytransform3d.rotations as pr -import pytransform3d.transformations as pt -from .mia_grasp_env import MiaGraspEnv -from .floating_mia_grasp_env import FloatingMiaGraspEnv -from grasp_metrics.utils.pybullet_interface import ( - collect_contact_information, compute_inertia) -from grasp_metrics.geometry import fit_plane_from_point_set, point_to_line_segment -from grasp_metrics.quasi_static_point_based import ferrari_canny -from grasp_metrics.geometry_based import ( - distance_com_centroid, distance_grasp_point_centroid, grasp_polygon_shape, - area_of_grasp_polygon, orthogonality, distance_grasp_point_finger_tips) -from grasp_metrics.hands import MiaHand - - -class MiaGraspEnvWithMetrics(MiaGraspEnv): - """Grasping environment using only the MIA hand that uses grasp metrics. - - The goal is to grasp an object using joint position offsets as actions. - - **State space:** - Position of three joints and values of six force sensors at the fingertips. - Joint positions are ordered as follows: - - (0) j_index_fle - - (2) j_mrl_fle - - (5) j_thumb_fle - Force measurements are ordered as follows: - - (0) tangential force at middle finger, - - (1) normal force at index finger, - - (2) tangential force at index finger, - - (3) tangential force at thumb, - - (4) normal force at thumb, - - (5) normal force at middle finger. - - **Action space:** - Joint position offset for all four controllable degrees of freedom. - Commands are ordered as follows: - - (0) j_index_fle - - (1) j_mrl_fle - - (3) j_thumb_fle - - :param gui: Show PyBullet GUI (default: True). - :param real_time: Run simulation in real time (default: False). - :param object_name: Name of the object to be loaded: 'insole', - 'pillow_small', or 'box' (default: 'insole'). - :param initial_joint_positions: Initial positions of six finger joints - (default: zeros). - :param grasp_point: Grasp point. - :param thumb_adducted: The joint j_thumb_opp can only take two - positions (adduction, abduction) and we cannot easily switch between - them. That's why we have to configure it in advance and cannot control - it during execution of a policy. When the thumb is adducted, it is - closer to the palm. When it is abducted, it is further away from the - palm. - :param verbose: Print debug output (default: False). - :param time_delta: Simulation time step. Note that the hand will still be - controlled at 20 Hz (default: 0.0001). - :param verbose_dt: Time delta after which the timing module should print - timing information (default: 0.1). - """ - def __init__( - self, - gui: bool = True, - real_time: bool = False, - object_name: str = "insole", - initial_joint_positions: npt.ArrayLike = (0, 0, 0, 0, 0, 0), - grasp_point: Union[npt.ArrayLike, None] = None, - thumb_adducted: bool = False, - verbose: bool = False, - time_delta: float = 0.0001, - verbose_dt: float = 0.1, - velocity_control: bool = False, - abort_on_first_step_with_collision: bool = False): - super(MiaGraspEnvWithMetrics, self).__init__( - gui, real_time, object_name, initial_joint_positions, - thumb_adducted, verbose, time_delta, verbose_dt, velocity_control, - abort_on_first_step_with_collision) - self.grasp_point = grasp_point - - def calculate_reward(self, state, action, next_state, done): - """Calculates the reward given a (state,action,state) tuple. - - :param state: Current state of the Mia hand. - :param action: Action performed by the Mia hand. - :param next_state: Successor state of the Mia hand. - :param done: Is the episode finished? - """ - if done: - return _final_reward_with_metrics(self, self.DROP_TEST_TIME) - else: - return 0.0 - - -class FloatingMiaGraspEnvWithMetrics(FloatingMiaGraspEnv): - """Grasp an insole with a floating Mia hand. - - **State space:** - - End-effector pose: (x, y, z, qx, qy, qz, qw) - - Finger joint angles: thumb_fle, index_fle, mrl_fle - - Force sensors: 6 values - - **Action space:** - - End-effector offset: (x, y, z, qx, qy, qz, qw) - - Finger joint angle: thumb_fle, index_fle, mrl_fle - - :param grasp_point: Grasp point. - :param object_name: Name of the object to be loaded: 'insole', - 'pillow_small', or 'box' (default: 'insole'). - :param horizon: Number of steps in simulation. - :param train: Bool flag indicating if the starting position of the grasped - :param compute_reward: Compute reward. - :param object_scale: Scaling factor for object. - :param task_space_limit: Limits for task space, shape: (2, 3). - :param gui: Show PyBullet GUI (default: True). - :param real_time: Run simulation in real time (default: False). - :param initial_joint_positions: Initial positions of six finger joints - (default: zeros). - :param verbose: Print debug output (default: False). - """ - def __init__( - self, - grasp_point: Union[npt.ArrayLike, None] = None, - **kwargs): - super(FloatingMiaGraspEnvWithMetrics, self).__init__(**kwargs) - self.grasp_point = grasp_point - - def set_world_pose(self, world_pose): - """Set pose of the hand. - - :param world_pose: world pose of hand given as position and - quaternion: (x, y, z, qw, qx, qy, qz) - """ - self.hand_world_pose = np.hstack(( - world_pose[:3], pr.quaternion_xyzw_from_wxyz(world_pose[3:]))) - - def get_world_pose(self): - """Get pose of the hand. - - :return: hand pose in world coordinates given as position and - quaternion: (x, y, z, qw, qx, qy, qz) - """ - return np.hstack(( - self.hand_world_pose[:3], - pr.quaternion_wxyz_from_xyzw(self.hand_world_pose[3:]))) - - def calculate_reward(self, state, action, next_state, done): - """Calculates the reward given a (state,action,state) tuple. - - :param state: Current state of the Mia hand. - :param action: Action performed by the Mia hand. - :param next_state: Successor state of the Mia hand. - :param done: Is the episode finished? - """ - if done: - return _final_reward_with_metrics(self, drop_test_time=1.0) - else: - return 0.0 - - -def _final_reward_with_metrics(env, drop_test_time, verbose=1): - if verbose: - print("Episode done, calculating final reward with metrics...") - - Q_O, dist, dist_fingers = _approach_info(env) - - if hasattr(env, "invalid_initial_pose") and env.invalid_initial_pose: - if verbose: - print(f"Stage 1: {dist=:.3f}, {dist_fingers=:.3f}, {Q_O=:.2f}") - return -15.0 - 5 * dist - 5 * dist_fingers + Q_O - - # before removing anchors - contacts, _, _, _ = collect_contact_information( - env.object_to_grasp.get_id(), env.robot.get_id(), 0, - default_torque=1.0) - n_contacts_before = len(contacts) - initial_height = env.object_to_grasp.get_pose()[2] - if n_contacts_before == 0: - if verbose: - print(f"Stage 2: {dist=:.3f}, {dist_fingers=:.3f}, {Q_O=:.2f}") - return -10.0 - 5 * dist - 5 * dist_fingers + Q_O - - env.object_to_grasp.remove_anchors() - env.simulation.simulate_time(drop_test_time) - - # after removing anchors - contacts, vertices_in_object, _, object2world = \ - collect_contact_information( - env.object_to_grasp.get_id(), env.robot.get_id(), 0, - default_torque=1.0) - vertices_in_world = pt.transform( - object2world, pt.vectors_to_points(vertices_in_object))[:, :3] - - if len(contacts) == 0: - if verbose: - print(f"Stage 3: {dist=:.3f}, {dist_fingers=:.3f}, {Q_O=:.2f}, {n_contacts_before=}") - return -5.0 - 5 * dist - 5 * dist_fingers + Q_O + 0.2 * n_contacts_before - - Q_FC, Q_SGP, Q_AGP, Q_DCC = _metrics(contacts, env, object2world, vertices_in_world) - height = env.object_to_grasp.get_pose()[2] - success = height > initial_height - 0.3 - success_reward = 5.0 if success else 0.0 - if verbose: - print(f"Stage 4: {success=}, {height=:.2f}/{initial_height=:.2f}, " - f"{dist=:.3f}, {Q_FC=:.5f}, {Q_DCC=:.3f}, {Q_AGP=:.3f}, " - f"{Q_SGP=:.3f}, {Q_O=:.2f}, {dist_fingers=:.3f}") - return success_reward - 5 * dist - 10 * dist_fingers + 200 * Q_FC + 0 * Q_DCC + 0 * Q_AGP + 0 * Q_SGP + 5 * Q_O - - -def _approach_info(env): - hand = MiaHand() - hand_pose = pt.transform_from_pq(env.get_world_pose()) - hand_pos, approach_direction = hand.perpendicular_to_palm_surface(hand_pose) - I = compute_inertia(env.object_to_grasp.get_id()) - Q_O = orthogonality(I, approach_direction) - approach_end = hand_pos + hand.get_maximum_finger_length() * approach_direction - if env.verbose: - pb.addUserDebugLine(hand_pos, approach_end, [1, 1, 0]) - if env.grasp_point is not None: - dist = point_to_line_segment( - env.grasp_point[:3], hand_pos, approach_end)[0] - dist_fingers = distance_grasp_point_finger_tips( - hand_pose, hand, env.grasp_point[:3]) - else: - dist = 0.0 - dist_fingers = 0.0 - return Q_O, dist, dist_fingers - - -def _metrics(contacts, env, object2world, vertices_in_world): - Q_FC = ferrari_canny(contacts) - if env.grasp_point is None: - Q_DCC = distance_com_centroid( - contacts.contact_points_in_world(object2world), - vertices_in_world) - else: - Q_DCC = distance_grasp_point_centroid( - contacts.contact_points_in_world(object2world), - vertices_in_world, env.grasp_point) - if len(contacts) >= 3: - normal, d, contact_points_in_plane = fit_plane_from_point_set( - contacts.contact_points()) - Q_AGP = area_of_grasp_polygon( - contact_points_in_plane, vertices_in_world, normal=normal) - Q_SGP = grasp_polygon_shape(contact_points_in_plane, normal=normal) - else: - Q_AGP = 0.0 - Q_SGP = 0.0 - return Q_FC, Q_SGP, Q_AGP, Q_DCC From 7835ded33f9aab9604d0e9b9ae318ac09ff01d4e Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 15:53:16 +0200 Subject: [PATCH 11/15] some refactoring --- deformable_gym/envs/base_env.py | 38 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index e184294..ead44ad 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -19,7 +19,6 @@ class BaseBulletEnv(Env, abc.ABC): :param real_time: Run PyBullet in real time. :param horizon: Maximum number of steps in an episode. :param soft: Activate soft body simulation. - :param load_plane: Load plane from URDF. :param verbose: Print debug information. :param time_delta: Time between PyBullet simulation steps. :param verbose_dt: Time after which simulation info should be printed. @@ -36,11 +35,16 @@ class BaseBulletEnv(Env, abc.ABC): action_space: spaces.Box def __init__( - self, gui: bool = True, real_time: bool = False, - horizon: int = 100, soft: bool = False, + self, + gui: bool = True, + real_time: bool = False, + horizon: int = 100, + soft: bool = False, verbose: bool = False, - time_delta: float = 0.0001, verbose_dt: float = 10.00, + time_delta: float = 0.0001, + verbose_dt: float = 10.00, pybullet_options: str = ""): + self.gui = gui self.verbose = verbose self.horizon = horizon @@ -65,10 +69,8 @@ def _load_objects(self): self.plane = pb.loadURDF("plane.urdf", (0, 0, 0), useFixedBase=1) def _hard_reset(self): - """Hard reset. - - Fully reset the PyBullet simulation and reload all objects. May be - necessary, for example, when soft-bodies in the environment explode. + """Hard reset the PyBullet simulation and reload all objects. May be necessary, e.g., if soft-bodies in the + environment explode. """ if self.verbose: print("Performing hard reset!") @@ -109,7 +111,7 @@ def render(self, mode: str = "human"): if mode == "human": assert self.gui else: - raise NotImplementedError("Render mode '%s' not supported" % mode) + raise NotImplementedError(f"Render mode {mode} not supported") def observe_state(self) -> np.ndarray: """Returns the current environment state. @@ -118,8 +120,7 @@ def observe_state(self) -> np.ndarray: """ return self.robot.get_joint_positions() - def is_done(self, state: np.ndarray, action: np.ndarray, - next_state: np.ndarray) -> bool: + def is_done(self, state: np.ndarray, action: np.ndarray, next_state: np.ndarray) -> bool: """Checks whether the current episode is over or not. :param state: State @@ -165,14 +166,12 @@ def step(self, action: npt.ArrayLike): reward = self.calculate_reward(state, action, next_state, done) if self.verbose: - print(f"Finished environment step: {next_state=}, {reward}, {done=}") + print(f"Finished environment step: {next_state=}, {reward=}, {done=}") return next_state, reward, done, {} @abc.abstractmethod - def calculate_reward( - self, state: npt.ArrayLike, action: npt.ArrayLike, - next_state: npt.ArrayLike, done: bool): + def calculate_reward(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike, done: bool): """Calculate reward. :param state: State of the environment. @@ -213,18 +212,19 @@ def _deformable_is_exploded(self) -> bool: return True def _check_forces(self, robot, high_force_threshold, verbose): - contact_points = robot.get_contact_points( - self.object_to_grasp.get_id()) + contact_points = robot.get_contact_points(self.object_to_grasp.get_id()) accumulated_forces = 0.0 for contact_point in contact_points: _, _, _, _, _, _, _, _, dist, force, _, _, _, _ = contact_point accumulated_forces += force high_forces = accumulated_forces > high_force_threshold contact_forces = accumulated_forces > 0 + if high_forces: - print("Accumulated force too high: %g" % accumulated_forces) + print(f"Accumulated force too high: {accumulated_forces}") elif verbose: - print("Accumulated force: %g" % accumulated_forces) + print(f"{accumulated_forces=}") + return high_forces, contact_forces def get_object_pose(self): From 544043714f409b1ef12ed370f4c8d6d0a38e72c7 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 28 Jul 2023 17:04:23 +0200 Subject: [PATCH 12/15] WIP: create common grasp env --- deformable_gym/envs/grasp_env.py | 94 +++++++++++++++++++++++++++ deformable_gym/robots/bullet_robot.py | 17 +++-- deformable_gym/robots/shadow_hand.py | 4 -- deformable_gym/robots/ur10_shadow.py | 4 -- 4 files changed, 106 insertions(+), 13 deletions(-) create mode 100644 deformable_gym/envs/grasp_env.py diff --git a/deformable_gym/envs/grasp_env.py b/deformable_gym/envs/grasp_env.py new file mode 100644 index 0000000..ca1ae0d --- /dev/null +++ b/deformable_gym/envs/grasp_env.py @@ -0,0 +1,94 @@ +import numpy as np + +from abc import ABC +from base_env import BaseBulletEnv +from ..objects.bullet_object import ObjectFactory +from gym.spaces import Box + + +class GraspEnv(BaseBulletEnv, ABC): + + def __init__(self, + object_name, + observable_object_pos: bool = False, + observable_time_step: bool = False): + + self.object_name = object_name + self._observable_object_pos = observable_object_pos + self._observable_time_step = observable_time_step + + super().__init__(soft=True) + + self.robot = self._create_robot() + + limits = self.robot.get_joint_limits(self.robot.motors.values()) + + if self._observable_object_pos: + lower_observations = np.append(lower_observations, np.ones(3)) + upper_observations = np.append(upper_observations, -np.ones(3)) + + if self._observable_time_step: + lower_observations = np.append(lower_observations, 0) + upper_observations = np.append(upper_observations, self.horizon) + + self.observation_space = Box(low=lower_observations, high=upper_observations) + + # build the action space + lower = [-np.ones(3) * .0005, # max negative base pos offset + -np.ones(4) * .000005, # max negative base orn offset + limits[0][self.actuated_finger_ids]] # negative joint limits + + upper = [np.ones(3) * .0005, # max positive base pos offset + np.ones(4) * .000005, # max positive base orn offset + limits[1][self.actuated_finger_ids]] # positive joint limits + + self.action_space = self.robot.action_space + + def _load_objects(self): + super()._load_objects() + self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name) + + def reset(self, hard_reset=False, pos=None): + + self.robot.reset(pos) + self.object_to_grasp.reset() + self.robot.activate_motors() + + return super().reset() + + def observe_state(self): + joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) + ee_pose = self.robot.get_ee_pose() + sensor_readings = self.robot.get_sensor_readings() + + state = np.concatenate([ee_pose, joint_pos, sensor_readings]) + + if self._observable_object_pos: + obj_pos = self.object_to_grasp.get_pose()[:3] + state = np.append(state, obj_pos) + + if self._observable_time_step: + state = np.append(state, self.step_counter) + + return state + + def calculate_reward(self, state, action, next_state, done): + """ + Calculates the reward by counting how many insole vertices are in the + target position. + """ + if done: + + # self.robot.deactivate_motors() + # remove insole anchors and simulate steps + self.object_to_grasp.remove_anchors() + + for _ in range(50): + self.simulation.step_to_trigger("time_step") + height = self.object_to_grasp.get_pose()[2] + if height < 0.9: + return -1.0 + else: + return 1.0 + else: + return 0.0 diff --git a/deformable_gym/robots/bullet_robot.py b/deformable_gym/robots/bullet_robot.py index 8c89725..5708752 100644 --- a/deformable_gym/robots/bullet_robot.py +++ b/deformable_gym/robots/bullet_robot.py @@ -7,6 +7,7 @@ from deformable_gym.robots.bullet_utils import draw_limits from deformable_gym.robots.inverse_kinematics import PyBulletSolver from deformable_gym.objects.bullet_object import Pose +from gym.spaces import Box class BulletRobot(abc.ABC): @@ -108,6 +109,11 @@ def __init__( self.multibody_pose = pbh.MultibodyPose( self.get_id(), self.init_pos, self.init_rot) + # create observation and action spaces + # TODO: fix to include base actions and sensor observations + self.action_space = Box(np.full(len(self.motors), -1.0), np.full(len(self.motors), 1.0)) + self.observation_space = Box(np.full(len(self.motors), -1.0), np.full(len(self.motors), 1.0)) + def initialise(self) -> int: """Initialize robot in simulation. @@ -251,10 +257,11 @@ def compute_ik( target_pos = current_pos + pos_command if self.task_space_limit is not None: - print(f"Original {target_pos=}") - target_pos = np.clip( - target_pos, self.task_space_limit[0], self.task_space_limit[1]) - print(f"Clipped {target_pos=}") + if self.verbose: + print(f"Original {target_pos=}") + target_pos = np.clip(target_pos, self.task_space_limit[0], self.task_space_limit[1]) + if self.verbose: + print(f"Clipped {target_pos=}") # calculate target rotation if orientation is provided target_orn = None @@ -393,7 +400,7 @@ def _get_link_id(self, link_name: str) -> int: """ return self._link_name_to_link_id[link_name] - def _get_joint_limits(self, joints: List[str]) -> Tuple[np.ndarray, np.ndarray]: + def get_joint_limits(self, joints: List[str]) -> Tuple[np.ndarray, np.ndarray]: """Get array of lower limits and array of upper limits of joints. :param joints: Names of joints for which we request limits. diff --git a/deformable_gym/robots/shadow_hand.py b/deformable_gym/robots/shadow_hand.py index 4a9e6d1..666ae4f 100644 --- a/deformable_gym/robots/shadow_hand.py +++ b/deformable_gym/robots/shadow_hand.py @@ -55,10 +55,6 @@ def __init__( if self.debug_visualization: self._init_debug_visualizations() - def get_joint_limits(self): - """Get joint limits.""" - return self._get_joint_limits(self.motors) - def perform_command(self, command: npt.ArrayLike): """Translates hand commands and updates current command. diff --git a/deformable_gym/robots/ur10_shadow.py b/deformable_gym/robots/ur10_shadow.py index 079b334..d3f939a 100644 --- a/deformable_gym/robots/ur10_shadow.py +++ b/deformable_gym/robots/ur10_shadow.py @@ -57,10 +57,6 @@ def arm_command_wrapper(): end_effector_name=end_effector_link, fallback=self.inverse_kinematics_solver)) - def get_joint_limits(self): - """Get joint limits.""" - return self._get_joint_limits(self.motors) - def perform_command(self, command): """ Performs the provided arm and hand commands. Assumes task-space arm From d3c5c82b5c06c240c663b76a898419a18b7880c4 Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 11 Aug 2023 13:44:12 +0200 Subject: [PATCH 13/15] cleanup and refactoring --- deformable_gym/envs/__init__.py | 6 - deformable_gym/envs/april_insole_dmp_env.py | 98 --------- deformable_gym/envs/base_env.py | 11 +- deformable_gym/envs/floating_mia_grasp_env.py | 2 +- .../envs/floating_shadow_grasp_env.py | 10 +- deformable_gym/envs/grasp_env.py | 27 +-- deformable_gym/envs/mia_grasp_env.py | 201 ------------------ deformable_gym/envs/shadow_grasp_env.py | 172 --------------- deformable_gym/envs/ur10_shadow_grasp_env.py | 23 +- deformable_gym/envs/ur5_mia_grasp_env.py | 11 +- deformable_gym/helpers/state_converter.py | 101 --------- deformable_gym/objects/__init__.py | 3 +- deformable_gym/objects/rigid_bag.py | 117 ---------- examples/floating_mia_example.py | 2 + examples/floating_shadow_example.py | 1 + examples/ur10_shadow_grasp_example.py | 2 + examples/ur5_mia_grasp_example.py | 1 + 17 files changed, 21 insertions(+), 767 deletions(-) delete mode 100644 deformable_gym/envs/april_insole_dmp_env.py delete mode 100644 deformable_gym/envs/mia_grasp_env.py delete mode 100644 deformable_gym/envs/shadow_grasp_env.py delete mode 100644 deformable_gym/helpers/state_converter.py delete mode 100644 deformable_gym/objects/rigid_bag.py diff --git a/deformable_gym/envs/__init__.py b/deformable_gym/envs/__init__.py index cf59695..98b9745 100644 --- a/deformable_gym/envs/__init__.py +++ b/deformable_gym/envs/__init__.py @@ -2,9 +2,6 @@ from .base_env import BaseBulletEnv from .floating_mia_grasp_env import FloatingMiaGraspEnv from .floating_shadow_grasp_env import FloatingShadowGraspEnv -from .mia_grasp_env import MiaGraspEnv -#from .mia_grasp_env_with_metrics import MiaGraspEnvWithMetrics -from .shadow_grasp_env import ShadowGraspEnv from .ur5_mia_grasp_env import UR5MiaGraspEnv from .ur10_shadow_grasp_env import UR10ShadowGraspEnv @@ -13,9 +10,6 @@ "BaseBulletEnv", "FloatingMiaGraspEnv", "FloatingShadowGraspEnv", - "MiaGraspEnv", - #"MiaGraspEnvWithMetrics", - "ShadowGraspEnv", "UR5MiaGraspEnv", "UR10ShadowGraspEnv" ] diff --git a/deformable_gym/envs/april_insole_dmp_env.py b/deformable_gym/envs/april_insole_dmp_env.py deleted file mode 100644 index 65086e1..0000000 --- a/deformable_gym/envs/april_insole_dmp_env.py +++ /dev/null @@ -1,98 +0,0 @@ -import numpy as np - -from deformable_gym.envs.ur5_mia_grasp_env import UR5MiaGraspEnv, INITIAL_JOINT_ANGLES -from deformable_gym.robots.mia_hand import MiaHandMixin -from deformable_gym.helpers.state_converter import VectorConverter, TransformedDMP, ActionCoupling - - -class AprilInsoleDmpEnv(UR5MiaGraspEnv): - """Control UR5 and Mia hand with a DMP. - - **State space:** - The state space has 16 dimensions. - The pose of the end-effector (7 values), finger joint angles (3 values), - and sensor readings (6 values). - End-effector poses are defined by position (x, y, z) and scalar-last - quaternion (qx, qy, qz, qw). - Joint positions are ordered as follows: - - (0) j_index_fle - - (2) j_mrl_fle - - (5) j_thumb_fle - Force measurements are ordered as follows: - - (0) tangential force at middle finger, - - (1) normal force at index finger, - - (2) tangential force at index finger, - - (3) tangential force at thumb, - - (4) normal force at thumb, - - (5) normal force at middle finger. - - **Action space:** - The action space has 11 dimensions. These are coupling term values for - the DMP: 7 values for the end-effector pose (7 values) and 3 values for - the joint angles of the hand. - End-effector commands are defined by position (x, y, z) and scalar-last - quaternion (qx, qy, qz, qw). - Joint commands are ordered as follows: - - (0) j_index_fle - - (1) j_mrl_fle - - (3) j_thumb_fle - """ - def __init__( - self, dmp, gui=True, real_time=False, - verbose=False, object2world=None, horizon=200): - self.object2world = object2world - super(AprilInsoleDmpEnv, self).__init__( - gui=gui, real_time=real_time, verbose=verbose, horizon=horizon) - self.dmp = dmp - self.transformed_dmp = TransformedDMP(dmp, object2world) - self.dmp_state2env_action = VectorConverter( - ["x", "y", "z", "qw", "qx", "qy", "qz", "j_index_fle", "j_mrl_fle", - "j_thumb_fle"], - ["x", "y", "z", "qx", "qy", "qz", "qw", "j_index_fle", "j_mrl_fle", - "j_thumb_fle"]) - self.env_state2dmp_state = VectorConverter( - ["x", "y", "z", "qx", "qy", "qz", "qw", "j_index_fle", "j_mrl_fle", - "j_middle_fle", "j_ring_fle", "j_thumb_opp", "j_thumb_fle", - "sensor1", "sensor2", "sensor3"], - ["x", "y", "z", "qw", "qx", "qy", "qz", "j_index_fle", "j_mrl_fle", - "j_thumb_fle"]) - - def reset(self, hard_reset=False): - start_y = self.dmp_state2env_action(self.transformed_dmp.start_y) - assert len(start_y) == 10 - arm_joint_target = self.robot.compute_ik( - np.array(INITIAL_JOINT_ANGLES), start_y[:3], start_y[3:7], False) - hand_joint_target = MiaHandMixin.convert_action_to_pybullet(start_y[7:]) - joint_angles = np.hstack((arm_joint_target[:6], hand_joint_target)) - self.robot.set_initial_joint_positions(joint_angles) - return super().reset(hard_reset) - - def step(self, action): - # observe current state - state = self.observe_state() - - # translate action - y = self.env_state2dmp_state(state) - next_y = self.transformed_dmp.step( - y, coupling_term=ActionCoupling(action)) - action = self.dmp_state2env_action(next_y) - - # execute action - self.robot.perform_command(action) - - # simulate until next time step - self.simulation.step_to_trigger("time_step") - - # observe new state - next_state = self.observe_state() - - # update time step - self.step_counter += 1 - - # check if episode is over - done = self.is_done(state, action, next_state) - - # calculate the reward - reward = self.calculate_reward(state, action, next_state, done) - - return next_state, reward, done, {} diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index ead44ad..3de44d0 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -203,13 +203,7 @@ def _deformable_is_exploded(self) -> bool: :return: Whether the deformable is exploded or not. """ - pose = self.object_to_grasp.get_pose() - - if np.isfinite(pose).all(): - return np.any(np.abs(pose[:3]) > 3) - else: - print("NAN in insole pose -> exploded?!") - return True + return not np.isfinite(self.object_to_grasp.get_pose()).all() def _check_forces(self, robot, high_force_threshold, verbose): contact_points = robot.get_contact_points(self.object_to_grasp.get_id()) @@ -248,8 +242,7 @@ def _init_hand_pose(self, robot: BulletRobot): """ desired_robot2world_pos = self.hand_world_pose[:3] desired_robot2world_orn = pb.getQuaternionFromEuler(self.hand_world_pose[3:]) - self.multibody_pose = MultibodyPose( - robot.get_id(), desired_robot2world_pos, desired_robot2world_orn) + self.multibody_pose = MultibodyPose(robot.get_id(), desired_robot2world_pos, desired_robot2world_orn) def set_world_pose(self, world_pose): """Set pose of the hand. diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py index 340c28e..687735e 100644 --- a/deformable_gym/envs/floating_mia_grasp_env.py +++ b/deformable_gym/envs/floating_mia_grasp_env.py @@ -228,7 +228,7 @@ def calculate_reward(self, state, action, next_state, done): if not (round(action[-1]) == 1 or self.step_counter >= self.horizon): return -100 - # self.robot.deactivate_motors() + self.robot.deactivate_motors() # remove insole anchors and simulate steps self.object_to_grasp.remove_anchors() for _ in range(50): diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py index bd69636..a54f30d 100644 --- a/deformable_gym/envs/floating_shadow_grasp_env.py +++ b/deformable_gym/envs/floating_shadow_grasp_env.py @@ -109,13 +109,7 @@ def reset(self, hard_reset=False): if self.verbose: print("Performing reset (april)") - if self.randomised: - if self.train: - pos = random.choice(self.train_positions) - else: - pos = random.choice(self.test_positions) - else: - pos = None + pos = None self.object_to_grasp.reset(pos=pos) @@ -147,7 +141,7 @@ def calculate_reward(self, state, action, next_state, done): if not (round(action[-1]) == 1 or self.step_counter >= self.horizon): return -100 - # self.robot.deactivate_motors() + self.robot.deactivate_motors() # remove insole anchors and simulate steps self.object_to_grasp.remove_anchors() for _ in range(50): diff --git a/deformable_gym/envs/grasp_env.py b/deformable_gym/envs/grasp_env.py index ca1ae0d..d226477 100644 --- a/deformable_gym/envs/grasp_env.py +++ b/deformable_gym/envs/grasp_env.py @@ -3,7 +3,6 @@ from abc import ABC from base_env import BaseBulletEnv from ..objects.bullet_object import ObjectFactory -from gym.spaces import Box class GraspEnv(BaseBulletEnv, ABC): @@ -21,28 +20,9 @@ def __init__(self, self.robot = self._create_robot() - limits = self.robot.get_joint_limits(self.robot.motors.values()) - - if self._observable_object_pos: - lower_observations = np.append(lower_observations, np.ones(3)) - upper_observations = np.append(upper_observations, -np.ones(3)) - - if self._observable_time_step: - lower_observations = np.append(lower_observations, 0) - upper_observations = np.append(upper_observations, self.horizon) - - self.observation_space = Box(low=lower_observations, high=upper_observations) - - # build the action space - lower = [-np.ones(3) * .0005, # max negative base pos offset - -np.ones(4) * .000005, # max negative base orn offset - limits[0][self.actuated_finger_ids]] # negative joint limits - - upper = [np.ones(3) * .0005, # max positive base pos offset - np.ones(4) * .000005, # max positive base orn offset - limits[1][self.actuated_finger_ids]] # positive joint limits - + # TODO: adapt if non-robot observable objects in environment self.action_space = self.robot.action_space + self.observation_space = self.robot.observation_space def _load_objects(self): super()._load_objects() @@ -58,10 +38,9 @@ def reset(self, hard_reset=False, pos=None): def observe_state(self): joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) - ee_pose = self.robot.get_ee_pose() sensor_readings = self.robot.get_sensor_readings() - state = np.concatenate([ee_pose, joint_pos, sensor_readings]) + state = np.concatenate([joint_pos, sensor_readings]) if self._observable_object_pos: obj_pos = self.object_to_grasp.get_pose()[:3] diff --git a/deformable_gym/envs/mia_grasp_env.py b/deformable_gym/envs/mia_grasp_env.py deleted file mode 100644 index 08724e9..0000000 --- a/deformable_gym/envs/mia_grasp_env.py +++ /dev/null @@ -1,201 +0,0 @@ -import numpy as np -import numpy.typing as npt -from gym import spaces -from deformable_gym.robots import mia_hand -from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin, FloatingHandMixin -from deformable_gym.objects.bullet_object import ObjectFactory - - -class MiaGraspEnv(FloatingHandMixin, GraspDeformableMixin, BaseBulletEnv): - """Basic grasping environment using only the MIA hand. - - The goal is to grasp an object using joint position offsets as actions. - - **State space:** - The state space has 9 dimensions. - Position of three joints and values of six force sensors at the fingertips. - Joint positions are ordered as follows: - - (0) j_index_fle - - (2) j_mrl_fle - - (5) j_thumb_fle - Force measurements are ordered as follows: - - (0) tangential force at middle finger, - - (1) normal force at index finger, - - (2) tangential force at index finger, - - (3) tangential force at thumb, - - (4) normal force at thumb, - - (5) normal force at middle finger. - - **Action space:** - The action space has 4 dimensions. - Joint commands are ordered as follows. - - (0) j_index_fle - - (1) j_mrl_fle - - (3) j_thumb_fle - At the end there is a flag to indicate that the episode is finished (1) - or not (0). - - :param gui: Show PyBullet GUI (default: True). - :param real_time: Run simulation in real time (default: False). - :param object_name: Name of the object to be loaded: 'insole', - 'pillow_small', or 'box' (default: 'insole'). - :param initial_joint_positions: Initial positions of six finger joints - (default: zeros). - :param thumb_adducted: The joint j_thumb_opp can only take two - positions (adduction, abduction) and we cannot easily switch between - them. That's why we have to configure it in advance and cannot control - it during execution of a policy. When the thumb is adducted, it is - closer to the palm. When it is abducted, it is further away from the - palm. - :param verbose: Print debug output (default: False). - :param time_delta: Simulation time step. Note that the hand will still be - controlled at 20 Hz (default: 0.0001). - :param verbose_dt: Time delta after which the timing module should print - timing information (default: 0.1). - :param velocity_control: Use velocity control? Default: position control. - :param abort_on_first_step_with_collision: Abort on first step if the hand - is in collision with the object. - """ - - ACCUMULATED_FORCES_THRESHOLD = 200.0 # greater forces end the episode - DROP_TEST_TIME = 0.5 # time to test grasp stability in the end - - def __init__( - self, - gui: bool = True, - real_time: bool = False, - object_name: str = "insole", - initial_joint_positions: npt.ArrayLike = (0, 0, 0, 0, 0, 0), - thumb_adducted: bool = True, - verbose: bool = False, - time_delta: float = 0.0001, - verbose_dt: float = 0.1, - velocity_control: bool = False, - abort_on_first_step_with_collision: bool = False): - self.initial_joint_positions = np.array(initial_joint_positions) - self.thumb_adducted = thumb_adducted - self.object_name = object_name - self.verbose = verbose - self.velocity_control = velocity_control - self.abort_on_first_step_with_collision = abort_on_first_step_with_collision - - super().__init__( - gui=gui, real_time=real_time, horizon=100, soft=True, - verbose=verbose, time_delta=time_delta, - verbose_dt=verbose_dt) - - self.hand_world_pose = (0, 0, 1, -np.pi / 8, np.pi, 0) - self.robot = self._create_robot() - - joint_lower, joint_upper = self.robot.get_joint_limits() - sensor_lower, sensor_upper = self.robot.get_sensor_limits() - lower = np.concatenate([joint_lower, sensor_lower], axis=0) - upper = np.concatenate([joint_upper, sensor_upper], axis=0) - - self.observation_space = spaces.Box(low=lower, high=upper) - self.action_space = spaces.Box(low=-np.ones(4), high=np.ones(4)) - - self.first_step = True - self.invalid_initial_pose = False - - def _create_robot(self) -> mia_hand.MiaHand: - if self.velocity_control: - robot = mia_hand.MiaHandVelocity( - verbose=self.verbose, world_pos=self.hand_world_pose[:3], - world_orn=self.hand_world_pose[3:], - debug_visualization=bool(self.verbose)) - else: - robot = mia_hand.MiaHandPosition( - verbose=self.verbose, world_pos=self.hand_world_pose[:3], - world_orn=self.hand_world_pose[3:], - debug_visualization=bool(self.verbose)) - - self.simulation.add_robot(robot) - - self._init_hand_pose(robot) - - robot.set_initial_joint_positions(self.initial_joint_positions) - robot.set_thumb_opp(self.thumb_adducted) - - return robot - - def _load_objects(self): - """Load all objects in PyBullet.""" - super()._load_objects() - self.object_to_grasp, self.object_position, self.object_orientation = \ - ObjectFactory().create(self.object_name) - - def reset(self): - """Reset episode. - - :return: state - State vector. - """ - if self.verbose: - print("Performing reset (april)") - - if self.invalid_initial_pose or self._deformable_is_exploded(): - self._hard_reset() - state = super().reset() - else: - self.object_to_grasp.reset() - state = super().reset() - - self.simulation.camera.reset(self.object_to_grasp.get_pose()[:3], 0, - 50, 0.35) - - self.first_step = True - self.invalid_initial_pose = False - - return state - - def observe_state(self): - """Observe state. - - :return: state - Current state of the Mia hand. - """ - joint_pos = self.robot.get_joint_positions( - self.robot.actuated_real_joints) - sensor_readings = self.robot.get_sensor_readings() - return np.concatenate( - [joint_pos, sensor_readings], axis=0).astype("float64") - - def is_done(self, state, action, next_state): - """Checks whether the current episode is over or not. - - :param state: Current state of the Mia hand. - :param action: Action performed by the Mia hand. - :param next_state: Successor state of the Mia hand. - :return: done - Is the episode finished? - """ - high_forces, contact_forces = self._check_forces( - self.robot, self.ACCUMULATED_FORCES_THRESHOLD, self.verbose) - - self.invalid_initial_pose = ( - self.abort_on_first_step_with_collision and self.first_step - and contact_forces) - done = (high_forces or self.invalid_initial_pose or - super(MiaGraspEnv, self).is_done(state, action, next_state)) - - self.first_step = False - - return done - - def calculate_reward(self, state, action, next_state, done): - """Calculates the reward given a (state,action,state) tuple. - - :param state: Current state of the Mia hand. - :param action: Action performed by the Mia hand. - :param next_state: Successor state of the Mia hand. - :param done: Is the episode finished? - """ - if done: - if self.verbose: - print("Episode done, calculating final reward...") - if self.invalid_initial_pose: - return -1000.0 - # Remove insole anchors and simulate steps. - self.object_to_grasp.remove_anchors() - self.simulation.simulate_time(self.DROP_TEST_TIME) - return self.object_to_grasp.get_pose()[2] - else: - return 0.0 diff --git a/deformable_gym/envs/shadow_grasp_env.py b/deformable_gym/envs/shadow_grasp_env.py deleted file mode 100644 index d90ed8f..0000000 --- a/deformable_gym/envs/shadow_grasp_env.py +++ /dev/null @@ -1,172 +0,0 @@ -import numpy as np -from gym import spaces -from deformable_gym.robots import shadow_hand -from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin, FloatingHandMixin -from deformable_gym.objects.bullet_object import ObjectFactory - - -class ShadowGraspEnv(FloatingHandMixin, GraspDeformableMixin, BaseBulletEnv): - """Basic grasping environment using only the Shadow dexterous hand. - - The goal is to grasp an object using joint position offsets as actions. - - **State space:** - Positions of 24 joints: - WRJ: 2,1; rh_FFJ: 4,3,2,1; rh_MFJ: 4,3,2,1; rh_RFJ: 4,3,2,1; - rh_LFJ: 5,4,3,2,1; rh_THJ: 5,4,3,2,1. - - **Action space:** TODO only 20 degrees are controlled actively - 24 joint commands and a flag to indicate termination of an episode. - - :param gui: Show PyBullet GUI (default: True). - :param real_time: Run simulation in real time (default: False). - :param object_name: Name of the object to be loaded: 'insole', - 'pillow_small', or 'box' (default: 'insole'). - :param initial_joint_positions: Initial positions of six finger joints - (default: zeros). - :param verbose: Print debug output (default: False). - :param time_delta: Simulation time step. Note that the hand will still be - controlled at 20 Hz (default: 0.0001). - :param verbose_dt: Time delta after which the timing module should print - timing information (default: 0.1). - """ - - ACCUMULATED_FORCES_THRESHOLD = 200.0 # greater forces end the episode - DROP_TEST_TIME = 0.5 # time to test grasp stability in the end - - def __init__( - self, gui=True, real_time=False, object_name="insole", - initial_joint_positions=(-0.5, -0.0, # WRJ2, WRJ1 - 0.3, 0, 0, 0, # rh_FFJ: 4,3,2,1 - 0.3, 0, 0, 0, # rh_MFJ: 4,3,2,1 - 0.3, 0, 0, 0, # rh_RFJ: 4,3,2,1 - 0.3, 0, 0, 0, 0, # rh_LFJ: 5,4,3,2,1 - 0.3, 0, 0, 0, 0, # rh_THJ: 5,4,3,2,1 - ), - verbose=False, time_delta=0.0001, - verbose_dt=0.1, velocity_control=False): - - self.initial_joint_positions = np.array(initial_joint_positions) - self.object_name = object_name - self.time_delta = time_delta - self.verbose = verbose - self.velocity_control = velocity_control - - super().__init__( - gui=gui, real_time=real_time, horizon=100, soft=True, - verbose=verbose, time_delta=time_delta, - verbose_dt=verbose_dt) - - self.hand_world_pose = (0, -0.2, 1, -np.pi / 2, -np.pi, 0) - self.transforms_initialized = False - self.robot = self._create_robot() - - self.step_counter = 0 - - joint_lower, joint_upper = self.robot.get_joint_limits() - lower = np.concatenate([joint_lower], axis=0) - upper = np.concatenate([joint_upper], axis=0) - - self.observation_space = spaces.Box(low=lower, high=upper) - self.action_space = spaces.Box( - low=-np.ones(25), high=np.ones(25)) - - self.first_step = True - self.invalid_initial_pose = False - - def _create_robot(self) -> shadow_hand.ShadowHand: - if self.velocity_control: - robot = shadow_hand.ShadowHandVelocity( - verbose=self.verbose, world_pos=self.hand_world_pose[:3], - world_orn=self.hand_world_pose[3:]) - else: - robot = shadow_hand.ShadowHandPosition( - verbose=self.verbose, world_pos=self.hand_world_pose[:3], - world_orn=self.hand_world_pose[3:]) - - self._init_hand_pose(robot) - - robot.set_initial_joint_positions(self.initial_joint_positions) - - self.simulation.add_robot(robot) - - return robot - - def _load_objects(self): - """Load all objects in PyBullet.""" - super()._load_objects() - self.object_to_grasp, self.object_position, self.object_orientation = \ - ObjectFactory().create(self.object_name) - - def reset(self): - """Reset episode. - - :return: state - State vector. - """ - if self.verbose: - print("Performing reset (april)") - - if self.invalid_initial_pose or self._deformable_is_exploded(): - self._hard_reset() - state = self.observe_state() - else: - self.object_to_grasp.reset() - state = super().reset() - - self.simulation.camera.reset(self.object_to_grasp.get_pose()[:3], 15, - -105, 0.35) - - self.first_step = True - self.invalid_initial_pose = False - - return state - - def observe_state(self): - """Observe state. - - :return: state - Current state of the Mia hand. - """ - joint_pos = self.robot.get_joint_positions() - #sensor_readings = self.robot.get_sensor_readings() - return np.concatenate( - [joint_pos], axis=0).astype("float64") - - def is_done(self, state, action, next_state): - """Checks whether the current episode is over or not. - - :param state: Current state of the Mia hand. - :param action: Action performed by the Mia hand. - :param next_state: Successor state of the Mia hand. - :return: done - Is the episode finished? - """ - high_forces, contact_forces = self._check_forces( - self.robot, self.ACCUMULATED_FORCES_THRESHOLD, self.verbose) - - self.invalid_initial_pose = self.first_step and contact_forces - done = (high_forces or self.invalid_initial_pose or - super(ShadowGraspEnv, self).is_done(state, action, next_state)) - - self.first_step = False - - return done - - def calculate_reward(self, state, action, next_state, done): - """Calculates the reward given a (state,action,state) tuple. - - :param state: Current state of the Mia hand. - :param action: Action performed by the Mia hand. - :param next_state: Successor state of the Mia hand. - :param done: Is the episode finished? - """ - if done: - if self.verbose: - print("Episode done, calculating final reward...") - if self.invalid_initial_pose: - return -1000.0 - # Remove insole anchors and simulate steps. - self.object_to_grasp.remove_anchors() - for _ in range(int(self.DROP_TEST_TIME / self.time_delta)): - self.simulation.timing.step() - return self.object_to_grasp.get_pose()[2] - else: - return 0.0 diff --git a/deformable_gym/envs/ur10_shadow_grasp_env.py b/deformable_gym/envs/ur10_shadow_grasp_env.py index aab5be5..2f8789e 100644 --- a/deformable_gym/envs/ur10_shadow_grasp_env.py +++ b/deformable_gym/envs/ur10_shadow_grasp_env.py @@ -30,14 +30,6 @@ class UR10ShadowGraspEnv(GraspDeformableMixin, BaseBulletEnv): object should be sampled from the training or the test set. """ - train_positions = ([-0.7, 0.1, 1.6], - [-0.7, 0.2, 1.6], - [-0.7, 0.0, 1.6]) - - test_positions = ([-0.8, 0.1, 1.6], - [-0.8, 0.2, 1.6], - [-0.8, 0.0, 1.6]) - object2world = pt.transform_from(R=np.eye(3), p=np.array([-0.7, 0.1, 1.8])) @@ -104,20 +96,13 @@ def _create_robot(self): def _load_objects(self): super()._load_objects() self.object_to_grasp, self.object_position, self.object_orientation = \ - ObjectFactory().create( - self.object_name, object2world=self.object2world, - scale=self.object_scale) + ObjectFactory().create(self.object_name, object2world=self.object2world, scale=self.object_scale) def reset(self, hard_reset=False): - if self.randomised: - if self.train: - pos = random.choice(self.train_positions) - else: - pos = random.choice(self.test_positions) - else: - pos = None - self.object_to_grasp.reset(pos=pos) + pos = None + + self.object_to_grasp.reset(pos) self.robot.activate_motors() return super().reset() diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index 66eb067..1fd8b6b 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -3,8 +3,7 @@ import numpy as np from gym import spaces from deformable_gym.robots import ur5_mia -from deformable_gym.envs.base_env import BaseBulletEnv -from deformable_gym.envs.mia_grasp_env import GraspDeformableMixin +from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin from deformable_gym.helpers import pybullet_helper as pbh from deformable_gym.objects.bullet_object import ObjectFactory import pytransform3d.transformations as pt @@ -172,13 +171,7 @@ def _load_objects(self): def reset(self, hard_reset=False): - if self.randomised: - if self.train: - pos = random.choice(self.train_positions) - else: - pos = random.choice(self.test_positions) - else: - pos = None + pos = None self.object_to_grasp.reset(pos=pos) self.robot.activate_motors() diff --git a/deformable_gym/helpers/state_converter.py b/deformable_gym/helpers/state_converter.py deleted file mode 100644 index 32d66b2..0000000 --- a/deformable_gym/helpers/state_converter.py +++ /dev/null @@ -1,101 +0,0 @@ -import numpy as np -from pytransform3d import transformations as pt - - -class VectorConverter: - """Convert between different vector representations. - - Example - ------- - - .. code-block:: python - - vc = VectorConverter(["x", "y", "z"], ["z", "z", ("a", 0.0), "x", "y"]) - result = vc(np.array([1, 2, 3])) - - Parameters - ---------- - input_fields : list of str - Names of fields of the input vector. - - output_fields : list of str, tuple, or list - Names of fields of the output vector. You can add a constant field - with the name 'a' by including ('a', value) or ['a', value] in the - list of output fields. - """ - def __init__(self, input_fields, output_fields): - additional_keys = [] - additional_values = [] - output_keys = [] - for of in output_fields: - if isinstance(of, tuple) or isinstance(of, list): - key, value = of - additional_keys.append(key) - additional_values.append(value) - output_keys.append(key) - else: - assert isinstance(of, str) - output_keys.append(of) - self.all_keys = input_fields + additional_keys - self.additional_values = np.array(additional_values, dtype=float) - self.indices = np.array( - [self.all_keys.index(key) for key in output_keys], dtype=int) - - def __call__(self, vector): - """Convert input fields to output fields. - - Parameters - ---------- - vector : array-like, shape (len(input_fields),) - Input vector. - - Returns - ------- - result : array, shape (len(output_fields),) - Output vector. - """ - modified_state = np.hstack((vector, self.additional_values)) - return modified_state[self.indices] - - -class ActionCoupling: - def __init__(self, action): - self.action = action - - def coupling(self, y, yd): - return np.zeros_like(self.action), self.action - - -class TransformedDMP: - def __init__(self, dmp, dmp_base2world): - self.dmp = dmp - self.dmp_base2world = dmp_base2world - self.world2dmp_base = pt.invert_transform(dmp_base2world) - self.last_yd = None - - def step(self, last_y, coupling_term=None): - if self.last_yd is None: - self.last_yd = np.zeros_like(last_y) - last_y = np.copy(last_y) - - last_y[:7] = self._world2base_link(last_y[:7]) - y, self.last_yd[:] = self.dmp.step( - last_y, self.last_yd, coupling_term=coupling_term) - y[:7] = self._base_link2world(y[:7]) - return y - - def _world2base_link(self, pose): - ee2world = pt.transform_from_pq(pose) - ee2dmp_base = pt.concat(ee2world, self.world2dmp_base) - return pt.pq_from_transform(ee2dmp_base, self.last_yd) - - def _base_link2world(self, pose): - ee2dmp_base = pt.transform_from_pq(pose) - ee2world = pt.concat(ee2dmp_base, self.dmp_base2world) - return pt.pq_from_transform(ee2world) - - @property - def start_y(self): - start_y = np.copy(self.dmp.start_y) - start_y[:7] = self._base_link2world(start_y[:7]) - return start_y diff --git a/deformable_gym/objects/__init__.py b/deformable_gym/objects/__init__.py index 35d6252..3d2cfe6 100644 --- a/deformable_gym/objects/__init__.py +++ b/deformable_gym/objects/__init__.py @@ -4,7 +4,6 @@ MocapObjectMixin, RigidPrimitiveObject, Insole, InsoleOnConveyorBelt, PillowSmall, PositionEulerAngleMixin) from .conveyor import Conveyor -from .rigid_bag import RigidBag __all__ = [ @@ -12,5 +11,5 @@ "UrdfObject", "SphereObject", "CylinderObject", "ObjectFactory", "CapsuleObject", "SoftObject", "MocapObjectMixin", "RigidPrimitiveObject", "Insole", "InsoleOnConveyorBelt", "PillowSmall", "PositionEulerAngleMixin", - "Conveyor", "RigidBag" + "Conveyor" ] diff --git a/deformable_gym/objects/rigid_bag.py b/deformable_gym/objects/rigid_bag.py deleted file mode 100644 index a63d1fd..0000000 --- a/deformable_gym/objects/rigid_bag.py +++ /dev/null @@ -1,117 +0,0 @@ -import numpy as np -import pybullet as pb -from pytransform3d.rotations import quaternion_wxyz_from_xyzw, matrix_from_quaternion -from pytransform3d.transformations import transform_from, transform, vectors_to_points, invert_transform -from deformable_gym.robots.bullet_utils import draw_box - - -class RigidBag: - def __init__(self, world_pos=(0, 0, 0), world_rot=(0, 0, 0), bag_height=0.022, - transparent=True, fixed=False): - base_height = 0.003 - top_height = 0.02 - total_length = 0.34 - total_width = 0.14 - compartment_length = 0.3 - compartment_width = 0.1 - - total_half_length = total_length / 2 - total_half_width = total_width / 2 - compartment_half_length = compartment_length / 2 - back_half_length = total_half_length - compartment_half_length - compartment_half_width = compartment_width / 2 - side_width = (total_half_width - compartment_half_width) / 2 - side1_y = -(side_width + compartment_half_width) - side2_y = side_width + compartment_half_width - back_x = -(total_half_length - back_half_length) - - base = pb.createCollisionShape( - pb.GEOM_BOX, halfExtents=(total_half_length, total_half_width, base_height / 2)) - side = pb.createCollisionShape( - pb.GEOM_BOX, halfExtents=(total_half_length, side_width, bag_height / 2)) - back = pb.createCollisionShape( - pb.GEOM_BOX, halfExtents=(back_half_length, compartment_half_width, bag_height / 2)) - top = pb.createCollisionShape( - pb.GEOM_BOX, halfExtents=(total_half_length, total_half_width, top_height / 2)) - linkCollisionShapeIndices = [side, side, back, top] - linkMasses = [0.1] * len(linkCollisionShapeIndices) - linkVisualShapeIndices = [-1] * len(linkCollisionShapeIndices) - linkPositions = [[0, side1_y, base_height / 2 + bag_height / 2], - [0, side2_y, base_height / 2 + bag_height / 2], - [back_x, 0, base_height / 2 + bag_height / 2], - [0, 0, base_height / 2 + bag_height + top_height / 2]] - linkOrientations = [[0, 0, 0, 1]] * len(linkCollisionShapeIndices) - linkInertialFramePositions = [[0, 0, 0]] * len(linkCollisionShapeIndices) - linkInertialFrameOrientations = [[0, 0, 0, 1]] * len(linkCollisionShapeIndices) - linkParentIndices = [0] * len(linkCollisionShapeIndices) - linkJointTypes = [pb.JOINT_FIXED] * len(linkCollisionShapeIndices) - linkJointAxis = [(0, 0, 1)] * len(linkCollisionShapeIndices) - - self.bag = pb.createMultiBody( - baseMass=1000, - baseCollisionShapeIndex=base, - baseVisualShapeIndex=-1, - basePosition=world_pos, - baseOrientation=pb.getQuaternionFromEuler(world_rot), - linkMasses=linkMasses, - linkCollisionShapeIndices=linkCollisionShapeIndices, - linkVisualShapeIndices=linkVisualShapeIndices, - linkPositions=linkPositions, - linkOrientations=linkOrientations, - linkInertialFramePositions=linkInertialFramePositions, - linkInertialFrameOrientations=linkInertialFrameOrientations, - linkParentIndices=linkParentIndices, - linkJointTypes=linkJointTypes, - linkJointAxis=linkJointAxis, - flags=pb.URDF_MERGE_FIXED_LINKS - ) - pb.changeDynamics( - self.bag, -1, lateralFriction=1, spinningFriction=1, rollingFriction=1, - restitution=0, contactStiffness=1000000, contactDamping=0.1) - bag_color = (1, 1, 1, 0.5) - if transparent: - for i in range(-1, len(linkCollisionShapeIndices)): - pb.changeVisualShape(self.bag, i, rgbaColor=bag_color) - - if fixed: - pb.createConstraint( - self.bag, -1, -1, -1, pb.JOINT_FIXED, [0, 0, 1], [0, 0, 0], - world_pos) - - z_lo = base_height / 2 - z_hi = z_lo + bag_height - x_lo = back_half_length + back_x - x_hi = total_half_length - y_lo = side1_y + side_width - y_hi = -y_lo - self.limits = [[x_lo, x_hi], [y_lo, y_hi], [z_lo, z_hi]] - self.corners_in_bag = np.array([ - [x_lo, y_lo, z_hi], - [x_lo, y_hi, z_hi], - [x_hi, y_lo, z_hi], - [x_hi, y_hi, z_hi], - [x_lo, y_lo, z_lo], - [x_lo, y_hi, z_lo], - [x_hi, y_lo, z_lo], - [x_hi, y_hi, z_lo], - ]) - - def compartment_corners(self): - pos, orn = pb.getBasePositionAndOrientation(self.bag) - bag2world = transform_from( - p=pos, R=matrix_from_quaternion(quaternion_wxyz_from_xyzw(orn))) - corners_in_world = transform( - bag2world, vectors_to_points(self.corners_in_bag))[:, :3] - return corners_in_world - - def draw_compartment_corners(self, line_ids=None): - corners_in_world = self.compartment_corners() - return draw_box(corners_in_world, line_ids) - - def transform_world2bag(self, points_in_world): - pos, orn = pb.getBasePositionAndOrientation(self.bag) - bag2world = transform_from( - p=pos, R=matrix_from_quaternion(quaternion_wxyz_from_xyzw(orn))) - world2bag = invert_transform(bag2world) - points_in_bag = transform(world2bag, vectors_to_points(points_in_world))[:, :3] - return points_in_bag diff --git a/examples/floating_mia_example.py b/examples/floating_mia_example.py index 22cd518..a327e21 100644 --- a/examples/floating_mia_example.py +++ b/examples/floating_mia_example.py @@ -31,3 +31,5 @@ if done: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + + env.reset() diff --git a/examples/floating_shadow_example.py b/examples/floating_shadow_example.py index 1570a1e..cbd97e2 100644 --- a/examples/floating_shadow_example.py +++ b/examples/floating_shadow_example.py @@ -31,3 +31,4 @@ if done: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + env.reset() diff --git a/examples/ur10_shadow_grasp_example.py b/examples/ur10_shadow_grasp_example.py index 71cf096..0610b7c 100644 --- a/examples/ur10_shadow_grasp_example.py +++ b/examples/ur10_shadow_grasp_example.py @@ -29,3 +29,5 @@ if done: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + + env.reset() diff --git a/examples/ur5_mia_grasp_example.py b/examples/ur5_mia_grasp_example.py index 23eb7ee..416eb88 100644 --- a/examples/ur5_mia_grasp_example.py +++ b/examples/ur5_mia_grasp_example.py @@ -29,3 +29,4 @@ if done: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + env.reset() From f2970ec1cb6fbc838e1ca875dd58304066b0561e Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 11 Aug 2023 14:04:19 +0200 Subject: [PATCH 14/15] added badge --- .github/workflows/test.yaml | 2 +- README.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f418bf9..0f93b2c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Testing +name: Tests on: push: diff --git a/README.md b/README.md index d180cef..24ad810 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ # DeformableGym +[![Tests](https://github.com/dfki-ric/deformable_gym/actions/workflows/test.yaml/badge.svg)](https://github.com/dfki-ric/deformable_gym/actions/workflows/test.yaml) + This repository contains a collection of RL gym environments built with PyBullet. In these environments, the agent needs to learn to grasp deformable object such as shoe insoles or pillows.

- +

From fc1f41b356d3c3b715a25a072e2944af1f3a436a Mon Sep 17 00:00:00 2001 From: Melvin Laux Date: Fri, 11 Aug 2023 14:04:39 +0200 Subject: [PATCH 15/15] added nissing reset in example --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 24ad810..e4029f7 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,8 @@ while num_episodes <= 10: if done: print(f"Episode finished with return {episode_return}!") num_episodes += 1 + + env.reset() ```