diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f418bf9..0f93b2c 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: Testing
+name: Tests
 
 on:
   push:
diff --git a/README.md b/README.md
index d180cef..e4029f7 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,12 @@
 # DeformableGym
 
+[![Tests](https://github.com/dfki-ric/deformable_gym/actions/workflows/test.yaml/badge.svg)](https://github.com/dfki-ric/deformable_gym/actions/workflows/test.yaml)
+
 This repository contains a collection of RL gym environments built with PyBullet. In these environments, the agent 
 needs to learn to grasp deformable object such as shoe insoles or pillows.
 
 <p align="center">
-  <img src="doc/source/_static/defgym.svg" />
+  <img src="doc/source/_static/defgym.svg"/>
 </p>
 
 
@@ -79,6 +81,8 @@ while num_episodes <= 10:
     if done:
         print(f"Episode finished with return {episode_return}!")
         num_episodes += 1
+        
+        env.reset()
 
 ```
 
diff --git a/deformable_gym/envs/__init__.py b/deformable_gym/envs/__init__.py
index cf59695..98b9745 100644
--- a/deformable_gym/envs/__init__.py
+++ b/deformable_gym/envs/__init__.py
@@ -2,9 +2,6 @@
 from .base_env import BaseBulletEnv
 from .floating_mia_grasp_env import FloatingMiaGraspEnv
 from .floating_shadow_grasp_env import FloatingShadowGraspEnv
-from .mia_grasp_env import MiaGraspEnv
-#from .mia_grasp_env_with_metrics import MiaGraspEnvWithMetrics
-from .shadow_grasp_env import ShadowGraspEnv
 from .ur5_mia_grasp_env import UR5MiaGraspEnv
 from .ur10_shadow_grasp_env import UR10ShadowGraspEnv
 
@@ -13,9 +10,6 @@
     "BaseBulletEnv",
     "FloatingMiaGraspEnv",
     "FloatingShadowGraspEnv",
-    "MiaGraspEnv",
-    #"MiaGraspEnvWithMetrics",
-    "ShadowGraspEnv",
     "UR5MiaGraspEnv",
     "UR10ShadowGraspEnv"
 ]
diff --git a/deformable_gym/envs/april_insole_dmp_env.py b/deformable_gym/envs/april_insole_dmp_env.py
deleted file mode 100644
index 65086e1..0000000
--- a/deformable_gym/envs/april_insole_dmp_env.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import numpy as np
-
-from deformable_gym.envs.ur5_mia_grasp_env import UR5MiaGraspEnv, INITIAL_JOINT_ANGLES
-from deformable_gym.robots.mia_hand import MiaHandMixin
-from deformable_gym.helpers.state_converter import VectorConverter, TransformedDMP, ActionCoupling
-
-
-class AprilInsoleDmpEnv(UR5MiaGraspEnv):
-    """Control UR5 and Mia hand with a DMP.
-
-    **State space:**
-    The state space has 16 dimensions.
-    The pose of the end-effector (7 values), finger joint angles (3 values),
-    and sensor readings (6 values).
-    End-effector poses are defined by position (x, y, z) and scalar-last
-    quaternion (qx, qy, qz, qw).
-    Joint positions are ordered as follows:
-    - (0) j_index_fle
-    - (2) j_mrl_fle
-    - (5) j_thumb_fle
-    Force measurements are ordered as follows:
-    - (0) tangential force at middle finger,
-    - (1) normal force at index finger,
-    - (2) tangential force at index finger,
-    - (3) tangential force at thumb,
-    - (4) normal force at thumb,
-    - (5) normal force at middle finger.
-
-    **Action space:**
-    The action space has 11 dimensions. These are coupling term values for
-    the DMP: 7 values for the end-effector pose (7 values) and 3 values for
-    the joint angles of the hand.
-    End-effector commands are defined by position (x, y, z) and scalar-last
-    quaternion (qx, qy, qz, qw).
-    Joint commands are ordered as follows:
-    - (0) j_index_fle
-    - (1) j_mrl_fle
-    - (3) j_thumb_fle
-    """
-    def __init__(
-            self, dmp, gui=True, real_time=False,
-            verbose=False, object2world=None, horizon=200):
-        self.object2world = object2world
-        super(AprilInsoleDmpEnv, self).__init__(
-            gui=gui, real_time=real_time, verbose=verbose, horizon=horizon)
-        self.dmp = dmp
-        self.transformed_dmp = TransformedDMP(dmp, object2world)
-        self.dmp_state2env_action = VectorConverter(
-            ["x", "y", "z", "qw", "qx", "qy", "qz", "j_index_fle", "j_mrl_fle",
-             "j_thumb_fle"],
-            ["x", "y", "z", "qx", "qy", "qz", "qw", "j_index_fle", "j_mrl_fle",
-             "j_thumb_fle"])
-        self.env_state2dmp_state = VectorConverter(
-            ["x", "y", "z", "qx", "qy", "qz", "qw", "j_index_fle", "j_mrl_fle",
-             "j_middle_fle", "j_ring_fle", "j_thumb_opp", "j_thumb_fle",
-             "sensor1", "sensor2", "sensor3"],
-            ["x", "y", "z", "qw", "qx", "qy", "qz", "j_index_fle", "j_mrl_fle",
-             "j_thumb_fle"])
-
-    def reset(self, hard_reset=False):
-        start_y = self.dmp_state2env_action(self.transformed_dmp.start_y)
-        assert len(start_y) == 10
-        arm_joint_target = self.robot.compute_ik(
-            np.array(INITIAL_JOINT_ANGLES), start_y[:3], start_y[3:7], False)
-        hand_joint_target = MiaHandMixin.convert_action_to_pybullet(start_y[7:])
-        joint_angles = np.hstack((arm_joint_target[:6], hand_joint_target))
-        self.robot.set_initial_joint_positions(joint_angles)
-        return super().reset(hard_reset)
-
-    def step(self, action):
-        # observe current state
-        state = self.observe_state()
-
-        # translate action
-        y = self.env_state2dmp_state(state)
-        next_y = self.transformed_dmp.step(
-            y, coupling_term=ActionCoupling(action))
-        action = self.dmp_state2env_action(next_y)
-
-        # execute action
-        self.robot.perform_command(action)
-
-        # simulate until next time step
-        self.simulation.step_to_trigger("time_step")
-
-        # observe new state
-        next_state = self.observe_state()
-
-        # update time step
-        self.step_counter += 1
-
-        # check if episode is over
-        done = self.is_done(state, action, next_state)
-
-        # calculate the reward
-        reward = self.calculate_reward(state, action, next_state, done)
-
-        return next_state, reward, done, {}
diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py
index 6221a20..3de44d0 100644
--- a/deformable_gym/envs/base_env.py
+++ b/deformable_gym/envs/base_env.py
@@ -8,7 +8,7 @@
 from gym.core import Env
 from gym import spaces
 from deformable_gym.robots.bullet_robot import BulletRobot
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.helpers.pybullet_helper import MultibodyPose
 
 
@@ -19,7 +19,6 @@ class BaseBulletEnv(Env, abc.ABC):
     :param real_time: Run PyBullet in real time.
     :param horizon: Maximum number of steps in an episode.
     :param soft: Activate soft body simulation.
-    :param load_plane: Load plane from URDF.
     :param verbose: Print debug information.
     :param time_delta: Time between PyBullet simulation steps.
     :param verbose_dt: Time after which simulation info should be printed.
@@ -36,17 +35,19 @@ class BaseBulletEnv(Env, abc.ABC):
     action_space: spaces.Box
 
     def __init__(
-            self, gui: bool = True, real_time: bool = False,
-            horizon: int = 100, soft: bool = False,
-            load_plane: bool = True, verbose: bool = False,
-            time_delta: float = 0.0001, verbose_dt: float = 10.00,
-            early_episode_termination: bool = True,
+            self,
+            gui: bool = True,
+            real_time: bool = False,
+            horizon: int = 100,
+            soft: bool = False,
+            verbose: bool = False,
+            time_delta: float = 0.0001,
+            verbose_dt: float = 10.00,
             pybullet_options: str = ""):
+
         self.gui = gui
         self.verbose = verbose
-        self.__load_plane = load_plane
         self.horizon = horizon
-        self.early_episode_termination = early_episode_termination
 
         mode = pb.GUI if gui else pb.DIRECT
 
@@ -64,20 +65,12 @@ def _create_robot(self):
         """Load (or reload) robot to PyBullet simulation."""
 
     def _load_objects(self):
-        """Load objects to PyBullet simulation.
-
-        If a plane should be loaded, it will have the position (0, 0, 0).
-        """
-        if self.__load_plane:
-            PLANE_POSITION = (0, 0, 0)
-            self.plane = pb.loadURDF(
-                "plane.urdf", PLANE_POSITION, useFixedBase=1)
+        """Load objects to PyBullet simulation."""
+        self.plane = pb.loadURDF("plane.urdf", (0, 0, 0), useFixedBase=1)
 
     def _hard_reset(self):
-        """Hard reset.
-
-        Fully reset the PyBullet simulation and reload all objects. May be
-        necessary, for example, when soft-bodies in the environment explode.
+        """Hard reset the PyBullet simulation and reload all objects. May be necessary, e.g., if soft-bodies in the
+        environment explode.
         """
         if self.verbose:
             print("Performing hard reset!")
@@ -118,7 +111,7 @@ def render(self, mode: str = "human"):
         if mode == "human":
             assert self.gui
         else:
-            raise NotImplementedError("Render mode '%s' not supported" % mode)
+            raise NotImplementedError(f"Render mode {mode} not supported")
 
     def observe_state(self) -> np.ndarray:
         """Returns the current environment state.
@@ -127,8 +120,7 @@ def observe_state(self) -> np.ndarray:
         """
         return self.robot.get_joint_positions()
 
-    def is_done(self, state: np.ndarray, action: np.ndarray,
-                next_state: np.ndarray) -> bool:
+    def is_done(self, state: np.ndarray, action: np.ndarray, next_state: np.ndarray) -> bool:
         """Checks whether the current episode is over or not.
 
         :param state: State
@@ -136,10 +128,6 @@ def is_done(self, state: np.ndarray, action: np.ndarray,
         :param next_state: State after action was taken
         :return: Is the current episode done?
         """
-        # check for termination action
-        if self.early_episode_termination and round(action[-1]) == 1:
-            return True
-
         return self.step_counter >= self.horizon
 
     def step(self, action: npt.ArrayLike):
@@ -160,10 +148,7 @@ def step(self, action: npt.ArrayLike):
         state = self.observe_state()
 
         # execute action
-        if self.early_episode_termination:
-            self.robot.perform_command(action[:-1])
-        else:
-            self.robot.perform_command(action)
+        self.robot.perform_command(action)
 
         # simulate until next time step
         self.simulation.step_to_trigger("time_step")
@@ -181,14 +166,12 @@ def step(self, action: npt.ArrayLike):
         reward = self.calculate_reward(state, action, next_state, done)
 
         if self.verbose:
-            print(f"Finished environment step: {next_state=}, {reward}, {done=}")
+            print(f"Finished environment step: {next_state=}, {reward=}, {done=}")
 
         return next_state, reward, done, {}
 
     @abc.abstractmethod
-    def calculate_reward(
-            self, state: np.ndarray, action: np.ndarray,
-            next_state: np.ndarray, done: bool):
+    def calculate_reward(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike, done: bool):
         """Calculate reward.
 
         :param state: State of the environment.
@@ -220,27 +203,22 @@ def _deformable_is_exploded(self) -> bool:
 
         :return: Whether the deformable is exploded or not.
         """
-        pose = self.object_to_grasp.get_pose()
-
-        if np.isfinite(pose).all():
-            return np.any(np.abs(pose[:3]) > 3)
-        else:
-            print("NAN in insole pose -> exploded?!")
-            return True
+        return not np.isfinite(self.object_to_grasp.get_pose()).all()
 
     def _check_forces(self, robot, high_force_threshold, verbose):
-        contact_points = robot.get_contact_points(
-            self.object_to_grasp.get_id())
+        contact_points = robot.get_contact_points(self.object_to_grasp.get_id())
         accumulated_forces = 0.0
         for contact_point in contact_points:
             _, _, _, _, _, _, _, _, dist, force, _, _, _, _ = contact_point
             accumulated_forces += force
         high_forces = accumulated_forces > high_force_threshold
         contact_forces = accumulated_forces > 0
+
         if high_forces:
-            print("Accumulated force too high: %g" % accumulated_forces)
+            print(f"Accumulated force too high: {accumulated_forces}")
         elif verbose:
-            print("Accumulated force: %g" % accumulated_forces)
+            print(f"{accumulated_forces=}")
+
         return high_forces, contact_forces
 
     def get_object_pose(self):
@@ -264,8 +242,7 @@ def _init_hand_pose(self, robot: BulletRobot):
         """
         desired_robot2world_pos = self.hand_world_pose[:3]
         desired_robot2world_orn = pb.getQuaternionFromEuler(self.hand_world_pose[3:])
-        self.multibody_pose = MultibodyPose(
-            robot.get_id(), desired_robot2world_pos, desired_robot2world_orn)
+        self.multibody_pose = MultibodyPose(robot.get_id(), desired_robot2world_pos, desired_robot2world_orn)
 
     def set_world_pose(self, world_pose):
         """Set pose of the hand.
diff --git a/deformable_gym/envs/pybullet_tools.py b/deformable_gym/envs/bullet_simulation.py
similarity index 100%
rename from deformable_gym/envs/pybullet_tools.py
rename to deformable_gym/envs/bullet_simulation.py
diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py
index 54164a8..687735e 100644
--- a/deformable_gym/envs/floating_mia_grasp_env.py
+++ b/deformable_gym/envs/floating_mia_grasp_env.py
@@ -85,7 +85,7 @@ def __init__(
         self._observable_object_pos = observable_object_pos
         self._observable_time_step = observable_time_step
 
-        super().__init__(soft=True, load_plane=True, **kwargs)
+        super().__init__(soft=True, **kwargs)
 
         self.hand_world_pose = self._STANDARD_INITIAL_POSE
         self.robot = self._create_robot()
@@ -120,7 +120,6 @@ def __init__(
             high=upper_observations)
 
         # build the action space
-
         lower = [-np.ones(3) * .0005,  # max negative base pos offset
                  -np.ones(4) * .000005,  # max negative base orn offset
                  limits[0][self.actuated_finger_ids]]  # negative joint limits
@@ -129,10 +128,6 @@ def __init__(
                  np.ones(4) * .000005,  # max positive base orn offset
                  limits[1][self.actuated_finger_ids]]  # positive joint limits
 
-        if self.early_episode_termination:
-            lower.append([0.0])
-            upper.append([1.0])
-
         self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper))
 
     def _create_robot(self):
@@ -144,14 +139,16 @@ def _create_robot(self):
                 world_pos=self.hand_world_pose[:3],
                 world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]),
                 task_space_limit=self.task_space_limit,
-                verbose=self.verbose, orn_limit=orn_limit,
+                verbose=self.verbose,
+                orn_limit=orn_limit,
                 base_commands=True)
         else:
             robot = mia_hand.MiaHandPosition(
                 world_pos=self.hand_world_pose[:3],
                 world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]),
                 task_space_limit=self.task_space_limit,
-                verbose=self.verbose, orn_limit=orn_limit,
+                verbose=self.verbose,
+                orn_limit=orn_limit,
                 base_commands=True)
 
         self.simulation.add_robot(robot)
@@ -185,10 +182,6 @@ def set_difficulty_mode(self, mode: str):
 
     def reset(self, hard_reset=False, pos=None):
 
-        if self.verbose:
-            print("Performing reset (april)")
-            print("Pose:", self.hand_world_pose)
-
         if pos is None:
             self.robot.reset_base(self.hand_world_pose)
         else:
@@ -235,17 +228,17 @@ def calculate_reward(self, state, action, next_state, done):
             if not (round(action[-1]) == 1 or self.step_counter >= self.horizon):
                 return -100
 
-            # self.robot.deactivate_motors()
+            self.robot.deactivate_motors()
             # remove insole anchors and simulate steps
             self.object_to_grasp.remove_anchors()
             for _ in range(50):
                 if self._deformable_is_exploded():
-                    return -1
+                    return -1.0
                 self.simulation.step_to_trigger("time_step")
             height = self.object_to_grasp.get_pose()[2]
             if height < 0.9:
-                return -1
+                return -1.0
             else:
-                return 1
+                return 1.0
         else:
             return 0.0
diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py
index 9613555..a54f30d 100644
--- a/deformable_gym/envs/floating_shadow_grasp_env.py
+++ b/deformable_gym/envs/floating_shadow_grasp_env.py
@@ -29,17 +29,12 @@ class FloatingShadowGraspEnv(GraspDeformableMixin, BaseBulletEnv):
     object should be sampled from the training or the test set.
     """
 
-    train_positions = ([-0.7, 0.1, 1.6],
-                       [-0.7, 0.2, 1.6],
-                       [-0.7, 0.0, 1.6])
-
-    test_positions = ([-0.8, 0.1, 1.6],
-                      [-0.8, 0.2, 1.6],
-                      [-0.8, 0.0, 1.6])
-
     def __init__(self, object_name="insole",
-                 horizon=200, train=True,
-                 compute_reward=True, object_scale=1.0,  **kwargs):
+                 horizon=100, train=True,
+                 compute_reward=True, object_scale=1.0,
+                 observable_object_pos: bool = False,
+                 observable_time_step: bool = False,
+                 **kwargs):
         self.insole = None
         self.train = train
         self.velocity_commands = False
@@ -47,8 +42,10 @@ def __init__(self, object_name="insole",
         self.randomised = False
         self.compute_reward = compute_reward
         self.object_scale = object_scale
+        self._observable_object_pos = observable_object_pos
+        self._observable_time_step = observable_time_step
 
-        super().__init__(horizon=horizon, soft=True, load_plane=True, **kwargs)
+        super().__init__(horizon=horizon, soft=True, **kwargs)
 
         self.hand_world_pose = (0, -0.5, 1, -np.pi/2, np.pi, 0)
         self.robot = self._create_robot()
@@ -63,6 +60,14 @@ def __init__(self, object_name="insole",
             np.array([2, 2, 2]), np.ones(4), limits[1][6:],
             np.array([5, 5, 5])], axis=0)
 
+        if self._observable_object_pos:
+            lower_observations = np.append(lower_observations, np.ones(3))
+            upper_observations = np.append(upper_observations, -np.ones(3))
+
+        if self._observable_time_step:
+            lower_observations = np.append(lower_observations, 0)
+            upper_observations = np.append(upper_observations, self.horizon)
+
         self.observation_space = spaces.Box(
             low=lower_observations, high=upper_observations)
 
@@ -104,13 +109,7 @@ def reset(self, hard_reset=False):
         if self.verbose:
             print("Performing reset (april)")
 
-        if self.randomised:
-            if self.train:
-                pos = random.choice(self.train_positions)
-            else:
-                pos = random.choice(self.test_positions)
-        else:
-            pos = None
+        pos = None
 
         self.object_to_grasp.reset(pos=pos)
 
@@ -142,7 +141,7 @@ def calculate_reward(self, state, action, next_state, done):
             if not (round(action[-1]) == 1 or self.step_counter >= self.horizon):
                 return -100
 
-            # self.robot.deactivate_motors()
+            self.robot.deactivate_motors()
             # remove insole anchors and simulate steps
             self.object_to_grasp.remove_anchors()
             for _ in range(50):
diff --git a/deformable_gym/envs/grasp_env.py b/deformable_gym/envs/grasp_env.py
new file mode 100644
index 0000000..d226477
--- /dev/null
+++ b/deformable_gym/envs/grasp_env.py
@@ -0,0 +1,73 @@
+import numpy as np
+
+from abc import ABC
+from base_env import BaseBulletEnv
+from ..objects.bullet_object import ObjectFactory
+
+
+class GraspEnv(BaseBulletEnv, ABC):
+
+    def __init__(self,
+                 object_name,
+                 observable_object_pos: bool = False,
+                 observable_time_step: bool = False):
+
+        self.object_name = object_name
+        self._observable_object_pos = observable_object_pos
+        self._observable_time_step = observable_time_step
+
+        super().__init__(soft=True)
+
+        self.robot = self._create_robot()
+
+        # TODO: adapt if non-robot observable objects in environment
+        self.action_space = self.robot.action_space
+        self.observation_space = self.robot.observation_space
+
+    def _load_objects(self):
+        super()._load_objects()
+        self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name)
+
+    def reset(self, hard_reset=False, pos=None):
+
+        self.robot.reset(pos)
+        self.object_to_grasp.reset()
+        self.robot.activate_motors()
+
+        return super().reset()
+
+    def observe_state(self):
+        joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints)
+        sensor_readings = self.robot.get_sensor_readings()
+
+        state = np.concatenate([joint_pos, sensor_readings])
+
+        if self._observable_object_pos:
+            obj_pos = self.object_to_grasp.get_pose()[:3]
+            state = np.append(state, obj_pos)
+
+        if self._observable_time_step:
+            state = np.append(state, self.step_counter)
+
+        return state
+
+    def calculate_reward(self, state, action, next_state, done):
+        """
+        Calculates the reward by counting how many insole vertices are in the
+        target position.
+        """
+        if done:
+
+            # self.robot.deactivate_motors()
+            # remove insole anchors and simulate steps
+            self.object_to_grasp.remove_anchors()
+
+            for _ in range(50):
+                self.simulation.step_to_trigger("time_step")
+            height = self.object_to_grasp.get_pose()[2]
+            if height < 0.9:
+                return -1.0
+            else:
+                return 1.0
+        else:
+            return 0.0
diff --git a/deformable_gym/envs/mia_grasp_env.py b/deformable_gym/envs/mia_grasp_env.py
deleted file mode 100644
index 3f66953..0000000
--- a/deformable_gym/envs/mia_grasp_env.py
+++ /dev/null
@@ -1,202 +0,0 @@
-import numpy as np
-import numpy.typing as npt
-from gym import spaces
-from deformable_gym.robots import mia_hand
-from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin, FloatingHandMixin
-from deformable_gym.objects.bullet_object import ObjectFactory
-
-
-class MiaGraspEnv(FloatingHandMixin, GraspDeformableMixin, BaseBulletEnv):
-    """Basic grasping environment using only the MIA hand.
-
-    The goal is to grasp an object using joint position offsets as actions.
-
-    **State space:**
-    The state space has 9 dimensions.
-    Position of three joints and values of six force sensors at the fingertips.
-    Joint positions are ordered as follows:
-    - (0) j_index_fle
-    - (2) j_mrl_fle
-    - (5) j_thumb_fle
-    Force measurements are ordered as follows:
-    - (0) tangential force at middle finger,
-    - (1) normal force at index finger,
-    - (2) tangential force at index finger,
-    - (3) tangential force at thumb,
-    - (4) normal force at thumb,
-    - (5) normal force at middle finger.
-
-    **Action space:**
-    The action space has 4 dimensions.
-    Joint commands are ordered as follows.
-    - (0) j_index_fle
-    - (1) j_mrl_fle
-    - (3) j_thumb_fle
-    At the end there is a flag to indicate that the episode is finished (1)
-    or not (0).
-
-    :param gui: Show PyBullet GUI (default: True).
-    :param real_time: Run simulation in real time (default: False).
-    :param object_name: Name of the object to be loaded: 'insole',
-        'pillow_small', or 'box' (default: 'insole').
-    :param initial_joint_positions: Initial positions of six finger joints
-        (default: zeros).
-    :param thumb_adducted: The joint j_thumb_opp can only take two
-        positions (adduction, abduction) and we cannot easily switch between
-        them. That's why we have to configure it in advance and cannot control
-        it during execution of a policy. When the thumb is adducted, it is
-        closer to the palm. When it is abducted, it is further away from the
-        palm.
-    :param verbose: Print debug output (default: False).
-    :param time_delta: Simulation time step. Note that the hand will still be
-        controlled at 20 Hz (default: 0.0001).
-    :param verbose_dt: Time delta after which the timing module should print
-        timing information (default: 0.1).
-    :param velocity_control: Use velocity control? Default: position control.
-    :param abort_on_first_step_with_collision: Abort on first step if the hand
-        is in collision with the object.
-    """
-
-    HORIZON = 500
-    ACCUMULATED_FORCES_THRESHOLD = 200.0  # greater forces end the episode
-    DROP_TEST_TIME = 0.5  # time to test grasp stability in the end
-
-    def __init__(
-            self,
-            gui: bool = True,
-            real_time: bool = False,
-            object_name: str = "insole",
-            initial_joint_positions: npt.ArrayLike = (0, 0, 0, 0, 0, 0),
-            thumb_adducted: bool = True,
-            verbose: bool = False,
-            time_delta: float = 0.0001,
-            verbose_dt: float = 0.1,
-            velocity_control: bool = False,
-            abort_on_first_step_with_collision: bool = False):
-        self.initial_joint_positions = np.array(initial_joint_positions)
-        self.thumb_adducted = thumb_adducted
-        self.object_name = object_name
-        self.verbose = verbose
-        self.velocity_control = velocity_control
-        self.abort_on_first_step_with_collision = abort_on_first_step_with_collision
-
-        super().__init__(
-            gui=gui, real_time=real_time, horizon=self.HORIZON, soft=True,
-            load_plane=True, verbose=verbose, time_delta=time_delta,
-            verbose_dt=verbose_dt)
-
-        self.hand_world_pose = (0, 0, 1, -np.pi / 8, np.pi, 0)
-        self.robot = self._create_robot()
-
-        joint_lower, joint_upper = self.robot.get_joint_limits()
-        sensor_lower, sensor_upper = self.robot.get_sensor_limits()
-        lower = np.concatenate([joint_lower, sensor_lower], axis=0)
-        upper = np.concatenate([joint_upper, sensor_upper], axis=0)
-
-        self.observation_space = spaces.Box(low=lower, high=upper)
-        self.action_space = spaces.Box(low=-np.ones(4), high=np.ones(4))
-
-        self.first_step = True
-        self.invalid_initial_pose = False
-
-    def _create_robot(self) -> mia_hand.MiaHand:
-        if self.velocity_control:
-            robot = mia_hand.MiaHandVelocity(
-                verbose=self.verbose, world_pos=self.hand_world_pose[:3],
-                world_orn=self.hand_world_pose[3:],
-                debug_visualization=bool(self.verbose))
-        else:
-            robot = mia_hand.MiaHandPosition(
-                verbose=self.verbose, world_pos=self.hand_world_pose[:3],
-                world_orn=self.hand_world_pose[3:],
-                debug_visualization=bool(self.verbose))
-
-        self.simulation.add_robot(robot)
-
-        self._init_hand_pose(robot)
-
-        robot.set_initial_joint_positions(self.initial_joint_positions)
-        robot.set_thumb_opp(self.thumb_adducted)
-
-        return robot
-
-    def _load_objects(self):
-        """Load all objects in PyBullet."""
-        super()._load_objects()
-        self.object_to_grasp, self.object_position, self.object_orientation = \
-            ObjectFactory().create(self.object_name)
-
-    def reset(self):
-        """Reset episode.
-
-        :return: state - State vector.
-        """
-        if self.verbose:
-            print("Performing reset (april)")
-
-        if self.invalid_initial_pose or self._deformable_is_exploded():
-            self._hard_reset()
-            state = super().reset()
-        else:
-            self.object_to_grasp.reset()
-            state = super().reset()
-
-        self.simulation.camera.reset(self.object_to_grasp.get_pose()[:3], 0,
-                                     50, 0.35)
-
-        self.first_step = True
-        self.invalid_initial_pose = False
-
-        return state
-
-    def observe_state(self):
-        """Observe state.
-
-        :return: state - Current state of the Mia hand.
-        """
-        joint_pos = self.robot.get_joint_positions(
-            self.robot.actuated_real_joints)
-        sensor_readings = self.robot.get_sensor_readings()
-        return np.concatenate(
-            [joint_pos, sensor_readings], axis=0).astype("float64")
-
-    def is_done(self, state, action, next_state):
-        """Checks whether the current episode is over or not.
-
-        :param state: Current state of the Mia hand.
-        :param action: Action performed by the Mia hand.
-        :param next_state: Successor state of the Mia hand.
-        :return: done - Is the episode finished?
-        """
-        high_forces, contact_forces = self._check_forces(
-            self.robot, self.ACCUMULATED_FORCES_THRESHOLD, self.verbose)
-
-        self.invalid_initial_pose = (
-                self.abort_on_first_step_with_collision and self.first_step
-                and contact_forces)
-        done = (high_forces or self.invalid_initial_pose or
-                super(MiaGraspEnv, self).is_done(state, action, next_state))
-
-        self.first_step = False
-
-        return done
-
-    def calculate_reward(self, state, action, next_state, done):
-        """Calculates the reward given a (state,action,state) tuple.
-
-        :param state: Current state of the Mia hand.
-        :param action: Action performed by the Mia hand.
-        :param next_state: Successor state of the Mia hand.
-        :param done: Is the episode finished?
-        """
-        if done:
-            if self.verbose:
-                print("Episode done, calculating final reward...")
-            if self.invalid_initial_pose:
-                return -1000.0
-            # Remove insole anchors and simulate steps.
-            self.object_to_grasp.remove_anchors()
-            self.simulation.simulate_time(self.DROP_TEST_TIME)
-            return self.object_to_grasp.get_pose()[2]
-        else:
-            return 0.0
diff --git a/deformable_gym/envs/mia_grasp_env_with_metrics.py b/deformable_gym/envs/mia_grasp_env_with_metrics.py
deleted file mode 100644
index f6b7d07..0000000
--- a/deformable_gym/envs/mia_grasp_env_with_metrics.py
+++ /dev/null
@@ -1,251 +0,0 @@
-from typing import Union
-import pybullet as pb
-import numpy as np
-import numpy.typing as npt
-import pytransform3d.rotations as pr
-import pytransform3d.transformations as pt
-from .mia_grasp_env import MiaGraspEnv
-from .floating_mia_grasp_env import FloatingMiaGraspEnv
-from grasp_metrics.utils.pybullet_interface import (
-    collect_contact_information, compute_inertia)
-from grasp_metrics.geometry import fit_plane_from_point_set, point_to_line_segment
-from grasp_metrics.quasi_static_point_based import ferrari_canny
-from grasp_metrics.geometry_based import (
-    distance_com_centroid, distance_grasp_point_centroid, grasp_polygon_shape,
-    area_of_grasp_polygon, orthogonality, distance_grasp_point_finger_tips)
-from grasp_metrics.hands import MiaHand
-
-
-class MiaGraspEnvWithMetrics(MiaGraspEnv):
-    """Grasping environment using only the MIA hand that uses grasp metrics.
-
-    The goal is to grasp an object using joint position offsets as actions.
-
-    **State space:**
-    Position of three joints and values of six force sensors at the fingertips.
-    Joint positions are ordered as follows:
-    - (0) j_index_fle
-    - (2) j_mrl_fle
-    - (5) j_thumb_fle
-    Force measurements are ordered as follows:
-    - (0) tangential force at middle finger,
-    - (1) normal force at index finger,
-    - (2) tangential force at index finger,
-    - (3) tangential force at thumb,
-    - (4) normal force at thumb,
-    - (5) normal force at middle finger.
-
-    **Action space:**
-    Joint position offset for all four controllable degrees of freedom.
-    Commands are ordered as follows:
-    - (0) j_index_fle
-    - (1) j_mrl_fle
-    - (3) j_thumb_fle
-
-    :param gui: Show PyBullet GUI (default: True).
-    :param real_time: Run simulation in real time (default: False).
-    :param object_name: Name of the object to be loaded: 'insole',
-        'pillow_small', or 'box' (default: 'insole').
-    :param initial_joint_positions: Initial positions of six finger joints
-        (default: zeros).
-    :param grasp_point: Grasp point.
-    :param thumb_adducted: The joint j_thumb_opp can only take two
-        positions (adduction, abduction) and we cannot easily switch between
-        them. That's why we have to configure it in advance and cannot control
-        it during execution of a policy. When the thumb is adducted, it is
-        closer to the palm. When it is abducted, it is further away from the
-        palm.
-    :param verbose: Print debug output (default: False).
-    :param time_delta: Simulation time step. Note that the hand will still be
-        controlled at 20 Hz (default: 0.0001).
-    :param verbose_dt: Time delta after which the timing module should print
-        timing information (default: 0.1).
-    """
-    def __init__(
-            self,
-            gui: bool = True,
-            real_time: bool = False,
-            object_name: str = "insole",
-            initial_joint_positions: npt.ArrayLike = (0, 0, 0, 0, 0, 0),
-            grasp_point: Union[npt.ArrayLike, None] = None,
-            thumb_adducted: bool = False,
-            verbose: bool = False,
-            time_delta: float = 0.0001,
-            verbose_dt: float = 0.1,
-            velocity_control: bool = False,
-            abort_on_first_step_with_collision: bool = False):
-        super(MiaGraspEnvWithMetrics, self).__init__(
-            gui, real_time, object_name, initial_joint_positions,
-            thumb_adducted, verbose, time_delta, verbose_dt, velocity_control,
-            abort_on_first_step_with_collision)
-        self.grasp_point = grasp_point
-
-    def calculate_reward(self, state, action, next_state, done):
-        """Calculates the reward given a (state,action,state) tuple.
-
-        :param state: Current state of the Mia hand.
-        :param action: Action performed by the Mia hand.
-        :param next_state: Successor state of the Mia hand.
-        :param done: Is the episode finished?
-        """
-        if done:
-            return _final_reward_with_metrics(self, self.DROP_TEST_TIME)
-        else:
-            return 0.0
-
-
-class FloatingMiaGraspEnvWithMetrics(FloatingMiaGraspEnv):
-    """Grasp an insole with a floating Mia hand.
-
-    **State space:**
-    - End-effector pose: (x, y, z, qx, qy, qz, qw)
-    - Finger joint angles: thumb_fle, index_fle, mrl_fle
-    - Force sensors: 6 values
-
-    **Action space:**
-    - End-effector offset: (x, y, z, qx, qy, qz, qw)
-    - Finger joint angle: thumb_fle, index_fle, mrl_fle
-
-    :param grasp_point: Grasp point.
-    :param object_name: Name of the object to be loaded: 'insole',
-        'pillow_small', or 'box' (default: 'insole').
-    :param horizon: Number of steps in simulation.
-    :param train: Bool flag indicating if the starting position of the grasped
-    :param compute_reward: Compute reward.
-    :param object_scale: Scaling factor for object.
-    :param task_space_limit: Limits for task space, shape: (2, 3).
-    :param gui: Show PyBullet GUI (default: True).
-    :param real_time: Run simulation in real time (default: False).
-    :param initial_joint_positions: Initial positions of six finger joints
-        (default: zeros).
-    :param verbose: Print debug output (default: False).
-    """
-    def __init__(
-            self,
-            grasp_point: Union[npt.ArrayLike, None] = None,
-            **kwargs):
-        super(FloatingMiaGraspEnvWithMetrics, self).__init__(**kwargs)
-        self.grasp_point = grasp_point
-
-    def set_world_pose(self, world_pose):
-        """Set pose of the hand.
-
-        :param world_pose: world pose of hand given as position and
-                           quaternion: (x, y, z, qw, qx, qy, qz)
-        """
-        self.hand_world_pose = np.hstack((
-            world_pose[:3], pr.quaternion_xyzw_from_wxyz(world_pose[3:])))
-
-    def get_world_pose(self):
-        """Get pose of the hand.
-
-        :return: hand pose in world coordinates given as position and
-                 quaternion: (x, y, z, qw, qx, qy, qz)
-        """
-        return np.hstack((
-            self.hand_world_pose[:3],
-            pr.quaternion_wxyz_from_xyzw(self.hand_world_pose[3:])))
-
-    def calculate_reward(self, state, action, next_state, done):
-        """Calculates the reward given a (state,action,state) tuple.
-
-        :param state: Current state of the Mia hand.
-        :param action: Action performed by the Mia hand.
-        :param next_state: Successor state of the Mia hand.
-        :param done: Is the episode finished?
-        """
-        if done:
-            return _final_reward_with_metrics(self, drop_test_time=1.0)
-        else:
-            return 0.0
-
-
-def _final_reward_with_metrics(env, drop_test_time, verbose=1):
-    if verbose:
-        print("Episode done, calculating final reward with metrics...")
-
-    Q_O, dist, dist_fingers = _approach_info(env)
-
-    if hasattr(env, "invalid_initial_pose") and env.invalid_initial_pose:
-        if verbose:
-            print(f"Stage 1: {dist=:.3f}, {dist_fingers=:.3f}, {Q_O=:.2f}")
-        return -15.0 - 5 * dist - 5 * dist_fingers + Q_O
-
-    # before removing anchors
-    contacts, _, _, _ = collect_contact_information(
-        env.object_to_grasp.get_id(), env.robot.get_id(), 0,
-        default_torque=1.0)
-    n_contacts_before = len(contacts)
-    initial_height = env.object_to_grasp.get_pose()[2]
-    if n_contacts_before == 0:
-        if verbose:
-            print(f"Stage 2: {dist=:.3f}, {dist_fingers=:.3f}, {Q_O=:.2f}")
-        return -10.0 - 5 * dist - 5 * dist_fingers + Q_O
-
-    env.object_to_grasp.remove_anchors()
-    env.simulation.simulate_time(drop_test_time)
-
-    # after removing anchors
-    contacts, vertices_in_object, _, object2world = \
-        collect_contact_information(
-            env.object_to_grasp.get_id(), env.robot.get_id(), 0,
-            default_torque=1.0)
-    vertices_in_world = pt.transform(
-        object2world, pt.vectors_to_points(vertices_in_object))[:, :3]
-
-    if len(contacts) == 0:
-        if verbose:
-            print(f"Stage 3: {dist=:.3f}, {dist_fingers=:.3f}, {Q_O=:.2f}, {n_contacts_before=}")
-        return -5.0 - 5 * dist - 5 * dist_fingers + Q_O + 0.2 * n_contacts_before
-
-    Q_FC, Q_SGP, Q_AGP, Q_DCC = _metrics(contacts, env, object2world, vertices_in_world)
-    height = env.object_to_grasp.get_pose()[2]
-    success = height > initial_height - 0.3
-    success_reward = 5.0 if success else 0.0
-    if verbose:
-        print(f"Stage 4: {success=}, {height=:.2f}/{initial_height=:.2f}, "
-              f"{dist=:.3f}, {Q_FC=:.5f}, {Q_DCC=:.3f}, {Q_AGP=:.3f}, "
-              f"{Q_SGP=:.3f}, {Q_O=:.2f}, {dist_fingers=:.3f}")
-    return success_reward - 5 * dist - 10 * dist_fingers + 200 * Q_FC + 0 * Q_DCC + 0 * Q_AGP + 0 * Q_SGP + 5 * Q_O
-
-
-def _approach_info(env):
-    hand = MiaHand()
-    hand_pose = pt.transform_from_pq(env.get_world_pose())
-    hand_pos, approach_direction = hand.perpendicular_to_palm_surface(hand_pose)
-    I = compute_inertia(env.object_to_grasp.get_id())
-    Q_O = orthogonality(I, approach_direction)
-    approach_end = hand_pos + hand.get_maximum_finger_length() * approach_direction
-    if env.verbose:
-        pb.addUserDebugLine(hand_pos, approach_end, [1, 1, 0])
-    if env.grasp_point is not None:
-        dist = point_to_line_segment(
-            env.grasp_point[:3], hand_pos, approach_end)[0]
-        dist_fingers = distance_grasp_point_finger_tips(
-            hand_pose, hand, env.grasp_point[:3])
-    else:
-        dist = 0.0
-        dist_fingers = 0.0
-    return Q_O, dist, dist_fingers
-
-
-def _metrics(contacts, env, object2world, vertices_in_world):
-    Q_FC = ferrari_canny(contacts)
-    if env.grasp_point is None:
-        Q_DCC = distance_com_centroid(
-            contacts.contact_points_in_world(object2world),
-            vertices_in_world)
-    else:
-        Q_DCC = distance_grasp_point_centroid(
-            contacts.contact_points_in_world(object2world),
-            vertices_in_world, env.grasp_point)
-    if len(contacts) >= 3:
-        normal, d, contact_points_in_plane = fit_plane_from_point_set(
-            contacts.contact_points())
-        Q_AGP = area_of_grasp_polygon(
-            contact_points_in_plane, vertices_in_world, normal=normal)
-        Q_SGP = grasp_polygon_shape(contact_points_in_plane, normal=normal)
-    else:
-        Q_AGP = 0.0
-        Q_SGP = 0.0
-    return Q_FC, Q_SGP, Q_AGP, Q_DCC
diff --git a/deformable_gym/envs/shadow_grasp_env.py b/deformable_gym/envs/shadow_grasp_env.py
deleted file mode 100644
index 8f61bd8..0000000
--- a/deformable_gym/envs/shadow_grasp_env.py
+++ /dev/null
@@ -1,173 +0,0 @@
-import numpy as np
-from gym import spaces
-from deformable_gym.robots import shadow_hand
-from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin, FloatingHandMixin
-from deformable_gym.objects.bullet_object import ObjectFactory
-
-
-class ShadowGraspEnv(FloatingHandMixin, GraspDeformableMixin, BaseBulletEnv):
-    """Basic grasping environment using only the Shadow dexterous hand.
-
-    The goal is to grasp an object using joint position offsets as actions.
-
-    **State space:**
-    Positions of 24 joints:
-    WRJ: 2,1; rh_FFJ: 4,3,2,1; rh_MFJ: 4,3,2,1; rh_RFJ: 4,3,2,1;
-    rh_LFJ: 5,4,3,2,1; rh_THJ: 5,4,3,2,1.
-
-    **Action space:**  TODO only 20 degrees are controlled actively
-    24 joint commands and a flag to indicate termination of an episode.
-
-    :param gui: Show PyBullet GUI (default: True).
-    :param real_time: Run simulation in real time (default: False).
-    :param object_name: Name of the object to be loaded: 'insole',
-        'pillow_small', or 'box' (default: 'insole').
-    :param initial_joint_positions: Initial positions of six finger joints
-        (default: zeros).
-    :param verbose: Print debug output (default: False).
-    :param time_delta: Simulation time step. Note that the hand will still be
-        controlled at 20 Hz (default: 0.0001).
-    :param verbose_dt: Time delta after which the timing module should print
-        timing information (default: 0.1).
-    """
-
-    HORIZON = 500
-    ACCUMULATED_FORCES_THRESHOLD = 200.0  # greater forces end the episode
-    DROP_TEST_TIME = 0.5  # time to test grasp stability in the end
-
-    def __init__(
-            self, gui=True, real_time=False, object_name="insole",
-            initial_joint_positions=(-0.5, -0.0,             # WRJ2, WRJ1
-                                     0.3, 0, 0, 0,       # rh_FFJ: 4,3,2,1
-                                     0.3, 0, 0, 0,       # rh_MFJ: 4,3,2,1
-                                     0.3, 0, 0, 0,       # rh_RFJ: 4,3,2,1
-                                     0.3, 0, 0, 0, 0,    # rh_LFJ: 5,4,3,2,1
-                                     0.3, 0, 0, 0, 0,    # rh_THJ: 5,4,3,2,1
-                                     ),
-            verbose=False, time_delta=0.0001,
-            verbose_dt=0.1, velocity_control=False):
-
-        self.initial_joint_positions = np.array(initial_joint_positions)
-        self.object_name = object_name
-        self.time_delta = time_delta
-        self.verbose = verbose
-        self.velocity_control = velocity_control
-
-        super().__init__(
-            gui=gui, real_time=real_time, horizon=self.HORIZON, soft=True,
-            load_plane=True, verbose=verbose, time_delta=time_delta,
-            verbose_dt=verbose_dt)
-
-        self.hand_world_pose = (0, -0.2, 1, -np.pi / 2, -np.pi, 0)
-        self.transforms_initialized = False
-        self.robot = self._create_robot()
-
-        self.step_counter = 0
-
-        joint_lower, joint_upper = self.robot.get_joint_limits()
-        lower = np.concatenate([joint_lower], axis=0)
-        upper = np.concatenate([joint_upper], axis=0)
-
-        self.observation_space = spaces.Box(low=lower, high=upper)
-        self.action_space = spaces.Box(
-            low=-np.ones(25), high=np.ones(25))
-
-        self.first_step = True
-        self.invalid_initial_pose = False
-
-    def _create_robot(self) -> shadow_hand.ShadowHand:
-        if self.velocity_control:
-            robot = shadow_hand.ShadowHandVelocity(
-                verbose=self.verbose, world_pos=self.hand_world_pose[:3],
-                world_orn=self.hand_world_pose[3:])
-        else:
-            robot = shadow_hand.ShadowHandPosition(
-                verbose=self.verbose, world_pos=self.hand_world_pose[:3],
-                world_orn=self.hand_world_pose[3:])
-
-        self._init_hand_pose(robot)
-
-        robot.set_initial_joint_positions(self.initial_joint_positions)
-
-        self.simulation.add_robot(robot)
-
-        return robot
-
-    def _load_objects(self):
-        """Load all objects in PyBullet."""
-        super()._load_objects()
-        self.object_to_grasp, self.object_position, self.object_orientation = \
-            ObjectFactory().create(self.object_name)
-
-    def reset(self):
-        """Reset episode.
-
-        :return: state - State vector.
-        """
-        if self.verbose:
-            print("Performing reset (april)")
-
-        if self.invalid_initial_pose or self._deformable_is_exploded():
-            self._hard_reset()
-            state = self.observe_state()
-        else:
-            self.object_to_grasp.reset()
-            state = super().reset()
-
-        self.simulation.camera.reset(self.object_to_grasp.get_pose()[:3], 15,
-                                     -105, 0.35)
-
-        self.first_step = True
-        self.invalid_initial_pose = False
-
-        return state
-
-    def observe_state(self):
-        """Observe state.
-
-        :return: state - Current state of the Mia hand.
-        """
-        joint_pos = self.robot.get_joint_positions()
-        #sensor_readings = self.robot.get_sensor_readings()
-        return np.concatenate(
-            [joint_pos], axis=0).astype("float64")
-
-    def is_done(self, state, action, next_state):
-        """Checks whether the current episode is over or not.
-
-        :param state: Current state of the Mia hand.
-        :param action: Action performed by the Mia hand.
-        :param next_state: Successor state of the Mia hand.
-        :return: done - Is the episode finished?
-        """
-        high_forces, contact_forces = self._check_forces(
-            self.robot, self.ACCUMULATED_FORCES_THRESHOLD, self.verbose)
-
-        self.invalid_initial_pose = self.first_step and contact_forces
-        done = (high_forces or self.invalid_initial_pose or
-                super(ShadowGraspEnv, self).is_done(state, action, next_state))
-
-        self.first_step = False
-
-        return done
-
-    def calculate_reward(self, state, action, next_state, done):
-        """Calculates the reward given a (state,action,state) tuple.
-
-        :param state: Current state of the Mia hand.
-        :param action: Action performed by the Mia hand.
-        :param next_state: Successor state of the Mia hand.
-        :param done: Is the episode finished?
-        """
-        if done:
-            if self.verbose:
-                print("Episode done, calculating final reward...")
-            if self.invalid_initial_pose:
-                return -1000.0
-            # Remove insole anchors and simulate steps.
-            self.object_to_grasp.remove_anchors()
-            for _ in range(int(self.DROP_TEST_TIME / self.time_delta)):
-                self.simulation.timing.step()
-            return self.object_to_grasp.get_pose()[2]
-        else:
-            return 0.0
diff --git a/deformable_gym/envs/ur10_shadow_grasp_env.py b/deformable_gym/envs/ur10_shadow_grasp_env.py
index e888d3b..2f8789e 100644
--- a/deformable_gym/envs/ur10_shadow_grasp_env.py
+++ b/deformable_gym/envs/ur10_shadow_grasp_env.py
@@ -30,19 +30,11 @@ class UR10ShadowGraspEnv(GraspDeformableMixin, BaseBulletEnv):
     object should be sampled from the training or the test set.
     """
 
-    train_positions = ([-0.7, 0.1, 1.6],
-                       [-0.7, 0.2, 1.6],
-                       [-0.7, 0.0, 1.6])
-
-    test_positions = ([-0.8, 0.1, 1.6],
-                      [-0.8, 0.2, 1.6],
-                      [-0.8, 0.0, 1.6])
-
     object2world = pt.transform_from(R=np.eye(3),
                                      p=np.array([-0.7, 0.1, 1.8]))
 
     def __init__(self, gui=True, real_time=False, object_name="insole",
-                 verbose=False, horizon=200, train=True,
+                 verbose=False, horizon=100, train=True,
                  compute_reward=True, object_scale=1.0, verbose_dt=10.0):
         self.insole = None
         self.train = train
@@ -53,7 +45,7 @@ def __init__(self, gui=True, real_time=False, object_name="insole",
         self.object_scale = object_scale
 
         super().__init__(gui=gui, real_time=real_time, horizon=horizon,
-                         soft=True, load_plane=True, verbose=verbose,
+                         soft=True, verbose=verbose,
                          verbose_dt=verbose_dt)
 
         self.robot = self._create_robot()
@@ -72,12 +64,10 @@ def __init__(self, gui=True, real_time=False, object_name="insole",
             low=lower_observations, high=upper_observations)
 
         lower_actions = np.concatenate([
-            np.array([-2, -2, 0]), -np.ones(4), limits[0][6:],
-            [0]], axis=0)
+            np.array([-2, -2, 0]), -np.ones(4), limits[0][6:]], axis=0)
 
         upper_actions = np.concatenate([
-            np.array([2, 2, 2]), np.ones(4), limits[1][6:],
-            [1]], axis=0)
+            np.array([2, 2, 2]), np.ones(4), limits[1][6:]], axis=0)
 
         self.action_space = spaces.Box(low=lower_actions, high=upper_actions)
 
@@ -97,19 +87,7 @@ def _create_robot(self):
                 end_effector_link="rh_forearm",
                 verbose=self.verbose, orn_limit=orn_limit)
 
-        # robot.set_ee_pose([-0.3, 0.2, 1.5],
-        #                  [-0.69284743, 0.69317556, 0.14108795, -0.13987236])
-
-        robot.set_initial_joint_positions(dict(zip(robot.motors,
-                                                   robot.get_joint_positions())))
-
-        """
-        robot.set_initial_joint_positions(np.array(
-            [2.44388798, -2.01664781, 1.72892952, -0.3965438, 1.18004689,
-             0.18013334, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-             0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]
-        ))
-        """
+        robot.set_initial_joint_positions(dict(zip(robot.motors, robot.get_joint_positions())))
 
         self.simulation.add_robot(robot)
 
@@ -118,24 +96,13 @@ def _create_robot(self):
     def _load_objects(self):
         super()._load_objects()
         self.object_to_grasp, self.object_position, self.object_orientation = \
-            ObjectFactory().create(
-                self.object_name, object2world=self.object2world,
-                scale=self.object_scale)
+            ObjectFactory().create(self.object_name, object2world=self.object2world, scale=self.object_scale)
 
     def reset(self, hard_reset=False):
-        if self.verbose:
-            print("Performing reset (april)")
-
-        if self.randomised:
-            if self.train:
-                pos = random.choice(self.train_positions)
-            else:
-                pos = random.choice(self.test_positions)
-        else:
-            pos = None
 
-        self.object_to_grasp.reset(pos=pos)
+        pos = None
 
+        self.object_to_grasp.reset(pos)
         self.robot.activate_motors()
 
         return super().reset()
@@ -174,9 +141,9 @@ def calculate_reward(self, state, action, next_state, done):
                     return -100
                 self.simulation.step_to_trigger("time_step")
             height = self.object_to_grasp.get_pose()[2]
-            if height < 0.5:
-                return -50
+            if height > 0.9:
+                return 1.0
             else:
-                return 0.0
+                return -1.0
         else:
             return 0.0
diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py
index 104c9f7..1fd8b6b 100644
--- a/deformable_gym/envs/ur5_mia_grasp_env.py
+++ b/deformable_gym/envs/ur5_mia_grasp_env.py
@@ -3,8 +3,7 @@
 import numpy as np
 from gym import spaces
 from deformable_gym.robots import ur5_mia
-from deformable_gym.envs.base_env import BaseBulletEnv
-from deformable_gym.envs.mia_grasp_env import GraspDeformableMixin
+from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin
 from deformable_gym.helpers import pybullet_helper as pbh
 from deformable_gym.objects.bullet_object import ObjectFactory
 import pytransform3d.transformations as pt
@@ -97,7 +96,7 @@ class UR5MiaGraspEnv(GraspDeformableMixin, BaseBulletEnv):
     def __init__(
             self, gui: bool = True, real_time: bool = False,
             object_name: str = "insole", verbose: bool = False,
-            horizon: int = 200, train: bool = True,
+            horizon: int = 100, train: bool = True,
             thumb_adducted: bool = True, compute_reward: bool = True,
             object_scale: float = 1.0, verbose_dt: float = 10.0,
             pybullet_options: str = ""):
@@ -110,7 +109,7 @@ def __init__(
         self.object_scale = object_scale
 
         super().__init__(gui=gui, real_time=real_time, horizon=horizon,
-                         soft=True, load_plane=True, verbose=verbose,
+                         soft=True, verbose=verbose,
                          verbose_dt=verbose_dt,
                          pybullet_options=pybullet_options)
 
@@ -132,12 +131,14 @@ def __init__(
         actuated_finger_ids = np.array([6, 7, 11], dtype=int)
 
         lower_actions = np.concatenate([
-            np.array([-2, -2, 0]), -np.ones(4), limits[0][actuated_finger_ids],
-            [0]], axis=0)
+            np.array([-2, -2, 0]),
+            -np.ones(4),
+            limits[0][actuated_finger_ids]], axis=0)
 
         upper_actions = np.concatenate([
-            np.array([2, 2, 2]), np.ones(4), limits[1][actuated_finger_ids],
-            [1]], axis=0)
+            np.array([2, 2, 2]),
+            np.ones(4),
+            limits[1][actuated_finger_ids]], axis=0)
 
         self.action_space = spaces.Box(low=lower_actions, high=upper_actions)
 
@@ -166,21 +167,11 @@ def _create_robot(self):
     def _load_objects(self):
         super()._load_objects()
         self.object_to_grasp, self.object_position, self.object_orientation = \
-            ObjectFactory().create(
-                self.object_name, object2world=self.object2world,
-                scale=self.object_scale)
+            ObjectFactory().create(self.object_name, object2world=self.object2world, scale=self.object_scale)
 
     def reset(self, hard_reset=False):
-        if self.verbose:
-            print("Performing reset (april)")
 
-        if self.randomised:
-            if self.train:
-                pos = random.choice(self.train_positions)
-            else:
-                pos = random.choice(self.test_positions)
-        else:
-            pos = None
+        pos = None
 
         self.object_to_grasp.reset(pos=pos)
         self.robot.activate_motors()
@@ -189,10 +180,6 @@ def reset(self, hard_reset=False):
 
     def is_done(self, state, action, next_state):
 
-        # check for termination action
-        # if round(action[-1]) == 1:
-        #    return True
-
         # check if insole is exploded
         if self._deformable_is_exploded():
             print("Exploded insole")
@@ -201,8 +188,7 @@ def is_done(self, state, action, next_state):
         return super().is_done(state, action, next_state)
 
     def observe_state(self):
-        joint_pos = self.robot.get_joint_positions(
-            self.robot.actuated_real_joints)
+        joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints)
         ee_pose = self.robot.get_ee_pose()
         sensor_readings = self.robot.get_sensor_readings()
 
@@ -228,9 +214,10 @@ def calculate_reward(self, state, action, next_state, done):
                     return -100
                 self.simulation.step_to_trigger("time_step")
             height = self.object_to_grasp.get_pose()[2]
-            if height < 0.5:
-                return -50
+
+            if height > 0.9:
+                return 1.0
             else:
-                return 0.0
+                return -1.0
         else:
             return 0.0
diff --git a/deformable_gym/helpers/state_converter.py b/deformable_gym/helpers/state_converter.py
deleted file mode 100644
index 32d66b2..0000000
--- a/deformable_gym/helpers/state_converter.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import numpy as np
-from pytransform3d import transformations as pt
-
-
-class VectorConverter:
-    """Convert between different vector representations.
-
-    Example
-    -------
-
-    .. code-block:: python
-
-        vc = VectorConverter(["x", "y", "z"], ["z", "z", ("a", 0.0), "x", "y"])
-        result = vc(np.array([1, 2, 3]))
-
-    Parameters
-    ----------
-    input_fields : list of str
-        Names of fields of the input vector.
-
-    output_fields : list of str, tuple, or list
-        Names of fields of the output vector. You can add a constant field
-        with the name 'a' by including ('a', value) or ['a', value] in the
-        list of output fields.
-    """
-    def __init__(self, input_fields, output_fields):
-        additional_keys = []
-        additional_values = []
-        output_keys = []
-        for of in output_fields:
-            if isinstance(of, tuple) or isinstance(of, list):
-                key, value = of
-                additional_keys.append(key)
-                additional_values.append(value)
-                output_keys.append(key)
-            else:
-                assert isinstance(of, str)
-                output_keys.append(of)
-        self.all_keys = input_fields + additional_keys
-        self.additional_values = np.array(additional_values, dtype=float)
-        self.indices = np.array(
-            [self.all_keys.index(key) for key in output_keys], dtype=int)
-
-    def __call__(self, vector):
-        """Convert input fields to output fields.
-
-        Parameters
-        ----------
-        vector : array-like, shape (len(input_fields),)
-            Input vector.
-
-        Returns
-        -------
-        result : array, shape (len(output_fields),)
-            Output vector.
-        """
-        modified_state = np.hstack((vector, self.additional_values))
-        return modified_state[self.indices]
-
-
-class ActionCoupling:
-    def __init__(self, action):
-        self.action = action
-
-    def coupling(self, y, yd):
-        return np.zeros_like(self.action), self.action
-
-
-class TransformedDMP:
-    def __init__(self, dmp, dmp_base2world):
-        self.dmp = dmp
-        self.dmp_base2world = dmp_base2world
-        self.world2dmp_base = pt.invert_transform(dmp_base2world)
-        self.last_yd = None
-
-    def step(self, last_y, coupling_term=None):
-        if self.last_yd is None:
-            self.last_yd = np.zeros_like(last_y)
-        last_y = np.copy(last_y)
-
-        last_y[:7] = self._world2base_link(last_y[:7])
-        y, self.last_yd[:] = self.dmp.step(
-            last_y, self.last_yd, coupling_term=coupling_term)
-        y[:7] = self._base_link2world(y[:7])
-        return y
-
-    def _world2base_link(self, pose):
-        ee2world = pt.transform_from_pq(pose)
-        ee2dmp_base = pt.concat(ee2world, self.world2dmp_base)
-        return pt.pq_from_transform(ee2dmp_base, self.last_yd)
-
-    def _base_link2world(self, pose):
-        ee2dmp_base = pt.transform_from_pq(pose)
-        ee2world = pt.concat(ee2dmp_base, self.dmp_base2world)
-        return pt.pq_from_transform(ee2world)
-
-    @property
-    def start_y(self):
-        start_y = np.copy(self.dmp.start_y)
-        start_y[:7] = self._base_link2world(start_y[:7])
-        return start_y
diff --git a/deformable_gym/objects/__init__.py b/deformable_gym/objects/__init__.py
index 35d6252..3d2cfe6 100644
--- a/deformable_gym/objects/__init__.py
+++ b/deformable_gym/objects/__init__.py
@@ -4,7 +4,6 @@
     MocapObjectMixin, RigidPrimitiveObject, Insole, InsoleOnConveyorBelt,
     PillowSmall, PositionEulerAngleMixin)
 from .conveyor import Conveyor
-from .rigid_bag import RigidBag
 
 
 __all__ = [
@@ -12,5 +11,5 @@
     "UrdfObject", "SphereObject", "CylinderObject", "ObjectFactory",
     "CapsuleObject", "SoftObject", "MocapObjectMixin", "RigidPrimitiveObject",
     "Insole", "InsoleOnConveyorBelt", "PillowSmall", "PositionEulerAngleMixin",
-    "Conveyor", "RigidBag"
+    "Conveyor"
 ]
diff --git a/deformable_gym/objects/rigid_bag.py b/deformable_gym/objects/rigid_bag.py
deleted file mode 100644
index a63d1fd..0000000
--- a/deformable_gym/objects/rigid_bag.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import numpy as np
-import pybullet as pb
-from pytransform3d.rotations import quaternion_wxyz_from_xyzw, matrix_from_quaternion
-from pytransform3d.transformations import transform_from, transform, vectors_to_points, invert_transform
-from deformable_gym.robots.bullet_utils import draw_box
-
-
-class RigidBag:
-    def __init__(self, world_pos=(0, 0, 0), world_rot=(0, 0, 0), bag_height=0.022,
-                 transparent=True, fixed=False):
-        base_height = 0.003
-        top_height = 0.02
-        total_length = 0.34
-        total_width = 0.14
-        compartment_length = 0.3
-        compartment_width = 0.1
-
-        total_half_length = total_length / 2
-        total_half_width = total_width / 2
-        compartment_half_length = compartment_length / 2
-        back_half_length = total_half_length - compartment_half_length
-        compartment_half_width = compartment_width / 2
-        side_width = (total_half_width - compartment_half_width) / 2
-        side1_y = -(side_width + compartment_half_width)
-        side2_y = side_width + compartment_half_width
-        back_x = -(total_half_length - back_half_length)
-
-        base = pb.createCollisionShape(
-            pb.GEOM_BOX, halfExtents=(total_half_length, total_half_width, base_height / 2))
-        side = pb.createCollisionShape(
-            pb.GEOM_BOX, halfExtents=(total_half_length, side_width, bag_height / 2))
-        back = pb.createCollisionShape(
-            pb.GEOM_BOX, halfExtents=(back_half_length, compartment_half_width, bag_height / 2))
-        top = pb.createCollisionShape(
-            pb.GEOM_BOX, halfExtents=(total_half_length, total_half_width, top_height / 2))
-        linkCollisionShapeIndices = [side, side, back, top]
-        linkMasses = [0.1] * len(linkCollisionShapeIndices)
-        linkVisualShapeIndices = [-1] * len(linkCollisionShapeIndices)
-        linkPositions = [[0, side1_y, base_height / 2 + bag_height / 2],
-                         [0, side2_y, base_height / 2 + bag_height / 2],
-                         [back_x, 0, base_height / 2 + bag_height / 2],
-                         [0, 0, base_height / 2 + bag_height + top_height / 2]]
-        linkOrientations = [[0, 0, 0, 1]] * len(linkCollisionShapeIndices)
-        linkInertialFramePositions = [[0, 0, 0]] * len(linkCollisionShapeIndices)
-        linkInertialFrameOrientations = [[0, 0, 0, 1]] * len(linkCollisionShapeIndices)
-        linkParentIndices = [0] * len(linkCollisionShapeIndices)
-        linkJointTypes = [pb.JOINT_FIXED] * len(linkCollisionShapeIndices)
-        linkJointAxis = [(0, 0, 1)] * len(linkCollisionShapeIndices)
-
-        self.bag = pb.createMultiBody(
-            baseMass=1000,
-            baseCollisionShapeIndex=base,
-            baseVisualShapeIndex=-1,
-            basePosition=world_pos,
-            baseOrientation=pb.getQuaternionFromEuler(world_rot),
-            linkMasses=linkMasses,
-            linkCollisionShapeIndices=linkCollisionShapeIndices,
-            linkVisualShapeIndices=linkVisualShapeIndices,
-            linkPositions=linkPositions,
-            linkOrientations=linkOrientations,
-            linkInertialFramePositions=linkInertialFramePositions,
-            linkInertialFrameOrientations=linkInertialFrameOrientations,
-            linkParentIndices=linkParentIndices,
-            linkJointTypes=linkJointTypes,
-            linkJointAxis=linkJointAxis,
-            flags=pb.URDF_MERGE_FIXED_LINKS
-        )
-        pb.changeDynamics(
-            self.bag, -1, lateralFriction=1, spinningFriction=1, rollingFriction=1,
-            restitution=0, contactStiffness=1000000, contactDamping=0.1)
-        bag_color = (1, 1, 1, 0.5)
-        if transparent:
-            for i in range(-1, len(linkCollisionShapeIndices)):
-                pb.changeVisualShape(self.bag, i, rgbaColor=bag_color)
-
-        if fixed:
-            pb.createConstraint(
-                self.bag, -1, -1, -1, pb.JOINT_FIXED, [0, 0, 1], [0, 0, 0],
-                world_pos)
-
-        z_lo = base_height / 2
-        z_hi = z_lo + bag_height
-        x_lo = back_half_length + back_x
-        x_hi = total_half_length
-        y_lo = side1_y + side_width
-        y_hi = -y_lo
-        self.limits = [[x_lo, x_hi], [y_lo, y_hi], [z_lo, z_hi]]
-        self.corners_in_bag = np.array([
-            [x_lo, y_lo, z_hi],
-            [x_lo, y_hi, z_hi],
-            [x_hi, y_lo, z_hi],
-            [x_hi, y_hi, z_hi],
-            [x_lo, y_lo, z_lo],
-            [x_lo, y_hi, z_lo],
-            [x_hi, y_lo, z_lo],
-            [x_hi, y_hi, z_lo],
-        ])
-
-    def compartment_corners(self):
-        pos, orn = pb.getBasePositionAndOrientation(self.bag)
-        bag2world = transform_from(
-            p=pos, R=matrix_from_quaternion(quaternion_wxyz_from_xyzw(orn)))
-        corners_in_world = transform(
-            bag2world, vectors_to_points(self.corners_in_bag))[:, :3]
-        return corners_in_world
-
-    def draw_compartment_corners(self, line_ids=None):
-        corners_in_world = self.compartment_corners()
-        return draw_box(corners_in_world, line_ids)
-
-    def transform_world2bag(self, points_in_world):
-        pos, orn = pb.getBasePositionAndOrientation(self.bag)
-        bag2world = transform_from(
-            p=pos, R=matrix_from_quaternion(quaternion_wxyz_from_xyzw(orn)))
-        world2bag = invert_transform(bag2world)
-        points_in_bag = transform(world2bag, vectors_to_points(points_in_world))[:, :3]
-        return points_in_bag
diff --git a/deformable_gym/robots/bullet_robot.py b/deformable_gym/robots/bullet_robot.py
index 8c89725..5708752 100644
--- a/deformable_gym/robots/bullet_robot.py
+++ b/deformable_gym/robots/bullet_robot.py
@@ -7,6 +7,7 @@
 from deformable_gym.robots.bullet_utils import draw_limits
 from deformable_gym.robots.inverse_kinematics import PyBulletSolver
 from deformable_gym.objects.bullet_object import Pose
+from gym.spaces import Box
 
 
 class BulletRobot(abc.ABC):
@@ -108,6 +109,11 @@ def __init__(
         self.multibody_pose = pbh.MultibodyPose(
             self.get_id(), self.init_pos, self.init_rot)
 
+        # create observation and action spaces
+        # TODO: fix to include base actions and sensor observations
+        self.action_space = Box(np.full(len(self.motors), -1.0), np.full(len(self.motors), 1.0))
+        self.observation_space = Box(np.full(len(self.motors), -1.0), np.full(len(self.motors), 1.0))
+
     def initialise(self) -> int:
         """Initialize robot in simulation.
 
@@ -251,10 +257,11 @@ def compute_ik(
         target_pos = current_pos + pos_command
 
         if self.task_space_limit is not None:
-            print(f"Original {target_pos=}")
-            target_pos = np.clip(
-                target_pos, self.task_space_limit[0], self.task_space_limit[1])
-            print(f"Clipped {target_pos=}")
+            if self.verbose:
+                print(f"Original {target_pos=}")
+            target_pos = np.clip(target_pos, self.task_space_limit[0], self.task_space_limit[1])
+            if self.verbose:
+                print(f"Clipped {target_pos=}")
 
         # calculate target rotation if orientation is provided
         target_orn = None
@@ -393,7 +400,7 @@ def _get_link_id(self, link_name: str) -> int:
         """
         return self._link_name_to_link_id[link_name]
 
-    def _get_joint_limits(self, joints: List[str]) -> Tuple[np.ndarray, np.ndarray]:
+    def get_joint_limits(self, joints: List[str]) -> Tuple[np.ndarray, np.ndarray]:
         """Get array of lower limits and array of upper limits of joints.
 
         :param joints: Names of joints for which we request limits.
diff --git a/deformable_gym/robots/shadow_hand.py b/deformable_gym/robots/shadow_hand.py
index 4a9e6d1..666ae4f 100644
--- a/deformable_gym/robots/shadow_hand.py
+++ b/deformable_gym/robots/shadow_hand.py
@@ -55,10 +55,6 @@ def __init__(
         if self.debug_visualization:
             self._init_debug_visualizations()
 
-    def get_joint_limits(self):
-        """Get joint limits."""
-        return self._get_joint_limits(self.motors)
-
     def perform_command(self, command: npt.ArrayLike):
         """Translates hand commands and updates current command.
 
diff --git a/deformable_gym/robots/ur10_shadow.py b/deformable_gym/robots/ur10_shadow.py
index 079b334..d3f939a 100644
--- a/deformable_gym/robots/ur10_shadow.py
+++ b/deformable_gym/robots/ur10_shadow.py
@@ -57,10 +57,6 @@ def arm_command_wrapper():
                 end_effector_name=end_effector_link,
                 fallback=self.inverse_kinematics_solver))
 
-    def get_joint_limits(self):
-        """Get joint limits."""
-        return self._get_joint_limits(self.motors)
-
     def perform_command(self, command):
         """
         Performs the provided arm and hand commands. Assumes task-space arm
diff --git a/examples/floating_mia_example.py b/examples/floating_mia_example.py
index 22cd518..a327e21 100644
--- a/examples/floating_mia_example.py
+++ b/examples/floating_mia_example.py
@@ -31,3 +31,5 @@
     if done:
         print(f"Episode finished with return {episode_return}!")
         num_episodes += 1
+
+        env.reset()
diff --git a/examples/floating_shadow_example.py b/examples/floating_shadow_example.py
index c8dcdce..cbd97e2 100644
--- a/examples/floating_shadow_example.py
+++ b/examples/floating_shadow_example.py
@@ -14,7 +14,6 @@
         gui=True,
         horizon=100,
         object_name="insole",
-        early_episode_termination=False
 )
 
 
@@ -32,3 +31,4 @@
     if done:
         print(f"Episode finished with return {episode_return}!")
         num_episodes += 1
+        env.reset()
diff --git a/examples/ur10_shadow_grasp_example.py b/examples/ur10_shadow_grasp_example.py
new file mode 100644
index 0000000..0610b7c
--- /dev/null
+++ b/examples/ur10_shadow_grasp_example.py
@@ -0,0 +1,33 @@
+from deformable_gym.envs.ur10_shadow_grasp_env import UR10ShadowGraspEnv
+
+"""
+=========
+Floating Mia Example
+=========
+
+This is an example of how to use the FloatingMiaGraspEnv. A random policy is then
+used to generate ten episodes.
+
+"""
+
+env = UR10ShadowGraspEnv(
+        gui=True,
+        object_name="insole"
+)
+
+env.reset()
+episode_return = 0
+num_episodes = 0
+
+while num_episodes <= 10:
+
+    action = env.action_space.sample()
+
+    state, reward, done, _ = env.step(action)
+    episode_return += reward
+
+    if done:
+        print(f"Episode finished with return {episode_return}!")
+        num_episodes += 1
+
+        env.reset()
diff --git a/examples/ur5_mia_grasp_example.py b/examples/ur5_mia_grasp_example.py
new file mode 100644
index 0000000..416eb88
--- /dev/null
+++ b/examples/ur5_mia_grasp_example.py
@@ -0,0 +1,32 @@
+from deformable_gym.envs.ur5_mia_grasp_env import UR5MiaGraspEnv
+
+"""
+=========
+Floating Mia Example
+=========
+
+This is an example of how to use the FloatingMiaGraspEnv. A random policy is then
+used to generate ten episodes.
+
+"""
+
+env = UR5MiaGraspEnv(
+        gui=True,
+        object_name="insole2"
+)
+
+env.reset()
+episode_return = 0
+num_episodes = 0
+
+while num_episodes <= 10:
+
+    action = env.action_space.sample()
+
+    state, reward, done, _ = env.step(action)
+    episode_return += reward
+
+    if done:
+        print(f"Episode finished with return {episode_return}!")
+        num_episodes += 1
+        env.reset()
diff --git a/tests/conftest.py b/tests/conftest.py
index 3e37e9c..a08ca31 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 import pytest
 import pybullet as pb
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 
 
 @pytest.fixture
@@ -22,7 +22,27 @@ def mia_sensors():
 
 @pytest.fixture
 def shadow_motors():
-    return ["rh_WRJ2", "rh_WRJ1", "rh_THJ5", "rh_THJ4", "rh_THJ3", "rh_THJ2",
-            "rh_THJ1", "rh_LFJ5", "rh_LFJ4", "rh_LFJ3", "rh_LFJ2", "rh_RFJ1",
-            "rh_RFJ4", "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4", "rh_MFJ3",
-            "rh_MFJ2", "rh_MFJ1", "rh_FFJ4", "rh_FFJ3", "rh_FFJ2", "rh_FFJ1"]
+    return ["rh_WRJ2", "rh_WRJ1", "rh_THJ5", "rh_THJ4", "rh_THJ3", "rh_THJ2", "rh_THJ1", "rh_LFJ5", "rh_LFJ4",
+            "rh_LFJ3", "rh_LFJ2", "rh_LFJ1", "rh_RFJ1", "rh_RFJ4", "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4",
+            "rh_MFJ3", "rh_MFJ2", "rh_MFJ1", "rh_FFJ4", "rh_FFJ3", "rh_FFJ2", "rh_FFJ1"]
+
+
+@pytest.fixture
+def ur5_mia_motors():
+    return ["ur5_shoulder_pan_joint", "ur5_shoulder_lift_joint", "ur5_elbow_joint", "ur5_wrist_1_joint",
+            "ur5_wrist_2_joint", "ur5_wrist_3_joint", "j_thumb_fle", "j_thumb_opp", "j_index_fle",  "j_mrl_fle",
+            "j_ring_fle", "j_little_fle"]
+
+
+@pytest.fixture
+def ur5_mia_sensors():
+    return ["j_index_sensor", "j_middle_sensor", "j_thumb_sensor"]
+
+
+@pytest.fixture
+def ur10_shadow_motors():
+    return ["ur10_shoulder_pan_joint", "ur10_shoulder_lift_joint", "ur10_elbow_joint", "ur10_wrist_1_joint",
+            "ur10_wrist_2_joint", "ur10_wrist_3_joint", "rh_WRJ2", "rh_WRJ1", "rh_THJ5", "rh_THJ4", "rh_THJ3",
+            "rh_THJ2", "rh_THJ1", "rh_LFJ5", "rh_LFJ4", "rh_LFJ3", "rh_LFJ2", "rh_LFJ1", "rh_RFJ1", "rh_RFJ4",
+            "rh_RFJ3", "rh_RFJ2", "rh_RFJ1", "rh_MFJ4", "rh_MFJ3", "rh_MFJ2", "rh_MFJ1", "rh_FFJ4", "rh_FFJ3",
+            "rh_FFJ2", "rh_FFJ1"]
diff --git a/tests/envs/test_floating_mia_grasp_env.py b/tests/envs/test_floating_mia_grasp_env.py
index 97d8e43..389684b 100644
--- a/tests/envs/test_floating_mia_grasp_env.py
+++ b/tests/envs/test_floating_mia_grasp_env.py
@@ -9,7 +9,6 @@ def env():
         verbose=True,
         horizon=10,
         object_name="insole_on_conveyor_belt/back",
-        early_episode_termination=False,
         observable_object_pos=True,
         difficulty_mode="hard")
 
diff --git a/tests/envs/test_floating_shadow_grasp_env.py b/tests/envs/test_floating_shadow_grasp_env.py
index 5464b3c..3f377b3 100644
--- a/tests/envs/test_floating_shadow_grasp_env.py
+++ b/tests/envs/test_floating_shadow_grasp_env.py
@@ -8,11 +8,9 @@ def env():
         gui=False,
         verbose=True,
         horizon=10,
-        object_name="insole_on_conveyor_belt/back",
-        early_episode_termination=False,
-        # observable_object_pos=True,
-        #  difficulty_mode="hard"
-        )
+        object_name="insole",
+        observable_object_pos=True,
+    )
 
 
 @pytest.mark.skip("TODO")
@@ -20,6 +18,7 @@ def test_action_space_dims(env):
     action_space = env.action_space
     assert action_space.shape[0] == 10
 
+
 @pytest.mark.skip("TODO")
 def test_obs_space_dims(env):
     obs_space = env.observation_space
@@ -32,7 +31,6 @@ def test_initial_obs(env):
     assert len(obs) == 18
 
 
-@pytest.mark.skip("TODO")
 def test_eps_done(env):
     env.reset()
     for t in range(9):
diff --git a/tests/envs/test_ur10_shadow_grasp_env.py b/tests/envs/test_ur10_shadow_grasp_env.py
new file mode 100644
index 0000000..73c4512
--- /dev/null
+++ b/tests/envs/test_ur10_shadow_grasp_env.py
@@ -0,0 +1,49 @@
+import pytest
+from deformable_gym.envs.ur10_shadow_grasp_env import UR10ShadowGraspEnv
+
+
+@pytest.fixture
+def env():
+    return UR10ShadowGraspEnv(
+        gui=False,
+        verbose=True,
+        horizon=10,
+        object_name="insole",
+        #observable_object_pos=True,
+    )
+
+
+@pytest.mark.skip("TODO")
+def test_action_space_dims(env):
+    action_space = env.action_space
+    assert action_space.shape[0] == 10
+
+
+@pytest.mark.skip("TODO")
+def test_obs_space_dims(env):
+    obs_space = env.observation_space
+    assert obs_space.shape[0] == 28
+
+
+@pytest.mark.skip("TODO")
+def test_initial_obs(env):
+    obs = env.reset()
+    assert len(obs) == 18
+
+@pytest.mark.skip("TODO")
+def test_eps_done(env):
+    env.reset()
+    for t in range(9):
+        action = env.action_space.sample()
+        obs, reward, done, info = env.step(action)
+
+        assert len(obs) == 18
+        assert isinstance(reward, float)
+        assert isinstance(done, bool)
+        assert not done
+
+    action = env.action_space.sample()
+    obs, reward, done, info = env.step(action)
+
+    assert done
+
diff --git a/tests/envs/test_ur5_mia_grasp_env.py b/tests/envs/test_ur5_mia_grasp_env.py
new file mode 100644
index 0000000..1e4ce67
--- /dev/null
+++ b/tests/envs/test_ur5_mia_grasp_env.py
@@ -0,0 +1,49 @@
+import pytest
+from deformable_gym.envs.ur5_mia_grasp_env import UR5MiaGraspEnv
+
+
+@pytest.fixture
+def env():
+    return UR5MiaGraspEnv(
+        gui=False,
+        verbose=True,
+        horizon=10,
+        object_name="insole",
+        #observable_object_pos=True,
+    )
+
+
+@pytest.mark.skip("TODO")
+def test_action_space_dims(env):
+    action_space = env.action_space
+    assert action_space.shape[0] == 10
+
+@pytest.mark.skip("TODO")
+def test_obs_space_dims(env):
+    obs_space = env.observation_space
+    assert obs_space.shape[0] == 28
+
+
+@pytest.mark.skip("TODO")
+def test_initial_obs(env):
+    obs = env.reset()
+    assert len(obs) == 18
+
+
+@pytest.mark.skip("TODO")
+def test_eps_done(env):
+    env.reset()
+    for t in range(9):
+        action = env.action_space.sample()
+        obs, reward, done, info = env.step(action)
+
+        assert len(obs) == 16
+        assert isinstance(reward, float)
+        assert isinstance(done, bool)
+        assert not done
+
+    action = env.action_space.sample()
+    obs, reward, done, info = env.step(action)
+
+    assert done
+
diff --git a/tests/objects/test_box.py b/tests/objects/test_box.py
index 830c39d..641d146 100644
--- a/tests/objects/test_box.py
+++ b/tests/objects/test_box.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pybullet as pb
 import pytest
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject
 from numpy.testing import assert_array_almost_equal
 
diff --git a/tests/objects/test_insole.py b/tests/objects/test_insole.py
index f9696cb..db731ce 100644
--- a/tests/objects/test_insole.py
+++ b/tests/objects/test_insole.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pybullet as pb
 import pytest
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject
 from numpy.testing import assert_array_almost_equal
 
diff --git a/tests/objects/test_insole_on_conveyor.py b/tests/objects/test_insole_on_conveyor.py
index 6cb8b5d..b0e7a36 100644
--- a/tests/objects/test_insole_on_conveyor.py
+++ b/tests/objects/test_insole_on_conveyor.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pybullet as pb
 import pytest
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject
 from numpy.testing import assert_array_almost_equal
 
diff --git a/tests/objects/test_pillow.py b/tests/objects/test_pillow.py
index 8746c68..f37e8ab 100644
--- a/tests/objects/test_pillow.py
+++ b/tests/objects/test_pillow.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pybullet as pb
 import pytest
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject
 from numpy.testing import assert_array_almost_equal
 
diff --git a/tests/objects/test_sphere.py b/tests/objects/test_sphere.py
index bb90240..512c0e6 100644
--- a/tests/objects/test_sphere.py
+++ b/tests/objects/test_sphere.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pybullet as pb
 import pytest
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject
 from numpy.testing import assert_array_almost_equal
 
diff --git a/tests/objects/test_urdf_object.py b/tests/objects/test_urdf_object.py
index e94d482..6cbe752 100644
--- a/tests/objects/test_urdf_object.py
+++ b/tests/objects/test_urdf_object.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pybullet as pb
 import pytest
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.objects.bullet_object import ObjectFactory, UrdfObject
 from numpy.testing import assert_array_almost_equal
 
diff --git a/tests/robots/test_mia_hand_position.py b/tests/robots/test_mia_hand_position.py
index cc5efac..53322dc 100644
--- a/tests/robots/test_mia_hand_position.py
+++ b/tests/robots/test_mia_hand_position.py
@@ -25,18 +25,10 @@ def test_mia_hand_position_initial_pose(simulation, robot):
 
 
 def test_mia_hand_position_creation(simulation, robot, mia_motors, mia_sensors):
-
-    found_motors = robot.motors.keys()
-
     # check motor creation
-    for motor in mia_motors:
-        assert motor in found_motors
-
-    found_sensors = robot.sensors.keys()
-
+    assert set(robot.motors.keys()) == set(mia_motors)
     # check sensor creation
-    for sensor in mia_sensors:
-        assert sensor in found_sensors
+    assert set(robot.sensors.keys()) == set(mia_sensors)
 
 
 @pytest.mark.skip("TODO")
diff --git a/tests/robots/test_mia_hand_velocity.py b/tests/robots/test_mia_hand_velocity.py
index c7e1de4..f5c8a82 100644
--- a/tests/robots/test_mia_hand_velocity.py
+++ b/tests/robots/test_mia_hand_velocity.py
@@ -28,17 +28,10 @@ def test_mia_hand_velocity_initial_pose(simulation, robot):
 
 def test_mia_hand_velocity_motor_creation(simulation, robot, mia_motors, mia_sensors):
 
-    found_motors = robot.motors.keys()
-
     # check motor creation
-    for motor in mia_motors:
-        assert motor in found_motors
-
-    found_sensors = robot.sensors.keys()
-
+    assert set(robot.motors.keys()) == set(mia_motors)
     # check sensor creation
-    for sensor in mia_sensors:
-        assert sensor in found_sensors
+    assert set(robot.sensors.keys()) == set(mia_sensors)
 
 
 @pytest.mark.skip("TODO")
diff --git a/tests/robots/test_shadow_hand_position.py b/tests/robots/test_shadow_hand_position.py
index 65f930c..0fd2fb2 100644
--- a/tests/robots/test_shadow_hand_position.py
+++ b/tests/robots/test_shadow_hand_position.py
@@ -33,13 +33,8 @@ def test_shadow_hand_position_initial_pose(simulation, robot, shadow_motors):
 
 def test_shadow_hand_position_motor_creation(simulation, robot, shadow_motors):
 
-    simulation.add_robot(robot)
-
-    found_motors = robot.motors.keys()
-
     # check motor creation
-    for motor in shadow_motors:
-        assert motor in found_motors
+    assert set(robot.motors.keys()) == set(shadow_motors)
 
 
 @pytest.mark.skip("TODO")
diff --git a/tests/robots/test_shadow_hand_velocity.py b/tests/robots/test_shadow_hand_velocity.py
index 606e68f..3f259d9 100644
--- a/tests/robots/test_shadow_hand_velocity.py
+++ b/tests/robots/test_shadow_hand_velocity.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pybullet as pb
-from deformable_gym.envs.pybullet_tools import BulletSimulation
+from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.robots.shadow_hand import ShadowHandVelocity
 from numpy.testing import assert_almost_equal
 
@@ -28,13 +28,8 @@ def test_shadow_hand_velocity_initial_pose(simulation, robot):
 
 def test_shadow_hand_velocity_motor_creation(simulation, robot, shadow_motors):
 
-    simulation.add_robot(robot)
-
-    found_motors = robot.motors.keys()
-
     # check motor creation
-    for motor in shadow_motors:
-        assert motor in found_motors
+    assert set(robot.motors.keys()) == set(shadow_motors)
 
 
 @pytest.mark.skip("TODO")
diff --git a/tests/robots/test_ur10_shadow_position.py b/tests/robots/test_ur10_shadow_position.py
index 99a1927..b5926a1 100644
--- a/tests/robots/test_ur10_shadow_position.py
+++ b/tests/robots/test_ur10_shadow_position.py
@@ -25,3 +25,8 @@ def test_ur10_shadow_position_creation(simulation, robot):
     actual_pose = np.concatenate(robot.multibody_pose.get_pose())
     expected_pose = np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0])
     assert_almost_equal(actual_pose, expected_pose)
+
+
+def test_ur10_shadow_position_motor_creation(simulation, robot, ur10_shadow_motors):
+    # check motor creation
+    assert set(robot.motors.keys()) == set(ur10_shadow_motors)
diff --git a/tests/robots/test_ur10_shadow_velocity.py b/tests/robots/test_ur10_shadow_velocity.py
index 0427683..8fc1527 100644
--- a/tests/robots/test_ur10_shadow_velocity.py
+++ b/tests/robots/test_ur10_shadow_velocity.py
@@ -25,3 +25,7 @@ def test_ur10_shadow_velocity_creation(simulation, robot):
     expected_pose = np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0])
     assert_almost_equal(actual_pose, expected_pose)
 
+
+def test_ur10_shadow_velocity_motor_creation(simulation, robot, ur10_shadow_motors):
+    # check motor creation
+    assert set(robot.motors.keys()) == set(ur10_shadow_motors)
diff --git a/tests/robots/test_ur5_mia_position.py b/tests/robots/test_ur5_mia_position.py
index 4394dc3..f802091 100644
--- a/tests/robots/test_ur5_mia_position.py
+++ b/tests/robots/test_ur5_mia_position.py
@@ -17,9 +17,16 @@ def robot():
 
 
 @pytest.mark.skip("TODO")
-def test_ur5_mia_position_creation(simulation, robot):
+def test_ur5_mia_position_initial_position(simulation, robot):
 
     actual_pose = np.concatenate(robot.multibody_pose.get_pose())
     expected_pose = np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0])
     assert_almost_equal(actual_pose, expected_pose)
 
+
+def test_ur5_mia_position_creation(simulation, robot, ur5_mia_motors, ur5_mia_sensors):
+
+    # check motor creation
+    assert set(robot.motors.keys()) == set(ur5_mia_motors)
+    # check sensor creation
+    assert set(robot.sensors.keys()) == set(ur5_mia_sensors)
diff --git a/tests/robots/test_ur5_mia_velocity.py b/tests/robots/test_ur5_mia_velocity.py
index 7fc97e7..fe41c5f 100644
--- a/tests/robots/test_ur5_mia_velocity.py
+++ b/tests/robots/test_ur5_mia_velocity.py
@@ -26,3 +26,10 @@ def test_ur5_mia_velocity_creation(simulation, robot):
 
     robot.perform_command(np.array([0.3, -0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]))
 
+
+def test_ur5_mia_velocity_motor_creation(simulation, robot, ur5_mia_motors, ur5_mia_sensors):
+    # check motor creation
+    assert set(robot.motors.keys()) == set(ur5_mia_motors)
+    # check sensor creation
+    assert set(robot.sensors.keys()) == set(ur5_mia_sensors)
+