Merge pull request #15 from dfki-ric/feature/gymnasium

Feature/gymnasium
dfki-ric · Aug 30, 2023 · dc3b7ea · dc3b7ea
2 parents 8a1b074 + 8cdbf2f
commit dc3b7ea
Show file tree

Hide file tree

Showing 19 changed files with 161 additions and 165 deletions.
diff --git a/README.md b/README.md
@@ -75,10 +75,10 @@ while num_episodes <= 10:
 
     action = env.action_space.sample()
 
-    state, reward, done, _ = env.step(action)
+    state, reward, terminated, truncated, _ = env.step(action)
     episode_return += reward
 
-    if done:
+    if terminated or truncated:
         print(f"Episode finished with return {episode_return}!")
         num_episodes += 1
 

diff --git a/deformable_gym/__init__.py b/deformable_gym/__init__.py
@@ -0,0 +1,15 @@
+from gymnasium.envs.registration import register
+
+register(
+    id="FloatingMiaGraspInsole-v0",
+    entry_point="deformable_gym.envs.floating_mia_grasp_env:FloatingMiaGraspEnv",
+    kwargs={"object_name": "insole_on_conveyor_belt/back",
+            "observable_object_pos": True}
+)
+
+register(
+    id="FloatingShadowGraspInsole-v0",
+    entry_point="deformable_gym.envs.floating_shadow_grasp_env:FloatingShadowGraspEnv",
+    kwargs={"object_name": "insole_on_conveyor_belt/back",
+            "observable_object_pos": True}
+)
diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py
@@ -1,18 +1,19 @@
 import abc
 from typing import Any
 
+import gymnasium as gym
 import numpy as np
 import numpy.typing as npt
 import pybullet as pb
 import pytransform3d.rotations as pr
-from gym.core import Env
-from gym import spaces
+
+from gymnasium import spaces
 from deformable_gym.robots.bullet_robot import BulletRobot
 from deformable_gym.envs.bullet_simulation import BulletSimulation
 from deformable_gym.helpers.pybullet_helper import MultibodyPose
 
 
-class BaseBulletEnv(Env, abc.ABC):
+class BaseBulletEnv(gym.Env, abc.ABC):
     """Configures PyBullet for the gym environment.
 
     :param gui: Show PyBullet GUI.
@@ -79,25 +80,26 @@ def _hard_reset(self):
         self._load_objects()
         self.robot = self._create_robot()
 
-    def reset(self) -> np.ndarray:
+    def reset(self, seed=None, options=None) -> npt.ArrayLike:
         """Reset the environment to its initial state and returns it.
 
         :return: Initial state.
         """
-
-        if self.verbose:
-            print("Performing reset (base)")
+        super().reset(seed=seed)
 
         assert isinstance(self.robot, BulletRobot)
 
         self.robot.reset()
         self.step_counter = 0
         self.simulation.timing.reset()
 
+        observation = self._get_observation()
+        info = self._get_info()
+
         if self.verbose:
-            print(f"Resetting env: {self.observe_state()}")
+            print(f"Resetting env: {observation}")
 
-        return self.observe_state()
+        return observation, info
 
     def render(self, mode: str = "human"):
         """Render environment.
@@ -113,22 +115,39 @@ def render(self, mode: str = "human"):
         else:
             raise NotImplementedError(f"Render mode {mode} not supported")
 
-    def observe_state(self) -> np.ndarray:
+    def _get_observation(self) -> npt.ArrayLike:
         """Returns the current environment state.
 
         :return: The observation
         """
         return self.robot.get_joint_positions()
 
-    def is_done(self, state: np.ndarray, action: np.ndarray, next_state: np.ndarray) -> bool:
-        """Checks whether the current episode is over or not.
+    def _get_info(self):
+        """Returns the current environment state.
+
+        :return: The observation
+        """
+        return {}
+
+    def _is_terminated(self, observation: npt.ArrayLike, action: npt.ArrayLike, next_observation: npt.ArrayLike) -> bool:
+        """Checks whether the current episode is terminated.
+
+        :param observation: observation before action was taken
+        :param action: Action taken in this step
+        :param next_observation: Observation after action was taken
+        :return: Is the current episode done?
+        """
+        return self.step_counter >= self.horizon
+
+    def _is_truncated(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike) -> bool:
+        """Checks whether the current episode is truncated.
 
         :param state: State
         :param action: Action taken in this state
         :param next_state: State after action was taken
         :return: Is the current episode done?
         """
-        return self.step_counter >= self.horizon
+        return False
 
     def step(self, action: npt.ArrayLike):
         """Take a step in the environment.
@@ -145,39 +164,46 @@ def step(self, action: npt.ArrayLike):
         for gym environments. Info dict is currently unused.
         """
         # observe current state
-        state = self.observe_state()
+        observation = self._get_observation()
 
         # execute action
         self.robot.perform_command(action)
 
         # simulate until next time step
         self.simulation.step_to_trigger("time_step")
 
-        # observe new state
-        next_state = self.observe_state()
+        next_observation = self._get_observation()
 
         # update time step
         self.step_counter += 1
 
         # check if episode is over
-        done = self.is_done(state, action, next_state)
+        terminated = self._is_terminated(observation, action, next_observation)
+        truncated = self._is_truncated(observation, action, next_observation)
 
         # calculate the reward
-        reward = self.calculate_reward(state, action, next_state, done)
+        reward = self.calculate_reward(observation, action, next_observation, terminated)
 
         if self.verbose:
-            print(f"Finished environment step: {next_state=}, {reward=}, {done=}")
+            print(f"Finished environment step: {next_observation=}, {reward=}, {terminated=}, {truncated=}")
+
+        return next_observation, reward, terminated, truncated, {}
 
-        return next_state, reward, done, {}
+    def close(self):
+        self.simulation.disconnect()
 
     @abc.abstractmethod
-    def calculate_reward(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike, done: bool):
+    def calculate_reward(self,
+                         state: npt.ArrayLike,
+                         action: npt.ArrayLike,
+                         next_state: npt.ArrayLike,
+                         terminated: bool):
         """Calculate reward.
 
         :param state: State of the environment.
         :param action: Action that has been executed in the state.
         :param next_state: State after executing the action.
-        :param done: Is the episode done?
+        :param terminated: Is the episode terminated?
         :return: Measured reward.
         """
 
@@ -209,8 +235,7 @@ def _check_forces(self, robot, high_force_threshold, verbose):
         contact_points = robot.get_contact_points(self.object_to_grasp.get_id())
         accumulated_forces = 0.0
         for contact_point in contact_points:
-            _, _, _, _, _, _, _, _, dist, force, _, _, _, _ = contact_point
-            accumulated_forces += force
+            accumulated_forces += contact_point[9]
         high_forces = accumulated_forces > high_force_threshold
         contact_forces = accumulated_forces > 0
 

diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py
@@ -1,11 +1,6 @@
-import random
-import time
-from typing import Union
-
 import numpy as np
-import numpy.typing as npt
 import pybullet as pb
-from gym import spaces
+from gymnasium import spaces
 from deformable_gym.robots import mia_hand
 from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin
 from deformable_gym.helpers import pybullet_helper as pbh
@@ -68,8 +63,6 @@ def __init__(
             object_name: str = "insole",
             compute_reward: bool = True,
             object_scale: float = 1.0,
-            task_space_limit: Union[npt.ArrayLike, None] = ((-0.02, -.05, .95),
-                                                            (0.02, 0.05, 1.05)),
             observable_object_pos: bool = False,
             observable_time_step: bool = False,
             difficulty_mode: str = "hard",
@@ -81,7 +74,6 @@ def __init__(
         self.randomised = False
         self.compute_reward = compute_reward
         self.object_scale = object_scale
-        self.task_space_limit = task_space_limit
         self._observable_object_pos = observable_object_pos
         self._observable_time_step = observable_time_step
 
@@ -99,25 +91,27 @@ def __init__(
             np.array([-2, -2, 0]),
             -np.ones(4),
             limits[0][self.actuated_finger_ids],
-            -np.ones(6)*5])
+            -np.ones(6)*10])
 
         upper_observations = np.concatenate([
             np.array([2, 2, 2]),
             np.ones(4),
             limits[1][self.actuated_finger_ids],
-            np.ones(6)*5])
+            np.ones(6)*10])
 
         if self._observable_object_pos:
-            lower_observations = np.append(lower_observations, np.ones(3))
-            upper_observations = np.append(upper_observations, -np.ones(3))
+            lower_observations = np.append(lower_observations, -np.ones(3)*2)
+            upper_observations = np.append(upper_observations, np.ones(3)*2)
 
         if self._observable_time_step:
             lower_observations = np.append(lower_observations, 0)
             upper_observations = np.append(upper_observations, self.horizon)
 
         self.observation_space = spaces.Box(
             low=lower_observations,
-            high=upper_observations)
+            high=upper_observations,
+            dtype=np.float64
+        )
 
         # build the action space
         lower = [-np.ones(3) * .0005,  # max negative base pos offset
@@ -128,25 +122,22 @@ def __init__(
                  np.ones(4) * .000005,  # max positive base orn offset
                  limits[1][self.actuated_finger_ids]]  # positive joint limits
 
-        self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper))
+        self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper), dtype=np.float64)
 
     def _create_robot(self):
         orn_limit = None
-        # orn_limit = [[0, 0, 0], [0, 0, 0]]
 
         if self.velocity_commands:
             robot = mia_hand.MiaHandVelocity(
                 world_pos=self.hand_world_pose[:3],
                 world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]),
-                task_space_limit=self.task_space_limit,
                 verbose=self.verbose,
                 orn_limit=orn_limit,
                 base_commands=True)
         else:
             robot = mia_hand.MiaHandPosition(
                 world_pos=self.hand_world_pose[:3],
                 world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]),
-                task_space_limit=self.task_space_limit,
                 verbose=self.verbose,
                 orn_limit=orn_limit,
                 base_commands=True)
@@ -157,8 +148,7 @@ def _create_robot(self):
 
     def _load_objects(self):
         super()._load_objects()
-        self.object_to_grasp, self.object_position, self.object_orientation = \
-            ObjectFactory().create(self.object_name)
+        self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name)
 
     def set_difficulty_mode(self, mode: str):
         """
@@ -180,7 +170,7 @@ def set_difficulty_mode(self, mode: str):
         else:
             raise ValueError(f"Received unknown difficulty mode {mode}!")
 
-    def reset(self, hard_reset=False, pos=None):
+    def reset(self, seed=None, options=None, hard_reset=False, pos=None):
 
         if pos is None:
             self.robot.reset_base(self.hand_world_pose)
@@ -190,18 +180,13 @@ def reset(self, hard_reset=False, pos=None):
         self.object_to_grasp.reset()
         self.robot.activate_motors()
 
-        return super().reset()
-
-    def is_done(self, state, action, next_state):
+        return super().reset(seed, options)
 
+    def _is_truncated(self, state, action, next_state):
         # check if insole is exploded
-        if self._deformable_is_exploded():
-            print("Exploded insole")
-            return True
+        return self._deformable_is_exploded()
 
-        return super().is_done(state, action, next_state)
-
-    def observe_state(self):
+    def _get_observation(self):
         joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints)
         ee_pose = self.robot.get_ee_pose()
         sensor_readings = self.robot.get_sensor_readings()
@@ -217,17 +202,12 @@ def observe_state(self):
 
         return state
 
-    def calculate_reward(self, state, action, next_state, done):
+    def calculate_reward(self, state, action, next_state, terminated):
         """
         Calculates the reward by counting how many insole vertices are in the
         target position.
         """
-        if done:
-            if not self.compute_reward:
-                return 0.0
-            if not (round(action[-1]) == 1 or self.step_counter >= self.horizon):
-                return -100
-
+        if terminated:
             self.robot.deactivate_motors()
             # remove insole anchors and simulate steps
             self.object_to_grasp.remove_anchors()