Skip to content

Commit

Permalink
Merge pull request #15 from dfki-ric/feature/gymnasium
Browse files Browse the repository at this point in the history
Feature/gymnasium
  • Loading branch information
mlaux1 authored Aug 30, 2023
2 parents 8a1b074 + 8cdbf2f commit dc3b7ea
Show file tree
Hide file tree
Showing 19 changed files with 161 additions and 165 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ while num_episodes <= 10:

action = env.action_space.sample()

state, reward, done, _ = env.step(action)
state, reward, terminated, truncated, _ = env.step(action)
episode_return += reward

if done:
if terminated or truncated:
print(f"Episode finished with return {episode_return}!")
num_episodes += 1

Expand Down
15 changes: 15 additions & 0 deletions deformable_gym/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from gymnasium.envs.registration import register

register(
id="FloatingMiaGraspInsole-v0",
entry_point="deformable_gym.envs.floating_mia_grasp_env:FloatingMiaGraspEnv",
kwargs={"object_name": "insole_on_conveyor_belt/back",
"observable_object_pos": True}
)

register(
id="FloatingShadowGraspInsole-v0",
entry_point="deformable_gym.envs.floating_shadow_grasp_env:FloatingShadowGraspEnv",
kwargs={"object_name": "insole_on_conveyor_belt/back",
"observable_object_pos": True}
)
73 changes: 49 additions & 24 deletions deformable_gym/envs/base_env.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import abc
from typing import Any

import gymnasium as gym
import numpy as np
import numpy.typing as npt
import pybullet as pb
import pytransform3d.rotations as pr
from gym.core import Env
from gym import spaces

from gymnasium import spaces
from deformable_gym.robots.bullet_robot import BulletRobot
from deformable_gym.envs.bullet_simulation import BulletSimulation
from deformable_gym.helpers.pybullet_helper import MultibodyPose


class BaseBulletEnv(Env, abc.ABC):
class BaseBulletEnv(gym.Env, abc.ABC):
"""Configures PyBullet for the gym environment.
:param gui: Show PyBullet GUI.
Expand Down Expand Up @@ -79,25 +80,26 @@ def _hard_reset(self):
self._load_objects()
self.robot = self._create_robot()

def reset(self) -> np.ndarray:
def reset(self, seed=None, options=None) -> npt.ArrayLike:
"""Reset the environment to its initial state and returns it.
:return: Initial state.
"""

if self.verbose:
print("Performing reset (base)")
super().reset(seed=seed)

assert isinstance(self.robot, BulletRobot)

self.robot.reset()
self.step_counter = 0
self.simulation.timing.reset()

observation = self._get_observation()
info = self._get_info()

if self.verbose:
print(f"Resetting env: {self.observe_state()}")
print(f"Resetting env: {observation}")

return self.observe_state()
return observation, info

def render(self, mode: str = "human"):
"""Render environment.
Expand All @@ -113,22 +115,39 @@ def render(self, mode: str = "human"):
else:
raise NotImplementedError(f"Render mode {mode} not supported")

def observe_state(self) -> np.ndarray:
def _get_observation(self) -> npt.ArrayLike:
"""Returns the current environment state.
:return: The observation
"""
return self.robot.get_joint_positions()

def is_done(self, state: np.ndarray, action: np.ndarray, next_state: np.ndarray) -> bool:
"""Checks whether the current episode is over or not.
def _get_info(self):
"""Returns the current environment state.
:return: The observation
"""
return {}

def _is_terminated(self, observation: npt.ArrayLike, action: npt.ArrayLike, next_observation: npt.ArrayLike) -> bool:
"""Checks whether the current episode is terminated.
:param observation: observation before action was taken
:param action: Action taken in this step
:param next_observation: Observation after action was taken
:return: Is the current episode done?
"""
return self.step_counter >= self.horizon

def _is_truncated(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike) -> bool:
"""Checks whether the current episode is truncated.
:param state: State
:param action: Action taken in this state
:param next_state: State after action was taken
:return: Is the current episode done?
"""
return self.step_counter >= self.horizon
return False

def step(self, action: npt.ArrayLike):
"""Take a step in the environment.
Expand All @@ -145,39 +164,46 @@ def step(self, action: npt.ArrayLike):
for gym environments. Info dict is currently unused.
"""
# observe current state
state = self.observe_state()
observation = self._get_observation()

# execute action
self.robot.perform_command(action)

# simulate until next time step
self.simulation.step_to_trigger("time_step")

# observe new state
next_state = self.observe_state()
next_observation = self._get_observation()

# update time step
self.step_counter += 1

# check if episode is over
done = self.is_done(state, action, next_state)
terminated = self._is_terminated(observation, action, next_observation)
truncated = self._is_truncated(observation, action, next_observation)

# calculate the reward
reward = self.calculate_reward(state, action, next_state, done)
reward = self.calculate_reward(observation, action, next_observation, terminated)

if self.verbose:
print(f"Finished environment step: {next_state=}, {reward=}, {done=}")
print(f"Finished environment step: {next_observation=}, {reward=}, {terminated=}, {truncated=}")

return next_observation, reward, terminated, truncated, {}

return next_state, reward, done, {}
def close(self):
self.simulation.disconnect()

@abc.abstractmethod
def calculate_reward(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike, done: bool):
def calculate_reward(self,
state: npt.ArrayLike,
action: npt.ArrayLike,
next_state: npt.ArrayLike,
terminated: bool):
"""Calculate reward.
:param state: State of the environment.
:param action: Action that has been executed in the state.
:param next_state: State after executing the action.
:param done: Is the episode done?
:param terminated: Is the episode terminated?
:return: Measured reward.
"""

Expand Down Expand Up @@ -209,8 +235,7 @@ def _check_forces(self, robot, high_force_threshold, verbose):
contact_points = robot.get_contact_points(self.object_to_grasp.get_id())
accumulated_forces = 0.0
for contact_point in contact_points:
_, _, _, _, _, _, _, _, dist, force, _, _, _, _ = contact_point
accumulated_forces += force
accumulated_forces += contact_point[9]
high_forces = accumulated_forces > high_force_threshold
contact_forces = accumulated_forces > 0

Expand Down
54 changes: 17 additions & 37 deletions deformable_gym/envs/floating_mia_grasp_env.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import random
import time
from typing import Union

import numpy as np
import numpy.typing as npt
import pybullet as pb
from gym import spaces
from gymnasium import spaces
from deformable_gym.robots import mia_hand
from deformable_gym.envs.base_env import BaseBulletEnv, GraspDeformableMixin
from deformable_gym.helpers import pybullet_helper as pbh
Expand Down Expand Up @@ -68,8 +63,6 @@ def __init__(
object_name: str = "insole",
compute_reward: bool = True,
object_scale: float = 1.0,
task_space_limit: Union[npt.ArrayLike, None] = ((-0.02, -.05, .95),
(0.02, 0.05, 1.05)),
observable_object_pos: bool = False,
observable_time_step: bool = False,
difficulty_mode: str = "hard",
Expand All @@ -81,7 +74,6 @@ def __init__(
self.randomised = False
self.compute_reward = compute_reward
self.object_scale = object_scale
self.task_space_limit = task_space_limit
self._observable_object_pos = observable_object_pos
self._observable_time_step = observable_time_step

Expand All @@ -99,25 +91,27 @@ def __init__(
np.array([-2, -2, 0]),
-np.ones(4),
limits[0][self.actuated_finger_ids],
-np.ones(6)*5])
-np.ones(6)*10])

upper_observations = np.concatenate([
np.array([2, 2, 2]),
np.ones(4),
limits[1][self.actuated_finger_ids],
np.ones(6)*5])
np.ones(6)*10])

if self._observable_object_pos:
lower_observations = np.append(lower_observations, np.ones(3))
upper_observations = np.append(upper_observations, -np.ones(3))
lower_observations = np.append(lower_observations, -np.ones(3)*2)
upper_observations = np.append(upper_observations, np.ones(3)*2)

if self._observable_time_step:
lower_observations = np.append(lower_observations, 0)
upper_observations = np.append(upper_observations, self.horizon)

self.observation_space = spaces.Box(
low=lower_observations,
high=upper_observations)
high=upper_observations,
dtype=np.float64
)

# build the action space
lower = [-np.ones(3) * .0005, # max negative base pos offset
Expand All @@ -128,25 +122,22 @@ def __init__(
np.ones(4) * .000005, # max positive base orn offset
limits[1][self.actuated_finger_ids]] # positive joint limits

self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper))
self.action_space = spaces.Box(low=np.concatenate(lower), high=np.concatenate(upper), dtype=np.float64)

def _create_robot(self):
orn_limit = None
# orn_limit = [[0, 0, 0], [0, 0, 0]]

if self.velocity_commands:
robot = mia_hand.MiaHandVelocity(
world_pos=self.hand_world_pose[:3],
world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]),
task_space_limit=self.task_space_limit,
verbose=self.verbose,
orn_limit=orn_limit,
base_commands=True)
else:
robot = mia_hand.MiaHandPosition(
world_pos=self.hand_world_pose[:3],
world_orn=pb.getEulerFromQuaternion(self.hand_world_pose[3:]),
task_space_limit=self.task_space_limit,
verbose=self.verbose,
orn_limit=orn_limit,
base_commands=True)
Expand All @@ -157,8 +148,7 @@ def _create_robot(self):

def _load_objects(self):
super()._load_objects()
self.object_to_grasp, self.object_position, self.object_orientation = \
ObjectFactory().create(self.object_name)
self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name)

def set_difficulty_mode(self, mode: str):
"""
Expand All @@ -180,7 +170,7 @@ def set_difficulty_mode(self, mode: str):
else:
raise ValueError(f"Received unknown difficulty mode {mode}!")

def reset(self, hard_reset=False, pos=None):
def reset(self, seed=None, options=None, hard_reset=False, pos=None):

if pos is None:
self.robot.reset_base(self.hand_world_pose)
Expand All @@ -190,18 +180,13 @@ def reset(self, hard_reset=False, pos=None):
self.object_to_grasp.reset()
self.robot.activate_motors()

return super().reset()

def is_done(self, state, action, next_state):
return super().reset(seed, options)

def _is_truncated(self, state, action, next_state):
# check if insole is exploded
if self._deformable_is_exploded():
print("Exploded insole")
return True
return self._deformable_is_exploded()

return super().is_done(state, action, next_state)

def observe_state(self):
def _get_observation(self):
joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints)
ee_pose = self.robot.get_ee_pose()
sensor_readings = self.robot.get_sensor_readings()
Expand All @@ -217,17 +202,12 @@ def observe_state(self):

return state

def calculate_reward(self, state, action, next_state, done):
def calculate_reward(self, state, action, next_state, terminated):
"""
Calculates the reward by counting how many insole vertices are in the
target position.
"""
if done:
if not self.compute_reward:
return 0.0
if not (round(action[-1]) == 1 or self.step_counter >= self.horizon):
return -100

if terminated:
self.robot.deactivate_motors()
# remove insole anchors and simulate steps
self.object_to_grasp.remove_anchors()
Expand Down
Loading

0 comments on commit dc3b7ea

Please sign in to comment.