diff --git a/deformable_gym/envs/base_env.py b/deformable_gym/envs/base_env.py index 09068f9..4202c14 100644 --- a/deformable_gym/envs/base_env.py +++ b/deformable_gym/envs/base_env.py @@ -67,11 +67,13 @@ def _create_robot(self): def _load_objects(self): """Load objects to PyBullet simulation.""" - self.plane = pb.loadURDF("plane.urdf", (0, 0, 0), useFixedBase=1) + self.plane = pb.loadURDF("plane.urdf", + (0, 0, 0), + useFixedBase=1) def _hard_reset(self): - """Hard reset the PyBullet simulation and reload all objects. May be necessary, e.g., if soft-bodies in the - environment explode. + """Hard reset the PyBullet simulation and reload all objects. May be + necessary, e.g., if soft-bodies in the environment explode. """ if self.verbose: print("Performing hard reset!") @@ -87,6 +89,9 @@ def reset(self, seed=None, options=None) -> npt.ArrayLike: """ super().reset(seed=seed) + if options is not None and "hard_reset" in options: + self._hard_reset() + assert isinstance(self.robot, BulletRobot) self.robot.reset() @@ -131,7 +136,11 @@ def _get_info(self): """ return {} - def _is_terminated(self, observation: npt.ArrayLike, action: npt.ArrayLike, next_observation: npt.ArrayLike) -> bool: + def _is_terminated(self, + observation: npt.ArrayLike, + action: npt.ArrayLike, + next_observation: npt.ArrayLike + ) -> bool: """Checks whether the current episode is terminated. :param observation: observation before action was taken @@ -141,7 +150,10 @@ def _is_terminated(self, observation: npt.ArrayLike, action: npt.ArrayLike, next """ return self.step_counter >= self.horizon - def _is_truncated(self, state: npt.ArrayLike, action: npt.ArrayLike, next_state: npt.ArrayLike) -> bool: + def _is_truncated(self, + state: npt.ArrayLike, + action: npt.ArrayLike, + next_state: npt.ArrayLike) -> bool: """Checks whether the current episode is truncated. :param state: State diff --git a/deformable_gym/envs/floating_mia_grasp_env.py b/deformable_gym/envs/floating_mia_grasp_env.py index ef2bc42..80021ff 100644 --- a/deformable_gym/envs/floating_mia_grasp_env.py +++ b/deformable_gym/envs/floating_mia_grasp_env.py @@ -34,10 +34,10 @@ class FloatingMiaGraspEnv(GraspDeformableMixin, BaseBulletEnv): object should be sampled from the training or the test set. """ - _STANDARD_INITIAL_POSE = np.r_[0.03, -0.005, 1.0, + STANDARD_INITIAL_POSE = np.r_[0.03, -0.005, 1.0, pb.getQuaternionFromEuler([-np.pi/8, np.pi, 0])] - _HARD_INITIAL_POSE = np.r_[0.03, -0.025, 1.0, + HARD_INITIAL_POSE = np.r_[0.03, -0.025, 1.0, pb.getQuaternionFromEuler([-np.pi/8, np.pi, 0])] _FINGERS_OPEN = {"j_index_fle": 0.0, @@ -58,13 +58,15 @@ class FloatingMiaGraspEnv(GraspDeformableMixin, BaseBulletEnv): "j_ring_fle": 0.71, "j_thumb_fle": 0.3} + _MAX_POS_OFFSET = .0005 + _MAX_ORN_OFFSET = .000005 + def __init__( self, object_name: str = "insole", compute_reward: bool = True, object_scale: float = 1.0, observable_object_pos: bool = False, - observable_time_step: bool = False, difficulty_mode: str = "hard", **kwargs): @@ -75,11 +77,10 @@ def __init__( self.compute_reward = compute_reward self.object_scale = object_scale self._observable_object_pos = observable_object_pos - self._observable_time_step = observable_time_step super().__init__(soft=True, **kwargs) - self.hand_world_pose = self._STANDARD_INITIAL_POSE + self.hand_world_pose = self.STANDARD_INITIAL_POSE self.robot = self._create_robot() limits = pbh.get_limit_array(self.robot.motors.values()) @@ -91,21 +92,19 @@ def __init__( np.array([-2, -2, 0]), -np.ones(4), limits[0][self.actuated_finger_ids], - -np.ones(6)*10]) + -np.full(6, 10)]) upper_observations = np.concatenate([ np.array([2, 2, 2]), np.ones(4), limits[1][self.actuated_finger_ids], - np.ones(6)*10]) + np.full(6, 10.)]) if self._observable_object_pos: - lower_observations = np.append(lower_observations, -np.ones(3)*2) - upper_observations = np.append(upper_observations, np.ones(3)*2) - - if self._observable_time_step: - lower_observations = np.append(lower_observations, 0) - upper_observations = np.append(upper_observations, self.horizon) + lower_observations = np.append(lower_observations, + -np.full(3, 2.)) + upper_observations = np.append(upper_observations, + np.full(3, 2.)) self.observation_space = spaces.Box( low=lower_observations, @@ -114,12 +113,12 @@ def __init__( ) # build the action space - lower = [-np.ones(3) * .0005, # max negative base pos offset - -np.ones(4) * .000005, # max negative base orn offset + lower = [-np.full(3, self._MAX_POS_OFFSET), # max negative base pos offset + -np.full(4, self._MAX_ORN_OFFSET), # max negative base orn offset limits[0][self.actuated_finger_ids]] # negative joint limits - upper = [np.ones(3) * .0005, # max positive base pos offset - np.ones(4) * .000005, # max positive base orn offset + upper = [np.full(3, self._MAX_POS_OFFSET), # max positive base pos offset + np.full(4, self._MAX_ORN_OFFSET), # max positive base orn offset limits[1][self.actuated_finger_ids]] # positive joint limits self.action_space = spaces.Box(low=np.concatenate(lower), @@ -162,22 +161,22 @@ def set_difficulty_mode(self, mode: str): if mode == "hard": self.robot.set_initial_joint_positions(self._FINGERS_OPEN) - self.hand_world_pose = self._HARD_INITIAL_POSE + self.hand_world_pose = self.HARD_INITIAL_POSE elif mode == "medium": self.robot.set_initial_joint_positions(self._FINGERS_HALFWAY_CLOSED) - self.hand_world_pose = self._STANDARD_INITIAL_POSE + self.hand_world_pose = self.STANDARD_INITIAL_POSE elif mode == "easy": self.robot.set_initial_joint_positions(self._FINGERS_CLOSED) - self.hand_world_pose = self._STANDARD_INITIAL_POSE + self.hand_world_pose = self.STANDARD_INITIAL_POSE else: raise ValueError(f"Received unknown difficulty mode {mode}!") - def reset(self, seed=None, options=None, hard_reset=False, pos=None): + def reset(self, seed=None, options=None): - if pos is None: - self.robot.reset_base(self.hand_world_pose) + if options is not None and "initial_pose" in options: + self.robot.reset_base(options["initial_pose"]) else: - self.robot.reset_base(pos) + self.robot.reset_base(self.hand_world_pose) self.object_to_grasp.reset() self.robot.activate_motors() @@ -199,9 +198,6 @@ def _get_observation(self): obj_pos = self.object_to_grasp.get_pose()[:3] state = np.append(state, obj_pos) - if self._observable_time_step: - state = np.append(state, self.step_counter) - return state def calculate_reward(self, state, action, next_state, terminated): diff --git a/deformable_gym/envs/floating_shadow_grasp_env.py b/deformable_gym/envs/floating_shadow_grasp_env.py index 104e1d6..9b8edfd 100644 --- a/deformable_gym/envs/floating_shadow_grasp_env.py +++ b/deformable_gym/envs/floating_shadow_grasp_env.py @@ -33,7 +33,6 @@ def __init__(self, object_name="insole", horizon=100, train=True, compute_reward=True, object_scale=1.0, observable_object_pos: bool = False, - observable_time_step: bool = False, **kwargs): self.insole = None self.train = train @@ -43,7 +42,6 @@ def __init__(self, object_name="insole", self.compute_reward = compute_reward self.object_scale = object_scale self._observable_object_pos = observable_object_pos - self._observable_time_step = observable_time_step super().__init__(horizon=horizon, soft=True, **kwargs) @@ -64,10 +62,6 @@ def __init__(self, object_name="insole", lower_observations = np.append(lower_observations, -np.ones(3)*2) upper_observations = np.append(upper_observations, np.ones(3)*2) - if self._observable_time_step: - lower_observations = np.append(lower_observations, 0) - upper_observations = np.append(upper_observations, self.horizon) - self.observation_space = spaces.Box( low=lower_observations, high=upper_observations, dtype=np.float64) @@ -104,7 +98,7 @@ def _load_objects(self): super()._load_objects() self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name) - def reset(self, seed=None, options=None, hard_reset=False): + def reset(self, seed=None, options=None): self.object_to_grasp.reset() self.robot.activate_motors() diff --git a/deformable_gym/envs/grasp_env.py b/deformable_gym/envs/grasp_env.py index b2ef0cd..23a7a79 100644 --- a/deformable_gym/envs/grasp_env.py +++ b/deformable_gym/envs/grasp_env.py @@ -9,12 +9,10 @@ class GraspEnv(BaseBulletEnv, ABC): def __init__(self, object_name, - observable_object_pos: bool = False, - observable_time_step: bool = False): + observable_object_pos: bool = False): self.object_name = object_name self._observable_object_pos = observable_object_pos - self._observable_time_step = observable_time_step super().__init__(soft=True) @@ -28,13 +26,16 @@ def _load_objects(self): super()._load_objects() self.object_to_grasp, self.object_position, self.object_orientation = ObjectFactory().create(self.object_name) - def reset(self, hard_reset=False, pos=None): + def reset(self, seed=None, options=None): + + if options is not None and "pos" in options: + pos = options["pos"] self.robot.reset(pos) self.object_to_grasp.reset() self.robot.activate_motors() - return super().reset() + return super().reset(seed, options) def _get_observation(self): joint_pos = self.robot.get_joint_positions(self.robot.actuated_real_joints) @@ -46,9 +47,6 @@ def _get_observation(self): obj_pos = self.object_to_grasp.get_pose()[:3] state = np.append(state, obj_pos) - if self._observable_time_step: - state = np.append(state, self.step_counter) - return state def calculate_reward(self, state, action, next_state, done): diff --git a/deformable_gym/envs/ur10_shadow_grasp_env.py b/deformable_gym/envs/ur10_shadow_grasp_env.py index efa3adf..c3949df 100644 --- a/deformable_gym/envs/ur10_shadow_grasp_env.py +++ b/deformable_gym/envs/ur10_shadow_grasp_env.py @@ -97,14 +97,14 @@ def _load_objects(self): self.object_to_grasp, self.object_position, self.object_orientation = \ ObjectFactory().create(self.object_name, object2world=self.object2world, scale=self.object_scale) - def reset(self, hard_reset=False): + def reset(self, seed=None, options=None): pos = None self.object_to_grasp.reset(pos) self.robot.activate_motors() - return super().reset() + return super().reset(seed, options) def is_done(self, state, action, next_state): diff --git a/deformable_gym/envs/ur5_mia_grasp_env.py b/deformable_gym/envs/ur5_mia_grasp_env.py index e4b1520..4983f0e 100644 --- a/deformable_gym/envs/ur5_mia_grasp_env.py +++ b/deformable_gym/envs/ur5_mia_grasp_env.py @@ -92,11 +92,17 @@ class UR5MiaGraspEnv(GraspDeformableMixin, BaseBulletEnv): p=np.array([-0.7, 0.1, 1.8])) def __init__( - self, gui: bool = True, real_time: bool = False, - object_name: str = "insole", verbose: bool = False, - horizon: int = 100, train: bool = True, - thumb_adducted: bool = True, compute_reward: bool = True, - object_scale: float = 1.0, verbose_dt: float = 10.0, + self, + gui: bool = True, + real_time: bool = False, + object_name: str = "insole", + verbose: bool = False, + horizon: int = 100, + train: bool = True, + thumb_adducted: bool = True, + compute_reward: bool = True, + object_scale: float = 1.0, + verbose_dt: float = 10.0, pybullet_options: str = ""): self.train = train self.velocity_commands = False @@ -166,7 +172,7 @@ def _load_objects(self): self.object_to_grasp, self.object_position, self.object_orientation = \ ObjectFactory().create(self.object_name, object2world=self.object2world, scale=self.object_scale) - def reset(self, seed=None, options=None, hard_reset=False): + def reset(self, seed=None, options=None): pos = None diff --git a/deformable_gym/helpers/pybullet_helper.py b/deformable_gym/helpers/pybullet_helper.py index 2bd7d3f..5d7ee61 100644 --- a/deformable_gym/helpers/pybullet_helper.py +++ b/deformable_gym/helpers/pybullet_helper.py @@ -20,8 +20,16 @@ class JointType(Enum): class Joint: - def __init__(self, name, joint_idx, joint_type, pos_idx, vel_idx, low, high, - max_force, max_vel, body_id): + def __init__(self, name: str, + joint_idx: int, + joint_type: JointType, + pos_idx: int, + vel_idx: int, + low: float, + high: float, + max_force: float, + max_vel: float, + body_id: int) -> None: self.name = name self.joint_idx = joint_idx self.joint_type = joint_type @@ -40,29 +48,33 @@ def __init__(self, name, joint_idx, joint_type, pos_idx, vel_idx, low, high, def __repr__(self): return f"Joint({', '.join([f'{k}={v}' for k,v in vars(self).items()])})" - def reset(self): + def reset(self) -> None: """Resets the joint to its initial position.""" self.set_position(self.init_pos) - def set_position(self, position): + def set_position(self, position: float) -> None: """Sets the joint to target position.""" - pb.resetJointState(self.body_id, self.joint_idx, - targetValue=position, targetVelocity=0.0) + pb.resetJointState(self.body_id, + self.joint_idx, + targetValue=position, + targetVelocity=0.0) - def set_limits(self, low, high): + def set_limits(self, low: float, high: float) -> None: """Sets the joint limits.""" - pb.changeDynamics(self.body_id, self.joint_idx, - jointLowerLimit=low, jointUpperLimit=high) + pb.changeDynamics(self.body_id, + self.joint_idx, + jointLowerLimit=low, + jointUpperLimit=high) - def get_position(self): + def get_position(self) -> float: """Gets the current position of the joint.""" return pb.getJointState(self.body_id, self.joint_idx)[0] - def get_velocity(self): + def get_velocity(self) -> float: """Gets the current velocity of the joint.""" return pb.getJointState(self.body_id, self.joint_idx)[1] - def set_target_position(self, position): + def set_target_position(self, position: float) -> None: """Sets the target position of the joint.""" if self.max_vel + 0.1 < self.get_velocity(): @@ -82,7 +94,7 @@ def set_target_position(self, position): if self.verbose: print(f"Warning: Trying to control deactivated motor {self.name}.") - def set_target_velocity(self, velocity): + def set_target_velocity(self, velocity: float) -> None: """Sets the target position of the joint.""" if self.activated: pb.setJointMotorControl2(self.body_id, self.joint_idx, diff --git a/deformable_gym/robots/bullet_robot.py b/deformable_gym/robots/bullet_robot.py index 0bb0ce7..b5fffdf 100644 --- a/deformable_gym/robots/bullet_robot.py +++ b/deformable_gym/robots/bullet_robot.py @@ -111,8 +111,10 @@ def __init__( # create observation and action spaces # TODO: fix to include base actions and sensor observations - self.action_space = Box(np.full(len(self.motors), -1.0), np.full(len(self.motors), 1.0)) - self.observation_space = Box(np.full(len(self.motors), -1.0), np.full(len(self.motors), 1.0)) + self.action_space = Box(np.full(len(self.motors), -1.0), + np.full(len(self.motors), 1.0)) + self.observation_space = Box(np.full(len(self.motors), -1.0), + np.full(len(self.motors), 1.0)) def initialise(self) -> int: """Initialize robot in simulation. @@ -146,7 +148,7 @@ def set_inverse_kinematics_solver(self, inverse_kinematics_solver): """ self.inverse_kinematics_solver = inverse_kinematics_solver - def set_endeffector(self, link_id: int): + def set_endeffector(self, link_id: int) -> None: """Set PyBullet id of end-effector link. Note that this only has an effect if the PyBulletSolver is used as IK @@ -207,7 +209,9 @@ def update_current_command(self, command: Dict[str, float]): for key in command.keys(): self.current_command[key] = command[key] - def send_current_command(self, keys: Union[Iterable[str], None] = None): + def send_current_command(self, + keys: Union[Iterable[str], None] = None + ) -> None: """Sends the currently stored commands to the selected motors. :param keys: The names of the motors to send commands to. @@ -216,14 +220,14 @@ def send_current_command(self, keys: Union[Iterable[str], None] = None): self.actuate_motors(keys) @abc.abstractmethod - def perform_command(self, command: npt.ArrayLike): + def perform_command(self, command: npt.ArrayLike) -> None: """Sends a command to the robot. :param command: Robot command. """ @abc.abstractmethod - def actuate_motors(self, keys: Union[Iterable[str], None] = None): + def actuate_motors(self, keys: Union[Iterable[str], None] = None) -> None: """Takes the provided action and sends the joint controls to the robot. :param keys: List of the motor keys to be actuated, if None all motor @@ -301,8 +305,10 @@ def get_ee_pose(self) -> np.ndarray: current_pos, current_orn = pb.getBasePositionAndOrientation(self._id) return np.concatenate((current_pos, current_orn), axis=0) - def set_ee_pose( - self, target_pos: np.ndarray, target_orn: np.ndarray): + def set_ee_pose(self, + target_pos: np.ndarray, + target_orn: np.ndarray + ) -> None: """Sets the pose of the end-effector instantly. No physics simulation steps required. @@ -317,7 +323,9 @@ def set_ee_pose( zip(self.motors.keys(), joint_pos)} self.set_joint_positions(position_dict) - def reset(self, keys: Union[Iterable[str], None] = None): + def reset(self, + keys: Union[Iterable[str], None] = None + ) -> None: """Resets actuators and sensors. :param keys: Names of joints to reset. Default is all. @@ -332,7 +340,9 @@ def reset(self, keys: Union[Iterable[str], None] = None): self.motors[key].reset() self.current_command[key] = self.motors[key].get_position() - def set_joint_positions(self, position_dict: Dict[str, float]): + def set_joint_positions(self, + position_dict: Dict[str, float] + ) -> None: """Sets the joint positions of the robot. :param position_dict: Maps joint names to angles. @@ -340,17 +350,17 @@ def set_joint_positions(self, position_dict: Dict[str, float]): for key in position_dict.keys(): self.motors[key].set_position(position_dict[key]) - def deactivate_motors(self): + def deactivate_motors(self) -> None: """Deactivates all motors of the robot.""" for motor in self.motors.values(): motor.deactivate() - def activate_motors(self): + def activate_motors(self) -> None: """Activates all motors of the robot.""" for motor in self.motors.values(): motor.activate() - def move_base(self, offset: npt.ArrayLike): + def move_base(self, offset: npt.ArrayLike) -> None: """Moves the robot base. :param offset: Pose offset of the robot given as position and @@ -366,7 +376,7 @@ def move_base(self, offset: npt.ArrayLike): jointChildFrameOrientation=target_orn, maxForce=100000) - def _enforce_limits(self, pose): + def _enforce_limits(self, pose) -> Tuple[Any, Any]: assert self.base_commands, "tried to move base, but base commands " \ "are not enabled" if self.task_space_limit is not None: @@ -378,7 +388,7 @@ def _enforce_limits(self, pose): # TODO enforce orn_limit return target_pos, target_orn - def reset_base(self, pose: npt.ArrayLike): + def reset_base(self, pose: npt.ArrayLike) -> None: """Resets the robot base. :param pose: Pose of the robot given as position and @@ -436,11 +446,15 @@ class HandMixin: def _init_debug_visualizations(self): """Initialize debug visualizations.""" self.robot_pose = Pose( - np.array([0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 1.0]), - scale=0.1, line_width=1) + np.zeros(3), + np.array([0.0, 0.0, 0.0, 1.0]), + scale=0.1, + line_width=1) self.object_pose = Pose( - np.array([0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 1.0]), - scale=0.1, line_width=1) + np.zeros(3), + np.array([0.0, 0.0, 0.0, 1.0]), + scale=0.1, + line_width=1) self.contact_normals = [] def get_contact_points(self, object_id: int): diff --git a/setup.py b/setup.py index bc7c25f..1bd6a2f 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ long_description = f.read() setup(name='deformable_gym', - version="0.2.1", + version="0.3.0", maintainer='Melvin Laux', maintainer_email='melvin.laux@uni-bremen.de', description='Gym environments for grasping deformable objects', diff --git a/tests/envs/test_floating_mia_grasp_env.py b/tests/envs/test_floating_mia_grasp_env.py index 2ba4a6d..e6fd63f 100644 --- a/tests/envs/test_floating_mia_grasp_env.py +++ b/tests/envs/test_floating_mia_grasp_env.py @@ -1,6 +1,7 @@ from numpy.testing import assert_allclose import pytest from deformable_gym.envs.floating_mia_grasp_env import FloatingMiaGraspEnv +from gymnasium.wrappers import RescaleAction @pytest.fixture @@ -56,6 +57,36 @@ def test_initial_sensor_info(env: FloatingMiaGraspEnv): assert_allclose(sensor_readings[0], sensor_readings[1]) +def test_episode_reproducibility(): + observations = [] + termination_flags = [] + + env = FloatingMiaGraspEnv(verbose=False, + horizon=10, + gui=False, + observable_object_pos=True, + object_name="insole_on_conveyor_belt/back", + difficulty_mode="hard", + ) + env = RescaleAction(env, 0., 1.) + + env.action_space.seed(SEED) + + for _ in range(2): + observation, _ = env.reset(seed=SEED) + observations.append(observation) + terminated = False + while not terminated: + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) + + observations.append(observation) + termination_flags.append(terminated) + + assert_allclose(observations[0], observations[1]) + assert_allclose(termination_flags[0], termination_flags[1]) + + def test_eps_done(env: FloatingMiaGraspEnv): if env._observable_object_pos: obs_space_dims_expected = 19