diff --git a/minigrid/__init__.py b/minigrid/__init__.py index 8841f90c6..155e1413a 100644 --- a/minigrid/__init__.py +++ b/minigrid/__init__.py @@ -1136,12 +1136,12 @@ def register_minigrid_envs(): # BabyAI - Language based levels - Level_MixedTrainLocal and Level_MixedTestLocal # ---------------------------------------- - + register( id="BabyAI-MixedTrainLocal-v0", entry_point="minigrid.envs.babyai:Level_MixedTrainLocal", ) - + register( id="BabyAI-MixedTestLocal-v0", entry_point="minigrid.envs.babyai:Level_MixedTestLocal", diff --git a/minigrid/envs/babyai/__init__.py b/minigrid/envs/babyai/__init__.py index 5dff6df9b..45fe2173a 100644 --- a/minigrid/envs/babyai/__init__.py +++ b/minigrid/envs/babyai/__init__.py @@ -13,6 +13,10 @@ GoToRedBlueBall, GoToSeq, ) +from minigrid.envs.babyai.mixed_seq_levels import ( + Level_MixedTestLocal, + Level_MixedTrainLocal, +) from minigrid.envs.babyai.open import ( Open, OpenDoor, @@ -51,7 +55,3 @@ UnlockPickup, UnlockToUnlock, ) -from minigrid.envs.babyai.mixed_seq_levels import ( - Level_MixedTrainLocal, - Level_MixedTestLocal, -) \ No newline at end of file diff --git a/minigrid/envs/babyai/core/levelgen.py b/minigrid/envs/babyai/core/levelgen.py index 04550ce15..fe744074d 100644 --- a/minigrid/envs/babyai/core/levelgen.py +++ b/minigrid/envs/babyai/core/levelgen.py @@ -96,18 +96,9 @@ def add_locked_room(self, color=None): continue if color is not None: - door, _ = self.add_door( - i, j, - door_idx, - color=color, - locked=True - ) + door, _ = self.add_door(i, j, door_idx, color=color, locked=True) else: - door, _ = self.add_door( - i, j, - door_idx, - locked=True - ) + door, _ = self.add_door(i, j, door_idx, locked=True) # Done adding locked room break diff --git a/minigrid/envs/babyai/mixed_seq_levels.py b/minigrid/envs/babyai/mixed_seq_levels.py index 2037889d4..5870a2a32 100644 --- a/minigrid/envs/babyai/mixed_seq_levels.py +++ b/minigrid/envs/babyai/mixed_seq_levels.py @@ -2,13 +2,20 @@ Copied and adapted from https://github.com/flowersteam/Grounding_LLMs_with_online_RL """ +from __future__ import annotations + +from minigrid.envs.babyai.core.levelgen import ( + GoToInstr, + LevelGen, + PickupInstr, + PutNextInstr, +) from minigrid.envs.babyai.core.verifier import ( AfterInstr, BeforeInstr, ObjDesc, OpenInstr, ) -from minigrid.envs.babyai.core.levelgen import LevelGen, GoToInstr, PickupInstr, PutNextInstr class Level_MixedTrainLocal(LevelGen): @@ -33,20 +40,21 @@ class Level_MixedTrainLocal(LevelGen): """ def __init__( - self, - room_size=8, - num_rows=1, - num_cols=1, - num_dists=8, - instr_kinds=['action', 'seq1'], - locations=False, - unblocking=False, - implicit_unlock=False, - **kwargs, + self, + room_size=8, + num_rows=1, + num_cols=1, + num_dists=8, + instr_kinds=["action", "seq1"], + locations=False, + unblocking=False, + implicit_unlock=False, + **kwargs, ): - - action = self._rand_elem(['goto', 'pickup', 'open', 'putnext', 'pick up seq go to']) - if action == 'open': + action = self._rand_elem( + ["goto", "pickup", "open", "putnext", "pick up seq go to"] + ) + if action == "open": num_cols = 2 num_rows = 1 # We add many distractors to increase the probability @@ -66,23 +74,24 @@ def __init__( # ['goto', 'pickup', 'open', 'putnext', 'pick up seq go to'], def gen_mission(self): - action = self._rand_elem(self.action_kinds) mission_accepted = False all_objects_reachable = False - if action == 'open': - + if action == "open": while not mission_accepted or not all_objects_reachable: - self._regen_grid() - color_door = self._rand_elem(['yellow', 'green', 'blue', 'purple']) # red and grey excluded + color_door = self._rand_elem( + ["yellow", "green", "blue", "purple"] + ) # red and grey excluded self.add_locked_room(color_door) self.connect_all() for j in range(self.num_rows): for i in range(self.num_cols): if self.get_room(i, j) is not self.locked_room: - self.add_distractors(i, j, num_distractors=self.num_dists, all_unique=False) + self.add_distractors( + i, j, num_distractors=self.num_dists, all_unique=False + ) # The agent must be placed after all the object to respect constraints while True: @@ -97,7 +106,7 @@ def gen_mission(self): color_in_instr = self._rand_elem([None, color_door]) - desc = ObjDesc('door', color_in_instr) + desc = ObjDesc("door", color_in_instr) self.instrs = OpenInstr(desc) mission_accepted = not (self.exclude_substrings()) @@ -105,44 +114,50 @@ def gen_mission(self): """if color_in_instr is None and mission_accepted and all_objects_reachable: print(color_door)""" - elif action == 'goto': + elif action == "goto": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 1, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 1, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj = self._rand_elem(objs) self.instrs = GoToInstr(ObjDesc(obj.type, obj.color)) mission_accepted = not (self.exclude_substrings()) - elif action == 'pickup': + elif action == "pickup": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 1, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 1, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj = self._rand_elem(objs) - while str(obj.type) == 'door': + while str(obj.type) == "door": obj = self._rand_elem(objs) self.instrs = PickupInstr(ObjDesc(obj.type, obj.color)) mission_accepted = not (self.exclude_substrings()) - elif action == 'putnext': + elif action == "putnext": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_1 = self._rand_elem(objs) - while str(obj_1.type) == 'door': + while str(obj_1.type) == "door": obj_1 = self._rand_elem(objs) desc1 = ObjDesc(obj_1.type, obj_1.color) obj_2 = self._rand_elem(objs) @@ -157,16 +172,18 @@ def gen_mission(self): mission_accepted = not (self.exclude_substrings()) - elif action == 'pick up seq go to': + elif action == "pick up seq go to": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_a = self._rand_elem(objs) - while str(obj_a.type) == 'door': + while str(obj_a.type) == "door": obj_a = self._rand_elem(objs) instr_a = PickupInstr(ObjDesc(obj_a.type, obj_a.color)) obj_b = self._rand_elem(objs) @@ -179,19 +196,24 @@ def gen_mission(self): obj_b = self._rand_elem(objs) instr_b = GoToInstr(ObjDesc(obj_b.type, obj_b.color)) - type_instr = self._rand_elem(['Before', 'After']) + type_instr = self._rand_elem(["Before", "After"]) - if type_instr == 'Before': + if type_instr == "Before": self.instrs = BeforeInstr(instr_a, instr_b) else: self.instrs = AfterInstr(instr_b, instr_a) mission_accepted = not (self.exclude_substrings()) - def exclude_substrings(self): # True if contains excluded substring - list_exclude_combinaison = ["yellow box", "red key", "red door", "green ball", "grey door"] + list_exclude_combinaison = [ + "yellow box", + "red key", + "red door", + "green ball", + "grey door", + ] for sub_str in list_exclude_combinaison: str = self.instrs.surface(self) @@ -230,29 +252,33 @@ def _regen_grid(self): room = self.room_grid[j][i] x_l, y_l = (room.top[0] + 1, room.top[1] + 1) - x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + x_m, y_m = ( + room.top[0] + room.size[0] - 1, + room.top[1] + room.size[1] - 1, + ) # Door positions, order is right, down, left, up if i < self.num_cols - 1: - room.neighbors[0] = self.room_grid[j][i+1] + room.neighbors[0] = self.room_grid[j][i + 1] room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) if j < self.num_rows - 1: - room.neighbors[1] = self.room_grid[j+1][i] + room.neighbors[1] = self.room_grid[j + 1][i] room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) if i > 0: - room.neighbors[2] = self.room_grid[j][i-1] + room.neighbors[2] = self.room_grid[j][i - 1] room.door_pos[2] = room.neighbors[2].door_pos[0] if j > 0: - room.neighbors[3] = self.room_grid[j-1][i] + room.neighbors[3] = self.room_grid[j - 1][i] room.door_pos[3] = room.neighbors[3].door_pos[1] # The agent starts in the middle, facing right self.agent_pos = ( - (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), - (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2), ) self.agent_dir = 0 + class Level_MixedTestLocal(LevelGen): """ Union of all instructions from PutNext, Open, Goto and PickUp. @@ -273,20 +299,21 @@ class Level_MixedTestLocal(LevelGen): """ def __init__( - self, - room_size=8, - num_rows=1, - num_cols=1, - num_dists=8, - instr_kinds=['action', 'seq1'], - locations=False, - unblocking=False, - implicit_unlock=False, - **kwargs, + self, + room_size=8, + num_rows=1, + num_cols=1, + num_dists=8, + instr_kinds=["action", "seq1"], + locations=False, + unblocking=False, + implicit_unlock=False, + **kwargs, ): - - action = self._rand_elem(['goto', 'pickup', 'open', 'putnext', 'pick up seq go to']) - if action == 'open': + action = self._rand_elem( + ["goto", "pickup", "open", "putnext", "pick up seq go to"] + ) + if action == "open": num_cols = 2 num_rows = 1 # We add many distractors to increase the probability @@ -305,23 +332,24 @@ def __init__( ) def gen_mission(self): - action = self._rand_elem(self.action_kinds) mission_accepted = False all_objects_reachable = False - if action == 'open': - + if action == "open": while not mission_accepted or not all_objects_reachable: - self._regen_grid() - color_door = self._rand_elem(['red', 'grey']) # only red and grey doors at test time + color_door = self._rand_elem( + ["red", "grey"] + ) # only red and grey doors at test time self.add_locked_room(color_door) self.connect_all() for j in range(self.num_rows): for i in range(self.num_cols): if self.get_room(i, j) is not self.locked_room: - self.add_distractors(i, j, num_distractors=self.num_dists, all_unique=False) + self.add_distractors( + i, j, num_distractors=self.num_dists, all_unique=False + ) # The agent must be placed after all the object to respect constraints while True: @@ -334,49 +362,55 @@ def gen_mission(self): all_objects_reachable = self.check_objs_reachable(raise_exc=False) - desc = ObjDesc('door', color_door) + desc = ObjDesc("door", color_door) self.instrs = OpenInstr(desc) mission_accepted = not (self.exclude_substrings()) - elif action == 'goto': + elif action == "goto": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 1, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 1, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj = self._rand_elem(objs) self.instrs = GoToInstr(ObjDesc(obj.type, obj.color)) mission_accepted = not (self.exclude_substrings()) - elif action == 'pickup': + elif action == "pickup": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 1, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 1, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj = self._rand_elem(objs) - while str(obj.type) == 'door': + while str(obj.type) == "door": obj = self._rand_elem(objs) self.instrs = PickupInstr(ObjDesc(obj.type, obj.color)) mission_accepted = not (self.exclude_substrings()) - elif action == 'putnext': + elif action == "putnext": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_1 = self._rand_elem(objs) - while str(obj_1.type) == 'door': + while str(obj_1.type) == "door": obj_1 = self._rand_elem(objs) desc1 = ObjDesc(obj_1.type, obj_1.color) obj_2 = self._rand_elem(objs) @@ -391,16 +425,18 @@ def gen_mission(self): mission_accepted = not (self.exclude_substrings()) - elif action == 'pick up seq go to': + elif action == "pick up seq go to": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_a = self._rand_elem(objs) - while str(obj_a.type) == 'door': + while str(obj_a.type) == "door": obj_a = self._rand_elem(objs) instr_a = PickupInstr(ObjDesc(obj_a.type, obj_a.color)) obj_b = self._rand_elem(objs) @@ -413,24 +449,38 @@ def gen_mission(self): obj_b = self._rand_elem(objs) instr_b = GoToInstr(ObjDesc(obj_b.type, obj_b.color)) - type_instr = self._rand_elem(['Before', 'After']) + type_instr = self._rand_elem(["Before", "After"]) - if type_instr == 'Before': + if type_instr == "Before": self.instrs = BeforeInstr(instr_a, instr_b) else: self.instrs = AfterInstr(instr_b, instr_a) mission_accepted = not (self.exclude_substrings()) - def exclude_substrings(self): # True if contains excluded substring - list_exclude_combinaison = ["yellow key", "yellow ball", "yellow door", - "red box", "red ball", - "green box", "green key", "green door", - "grey box", "grey key", "grey ball", - "blue box", "blue key", "blue ball", "blue door", - "purple box", "purple key", "purple ball", "purple door"] + list_exclude_combinaison = [ + "yellow key", + "yellow ball", + "yellow door", + "red box", + "red ball", + "green box", + "green key", + "green door", + "grey box", + "grey key", + "grey ball", + "blue box", + "blue key", + "blue ball", + "blue door", + "purple box", + "purple key", + "purple ball", + "purple door", + ] for sub_str in list_exclude_combinaison: if sub_str in self.instrs.surface(self): @@ -468,51 +518,57 @@ def _regen_grid(self): room = self.room_grid[j][i] x_l, y_l = (room.top[0] + 1, room.top[1] + 1) - x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + x_m, y_m = ( + room.top[0] + room.size[0] - 1, + room.top[1] + room.size[1] - 1, + ) # Door positions, order is right, down, left, up if i < self.num_cols - 1: - room.neighbors[0] = self.room_grid[j][i+1] + room.neighbors[0] = self.room_grid[j][i + 1] room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) if j < self.num_rows - 1: - room.neighbors[1] = self.room_grid[j+1][i] + room.neighbors[1] = self.room_grid[j + 1][i] room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) if i > 0: - room.neighbors[2] = self.room_grid[j][i-1] + room.neighbors[2] = self.room_grid[j][i - 1] room.door_pos[2] = room.neighbors[2].door_pos[0] if j > 0: - room.neighbors[3] = self.room_grid[j-1][i] + room.neighbors[3] = self.room_grid[j - 1][i] room.door_pos[3] = room.neighbors[3].door_pos[1] # The agent starts in the middle, facing right self.agent_pos = ( - (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), - (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2), ) self.agent_dir = 0 + class Level_MixedTrainLocalFrench(LevelGen): """ Same as MixedTrainLocal but in French """ + # TODO pas encore fini def __init__( - self, - room_size=8, - num_rows=1, - num_cols=1, - num_dists=8, - language='french', - instr_kinds=['action', 'seq1'], - locations=False, - unblocking=False, - implicit_unlock=False, - **kwargs, + self, + room_size=8, + num_rows=1, + num_cols=1, + num_dists=8, + language="french", + instr_kinds=["action", "seq1"], + locations=False, + unblocking=False, + implicit_unlock=False, + **kwargs, ): - - action = self._rand_elem(['goto', 'pickup', 'open', 'putnext', 'pick up seq go to']) - if action == 'open': + action = self._rand_elem( + ["goto", "pickup", "open", "putnext", "pick up seq go to"] + ) + if action == "open": num_cols = 2 num_rows = 1 # We add many distractors to increase the probability @@ -533,23 +589,24 @@ def __init__( # ['goto', 'pickup', 'open', 'putnext', 'pick up seq go to'], def gen_mission(self): - action = self._rand_elem(self.action_kinds) mission_accepted = False all_objects_reachable = False - if action == 'open': - + if action == "open": while not mission_accepted or not all_objects_reachable: - self._regen_grid() - color_door = self._rand_elem(['jaune', 'verte', 'bleue', 'violette']) # red and grey excluded + color_door = self._rand_elem( + ["jaune", "verte", "bleue", "violette"] + ) # red and grey excluded self.add_locked_room(color_door) self.connect_all() for j in range(self.num_rows): for i in range(self.num_cols): if self.get_room(i, j) is not self.locked_room: - self.add_distractors(i, j, num_distractors=self.num_dists, all_unique=False) + self.add_distractors( + i, j, num_distractors=self.num_dists, all_unique=False + ) # The agent must be placed after all the object to respect constraints while True: @@ -564,7 +621,7 @@ def gen_mission(self): color_in_instr = self._rand_elem([None, color_door]) - desc = ObjDesc('door', color_in_instr) + desc = ObjDesc("door", color_in_instr) self.instrs = OpenInstr(desc) mission_accepted = not (self.exclude_substrings()) @@ -572,44 +629,50 @@ def gen_mission(self): """if color_in_instr is None and mission_accepted and all_objects_reachable: print(color_door)""" - elif action == 'goto': + elif action == "goto": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 1, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 1, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj = self._rand_elem(objs) self.instrs = GoToInstr(ObjDesc(obj.type, obj.color)) mission_accepted = not (self.exclude_substrings()) - elif action == 'pickup': + elif action == "pickup": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 1, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 1, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj = self._rand_elem(objs) - while str(obj.type) == 'door': + while str(obj.type) == "door": obj = self._rand_elem(objs) self.instrs = PickupInstr(ObjDesc(obj.type, obj.color)) mission_accepted = not (self.exclude_substrings()) - elif action == 'putnext': + elif action == "putnext": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_1 = self._rand_elem(objs) - while str(obj_1.type) == 'door': + while str(obj_1.type) == "door": obj_1 = self._rand_elem(objs) desc1 = ObjDesc(obj_1.type, obj_1.color) obj_2 = self._rand_elem(objs) @@ -624,16 +687,18 @@ def gen_mission(self): mission_accepted = not (self.exclude_substrings()) - elif action == 'pick up seq go to': + elif action == "pick up seq go to": self.num_cols = 1 self.num_rows = 1 while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_a = self._rand_elem(objs) - while str(obj_a.type) == 'door': + while str(obj_a.type) == "door": obj_a = self._rand_elem(objs) instr_a = PickupInstr(ObjDesc(obj_a.type, obj_a.color)) obj_b = self._rand_elem(objs) @@ -646,19 +711,24 @@ def gen_mission(self): obj_b = self._rand_elem(objs) instr_b = GoToInstr(ObjDesc(obj_b.type, obj_b.color)) - type_instr = self._rand_elem(['Before', 'After']) + type_instr = self._rand_elem(["Before", "After"]) - if type_instr == 'Before': + if type_instr == "Before": self.instrs = BeforeInstr(instr_a, instr_b) else: self.instrs = AfterInstr(instr_b, instr_a) mission_accepted = not (self.exclude_substrings()) - def exclude_substrings(self): # True if contains excluded substring - list_exclude_combinaison = ["boîte jaune", "clef rouge", "porte rouge", "balle verte", "porte grise"] + list_exclude_combinaison = [ + "boîte jaune", + "clef rouge", + "porte rouge", + "balle verte", + "porte grise", + ] for sub_str in list_exclude_combinaison: str = self.instrs.surface(self) @@ -697,26 +767,29 @@ def _regen_grid(self): room = self.room_grid[j][i] x_l, y_l = (room.top[0] + 1, room.top[1] + 1) - x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + x_m, y_m = ( + room.top[0] + room.size[0] - 1, + room.top[1] + room.size[1] - 1, + ) # Door positions, order is right, down, left, up if i < self.num_cols - 1: - room.neighbors[0] = self.room_grid[j][i+1] + room.neighbors[0] = self.room_grid[j][i + 1] room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) if j < self.num_rows - 1: - room.neighbors[1] = self.room_grid[j+1][i] + room.neighbors[1] = self.room_grid[j + 1][i] room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) if i > 0: - room.neighbors[2] = self.room_grid[j][i-1] + room.neighbors[2] = self.room_grid[j][i - 1] room.door_pos[2] = room.neighbors[2].door_pos[0] if j > 0: - room.neighbors[3] = self.room_grid[j-1][i] + room.neighbors[3] = self.room_grid[j - 1][i] room.door_pos[3] = room.neighbors[3].door_pos[1] # The agent starts in the middle, facing right self.agent_pos = ( - (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), - (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2), ) self.agent_dir = 0 @@ -735,19 +808,18 @@ class Level_PickUpSeqGoToLocal(LevelGen): """ def __init__( - self, - room_size=8, - num_rows=1, - num_cols=1, - num_dists=8, - instr_kinds=['seq1'], - locations=False, - unblocking=False, - implicit_unlock=False, - **kwargs, + self, + room_size=8, + num_rows=1, + num_cols=1, + num_dists=8, + instr_kinds=["seq1"], + locations=False, + unblocking=False, + implicit_unlock=False, + **kwargs, ): - - action = 'pick up seq pick up ' + action = "pick up seq pick up " # We add many distractors to increase the probability # of ambiguous locations within the same room @@ -765,17 +837,18 @@ def __init__( ) def gen_mission(self): - mission_accepted = False all_objects_reachable = False while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_a = self._rand_elem(objs) - while str(obj_a.type) == 'door': + while str(obj_a.type) == "door": obj_a = self._rand_elem(objs) instr_a = PickupInstr(ObjDesc(obj_a.type, obj_a.color)) obj_b = self._rand_elem(objs) @@ -788,19 +861,24 @@ def gen_mission(self): obj_b = self._rand_elem(objs) instr_b = GoToInstr(ObjDesc(obj_b.type, obj_b.color)) - type_instr = self._rand_elem(['Before', 'After']) + type_instr = self._rand_elem(["Before", "After"]) - if type_instr == 'Before': + if type_instr == "Before": self.instrs = BeforeInstr(instr_a, instr_b) else: self.instrs = AfterInstr(instr_b, instr_a) mission_accepted = not (self.exclude_substrings()) - def exclude_substrings(self): # True if contains excluded substring - list_exclude_combinaison = ["yellow box", "red key", "red door", "green ball", "grey door"] + list_exclude_combinaison = [ + "yellow box", + "red key", + "red door", + "green ball", + "grey door", + ] for sub_str in list_exclude_combinaison: if sub_str in self.instrs.surface(self): @@ -838,29 +916,33 @@ def _regen_grid(self): room = self.room_grid[j][i] x_l, y_l = (room.top[0] + 1, room.top[1] + 1) - x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + x_m, y_m = ( + room.top[0] + room.size[0] - 1, + room.top[1] + room.size[1] - 1, + ) # Door positions, order is right, down, left, up if i < self.num_cols - 1: - room.neighbors[0] = self.room_grid[j][i+1] + room.neighbors[0] = self.room_grid[j][i + 1] room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) if j < self.num_rows - 1: - room.neighbors[1] = self.room_grid[j+1][i] + room.neighbors[1] = self.room_grid[j + 1][i] room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) if i > 0: - room.neighbors[2] = self.room_grid[j][i-1] + room.neighbors[2] = self.room_grid[j][i - 1] room.door_pos[2] = room.neighbors[2].door_pos[0] if j > 0: - room.neighbors[3] = self.room_grid[j-1][i] + room.neighbors[3] = self.room_grid[j - 1][i] room.door_pos[3] = room.neighbors[3].door_pos[1] # The agent starts in the middle, facing right self.agent_pos = ( - (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), - (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2), ) self.agent_dir = 0 + class Level_PickUpThenGoToLocal(LevelGen): """ In order to test generalisation we only give to the agent the instruction: @@ -875,19 +957,18 @@ class Level_PickUpThenGoToLocal(LevelGen): """ def __init__( - self, - room_size=8, - num_rows=1, - num_cols=1, - num_dists=8, - instr_kinds=['seq1'], - locations=False, - unblocking=False, - implicit_unlock=False, - **kwargs, + self, + room_size=8, + num_rows=1, + num_cols=1, + num_dists=8, + instr_kinds=["seq1"], + locations=False, + unblocking=False, + implicit_unlock=False, + **kwargs, ): - - action = 'pick up seq pick up ' + action = "pick up seq pick up " # We add many distractors to increase the probability # of ambiguous locations within the same room @@ -905,17 +986,18 @@ def __init__( ) def gen_mission(self): - mission_accepted = False all_objects_reachable = False while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_a = self._rand_elem(objs) - while str(obj_a.type) == 'door': + while str(obj_a.type) == "door": obj_a = self._rand_elem(objs) instr_a = PickupInstr(ObjDesc(obj_a.type, obj_a.color)) obj_b = self._rand_elem(objs) @@ -932,10 +1014,15 @@ def gen_mission(self): mission_accepted = not (self.exclude_substrings()) - def exclude_substrings(self): # True if contains excluded substring - list_exclude_combinaison = ["yellow box", "red key", "red door", "green ball", "grey door"] + list_exclude_combinaison = [ + "yellow box", + "red key", + "red door", + "green ball", + "grey door", + ] for sub_str in list_exclude_combinaison: if sub_str in self.instrs.surface(self): @@ -973,26 +1060,29 @@ def _regen_grid(self): room = self.room_grid[j][i] x_l, y_l = (room.top[0] + 1, room.top[1] + 1) - x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + x_m, y_m = ( + room.top[0] + room.size[0] - 1, + room.top[1] + room.size[1] - 1, + ) # Door positions, order is right, down, left, up if i < self.num_cols - 1: - room.neighbors[0] = self.room_grid[j][i+1] + room.neighbors[0] = self.room_grid[j][i + 1] room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) if j < self.num_rows - 1: - room.neighbors[1] = self.room_grid[j+1][i] + room.neighbors[1] = self.room_grid[j + 1][i] room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) if i > 0: - room.neighbors[2] = self.room_grid[j][i-1] + room.neighbors[2] = self.room_grid[j][i - 1] room.door_pos[2] = room.neighbors[2].door_pos[0] if j > 0: - room.neighbors[3] = self.room_grid[j-1][i] + room.neighbors[3] = self.room_grid[j - 1][i] room.door_pos[3] = room.neighbors[3].door_pos[1] # The agent starts in the middle, facing right self.agent_pos = ( - (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), - (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2), ) self.agent_dir = 0 @@ -1011,19 +1101,18 @@ class Level_GoToAfterPickUpLocal(LevelGen): """ def __init__( - self, - room_size=8, - num_rows=1, - num_cols=1, - num_dists=8, - instr_kinds=['seq1'], - locations=False, - unblocking=False, - implicit_unlock=False, - **kwargs, + self, + room_size=8, + num_rows=1, + num_cols=1, + num_dists=8, + instr_kinds=["seq1"], + locations=False, + unblocking=False, + implicit_unlock=False, + **kwargs, ): - - action = 'pick up seq pick up ' + action = "pick up seq pick up " # We add many distractors to increase the probability # of ambiguous locations within the same room @@ -1041,17 +1130,18 @@ def __init__( ) def gen_mission(self): - mission_accepted = False all_objects_reachable = False while not mission_accepted or not all_objects_reachable: self._regen_grid() self.place_agent() - objs = self.add_distractors(num_distractors=self.num_dists + 2, all_unique=False) + objs = self.add_distractors( + num_distractors=self.num_dists + 2, all_unique=False + ) all_objects_reachable = self.check_objs_reachable(raise_exc=False) obj_a = self._rand_elem(objs) - while str(obj_a.type) == 'door': + while str(obj_a.type) == "door": obj_a = self._rand_elem(objs) instr_a = PickupInstr(ObjDesc(obj_a.type, obj_a.color)) obj_b = self._rand_elem(objs) @@ -1068,10 +1158,15 @@ def gen_mission(self): mission_accepted = not (self.exclude_substrings()) - def exclude_substrings(self): # True if contains excluded substring - list_exclude_combinaison = ["yellow box", "red key", "red door", "green ball", "grey door"] + list_exclude_combinaison = [ + "yellow box", + "red key", + "red door", + "green ball", + "grey door", + ] for sub_str in list_exclude_combinaison: if sub_str in self.instrs.surface(self): @@ -1109,25 +1204,28 @@ def _regen_grid(self): room = self.room_grid[j][i] x_l, y_l = (room.top[0] + 1, room.top[1] + 1) - x_m, y_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) + x_m, y_m = ( + room.top[0] + room.size[0] - 1, + room.top[1] + room.size[1] - 1, + ) # Door positions, order is right, down, left, up if i < self.num_cols - 1: - room.neighbors[0] = self.room_grid[j][i+1] + room.neighbors[0] = self.room_grid[j][i + 1] room.door_pos[0] = (x_m, self._rand_int(y_l, y_m)) if j < self.num_rows - 1: - room.neighbors[1] = self.room_grid[j+1][i] + room.neighbors[1] = self.room_grid[j + 1][i] room.door_pos[1] = (self._rand_int(x_l, x_m), y_m) if i > 0: - room.neighbors[2] = self.room_grid[j][i-1] + room.neighbors[2] = self.room_grid[j][i - 1] room.door_pos[2] = room.neighbors[2].door_pos[0] if j > 0: - room.neighbors[3] = self.room_grid[j-1][i] + room.neighbors[3] = self.room_grid[j - 1][i] room.door_pos[3] = room.neighbors[3].door_pos[1] # The agent starts in the middle, facing right self.agent_pos = ( - (self.num_cols // 2) * (self.room_size-1) + (self.room_size // 2), - (self.num_rows // 2) * (self.room_size-1) + (self.room_size // 2) + (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2), + (self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2), ) self.agent_dir = 0 diff --git a/minigrid/minigrid_env.py b/minigrid/minigrid_env.py index d80a5309c..e4c0f1d3e 100755 --- a/minigrid/minigrid_env.py +++ b/minigrid/minigrid_env.py @@ -13,7 +13,13 @@ from gymnasium.core import ActType, ObsType from minigrid.core.actions import Actions -from minigrid.core.constants import COLOR_NAMES, DIR_TO_VEC, TILE_PIXELS, COLOR_TO_IDX, OBJECT_TO_IDX +from minigrid.core.constants import ( + COLOR_NAMES, + COLOR_TO_IDX, + DIR_TO_VEC, + OBJECT_TO_IDX, + TILE_PIXELS, +) from minigrid.core.grid import Grid from minigrid.core.mission import MissionSpace from minigrid.core.world_object import Point, WorldObj @@ -45,7 +51,7 @@ def __init__( highlight: bool = True, tile_size: int = TILE_PIXELS, agent_pov: bool = False, - language='english' + language="english", ): # Initialize mission self.mission = mission_space.sample() @@ -598,7 +604,7 @@ def step( self.render() obs = self.gen_obs() - + # add info Episodic Knowledge to minigrid move_forward = None if action == self.actions.forward: @@ -810,28 +816,54 @@ def gen_graph(self, move_forward=None): image = grid.encode(vis_mask) # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state) # State, 0: open, 1: closed, 2: locked - if self.language == 'english': - IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'} + if self.language == "english": + IDX_TO_STATE = {0: "open", 1: "closed", 2: "locked"} IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) - elif self.language == 'french': - IDX_TO_STATE = {0: 'ouverte', 1: 'fermée', 2: 'fermée à clef'} - IDX_TO_COLOR = {0: 'rouge', 1: 'verte', 2: 'bleue', 3: 'violette', 4: 'jaune', 5: 'grise'} - IDX_TO_OBJECT = {0: 'non visible', 1: 'vide', 2: 'mur', 3: 'sol', 4: 'porte', 5: 'clef', - 6: 'balle', 7: 'boîte', 8: 'but', 9: 'lave', 10: 'agent'} + elif self.language == "french": + IDX_TO_STATE = {0: "ouverte", 1: "fermée", 2: "fermée à clef"} + IDX_TO_COLOR = { + 0: "rouge", + 1: "verte", + 2: "bleue", + 3: "violette", + 4: "jaune", + 5: "grise", + } + IDX_TO_OBJECT = { + 0: "non visible", + 1: "vide", + 2: "mur", + 3: "sol", + 4: "porte", + 5: "clef", + 6: "balle", + 7: "boîte", + 8: "but", + 9: "lave", + 10: "agent", + } list_textual_descriptions = [] if self.carrying is not None: # print('carrying') - if self.language == 'english': - list_textual_descriptions.append("You carry a {} {}".format(self.carrying.color, self.carrying.type)) - elif self.language == 'french': - list_textual_descriptions.append("Tu portes une {} {}".format(self.carrying.type, self.carrying.color)) + if self.language == "english": + list_textual_descriptions.append( + f"You carry a {self.carrying.color} {self.carrying.type}" + ) + elif self.language == "french": + list_textual_descriptions.append( + "Tu portes une {} {}".format( + self.carrying.type, self.carrying.color + ) + ) # print('A agent position i: {}, j: {}'.format(self.agent_pos[0], self.agent_pos[1])) - agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1]) + agent_pos_vx, agent_pos_vy = self.get_view_coords( + self.agent_pos[0], self.agent_pos[1] + ) # print('B agent position i: {}, j: {}'.format(agent_pos_vx, agent_pos_vy)) view_field_dictionary = dict() @@ -854,11 +886,14 @@ def gen_graph(self, move_forward=None): while j >= 0 and not object_seen: if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1: if image[agent_pos_vx][j][0] == 2: - if self.language == 'english': + if self.language == "english": + list_textual_descriptions.append( + f"You see a wall {agent_pos_vy - j} step{'s' if agent_pos_vy - j > 1 else ''} forward" + ) + elif self.language == "french": list_textual_descriptions.append( - f"You see a wall {agent_pos_vy - j} step{'s' if agent_pos_vy - j > 1 else ''} forward") - elif self.language == 'french': - list_textual_descriptions.append("Tu vois un mur à {} pas devant".format(agent_pos_vy - j)) + f"Tu vois un mur à {agent_pos_vy - j} pas devant" + ) object_seen = True else: object_seen = True @@ -869,11 +904,14 @@ def gen_graph(self, move_forward=None): while i >= 0 and not object_seen: if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1: if image[i][agent_pos_vy][0] == 2: - if self.language == 'english': + if self.language == "english": list_textual_descriptions.append( - f"You see a wall {agent_pos_vx - i} step{'s' if agent_pos_vx - i > 1 else ''} left") - elif self.language == 'french': - list_textual_descriptions.append("Tu vois un mur à {} pas à gauche".format(agent_pos_vx - i)) + f"You see a wall {agent_pos_vx - i} step{'s' if agent_pos_vx - i > 1 else ''} left" + ) + elif self.language == "french": + list_textual_descriptions.append( + f"Tu vois un mur à {agent_pos_vx - i} pas à gauche" + ) object_seen = True else: object_seen = True @@ -884,11 +922,14 @@ def gen_graph(self, move_forward=None): while i < image.shape[0] and not object_seen: if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1: if image[i][agent_pos_vy][0] == 2: - if self.language == 'english': + if self.language == "english": + list_textual_descriptions.append( + f"You see a wall {i - agent_pos_vx} step{'s' if i - agent_pos_vx > 1 else ''} right" + ) + elif self.language == "french": list_textual_descriptions.append( - f"You see a wall {i - agent_pos_vx} step{'s' if i - agent_pos_vx > 1 else ''} right") - elif self.language == 'french': - list_textual_descriptions.append("Tu vois un mur à {} pas à droite".format(i - agent_pos_vx)) + f"Tu vois un mur à {i - agent_pos_vx} pas à droite" + ) object_seen = True else: object_seen = True @@ -902,72 +943,92 @@ def gen_graph(self, move_forward=None): relative_position = dict() if i - agent_pos_vx > 0: - if self.language == 'english': + if self.language == "english": relative_position["x_axis"] = ("right", i - agent_pos_vx) - elif self.language == 'french': - relative_position["x_axis"] = ("à droite", i - agent_pos_vx) + elif self.language == "french": + relative_position["x_axis"] = ("à droite", i - agent_pos_vx) elif i - agent_pos_vx == 0: - if self.language == 'english': + if self.language == "english": relative_position["x_axis"] = ("face", 0) - elif self.language == 'french': + elif self.language == "french": relative_position["x_axis"] = ("en face", 0) else: - if self.language == 'english': + if self.language == "english": relative_position["x_axis"] = ("left", agent_pos_vx - i) - elif self.language == 'french': + elif self.language == "french": relative_position["x_axis"] = ("à gauche", agent_pos_vx - i) if agent_pos_vy - j > 0: - if self.language == 'english': + if self.language == "english": relative_position["y_axis"] = ("forward", agent_pos_vy - j) - elif self.language == 'french': + elif self.language == "french": relative_position["y_axis"] = ("devant", agent_pos_vy - j) elif agent_pos_vy - j == 0: - if self.language == 'english': + if self.language == "english": relative_position["y_axis"] = ("forward", 0) - elif self.language == 'french': + elif self.language == "french": relative_position["y_axis"] = ("devant", 0) distances = [] if relative_position["x_axis"][0] in ["face", "en face"]: - distances.append((relative_position["y_axis"][1], relative_position["y_axis"][0])) + distances.append( + ( + relative_position["y_axis"][1], + relative_position["y_axis"][0], + ) + ) elif relative_position["y_axis"][1] == 0: - distances.append((relative_position["x_axis"][1], relative_position["x_axis"][0])) + distances.append( + ( + relative_position["x_axis"][1], + relative_position["x_axis"][0], + ) + ) else: - distances.append((relative_position["x_axis"][1], relative_position["x_axis"][0])) - distances.append((relative_position["y_axis"][1], relative_position["y_axis"][0])) + distances.append( + ( + relative_position["x_axis"][1], + relative_position["x_axis"][0], + ) + ) + distances.append( + ( + relative_position["y_axis"][1], + relative_position["y_axis"][0], + ) + ) description = "" if object[0] != 4: # if it is not a door - if self.language == 'english': + if self.language == "english": description = f"You see a {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " - elif self.language == 'french': + elif self.language == "french": description = f"Tu vois une {IDX_TO_OBJECT[object[0]]} {IDX_TO_COLOR[object[1]]} " else: if IDX_TO_STATE[object[2]] != 0: # if it is not open - if self.language == 'english': + if self.language == "english": description = f"You see a {IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " - elif self.language == 'french': + elif self.language == "french": description = f"Tu vois une {IDX_TO_OBJECT[object[0]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_STATE[object[2]]} " else: - if self.language == 'english': + if self.language == "english": description = f"You see an {IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " - elif self.language == 'french': + elif self.language == "french": description = f"Tu vois une {IDX_TO_OBJECT[object[0]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_STATE[object[2]]} " for _i, _distance in enumerate(distances): if _i > 0: - if self.language == 'english': + if self.language == "english": description += " and " - elif self.language == 'french': + elif self.language == "french": description += " et " - if self.language == 'english': + if self.language == "english": description += f"{_distance[0]} step{'s' if _distance[0] > 1 else ''} {_distance[1]}" - elif self.language == 'french': + elif self.language == "french": description += f"{_distance[0]} pas {_distance[1]}" list_textual_descriptions.append(description) - return {'descriptions': list_textual_descriptions} + return {"descriptions": list_textual_descriptions}