-
Notifications
You must be signed in to change notification settings - Fork 0
/
StandardAgentControl.py
96 lines (79 loc) · 3.16 KB
/
StandardAgentControl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import copy
from QLearning import QLearning
class StandardAgentControl:
AVAILABLE_COMMAND = ["right", "up", "left", "down"]
def __init__(self, cords, statesMatrix):
self.__startState = copy.deepcopy(cords)
self.currentCords = cords
self.totalScore = 0
self.__bumpInWall = False
self.__statesMatrix = statesMatrix
self.__matrixSize = (len(statesMatrix), len(statesMatrix[0]))
self.__learningClass = QLearning(self.__matrixSize[0], self.__matrixSize[1], self.AVAILABLE_COMMAND)
def choose_action(self):
if self.__statesMatrix[self.currentCords[0]][self.currentCords[1]].get_type() == "win" or \
self.__statesMatrix[self.currentCords[0]][self.currentCords[1]].get_type() == "hole":
print("#### NEXT EPISODE #### \n")
print(str(self.__startState))
self.currentCords = copy.deepcopy(self.__startState)
else:
action = self.__learningClass.choose_action(self.currentCords)
self.__execute_action(action)
self.__evaluate_consequence(self.currentCords[0], self.currentCords[1])
def __execute_action(self, action):
self.__bumpInWall = False
if action == "right":
self.__right()
elif action == "up":
self.__up()
elif action == "left":
self.__left()
else:
self.__down()
def __right(self):
dest_x = self.currentCords[0]
dest_y = self.currentCords[1] + 1
if self.__is_in_bound(dest_x, dest_y) and \
not self.__is_a_wall(dest_x, dest_y):
self.currentCords[1] += 1
else:
self.__bumpInWall = True
def __up(self):
dest_x = self.currentCords[0] - 1
dest_y = self.currentCords[1]
if self.__is_in_bound(dest_x, dest_y) and \
not self.__is_a_wall(dest_x, dest_y):
self.currentCords[0] -= 1
else:
self.__bumpInWall = True
def __left(self):
dest_x = self.currentCords[0]
dest_y = self.currentCords[1] - 1
if self.__is_in_bound(dest_x, dest_y) and \
not self.__is_a_wall(dest_x, dest_y):
self.currentCords[1] -= 1
else:
self.__bumpInWall = True
def __down(self):
dest_x = self.currentCords[0] + 1
dest_y = self.currentCords[1]
if self.__is_in_bound(dest_x, dest_y) and \
not self.__is_a_wall(dest_x, dest_y):
self.currentCords[0] += 1
else:
self.__bumpInWall = True
def __is_in_bound(self, x, y):
return x < self.__matrixSize[0] and x >= 0 \
and y < self.__matrixSize[1] and y >= 0
def __is_a_wall(self, x, y):
return self.__statesMatrix[x][y].get_type() == "wall"
def __evaluate_consequence(self, x, y):
if self.__bumpInWall:
reward = -1
elif self.__statesMatrix[x][y].get_type() == "win":
reward = 10
elif self.__statesMatrix[x][y].get_type() == "hole":
reward = -5
else:
reward = -0.2
self.__learningClass.take_reward(reward, self.currentCords)