-
Notifications
You must be signed in to change notification settings - Fork 0
/
QLearning.py
71 lines (62 loc) · 2.37 KB
/
QLearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import AppConfig
from random import choice
class QLearning:
INITIAL_VALUE = 0
TOTAL_SCORE = 0
def __init__(self, rows, columns, actions):
self.matrix = {}
self.__currentTurn = []
self.__rows = rows
self.__columns = columns
self.actions = actions
for i in range(rows):
for j in range(columns):
self.matrix[i, j] = {}
for act in actions:
self.matrix[i, j][act] = self.INITIAL_VALUE
def choose_action(self, currentState):
x = currentState[0]
y = currentState[1]
bestActions = []
maxValue = -99999
availableMoves = self.matrix[x, y]
for i in range(len(self.actions)):
if availableMoves[self.actions[i]] > maxValue:
maxValue = availableMoves[self.actions[i]]
bestActions = []
bestActions.append(i)
elif availableMoves[self.actions[i]] == maxValue:
bestActions.append(i)
choosedAction = self.actions[choice(bestActions)]
self.__currentTurn = [x, y, choosedAction]
self.__print_choosen_moves(bestActions)
print("Choosed action: " + choosedAction)
return choosedAction
def take_reward(self, reward, nextState):
x = self.__currentTurn[0]
y = self.__currentTurn[1]
action = self.__currentTurn[2]
self.matrix[x, y][action] = AppConfig.ALPHA * reward + AppConfig.GAMMA * self.__getMaxQ(nextState)
print("Reward for action: " + str(reward))
self.__print_matrix()
def __getMaxQ(self, state):
x = state[0]
y = state[1]
maxQ = -99999
allMoves = self.matrix[x, y]
for i in range(len(self.actions)):
if allMoves[self.actions[i]] > maxQ:
maxQ = allMoves[self.actions[i]]
print(str(maxQ) + " of " + str(state))
return maxQ
def __print_choosen_moves(self, choosenMoves):
rowprint = "Best actions: "
for move in choosenMoves:
rowprint += str(self.actions[move]) + " "
print(rowprint)
def __print_matrix(self):
for i in range(self.__rows):
for j in range(self.__columns):
rowprint = "[" + str(i) + ", " + str(j) + "]" + " " + str(self.matrix[i, j])
#rowprint = str(elem)
print(rowprint)