-
Notifications
You must be signed in to change notification settings - Fork 0
/
tree_node.py
58 lines (48 loc) · 1.46 KB
/
tree_node.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import random
import numpy as np
class Tree_node:
def __init__(self, state, C, parent, terminal):
self.state = state
self.is_terminal = terminal
self.T = 0.0
self.n = 0.0
self.C = C
self.parent = parent
self.children = []
self.complete_tree = False
self.pre_val = 0
self.pre_counter = 0
def inc_T(self, val):
self.T += val
def inc_n(self):
self.n += 1.0
def get_n(self):
return self.n
def get_T(self):
return self.T
def get_v(self):
if self.parent == None:
return 1.0
return self.T / self.n
def inc_pre_val(self, reward):
self.pre_val += reward
def get_greedy(self):
self.u = self.T / self.n
return self.u
def get_UCB(self, exploration, C):
if exploration == "random":
return random.uniform(0, 1)
else:
if self.n == 0:
if self.pre_counter > 0:
return self.pre_val / self.pre_counter + C * np.sqrt(
(2 * np.log(self.parent.get_n())) / (1.0)
)
else:
return C * np.sqrt((2 * np.log(self.parent.get_n())) / (1.0))
if self.parent == None:
return 1.0
self.u = (self.T / self.n) + C * np.sqrt(
(2 * np.log(self.parent.get_n())) / (self.n)
)
return self.u