-
Notifications
You must be signed in to change notification settings - Fork 11
/
config.yaml
125 lines (110 loc) · 3.83 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
general:
task: "rl" # obs_gen, obs_infomax, cmd_gen, ap, sp, dgi, rl
random_seed: 42
use_this_many_data: -1
use_cuda: True # disable this when running on machine without cuda
visdom: False
training:
batch_size: 32 # for sp, change this to 64
max_episode: 100000
smoothing_eps: 0.1
optimizer:
step_rule: 'radam' # adam, radam
learning_rate: 0.001
clip_grad_norm: 5
learning_rate_warmup_until: 1000
fix_parameters_keywords: [] # ["rgcns", "word_embedding", "node_embedding", "relation_embedding", "word_embedding_prj"]
patience: 3 # >=1 to enable
evaluate:
run_eval: True
g_belief: False
batch_size: 256 # for sp, change this to 64
max_target_length: 200
checkpoint:
report_frequency: 5000 # episode
save_frequency: 1000 # episode
experiment_tag: 'exp_tag'
load_pretrained: False # during test, enable this so that the agent load your pretrained model
load_from_tag: 'pretrain_model'
load_parameter_keywords: ["rgcns", "word_embedding", "node_embedding", "relation_embedding", "word_embedding_prj"]
load_graph_generation_model_from_tag: 'graph_generation_model_tag'
model:
use_pretrained_embedding: True
word_embedding_size: 300
word_embedding_trainable: False
node_embedding_size: 100
node_embedding_trainable: True
relation_embedding_size: 32
relation_embedding_trainable: True
embedding_dropout: 0.
encoder_layers: 1
decoder_layers: 1
action_scorer_layers: 1
encoder_conv_num: 5
block_hidden_dim: 64
gcn_hidden_dims: [64, 64, 64, 64, 64, 64] # last one should equal to block hidden dim
gcn_highway_connections: True
gcn_num_bases: 3
real_valued_graph: True
n_heads: 1
dropout: 0.
attention_dropout: 0.
block_dropout: 0.
# here are the 5 tasks
cmd_gen:
data_path: "cmd_gen.0.2"
obs_gen:
data_path: "obs_gen.0.1"
backprop_frequency: 5 # time steps after which loss.backward() and step()
ap:
data_path: "ap.0.2"
graph_type: "full" # full, seen
k_way_classification: -1 # -1 means all
sp:
data_path: "sp.0.2"
graph_type: "full" # full, seen
k_way_classification: -1 # -1 means all
dgi:
data_path: "dgi.0.2"
graph_type: "full" # full, seen
sample_bias_positive: 0.0
sample_bias_negative: 0.0
rl:
data_path: "rl.0.2"
difficulty_level: 1 # 1-10
training_size: 20 # 1, 20, 100
fully_observable_graph: False # use fully observable graph
training:
max_nb_steps_per_episode: 50 # after this many steps, a game is terminated
learn_start_from_this_episode: 100
target_net_update_frequency: 500 # sync target net with online net per this many epochs
use_negative_reward: False
evaluate:
max_nb_steps_per_episode: 100
replay:
buffer_reward_threshold: 0.1 # a new sequence of transitions has to be k times better than average rewards in current replay buffer. Use -1 to disable.
accumulate_reward_from_final: False
prioritized_replay_beta: 0.4
prioritized_replay_eps: 0.000001
count_reward_lambda: 0.0 # 0 to disable
graph_reward_lambda: 0.0 # 0 to disable
graph_reward_type: "triplets_difference" # triplets_difference, triplets_increased
discount_gamma_graph_reward: 0.9
discount_gamma_count_reward: 0.5
discount_gamma_game_reward: 0.9
replay_memory_capacity: 500000 # adjust this depending on your RAM size
replay_memory_priority_fraction: 0.6
update_per_k_game_steps: 50
replay_batch_size: 64
multi_step: 3
replay_sample_history_length: 8
replay_sample_update_from: 4
epsilon_greedy:
noisy_net: False # if this is true, then epsilon greedy is disabled
epsilon_anneal_episodes: 20000 # -1 if not annealing
epsilon_anneal_from: 1.0
epsilon_anneal_to: 0.1
model:
enable_recurrent_memory: False
enable_graph_input: True
enable_text_input: True