Updated method to create human advice matrix

ssm-lab · Apr 5, 2024 · 0d2ded8 · 0d2ded8
1 parent d85a547
commit 0d2ded8
Showing 1 changed file with 20 additions and 1 deletion.
diff --git a/src/runner.py b/src/runner.py
@@ -28,7 +28,26 @@ def get_human_input():
 
 def get_advice_matrix(human_input):
     print("get advice from human input")
-    #TODO: consult human_advice()
+
+    advice_matrix = np.zeros((ENVIRONMENT.observation_space.n, ENVIRONMENT.action_space.n), dtype = "f, f, f, f")
+
+    for state in range(ENVIRONMENT.observation_space.n):
+        for action in range(ENVIRONMENT.action_space.n):
+            advice_matrix[state, action] = (0, 0, 1, 1 / ENVIRONMENT.action_space.n)
+
+    for hint in human_input.hints:
+        state_action_pairs = []
+        u = hint.u
+        b = hint.b
+        d = hint.d
+        a = 1 / ENVIRONMENT.action_space.n
+
+        state_action_pairs = (hint.cell.get_actions_to_me_from_all_neighbors())
+        print(state_action_pairs)
+        for sap in state_action_pairs:
+            print(sap[0]) # need the state not the cell 
+            action = sap[1].value
+        #    human_input[state, action] = (b, d, u, a)
 
 
 def update_policy(policy, ep_states, ep_actions, ep_probs, ep_returns, num_actions):