Skip to content

Commit

Permalink
Updated method to create human advice matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
dagenaik committed Apr 5, 2024
1 parent d85a547 commit 0d2ded8
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion src/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,26 @@ def get_human_input():

def get_advice_matrix(human_input):
print("get advice from human input")
#TODO: consult human_advice()

advice_matrix = np.zeros((ENVIRONMENT.observation_space.n, ENVIRONMENT.action_space.n), dtype = "f, f, f, f")

for state in range(ENVIRONMENT.observation_space.n):
for action in range(ENVIRONMENT.action_space.n):
advice_matrix[state, action] = (0, 0, 1, 1 / ENVIRONMENT.action_space.n)

for hint in human_input.hints:
state_action_pairs = []
u = hint.u
b = hint.b
d = hint.d
a = 1 / ENVIRONMENT.action_space.n

state_action_pairs = (hint.cell.get_actions_to_me_from_all_neighbors())
print(state_action_pairs)
for sap in state_action_pairs:
print(sap[0]) # need the state not the cell
action = sap[1].value
# human_input[state, action] = (b, d, u, a)


def update_policy(policy, ep_states, ep_actions, ep_probs, ep_returns, num_actions):
Expand Down

0 comments on commit 0d2ded8

Please sign in to comment.