-
Notifications
You must be signed in to change notification settings - Fork 1
/
payoffbuilder.py
127 lines (95 loc) · 5.48 KB
/
payoffbuilder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import tensorflow as tf
import numpy as np
import logging
from tqdm import tqdm
import itertools
import rlagent
import simmodel
import trainingdriver
def load_heuristic_agents(logger):
agent_map = {0: simmodel.BaseDeveloper(name="last-minute-patcher", logger=logger, panic_threshold=7,
action_selector=simmodel.last_minute_patcher),
1: simmodel.BaseDeveloper(name="stressed-patcher", logger=logger, panic_threshold=2,
action_selector=simmodel.stressed_patcher)}
return agent_map
def load_trained_agents(logger, scenario_name):
agent_map = {}
for agent_index in range(trainingdriver.NUMBER_AGENTS):
agent = rlagent.DeepQLearner(
name=trainingdriver.DEVELOPER_NAME_PREFIX + str(agent_index) + "_" + scenario_name,
input_number=trainingdriver.INPUT_NUMBER,
hidden_units=trainingdriver.HIDDEN_UNITS,
logger=logger)
agent_map[agent_index] = agent
return agent_map
def main():
log_filename = "payoff_table_builder.log"
logging_level = logging.INFO
logger = logging.getLogger("Payoff-table-builder->")
handler = logging.FileHandler(log_filename, mode='w')
logger.addHandler(handler)
logger.setLevel(logging_level)
simulation_episodes = 100
use_rl_agents = False
# This are the values for the current scenario
# sloppy_rework_factor = 1.05
# avg_resolution_time = trainingdriver.SCENARIO_AVG_RESOLUTION_TIME
# This is the value for the worse scenario
# sloppy_rework_factor = 2
# avg_resolution_time = trainingdriver.SCENARIO_AVG_RESOLUTION_TIME
# This is the value for the better scenario
sloppy_rework_factor = 5
avg_resolution_time = trainingdriver.SCENARIO_AVG_RESOLUTION_TIME
scenario_name = "sloppy_code_impact_105"
checkpoint_path = "results/sloppy_code_impact_105_5000_steps/" + scenario_name + trainingdriver.CHECKPOINT_SUFFIX
scenario_approach_map = {simmodel.CLEAN_ACTION: trainingdriver.CLEAN_CODING_APPROACH,
simmodel.SLOPPY_ACTION: simmodel.CodingApproach(
resolution_factor=trainingdriver.SLOPPY_RESOLUTION_FACTOR,
rework_factor=sloppy_rework_factor,
code_impact=trainingdriver.SLOPPY_CODE_IMPACT)}
agent_map = load_heuristic_agents(logger)
if use_rl_agents:
agent_map = load_trained_agents(logger, scenario_name)
saver = tf.train.Saver()
with tf.Session() as session:
saver.restore(session, checkpoint_path)
logger.info("Restored: " + checkpoint_path)
generate_payoff_values(agent_map, avg_resolution_time, logger, scenario_approach_map, session,
simulation_episodes)
else:
logger.info("Using only heuristic strategies!!!")
generate_payoff_values(agent_map, avg_resolution_time, logger, scenario_approach_map, None,
simulation_episodes)
def generate_payoff_values(agent_map, avg_resolution_time, logger, scenario_approach_map, session, simulation_episodes):
for agent_index, opponent_index in itertools.combinations_with_replacement(range(trainingdriver.NUMBER_AGENTS),
trainingdriver.NUMBER_AGENTS):
logger.info("Strategy Profile: DEV %d vs DEV %d", agent_index, opponent_index)
simulation_environment = simmodel.SimulationEnvironment(logger=logger,
time_units=trainingdriver.SCENARIO_TIME_UNITS,
avg_resolution_time=avg_resolution_time,
prob_new_issue=trainingdriver.SCENARIO_PROB_NEW_ISSUE,
prob_rework=trainingdriver.SCENARIO_PROB_REWORK)
developers = [simmodel.Developer(agent=agent_map[agent_index], approach_map=scenario_approach_map),
simmodel.Developer(agent=agent_map[opponent_index], approach_map=scenario_approach_map)]
for episode_index in tqdm(range(1, simulation_episodes + 1)):
simulation_environment.reset(developers)
for time_step in range(simulation_environment.time_units):
simulation_environment.step(developers=developers,
session=session)
logger.debug("Strategy Profile: DEV %d vs DEV %d ->Episode %s finished: ", agent_index,
opponent_index,
str(episode_index))
for developer in developers:
developer.log_progress()
for developer in developers:
payoff_values = [performance_metric.issues_delivered for performance_metric in
developer.agent.metric_catalogue]
sloppiness_values = [performance_metric.get_sloppy_ratio() for performance_metric in
developer.agent.metric_catalogue]
logger.info(
"%s -> %.2f REPLICATIONS : Payoff (mean, std) %.2f %.2f Sloppiness (mean, std): %.2f %.2f",
developer.name,
simulation_episodes, np.mean(payoff_values), np.std(payoff_values), np.mean(sloppiness_values),
np.std(sloppiness_values))
if __name__ == "__main__":
main()