Rewrite ppo, update example scripts, clean up requirements

EvolutionGym · Jun 15, 2024 · 3bdbffb · 3bdbffb
1 parent bc6212e
commit 3bdbffb
Show file tree

Hide file tree

Showing 20 changed files with 543 additions and 955 deletions.
diff --git a/evogym/envs/base.py b/evogym/envs/base.py
@@ -392,15 +392,6 @@ def __init__(
 
         EvoGymBase.__init__(self, world=world, render_mode=render_mode, render_options=render_options)
         self.default_viewer.track_objects('robot')
-
-    def step(self, action):
-
-        action_copy = {}
-
-        for robot_name, a in action.items():
-            action_copy[robot_name] = a + 1
-
-        return super().step(action_copy)
 
     def pos_at_time(self, time):
         return super().pos_at_time(time)*self.VOXEL_SIZE

diff --git a/examples/bo/run.py b/examples/bo/run.py
@@ -1,9 +1,8 @@
-from distutils.command.config import config
 import os
 from re import X
 import shutil
-import random
 import numpy as np
+import argparse
 
 from GPyOpt.core.task.space import Design_space
 from GPyOpt.models import GPModel
@@ -13,17 +12,9 @@
 from GPyOpt.core.evaluators import ThompsonBatch
 from .optimizer import Objective, Optimization
 
-import sys
-curr_dir = os.path.dirname(os.path.abspath(__file__))
-root_dir = os.path.join(curr_dir, '..')
-external_dir = os.path.join(root_dir, 'externals')
-sys.path.insert(0, root_dir)
-sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))
-
+from ppo.run import run_ppo
 import evogym.envs
 from evogym import is_connected, has_actuator, get_full_connectivity
-from utils.algo_utils import TerminationCondition
-from ppo import run_ppo
 
 def get_robot_from_genome(genome, config):
     '''
@@ -36,6 +27,8 @@ def get_robot_from_genome(genome, config):
 
 def eval_genome_cost(genome, config, genome_id, generation):
     robot = get_robot_from_genome(genome, config)
+    args, env_name = config['args'], config['env_name']
+
     if not (is_connected(robot) and has_actuator(robot)):
         return 10
     else:
@@ -45,9 +38,7 @@ def eval_genome_cost(genome, config, genome_id, generation):
         save_path_controller = os.path.join(save_path_generation, 'controller')
         np.savez(save_path_structure, robot, connectivity)
         fitness = run_ppo(
-            structure=(robot, connectivity),
-            termination_condition=TerminationCondition(config['train_iters']),
-            saving_convention=(save_path_controller, genome_id),
+            args, robot, env_name, save_path_controller, f'{genome_id}', connectivity
         )
         cost = -fitness
         return cost
@@ -61,20 +52,23 @@ def eval_genome_constraint(genomes, config):
     return np.array(all_violation)
 
 def run_bo(
-        experiment_name,
-        structure_shape,
-        pop_size,
-        max_evaluations,
-        train_iters,
-        num_cores,
-    ):
-
-    save_path = os.path.join(root_dir, 'saved_data', experiment_name)
+    args: argparse.Namespace,
+):
+    exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = (
+        args.exp_name,
+        args.env_name,
+        args.pop_size,
+        args.structure_shape,
+        args.max_evaluations,
+        args.num_cores,
+    )
+
+    save_path = os.path.join('saved_data', exp_name)
 
     try:
         os.makedirs(save_path)
     except:
-        print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS')
+        print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS')
         print('Override? (y/n): ', end='')
         ans = input()
         if ans.lower() == 'y':
@@ -88,13 +82,13 @@ def run_bo(
     with open(save_path_metadata, 'w') as f:
         f.write(f'POP_SIZE: {pop_size}\n' \
             f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n' \
-            f'MAX_EVALUATIONS: {max_evaluations}\n' \
-            f'TRAIN_ITERS: {train_iters}\n')
+            f'MAX_EVALUATIONS: {max_evaluations}\n')
 
     config = {
         'structure_shape': structure_shape,
-        'train_iters': train_iters,
         'save_path': save_path,
+        'args': args, # args for run_ppo
+        'env_name': env_name,
     }
 
     def constraint_func(genome): 

diff --git a/examples/cppn_neat/run.py b/examples/cppn_neat/run.py
@@ -1,26 +1,24 @@
 import os
 import shutil
-import random
 import numpy as np
 import torch
 import neat
+import argparse
 
 import sys
 curr_dir = os.path.dirname(os.path.abspath(__file__))
 root_dir = os.path.join(curr_dir, '..')
 external_dir = os.path.join(root_dir, 'externals')
 sys.path.insert(0, root_dir)
 sys.path.insert(1, os.path.join(external_dir, 'PyTorch-NEAT'))
-sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))
 
 from pytorch_neat.cppn import create_cppn
 from .parallel import ParallelEvaluator
 from .population import Population
 
-from utils.algo_utils import TerminationCondition
-from ppo import run_ppo
-from evogym import is_connected, has_actuator, get_full_connectivity, hashable
+from ppo.run import run_ppo
 import evogym.envs
+from evogym import is_connected, has_actuator, get_full_connectivity, hashable
 
 
 def get_cppn_input(structure_shape):
@@ -43,15 +41,16 @@ def get_robot_from_genome(genome, config):
 
 def eval_genome_fitness(genome, config, genome_id, generation):
     robot = get_robot_from_genome(genome, config)
+    args, env_name = config.extra_info['args'], config.extra_info['env_name']
+
     connectivity = get_full_connectivity(robot)
     save_path_generation = os.path.join(config.extra_info['save_path'], f'generation_{generation}')
     save_path_structure = os.path.join(save_path_generation, 'structure', f'{genome_id}')
     save_path_controller = os.path.join(save_path_generation, 'controller')
     np.savez(save_path_structure, robot, connectivity)
+
     fitness = run_ppo(
-        structure=(robot, connectivity),
-        termination_condition=TerminationCondition(config.extra_info['train_iters']),
-        saving_convention=(save_path_controller, genome_id),
+        args, robot, env_name, save_path_controller, f'{genome_id}', connectivity
     )
     return fitness
 
@@ -93,20 +92,23 @@ def post_evaluate(self, config, population, species, best_genome):
             f.write(out)
 
 def run_cppn_neat(
-        experiment_name,
-        structure_shape,
-        pop_size,
-        max_evaluations,
-        train_iters,
-        num_cores,
-    ):
+    args: argparse.Namespace
+):
+    exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = (
+        args.exp_name,
+        args.env_name,
+        args.pop_size,
+        args.structure_shape,
+        args.max_evaluations,
+        args.num_cores,
+    )
 
-    save_path = os.path.join(root_dir, 'saved_data', experiment_name)
+    save_path = os.path.join('saved_data', exp_name)
 
     try:
         os.makedirs(save_path)
     except:
-        print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS')
+        print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS')
         print('Override? (y/n): ', end='')
         ans = input()
         if ans.lower() == 'y':
@@ -120,8 +122,7 @@ def run_cppn_neat(
     with open(save_path_metadata, 'w') as f:
         f.write(f'POP_SIZE: {pop_size}\n' \
             f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n' \
-            f'MAX_EVALUATIONS: {max_evaluations}\n' \
-            f'TRAIN_ITERS: {train_iters}\n')
+            f'MAX_EVALUATIONS: {max_evaluations}\n')
 
     structure_hashes = {}
 
@@ -134,9 +135,10 @@ def run_cppn_neat(
         config_path,
         extra_info={
             'structure_shape': structure_shape,
-            'train_iters': train_iters,
             'save_path': save_path,
             'structure_hashes': structure_hashes,
+            'args': args, # args for run_ppo
+            'env_name': env_name,
         },
         custom_config=[
             ('NEAT', 'pop_size', pop_size),

diff --git a/examples/ga/run.py b/examples/ga/run.py
@@ -3,34 +3,40 @@
 import shutil
 import random
 import math
+import argparse
+from typing import List
 
-import sys
-curr_dir = os.path.dirname(os.path.abspath(__file__))
-root_dir = os.path.join(curr_dir, '..')
-external_dir = os.path.join(root_dir, 'externals')
-sys.path.insert(0, root_dir)
-sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))
-
-from ppo import run_ppo
+from ppo.run import run_ppo
+import evogym.envs
 from evogym import sample_robot, hashable
 import utils.mp_group as mp
-from utils.algo_utils import get_percent_survival_evals, mutate, TerminationCondition, Structure
+from utils.algo_utils import get_percent_survival_evals, mutate, Structure
 
-def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_iters, num_cores):
+def run_ga(
+    args: argparse.Namespace,
+):
     print()
-
-    ### STARTUP: MANAGE DIRECTORIES ###
-    home_path = os.path.join(root_dir, "saved_data", experiment_name)
+
+    exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = (
+        args.exp_name,
+        args.env_name,
+        args.pop_size,
+        args.structure_shape,
+        args.max_evaluations,
+        args.num_cores,
+    )
+
+    ### MANAGE DIRECTORIES ###
+    home_path = os.path.join("saved_data", exp_name)
     start_gen = 0
 
-    ### DEFINE TERMINATION CONDITION ###    
-    tc = TerminationCondition(train_iters)
+    ### DEFINE TERMINATION CONDITION ###
 
     is_continuing = False    
     try:
         os.makedirs(home_path)
     except:
-        print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS')
+        print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS')
         print("Override? (y/n/c): ", end="")
         ans = input()
         if ans.lower() == "y":
@@ -46,22 +52,21 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
 
     ### STORE META-DATA ##
     if not is_continuing:
-        temp_path = os.path.join(root_dir, "saved_data", experiment_name, "metadata.txt")
+        temp_path = os.path.join("saved_data", exp_name, "metadata.txt")
 
         try:
-            os.makedirs(os.path.join(root_dir, "saved_data", experiment_name))
+            os.makedirs(os.path.join("saved_data", exp_name))
         except:
             pass
 
         f = open(temp_path, "w")
         f.write(f'POP_SIZE: {pop_size}\n')
         f.write(f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n')
         f.write(f'MAX_EVALUATIONS: {max_evaluations}\n')
-        f.write(f'TRAIN_ITERS: {train_iters}\n')
         f.close()
 
     else:
-        temp_path = os.path.join(root_dir, "saved_data", experiment_name, "metadata.txt")
+        temp_path = os.path.join("saved_data", exp_name, "metadata.txt")
         f = open(temp_path, "r")
         count = 0
         for line in f:
@@ -71,18 +76,15 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
                 structure_shape = (int(line.split()[1]), int(line.split()[2]))
             if count == 2:
                 max_evaluations = int(line.split()[1])
-            if count == 3:
-                train_iters = int(line.split()[1])
-                tc.change_target(train_iters)
             count += 1
 
         print(f'Starting training with pop_size {pop_size}, shape ({structure_shape[0]}, {structure_shape[1]}), ' + 
-            f'max evals: {max_evaluations}, train iters {train_iters}.')
+            f'max evals: {max_evaluations}.')
 
         f.close()
 
     ### GENERATE // GET INITIAL POPULATION ###
-    structures = []
+    structures: List[Structure] = []
     population_structure_hashes = {}
     num_evaluations = 0
     generation = 0
@@ -103,7 +105,7 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
     else:
         for g in range(start_gen+1):
             for i in range(pop_size):
-                save_path_structure = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(g), "structure", str(i) + ".npz")
+                save_path_structure = os.path.join("saved_data", exp_name, "generation_" + str(g), "structure", str(i) + ".npz")
                 np_data = np.load(save_path_structure)
                 structure_data = []
                 for key, value in np_data.items():
@@ -125,8 +127,8 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
 
 
         ### MAKE GENERATION DIRECTORIES ###
-        save_path_structure = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "structure")
-        save_path_controller = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "controller")
+        save_path_structure = os.path.join("saved_data", exp_name, "generation_" + str(generation), "structure")
+        save_path_controller = os.path.join("saved_data", exp_name, "generation_" + str(generation), "controller")
 
         try:
             os.makedirs(save_path_structure)
@@ -150,19 +152,20 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
         for structure in structures:
 
             if structure.is_survivor:
-                save_path_controller_part = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "controller",
-                    "robot_" + str(structure.label) + "_controller" + ".pt")
-                save_path_controller_part_old = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation-1), "controller",
-                    "robot_" + str(structure.prev_gen_label) + "_controller" + ".pt")
+                save_path_controller_part = os.path.join("saved_data", exp_name, "generation_" + str(generation), "controller",
+                    f"{structure.label}.zip")
+                save_path_controller_part_old = os.path.join("saved_data", exp_name, "generation_" + str(generation-1), "controller",
+                    f"{structure.prev_gen_label}.zip")
 
                 print(f'Skipping training for {save_path_controller_part}.\n')
                 try:
                     shutil.copy(save_path_controller_part_old, save_path_controller_part)
                 except:
                     print(f'Error coppying controller for {save_path_controller_part}.\n')
-            else:        
-                ppo_args = ((structure.body, structure.connections), tc, (save_path_controller, structure.label))
+            else:
+                ppo_args = (args, structure.body, env_name, save_path_controller, f'{structure.label}', structure.connections)
                 group.add_job(run_ppo, ppo_args, callback=structure.set_reward)
+
 
         group.run_jobs(num_cores)
 
@@ -177,7 +180,7 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
         structures = sorted(structures, key=lambda structure: structure.fitness, reverse=True)
 
         #SAVE RANKING TO FILE
-        temp_path = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "output.txt")
+        temp_path = os.path.join("saved_data", exp_name, "generation_" + str(generation), "output.txt")
         f = open(temp_path, "w")
 
         out = ""

diff --git a/examples/ppo/__init__.py b/examples/ppo/__init__.py