Skip to content

Commit

Permalink
Rewrite ppo, update example scripts, clean up requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
jagdeepsb committed Jun 15, 2024
1 parent bc6212e commit 3bdbffb
Show file tree
Hide file tree
Showing 20 changed files with 543 additions and 955 deletions.
9 changes: 0 additions & 9 deletions evogym/envs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,15 +392,6 @@ def __init__(

EvoGymBase.__init__(self, world=world, render_mode=render_mode, render_options=render_options)
self.default_viewer.track_objects('robot')

def step(self, action):

action_copy = {}

for robot_name, a in action.items():
action_copy[robot_name] = a + 1

return super().step(action_copy)

def pos_at_time(self, time):
return super().pos_at_time(time)*self.VOXEL_SIZE
Expand Down
48 changes: 21 additions & 27 deletions examples/bo/run.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from distutils.command.config import config
import os
from re import X
import shutil
import random
import numpy as np
import argparse

from GPyOpt.core.task.space import Design_space
from GPyOpt.models import GPModel
Expand All @@ -13,17 +12,9 @@
from GPyOpt.core.evaluators import ThompsonBatch
from .optimizer import Objective, Optimization

import sys
curr_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.join(curr_dir, '..')
external_dir = os.path.join(root_dir, 'externals')
sys.path.insert(0, root_dir)
sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))

from ppo.run import run_ppo
import evogym.envs
from evogym import is_connected, has_actuator, get_full_connectivity
from utils.algo_utils import TerminationCondition
from ppo import run_ppo

def get_robot_from_genome(genome, config):
'''
Expand All @@ -36,6 +27,8 @@ def get_robot_from_genome(genome, config):

def eval_genome_cost(genome, config, genome_id, generation):
robot = get_robot_from_genome(genome, config)
args, env_name = config['args'], config['env_name']

if not (is_connected(robot) and has_actuator(robot)):
return 10
else:
Expand All @@ -45,9 +38,7 @@ def eval_genome_cost(genome, config, genome_id, generation):
save_path_controller = os.path.join(save_path_generation, 'controller')
np.savez(save_path_structure, robot, connectivity)
fitness = run_ppo(
structure=(robot, connectivity),
termination_condition=TerminationCondition(config['train_iters']),
saving_convention=(save_path_controller, genome_id),
args, robot, env_name, save_path_controller, f'{genome_id}', connectivity
)
cost = -fitness
return cost
Expand All @@ -61,20 +52,23 @@ def eval_genome_constraint(genomes, config):
return np.array(all_violation)

def run_bo(
experiment_name,
structure_shape,
pop_size,
max_evaluations,
train_iters,
num_cores,
):

save_path = os.path.join(root_dir, 'saved_data', experiment_name)
args: argparse.Namespace,
):
exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = (
args.exp_name,
args.env_name,
args.pop_size,
args.structure_shape,
args.max_evaluations,
args.num_cores,
)

save_path = os.path.join('saved_data', exp_name)

try:
os.makedirs(save_path)
except:
print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS')
print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS')
print('Override? (y/n): ', end='')
ans = input()
if ans.lower() == 'y':
Expand All @@ -88,13 +82,13 @@ def run_bo(
with open(save_path_metadata, 'w') as f:
f.write(f'POP_SIZE: {pop_size}\n' \
f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n' \
f'MAX_EVALUATIONS: {max_evaluations}\n' \
f'TRAIN_ITERS: {train_iters}\n')
f'MAX_EVALUATIONS: {max_evaluations}\n')

config = {
'structure_shape': structure_shape,
'train_iters': train_iters,
'save_path': save_path,
'args': args, # args for run_ppo
'env_name': env_name,
}

def constraint_func(genome):
Expand Down
42 changes: 22 additions & 20 deletions examples/cppn_neat/run.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
import os
import shutil
import random
import numpy as np
import torch
import neat
import argparse

import sys
curr_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.join(curr_dir, '..')
external_dir = os.path.join(root_dir, 'externals')
sys.path.insert(0, root_dir)
sys.path.insert(1, os.path.join(external_dir, 'PyTorch-NEAT'))
sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))

from pytorch_neat.cppn import create_cppn
from .parallel import ParallelEvaluator
from .population import Population

from utils.algo_utils import TerminationCondition
from ppo import run_ppo
from evogym import is_connected, has_actuator, get_full_connectivity, hashable
from ppo.run import run_ppo
import evogym.envs
from evogym import is_connected, has_actuator, get_full_connectivity, hashable


def get_cppn_input(structure_shape):
Expand All @@ -43,15 +41,16 @@ def get_robot_from_genome(genome, config):

def eval_genome_fitness(genome, config, genome_id, generation):
robot = get_robot_from_genome(genome, config)
args, env_name = config.extra_info['args'], config.extra_info['env_name']

connectivity = get_full_connectivity(robot)
save_path_generation = os.path.join(config.extra_info['save_path'], f'generation_{generation}')
save_path_structure = os.path.join(save_path_generation, 'structure', f'{genome_id}')
save_path_controller = os.path.join(save_path_generation, 'controller')
np.savez(save_path_structure, robot, connectivity)

fitness = run_ppo(
structure=(robot, connectivity),
termination_condition=TerminationCondition(config.extra_info['train_iters']),
saving_convention=(save_path_controller, genome_id),
args, robot, env_name, save_path_controller, f'{genome_id}', connectivity
)
return fitness

Expand Down Expand Up @@ -93,20 +92,23 @@ def post_evaluate(self, config, population, species, best_genome):
f.write(out)

def run_cppn_neat(
experiment_name,
structure_shape,
pop_size,
max_evaluations,
train_iters,
num_cores,
):
args: argparse.Namespace
):
exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = (
args.exp_name,
args.env_name,
args.pop_size,
args.structure_shape,
args.max_evaluations,
args.num_cores,
)

save_path = os.path.join(root_dir, 'saved_data', experiment_name)
save_path = os.path.join('saved_data', exp_name)

try:
os.makedirs(save_path)
except:
print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS')
print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS')
print('Override? (y/n): ', end='')
ans = input()
if ans.lower() == 'y':
Expand All @@ -120,8 +122,7 @@ def run_cppn_neat(
with open(save_path_metadata, 'w') as f:
f.write(f'POP_SIZE: {pop_size}\n' \
f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n' \
f'MAX_EVALUATIONS: {max_evaluations}\n' \
f'TRAIN_ITERS: {train_iters}\n')
f'MAX_EVALUATIONS: {max_evaluations}\n')

structure_hashes = {}

Expand All @@ -134,9 +135,10 @@ def run_cppn_neat(
config_path,
extra_info={
'structure_shape': structure_shape,
'train_iters': train_iters,
'save_path': save_path,
'structure_hashes': structure_hashes,
'args': args, # args for run_ppo
'env_name': env_name,
},
custom_config=[
('NEAT', 'pop_size', pop_size),
Expand Down
73 changes: 38 additions & 35 deletions examples/ga/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,40 @@
import shutil
import random
import math
import argparse
from typing import List

import sys
curr_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.join(curr_dir, '..')
external_dir = os.path.join(root_dir, 'externals')
sys.path.insert(0, root_dir)
sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))

from ppo import run_ppo
from ppo.run import run_ppo
import evogym.envs
from evogym import sample_robot, hashable
import utils.mp_group as mp
from utils.algo_utils import get_percent_survival_evals, mutate, TerminationCondition, Structure
from utils.algo_utils import get_percent_survival_evals, mutate, Structure

def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_iters, num_cores):
def run_ga(
args: argparse.Namespace,
):
print()

### STARTUP: MANAGE DIRECTORIES ###
home_path = os.path.join(root_dir, "saved_data", experiment_name)

exp_name, env_name, pop_size, structure_shape, max_evaluations, num_cores = (
args.exp_name,
args.env_name,
args.pop_size,
args.structure_shape,
args.max_evaluations,
args.num_cores,
)

### MANAGE DIRECTORIES ###
home_path = os.path.join("saved_data", exp_name)
start_gen = 0

### DEFINE TERMINATION CONDITION ###
tc = TerminationCondition(train_iters)
### DEFINE TERMINATION CONDITION ###

is_continuing = False
try:
os.makedirs(home_path)
except:
print(f'THIS EXPERIMENT ({experiment_name}) ALREADY EXISTS')
print(f'THIS EXPERIMENT ({exp_name}) ALREADY EXISTS')
print("Override? (y/n/c): ", end="")
ans = input()
if ans.lower() == "y":
Expand All @@ -46,22 +52,21 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it

### STORE META-DATA ##
if not is_continuing:
temp_path = os.path.join(root_dir, "saved_data", experiment_name, "metadata.txt")
temp_path = os.path.join("saved_data", exp_name, "metadata.txt")

try:
os.makedirs(os.path.join(root_dir, "saved_data", experiment_name))
os.makedirs(os.path.join("saved_data", exp_name))
except:
pass

f = open(temp_path, "w")
f.write(f'POP_SIZE: {pop_size}\n')
f.write(f'STRUCTURE_SHAPE: {structure_shape[0]} {structure_shape[1]}\n')
f.write(f'MAX_EVALUATIONS: {max_evaluations}\n')
f.write(f'TRAIN_ITERS: {train_iters}\n')
f.close()

else:
temp_path = os.path.join(root_dir, "saved_data", experiment_name, "metadata.txt")
temp_path = os.path.join("saved_data", exp_name, "metadata.txt")
f = open(temp_path, "r")
count = 0
for line in f:
Expand All @@ -71,18 +76,15 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
structure_shape = (int(line.split()[1]), int(line.split()[2]))
if count == 2:
max_evaluations = int(line.split()[1])
if count == 3:
train_iters = int(line.split()[1])
tc.change_target(train_iters)
count += 1

print(f'Starting training with pop_size {pop_size}, shape ({structure_shape[0]}, {structure_shape[1]}), ' +
f'max evals: {max_evaluations}, train iters {train_iters}.')
f'max evals: {max_evaluations}.')

f.close()

### GENERATE // GET INITIAL POPULATION ###
structures = []
structures: List[Structure] = []
population_structure_hashes = {}
num_evaluations = 0
generation = 0
Expand All @@ -103,7 +105,7 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
else:
for g in range(start_gen+1):
for i in range(pop_size):
save_path_structure = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(g), "structure", str(i) + ".npz")
save_path_structure = os.path.join("saved_data", exp_name, "generation_" + str(g), "structure", str(i) + ".npz")
np_data = np.load(save_path_structure)
structure_data = []
for key, value in np_data.items():
Expand All @@ -125,8 +127,8 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it


### MAKE GENERATION DIRECTORIES ###
save_path_structure = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "structure")
save_path_controller = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "controller")
save_path_structure = os.path.join("saved_data", exp_name, "generation_" + str(generation), "structure")
save_path_controller = os.path.join("saved_data", exp_name, "generation_" + str(generation), "controller")

try:
os.makedirs(save_path_structure)
Expand All @@ -150,19 +152,20 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
for structure in structures:

if structure.is_survivor:
save_path_controller_part = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "controller",
"robot_" + str(structure.label) + "_controller" + ".pt")
save_path_controller_part_old = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation-1), "controller",
"robot_" + str(structure.prev_gen_label) + "_controller" + ".pt")
save_path_controller_part = os.path.join("saved_data", exp_name, "generation_" + str(generation), "controller",
f"{structure.label}.zip")
save_path_controller_part_old = os.path.join("saved_data", exp_name, "generation_" + str(generation-1), "controller",
f"{structure.prev_gen_label}.zip")

print(f'Skipping training for {save_path_controller_part}.\n')
try:
shutil.copy(save_path_controller_part_old, save_path_controller_part)
except:
print(f'Error coppying controller for {save_path_controller_part}.\n')
else:
ppo_args = ((structure.body, structure.connections), tc, (save_path_controller, structure.label))
else:
ppo_args = (args, structure.body, env_name, save_path_controller, f'{structure.label}', structure.connections)
group.add_job(run_ppo, ppo_args, callback=structure.set_reward)


group.run_jobs(num_cores)

Expand All @@ -177,7 +180,7 @@ def run_ga(experiment_name, structure_shape, pop_size, max_evaluations, train_it
structures = sorted(structures, key=lambda structure: structure.fitness, reverse=True)

#SAVE RANKING TO FILE
temp_path = os.path.join(root_dir, "saved_data", experiment_name, "generation_" + str(generation), "output.txt")
temp_path = os.path.join("saved_data", exp_name, "generation_" + str(generation), "output.txt")
f = open(temp_path, "w")

out = ""
Expand Down
2 changes: 0 additions & 2 deletions examples/ppo/__init__.py

This file was deleted.

Loading

0 comments on commit 3bdbffb

Please sign in to comment.