forked from jzhuo/genetic-sgd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
134 lines (111 loc) · 3.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
This is the main functtion for the files.
It should contain functions that
* Load the dataset or datasets to be used
* Initialize the genetic algorithm and its population
* Train on the dataset
"""
import numpy as np
import pandas as pd
import genetic_algorithm
import neural_network
from sklearn.metrics import mean_squared_error
SEED = 42
OUTPUT_SIZE = [1]
HIDDEN_LAYER_SIZES = [5, 25] # , 10, 25, 50, 100, 250]
LEARNING_RATES = [1e-3]
np.random.seed(SEED)
def load_dataset(name):
frame = pd.read_csv(name, index_col=0)
return frame
def calculate_input_size(data):
return data.shape[1] - 1
def plot_comparison():
import matplotlib.pyplot as plt
pass
def cross_validate_on_data(estimator, param_grid, data):
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(
estimator,
param_grid,
scoring="neg_mean_squared_error",
cv=3,
n_jobs=1,
verbose=4,
)
X, y = split_data(data)
grid.fit(X, y)
return grid
def make_nn_grid(data):
estimator = neural_network.NeuralNetwork()
param_grid = {
"input_size": [calculate_input_size(data)],
"hidden_layer_size": HIDDEN_LAYER_SIZES,
"output_size": OUTPUT_SIZE,
"learning_rate": LEARNING_RATES,
"epochs": [10],
"verbose": [0], # ****** REMEMBER TO TOGGLE VERBOSITY ******
}
print("\n\n***** Training Neural Networks *****\n\n")
return cross_validate_on_data(estimator, param_grid, data)
def make_ga_grid(data, hybrid):
# init ga
estimator = genetic_algorithm.GeneticAlgorithm()
param_grid = {
"hybrid": [hybrid],
"input_size": [data.shape[1] - 1],
"hidden_layer_size": HIDDEN_LAYER_SIZES,
"output_size": OUTPUT_SIZE,
"population_size": [5],
"selection_size": [3],
"learning_rate": LEARNING_RATES,
"epochs": [5],
"generations": [2],
"cases": [["mse", "l2", "l1"]],
"verbose": [0], # ****** REMEMBER TO TOGGLE VERBOSITY ******
}
if hybrid is True:
print("\n\n***** Training Hybrid GA *****\n\n")
else:
print("\n\n***** Training GA *****\n\n")
return cross_validate_on_data(estimator, param_grid, data)
def split_data(data):
input_cols = list(data.columns)
input_cols.remove("y")
y = data["y"].values
X = data[input_cols].values
return X, y
def analyze_grids(grids, data):
X, y = split_data(data)
print("X.shape", X.shape)
print("y.shape", y.shape)
# extract best models for each grid
hybrid_model = grids["hybrid"].best_estimator_
nn_model = grids["nn"].best_estimator_
ga_model = grids["ga"].best_estimator_
if hybrid_model == nn_model or ga_model == hybrid_model or nn_model == ga_model:
raise ValueError("The best estimators aren't supposed to be the same!")
# predict with models
y_hybrid = hybrid_model.predict(X)
y_nn = nn_model.predict(X)
y_ga = ga_model.predict(X)
# calculate MSE
hybrid_mse = mean_squared_error(y, y_hybrid)
nn_mse = mean_squared_error(y, y_nn)
ga_mse = mean_squared_error(y, y_ga)
# print each MSE values
print("Hybrid GA MSE:", hybrid_mse)
print("Neural Network MSE:", nn_mse)
print("GA MSE:", ga_mse)
if __name__ == "__main__":
# load data
data = load_dataset("data/uball_0.0_50_200").sample(frac=1)
split = int(len(data) * 0.8)
train = data[:split]
test = data[split:]
grids = {
"nn": make_nn_grid(train),
"hybrid": make_ga_grid(train, True),
"ga": make_ga_grid(train, False),
}
analyze_grids(grids, test)