-
Notifications
You must be signed in to change notification settings - Fork 0
/
hyperparameter_selection.py
60 lines (52 loc) · 1.89 KB
/
hyperparameter_selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import itertools
import numpy as np
from src import model
def compute_best_hyperparams(
x_train: np.ndarray, y_train: np.ndarray, hyperparams_grid: list
) -> None:
"""
Test the oversampling and undersampling methods
"""
# Test for undersampling
f1_undersampling = []
for proportion, lr, lambda_ in hyperparams_grid:
print(
f"Hyperparams: Undersampling proportion: {proportion}, lr: {lr}, lambda: {lambda_}"
)
mean_f1_score = model.cross_validation(
y=y_train,
x=x_train,
k_fold=5,
lr=lr,
lambda_=lambda_,
prop=proportion,
)
print(f"mean f1 score: {mean_f1_score}")
f1_undersampling.append(mean_f1_score)
best_undersampling = np.argmax(f1_undersampling)
print(
"The optimal set of hyperparams is:",
hyperparams_grid[best_undersampling],
"leading to a f1-score of ",
f1_undersampling[best_undersampling],
)
if __name__ == "__main__":
# Set random seed to ensure that results are deterministic
np.random.seed(42)
print("Reading processed datasets...")
# Load already processed datasets
x_train = np.load("data/processed_x_train.npz")["arr_0"]
x_test = np.load("data/processed_x_test.npz")["arr_0"]
y_train = np.load("data/processed_y_train.npz")["arr_0"]
print("Success!")
# Test the sampling method :
print("Start Grid Search with the following potential params: ")
prop = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]
learning_rates = [0.1, 0.15, 0.2]
lambdas = [0.1, 0.05, 0.01, 0.005, 0]
print(f"Undersampling proportion: {prop}")
print(f"Learning rate: {learning_rates}")
print(f"Lambda: {lambdas}")
print("======")
hyperparams_grid = list(itertools.product(prop, learning_rates, lambdas))
compute_best_hyperparams(x_train, y_train, hyperparams_grid)