Skip to content

Commit

Permalink
Merge pull request #257 from alan-turing-institute/optimise
Browse files Browse the repository at this point in the history
Optimise
  • Loading branch information
mastoffel authored Oct 10, 2024
2 parents 983a012 + b68c497 commit 3706802
Show file tree
Hide file tree
Showing 17 changed files with 66 additions and 128 deletions.
8 changes: 0 additions & 8 deletions autoemulate/compare.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.decomposition import PCA
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from tqdm.autonotebook import tqdm

Expand All @@ -27,7 +20,6 @@
from autoemulate.plotting import _plot_model
from autoemulate.printing import _print_setup
from autoemulate.save import ModelSerialiser
from autoemulate.utils import _ensure_2d
from autoemulate.utils import _get_full_model_name
from autoemulate.utils import _redirect_warnings
from autoemulate.utils import get_model_name
Expand Down
2 changes: 0 additions & 2 deletions autoemulate/cross_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import pandas as pd
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_validate
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import train_test_split

from autoemulate.utils import get_model_name
from autoemulate.utils import get_model_params
Expand Down
61 changes: 30 additions & 31 deletions autoemulate/emulators/conditional_neural_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Real
from skorch import NeuralNetRegressor
from skorch.callbacks import EarlyStopping
from skorch.callbacks import GradientNormClipping
Expand Down Expand Up @@ -42,8 +41,10 @@ class ConditionalNeuralProcess(RegressorMixin, BaseEstimator):
The number of hidden units in the neural network layers.
latent_dim : int, default=64
The dimensionality of the latent space.
hidden_layers : int, default=3
The number of hidden layers in the neural network.
hidden_layers_enc : int, default=3
The number of hidden layers in the encoder.
hidden_layers_dec : int, default=3
The number of hidden layers in the decoder.
min_context_points : int, default=3
The minimum number of context points to use during training.
max_context_points : int, default=10
Expand Down Expand Up @@ -108,26 +109,28 @@ def __init__(
# architecture
hidden_dim=64,
latent_dim=64,
hidden_layers=3,
hidden_layers_enc=3,
hidden_layers_dec=3,
# data per episode
min_context_points=3,
max_context_points=10,
n_episode=32,
# training
max_epochs=100,
lr=1e-2,
lr=5e-3,
batch_size=16,
activation=nn.ReLU,
optimizer=torch.optim.AdamW,
normalize_y=True,
# misc
device=None,
device="cpu",
random_state=None,
attention=False,
):
self.hidden_dim = hidden_dim
self.latent_dim = latent_dim
self.hidden_layers = hidden_layers
self.hidden_layers_enc = hidden_layers_enc
self.hidden_layers_dec = hidden_layers_dec
self.min_context_points = min_context_points
self.max_context_points = max_context_points
self.n_episode = n_episode
Expand Down Expand Up @@ -184,7 +187,8 @@ def fit(self, X, y):
module__output_dim=self.output_dim_,
module__hidden_dim=self.hidden_dim,
module__latent_dim=self.latent_dim,
module__hidden_layers=self.hidden_layers,
module__hidden_layers_enc=self.hidden_layers_enc,
module__hidden_layers_dec=self.hidden_layers_dec,
module__activation=self.activation,
dataset__min_context_points=self.min_context_points,
dataset__max_context_points=self.max_context_points,
Expand All @@ -193,11 +197,7 @@ def fit(self, X, y):
lr=self.lr,
batch_size=self.batch_size,
optimizer=self.optimizer,
device=self.device
if self.device is not None
else "cuda"
if torch.cuda.is_available()
else "cpu",
device=self.device,
dataset=CNPDataset, # special dataset to sample context and target sets
criterion=CNPLoss,
iterator_train__collate_fn=cnp_collate_fn, # special collate to different n in episodes
Expand Down Expand Up @@ -260,31 +260,30 @@ def predict(self, X, return_std=False):
def get_grid_params(search_type: str = "random"):
param_space = {
"max_epochs": [100, 200, 300],
"batch_size": [16, 32, 64],
"batch_size": [16, 32],
"hidden_dim": [32, 64, 128],
"latent_dim": [32, 64, 128],
"max_context_points": [10, 20, 30],
"hidden_layers": [1, 2, 3, 4, 5],
"max_context_points": [5, 10, 15],
"hidden_layers_enc": [2, 3, 4],
"hidden_layers_dec": [2, 3, 4],
"activation": [
nn.ReLU,
# nn.Tanh,
nn.GELU,
# nn.Sigmoid,
],
# ],
"optimizer": [torch.optim.AdamW, torch.optim.SGD], #
"optimizer": [torch.optim.AdamW], #
"lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
}
# match search_type:
# case "random":
# param_space |= {
# "lr": loguniform(1e-4, 1e-2),
# }
# case "bayes":
# param_space |= {
# "lr": Real(1e-4, 1e-2, prior="log-uniform"),
# }
# case _:
# raise ValueError(f"Invalid search type: {search_type}")
# # match search_type:
# case "random":
# param_space |= {
# "lr": loguniform(1e-4, 1e-2),
# }
# case "bayes":
# param_space |= {
# "lr": Real(1e-4, 1e-2, prior="log-uniform"),
# }
# case _:
# raise ValueError(f"Invalid search type: {search_type}")

return param_space

Expand Down
1 change: 0 additions & 1 deletion autoemulate/emulators/gaussian_process_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Integer
from skopt.space import Real

from autoemulate.utils import _suppress_convergence_warnings
Expand Down
25 changes: 2 additions & 23 deletions autoemulate/emulators/gaussian_process_torch.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,13 @@
from copy import deepcopy

import gpytorch
import numpy as np
import torch
from scipy.stats import loguniform
from scipy.stats import randint
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.exceptions import DataConversionWarning
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing._data import _handle_zeros_in_scale
from sklearn.utils import check_array
from sklearn.utils import check_X_y
from sklearn.utils.validation import check_is_fitted
from skopt.space import Categorical
from skopt.space import Integer
from skopt.space import Real
from skorch.callbacks import Checkpoint
from skorch.callbacks import EarlyStopping
from skorch.callbacks import EpochScoring
from skorch.callbacks import LRScheduler
from skorch.callbacks import ProgressBar
from skorch.dataset import Dataset
from skorch.dataset import ValidSplit
from skorch.helper import predefined_split
from skorch.probabilistic import ExactGPRegressor

from autoemulate.emulators.gaussian_process_utils import EarlyStoppingCustom
Expand Down Expand Up @@ -59,7 +42,7 @@ def __init__(
max_epochs=50,
normalize_y=True,
# misc
device=None,
device="cpu",
random_state=None,
):
self.mean_module = mean_module
Expand Down Expand Up @@ -167,11 +150,7 @@ def fit(self, X, y):
),
],
verbose=0,
device=self.device
if self.device is not None
else "cuda"
if torch.cuda.is_available()
else "cpu",
device=self.device,
)
self.model_.fit(X, y)
self.is_fitted_ = True
Expand Down
2 changes: 1 addition & 1 deletion autoemulate/emulators/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def get_grid_params(self, search_type="random"):
"min_samples_leaf": randint(1, 6),
"subsample": uniform(0.6, 0.4), # 0.4 is the range width (1.0 - 0.6)
"max_features": ["sqrt", "log2", None],
"ccp_alpha": loguniform(0.01, 0.1),
"ccp_alpha": loguniform(0.001, 0.1),
}

param_space_bayes = {
Expand Down
4 changes: 2 additions & 2 deletions autoemulate/emulators/light_gbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def predict(self, X):
def get_grid_params(self, search_type="random"):
"""Returns the grid parameters of the emulator."""
param_space_random = {
"boosting_type": ["gbdt", "dart"],
"boosting_type": ["gbdt"],
"num_leaves": randint(10, 100),
"max_depth": randint(-1, 12),
"learning_rate": loguniform(0.001, 0.1),
Expand All @@ -119,7 +119,7 @@ def get_grid_params(self, search_type="random"):
}

param_space_bayes = {
"boosting_type": Categorical(["gbdt", "dart"]),
"boosting_type": Categorical(["gbdt"]),
"num_leaves": Integer(10, 100),
"max_depth": Integer(-1, 12),
"learning_rate": Real(0.001, 0.1, prior="log-uniform"),
Expand Down
27 changes: 20 additions & 7 deletions autoemulate/emulators/neural_networks/cnp_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@ class Encoder(nn.Module):
"""

def __init__(
self, input_dim, output_dim, hidden_dim, latent_dim, hidden_layers, activation
self,
input_dim,
output_dim,
hidden_dim,
latent_dim,
hidden_layers_enc,
activation,
):
super().__init__()
layers = [nn.Linear(input_dim + output_dim, hidden_dim), activation()]
for _ in range(hidden_layers):
for _ in range(hidden_layers_enc):
layers.extend([nn.Linear(hidden_dim, hidden_dim), activation()])
layers.append(nn.Linear(hidden_dim, latent_dim))
self.net = nn.Sequential(*layers)
Expand Down Expand Up @@ -53,11 +59,17 @@ def forward(self, x_context, y_context, context_mask=None):

class Decoder(nn.Module):
def __init__(
self, input_dim, latent_dim, hidden_dim, output_dim, hidden_layers, activation
self,
input_dim,
latent_dim,
hidden_dim,
output_dim,
hidden_layers_dec,
activation,
):
super().__init__()
layers = [nn.Linear(latent_dim + input_dim, hidden_dim), activation()]
for _ in range(hidden_layers):
for _ in range(hidden_layers_dec):
layers.extend([nn.Linear(hidden_dim, hidden_dim), activation()])
self.net = nn.Sequential(*layers)
self.mean_head = nn.Linear(hidden_dim, output_dim)
Expand Down Expand Up @@ -94,15 +106,16 @@ def __init__(
output_dim,
hidden_dim,
latent_dim,
hidden_layers,
hidden_layers_enc,
hidden_layers_dec,
activation=nn.ReLU,
):
super().__init__()
self.encoder = Encoder(
input_dim, output_dim, hidden_dim, latent_dim, hidden_layers, activation
input_dim, output_dim, hidden_dim, latent_dim, hidden_layers_enc, activation
)
self.decoder = Decoder(
input_dim, latent_dim, hidden_dim, output_dim, hidden_layers, activation
input_dim, latent_dim, hidden_dim, output_dim, hidden_layers_dec, activation
)

def forward(self, X_context, y_context, X_target=None, context_mask=None):
Expand Down
1 change: 0 additions & 1 deletion autoemulate/emulators/polynomials.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Integer


class SecondOrderPolynomial(BaseEstimator, RegressorMixin):
Expand Down
7 changes: 1 addition & 6 deletions autoemulate/emulators/radial_basis_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,7 @@ def predict(self, X):

def get_grid_params(self, search_type="random"):
"""Returns the grid parameters of the emulator."""
# param_space_random = {
# #"smoothing": uniform(0.0, 1.0),
# "kernel": ["linear", "thin_plate_spline", "cubic", "quintic", "multiquadric", "inverse_multiquadric", "gaussian"],
# #"epsilon": uniform(0.0, 1.0),
# "degree": randint(0, 5),
# }

param_space_random = [
{
"kernel": ["linear", "multiquadric"],
Expand Down
33 changes: 4 additions & 29 deletions autoemulate/emulators/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from sklearn.utils.validation import check_X_y
from skopt.space import Categorical
from skopt.space import Integer
from skopt.space import Real


class RandomForest(BaseEstimator, RegressorMixin):
Expand Down Expand Up @@ -100,18 +99,18 @@ def get_grid_params(self, search_type="random"):
"n_estimators": randint(50, 500),
"min_samples_split": randint(2, 20),
"min_samples_leaf": randint(1, 10),
"max_features": [None, "sqrt", "log2"],
"max_features": ["sqrt", "log2", None, 1.0],
"bootstrap": [True, False],
"oob_score": [True, False],
# # "max_depth": [None] + list(range(3, 20)), # None plus a range of depths
"max_samples": [None, 0.5, 0.75],
"max_depth": [None] + list(range(5, 30, 5)), # None plus a range of depths
"max_samples": [None, 0.5, 0.7, 0.9],
}

param_space_bayes = {
"n_estimators": Integer(50, 500),
"min_samples_split": Integer(2, 20),
"min_samples_leaf": Integer(1, 10),
"max_features": Categorical([None, "sqrt", "log2"]),
"max_features": ["sqrt", "log2", 1.0, None],
"bootstrap": Categorical([True, False]),
"oob_score": Categorical([True, False]),
# "max_depth": Categorical([None] + list(range(3, 20))), # None plus a range of depths
Expand All @@ -131,27 +130,3 @@ def model_name(self):

def _more_tags(self):
return {"multioutput": True}

# def score(self, X, y, metric):
# """Returns the score of the emulator.

# Parameters
# ----------
# X : array-like, shape (n_samples, n_features)
# Simulation input.
# y : array-like, shape (n_samples, n_outputs)
# Simulation output.
# metric : str
# Name of the metric to use, currently either rsme or r2.
# Returns
# -------
# metric : float
# Metric of the emulator.

# """
# predictions = self.predict(X)
# return metric(y, predictions)

# def _more_tags(self):
# return {'non_deterministic': True,
# 'multioutput': True}
2 changes: 0 additions & 2 deletions autoemulate/emulators/support_vector_machines.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ def get_grid_params(self, search_type="random"):
"C": uniform(1.0, 3.0),
"epsilon": uniform(0.1, 0.3),
"shrinking": [True, False],
"cache_size": randint(200, 401),
"verbose": [False],
"max_iter": [-1],
}

Expand Down
Loading

0 comments on commit 3706802

Please sign in to comment.