Skip to content

Commit

Permalink
Merge pull request #3357 from sambhavnoobcoder/CNN-Advance-2
Browse files Browse the repository at this point in the history
Enhance CNN Model with L2 Regularization, K-Fold CV, and Ensemble Techniques
  • Loading branch information
mdietze authored Aug 15, 2024
2 parents 5f7115b + 70bfdaf commit bb2cda9
Showing 1 changed file with 148 additions and 59 deletions.
207 changes: 148 additions & 59 deletions modules/assim.sequential/R/downscale_function.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,38 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool)
return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data))
}

##' @title Create folds function
##' @name create_folds
##' @author Sambhav Dixit
##'
##' @param y Vector. A vector of outcome data or indices.
##' @param k Numeric. The number of folds to create.
##' @param list Logical. If TRUE, returns a list of fold indices. If FALSE, returns a vector.
##' @param returnTrain Logical. If TRUE, returns indices for training sets. If FALSE, returns indices for test sets.
##' @details This function creates k-fold indices for cross-validation. It can return either training or test set indices, and the output can be in list or vector format.
##'
##' @description This function generates k-fold indices for cross-validation, allowing for flexible output formats.
##'
##' @return A list of k elements (if list = TRUE), each containing indices for a fold, or a vector of indices (if list = FALSE).

create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) {
n <- length(y)
indices <- seq_len(n)
folds <- split(indices, cut(seq_len(n), breaks = k, labels = FALSE))

if (!returnTrain) {
folds <- folds # Test indices are already what we want
} else {
folds <- lapply(folds, function(x) indices[-x]) # Return training indices
}

if (!list) {
folds <- unlist(folds)
}

return(folds)
}

##' @title SDA Downscale Function
##' @name SDA_downscale
##' @author Joshua Ploshay, Sambhav Dixit
Expand Down Expand Up @@ -140,84 +172,141 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ
predictions[[i]] <- stats::predict(models[[i]], test_data)
}
} else if (model_type == "cnn") {
# Define k_folds and num_bags
k_folds <- 5
num_bags <- 5

# Reshape input data for CNN
x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train)))
x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test)))

for (i in seq_along(carbon_data)) {
# Define the CNN model architecture
# Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer.
model <- keras3::keras_model_sequential() |>
# 1D Convolutional layer: Extracts local features from input data
keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |>
# Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
keras3::layer_batch_normalization() |>
# Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
keras3::layer_dropout(rate = 0.3) |>
# Flatten: Converts 3D output to 1D for dense layer input
keras3::layer_flatten() |>
# Dense layer: Learns complex combinations of features
keras3::layer_dense(units = 64, activation = 'relu') |>
# Second batch normalization: Further stabilizes learning in deeper layers
keras3::layer_batch_normalization() |>
# Second dropout: Additional regularization to prevent overfitting in final layers
keras3::layer_dropout(rate = 0.3) |>
# Output layer: Single neuron for regression prediction
keras3::layer_dense(units = 1)
all_models <- list()

# Learning rate scheduler
lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
initial_learning_rate = 0.001,
decay_steps = 1000,
decay_rate = 0.9
)
# Create k-fold indices
fold_indices <- create_folds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE)

# Compile the model
model |> keras3::compile(
loss = 'mean_squared_error',
optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
metrics = c('mean_absolute_error')
)

# Early stopping callback
early_stopping <- keras3::callback_early_stopping(
monitor = 'val_loss',
patience = 10,
restore_best_weights = TRUE
)
#initialise operations for each fold
for (fold in 1:k_folds) {
cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds))

# Split data into training and validation sets for this fold
train_indices <- setdiff(seq_len(nrow(x_train)), fold_indices[[fold]])
val_indices <- fold_indices[[fold]]

x_train_fold <- x_train[train_indices, , drop = FALSE]
y_train_fold <- y_train[train_indices, i]
x_val_fold <- x_train[val_indices, , drop = FALSE]
y_val_fold <- y_train[val_indices, i]

# Create bagged models for this fold
fold_models <- list()
for (bag in 1:num_bags) {
# Create bootstrap sample
bootstrap_indices <- sample(1:nrow(x_train_fold), size = nrow(x_train_fold), replace = TRUE)
x_train_bag <- x_train_fold[bootstrap_indices, ]
y_train_bag <- y_train_fold[bootstrap_indices]

# Define the CNN model architecture
# Used dual batch normalization and dropout as the first set of batch normalization and
model <- keras3::keras_model_sequential() |>
# Layer Reshape : Reshape to fit target shape for the convolutional layer
keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |>
# 1D Convolutional layer: Extracts local features from input data
keras3::layer_conv_2d(
filters = 32,
kernel_size = c(3, 1),
activation = 'relu',
padding = 'same'
) |>
# Flatten: Converts 3D output to 1D for dense layer input
keras3::layer_flatten() |>
# Dense layer: Learns complex combinations of features
keras3::layer_dense(
units = 64,
activation = 'relu',
kernel_regularizer = keras3::regularizer_l2(0.01)
) |>
# Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift
keras3::layer_batch_normalization() |>
# Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization
keras3::layer_dropout(rate = 0.3) |>
# Dense layer: Learns complex combinations of features
keras3::layer_dense(
units = 32,
activation = 'relu',
kernel_regularizer = keras3::regularizer_l2(0.01)
) |>
# Batch normalization: Further stabilizes learning in deeper layers
keras3::layer_batch_normalization() |>
# Dropout: Additional regularization to prevent overfitting in final layer
keras3::layer_dropout(rate = 0.3) |>
# Output layer: Single neuron for regression prediction
keras3::layer_dense(
units = 1,
kernel_regularizer = keras3::regularizer_l2(0.01)
)

# Learning rate scheduler
lr_schedule <- keras3::learning_rate_schedule_exponential_decay(
initial_learning_rate = 0.001,
decay_steps = 1000,
decay_rate = 0.9
)

# Early stopping callback
early_stopping <- keras3::callback_early_stopping(
monitor = 'loss',
patience = 10,
restore_best_weights = TRUE
)

# Train the model
model |> keras3::fit(
x = x_train,
y = y_train[, i],
epochs = 500, # Increased max epochs
batch_size = 32,
validation_split = 0.2,
callbacks = list(early_stopping),
verbose = 0
)
# Compile the model
model |> keras3::compile(
loss = 'mean_squared_error',
optimizer = keras3::optimizer_adam(learning_rate = lr_schedule),
metrics = c('mean_absolute_error')
)

# Store the trained model
models[[i]] <- model
# Train the model
model |> keras3::fit(
x = x_train_bag,
y = y_train_bag,
epochs = 500,
batch_size = 32,
callbacks = list(early_stopping),
verbose = 0
)

#CNN predictions
cnn_predict <- function(model, newdata, scaling_params) {
# Store the trained model for this bag in the fold_models list
fold_models[[bag]] <- model
}

# Add fold models to all_models list
all_models <- c(all_models, fold_models)
}

# Store all models for this ensemble
models[[i]] <- all_models

# Use all models for predictions
cnn_ensemble_predict <- function(models, newdata, scaling_params) {
newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd)
newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata)))
predictions <- stats::predict(model, newdata)
return(as.vector(predictions))
predictions <- sapply(models, function(m) stats::predict(m, newdata))
return(rowMeans(predictions))
}

# Create a prediction raster from covariates
prediction_rast <- terra::rast(covariates)

# Generate spatial predictions using the trained model
maps[[i]] <- terra::predict(prediction_rast, model = models[[i]],
fun = cnn_predict,
fun = cnn_ensemble_predict,
scaling_params = scaling_params)

# Make predictions on held-out test data
predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params)
predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params)

}
} else {
stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.")
Expand Down

0 comments on commit bb2cda9

Please sign in to comment.