From 25fd5b6254cb55d9d983bb80108b7e78b91d14ce Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:43:03 +0530 Subject: [PATCH 01/15] Update downscale_function.R Add L2 regularization to CNN model in SDA_downscale function - Introduced L2 kernel regularization to convolutional and dense layers - Set L2 factor to 0.01 for all regularized layers - Aim to reduce overfitting and improve model generalization - Updated model architecture in the CNN branch of SDA_downscale function --- modules/assim.sequential/R/downscale_function.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 27645f9dd7..99677466ec 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -145,11 +145,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test))) for (i in seq_along(carbon_data)) { + # L2 regularization factor + l2_factor <- 0.01 # Define the CNN model architecture # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer. model <- keras3::keras_model_sequential() |> # 1D Convolutional layer: Extracts local features from input data - keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |> + keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |> # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift keras3::layer_batch_normalization() |> # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization @@ -157,13 +159,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ # Flatten: Converts 3D output to 1D for dense layer input keras3::layer_flatten() |> # Dense layer: Learns complex combinations of features - keras3::layer_dense(units = 64, activation = 'relu') |> + keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |> # Second batch normalization: Further stabilizes learning in deeper layers keras3::layer_batch_normalization() |> # Second dropout: Additional regularization to prevent overfitting in final layers keras3::layer_dropout(rate = 0.3) |> # Output layer: Single neuron for regression prediction - keras3::layer_dense(units = 1) + keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) # Learning rate scheduler lr_schedule <- keras3::learning_rate_schedule_exponential_decay( From d4696e6083ed5167609e8a238c5d75faabdec562 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:52:40 +0530 Subject: [PATCH 02/15] Define k-folds and create cross-validation indices - Set k_folds variable to 5 for k-fold cross-validation - Use caret::createFolds to generate fold indices for training data --- modules/assim.sequential/R/downscale_function.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 99677466ec..fede48920a 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -140,9 +140,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ predictions[[i]] <- stats::predict(models[[i]], test_data) } } else if (model_type == "cnn") { + # Define k_folds within the function + k_folds <- 5 + # Reshape input data for CNN x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train))) x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test))) + + # Create k-fold indices for cross-validation (only on training data) + fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE) for (i in seq_along(carbon_data)) { # L2 regularization factor From 6a34666c849f70b22e9f2d5623c542c19ee02679 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:59:04 +0530 Subject: [PATCH 03/15] Implement cross-validation loop for ensemble modeling - Initialize cv_results list to store results - Create loop to process each fold - Split data into training and validation sets for each fold - Print progress message for each fold --- modules/assim.sequential/R/downscale_function.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index fede48920a..311a90794b 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -151,6 +151,20 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE) for (i in seq_along(carbon_data)) { + + cv_results <- list() + + for (fold in 1:k_folds) { + cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds)) + + # Split training data into training and validation sets for this fold + val_indices <- fold_indices[[fold]] + train_indices <- setdiff(1:nrow(x_train), val_indices) + + x_train_fold <- x_train[train_indices, , drop = FALSE] + y_train_fold <- y_train[train_indices, i] + x_val_fold <- x_train[val_indices, , drop = FALSE] + y_val_fold <- y_train[val_indices, i] # L2 regularization factor l2_factor <- 0.01 # Define the CNN model architecture From 4b16839a3d826745c917b8478cfba1ec559b5fda Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:02:56 +0530 Subject: [PATCH 04/15] Implement model training and evaluation within cross-validation - Use tryCatch for error handling during model fitting - Evaluate model on validation set - Store results in cv_results list - Handle potential errors and log them --- .../assim.sequential/R/downscale_function.R | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 311a90794b..61612948ff 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -208,16 +208,24 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ restore_best_weights = TRUE ) - # Train the model - model |> keras3::fit( - x = x_train, - y = y_train[, i], - epochs = 500, # Increased max epochs - batch_size = 32, - validation_split = 0.2, - callbacks = list(early_stopping), - verbose = 0 - ) + tryCatch({ + model |> keras3::fit( + x = x_train_fold, + y = y_train_fold, + epochs = 500, + batch_size = 32, + callbacks = list(early_stopping), + verbose = 0 + ) + + # Evaluate model on validation set + val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0) + cv_results[[fold]] <- val_results + }, error = function(e) { + cat("Error in fold", fold, ":", conditionMessage(e), "\n") + cv_results[[fold]] <- c(NA, NA) + }) + } # Store the trained model models[[i]] <- model From b22c84e4b598877fddd6f6c14bc20874ce411bde Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:06:49 +0530 Subject: [PATCH 05/15] Calculate and display average cross-validation performance - Compute mean MSE and MAE across all folds - Use sapply for efficient calculation - Handle potential NA values - Print results for the current ensemble --- modules/assim.sequential/R/downscale_function.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 61612948ff..b4f71a662d 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -227,6 +227,12 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ }) } + # Calculate average performance across folds + mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE) + mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE) + + cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae)) + # Store the trained model models[[i]] <- model From dce5d953d56a1006141ce06df1e26ca0e41a6b5c Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:26:03 +0530 Subject: [PATCH 06/15] Implement bagging for CNN in SDA_downscale function - Add num_bags parameter to control number of bagged models - Create and train multiple CNN models for each ensemble - Implement bagged prediction function for CNN - Update prediction process to use bagged models - Adjust cross-validation to incorporate bagging --- .../assim.sequential/R/downscale_function.R | 138 +++++++++--------- 1 file changed, 73 insertions(+), 65 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index b4f71a662d..f58b51763e 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -167,74 +167,82 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ y_val_fold <- y_train[val_indices, i] # L2 regularization factor l2_factor <- 0.01 - # Define the CNN model architecture - # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer. - model <- keras3::keras_model_sequential() |> - # 1D Convolutional layer: Extracts local features from input data - keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |> - # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift - keras3::layer_batch_normalization() |> - # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization - keras3::layer_dropout(rate = 0.3) |> - # Flatten: Converts 3D output to 1D for dense layer input - keras3::layer_flatten() |> - # Dense layer: Learns complex combinations of features - keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |> - # Second batch normalization: Further stabilizes learning in deeper layers - keras3::layer_batch_normalization() |> - # Second dropout: Additional regularization to prevent overfitting in final layers - keras3::layer_dropout(rate = 0.3) |> - # Output layer: Single neuron for regression prediction - keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) - - # Learning rate scheduler - lr_schedule <- keras3::learning_rate_schedule_exponential_decay( - initial_learning_rate = 0.001, - decay_steps = 1000, - decay_rate = 0.9 - ) - - # Compile the model - model |> keras3::compile( - loss = 'mean_squared_error', - optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), - metrics = c('mean_absolute_error') - ) - - # Early stopping callback - early_stopping <- keras3::callback_early_stopping( - monitor = 'val_loss', - patience = 10, - restore_best_weights = TRUE - ) - - tryCatch({ - model |> keras3::fit( - x = x_train_fold, - y = y_train_fold, - epochs = 500, - batch_size = 32, - callbacks = list(early_stopping), - verbose = 0 - ) - - # Evaluate model on validation set - val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0) - cv_results[[fold]] <- val_results - }, error = function(e) { - cat("Error in fold", fold, ":", conditionMessage(e), "\n") - cv_results[[fold]] <- c(NA, NA) - }) + # Train final bagged models on all training data + final_bagged_models <- list() + for (bag in 1:num_bags) { + bootstrap_indices <- sample(1:nrow(x_train), size = nrow(x_train), replace = TRUE) + x_train_bag <- x_train[bootstrap_indices, ] + y_train_bag <- y_train[bootstrap_indices, i] + # Define the CNN model architecture + # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer. + model <- keras3::keras_model_sequential() |> + # 1D Convolutional layer: Extracts local features from input data + keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |> + # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift + keras3::layer_batch_normalization() |> + # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization + keras3::layer_dropout(rate = 0.3) |> + # Flatten: Converts 3D output to 1D for dense layer input + keras3::layer_flatten() |> + # Dense layer: Learns complex combinations of features + keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |> + # Second batch normalization: Further stabilizes learning in deeper layers + keras3::layer_batch_normalization() |> + # Second dropout: Additional regularization to prevent overfitting in final layers + keras3::layer_dropout(rate = 0.3) |> + # Output layer: Single neuron for regression prediction + keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) + + # Learning rate scheduler + lr_schedule <- keras3::learning_rate_schedule_exponential_decay( + initial_learning_rate = 0.001, + decay_steps = 1000, + decay_rate = 0.9 + ) + + # Compile the model + model |> keras3::compile( + loss = 'mean_squared_error', + optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), + metrics = c('mean_absolute_error') + ) + + # Early stopping callback + early_stopping <- keras3::callback_early_stopping( + monitor = 'val_loss', + patience = 10, + restore_best_weights = TRUE + ) + + tryCatch({ + model |> keras3::fit( + x = x_train_fold, + y = y_train_fold, + epochs = 500, + batch_size = 32, + callbacks = list(early_stopping), + verbose = 0 + ) + final_bagged_models[[bag]] <- final_model + + # Evaluate model on validation set + val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0) + cv_results[[fold]] <- val_results + }, error = function(e) { + cat("Error in fold", fold, ":", conditionMessage(e), "\n") + cv_results[[fold]] <- c(NA, NA) + }) + } + + # Calculate average performance across folds + mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE) + mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE) + + cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae)) } - # Calculate average performance across folds - mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE) - mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE) - - cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae)) - # Store the trained model - models[[i]] <- model + models[[i]] <- final_bagged_models #CNN predictions cnn_predict <- function(model, newdata, scaling_params) { From e95536c965345fdfc1f0208d0865159e357e0fc2 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:33:12 +0530 Subject: [PATCH 07/15] Improve bagged CNN evaluation and standardize nomenclature - Add final test set evaluation for each bagged CNN ensemble - Calculate and display Test MSE and MAE for ensembles - Standardize variable names for clarity and suiting the flow of the code avoiding conflicts . --- modules/assim.sequential/R/downscale_function.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index f58b51763e..259ea30401 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -256,12 +256,18 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ prediction_rast <- terra::rast(covariates) # Generate spatial predictions using the trained model - maps[[i]] <- terra::predict(prediction_rast, model = models[[i]], + maps[[i]] <- terra::predict(prediction_rast, model = final_bagged_models, fun = cnn_predict, scaling_params = scaling_params) # Make predictions on held-out test data - predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params) + predictions[[i]] <- cnn_predict(final_bagged_models, x_data[-sample, ], scaling_params) + + # Evaluate final bagged ensemble on test set + test_predictions <- cnn_predict(final_bagged_models, x_test, scaling_params) + test_mse <- mean((test_predictions - y_test[, i])^2) + test_mae <- mean(abs(test_predictions - y_test[, i])) + cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae)) } } else { stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.") From 85065566c9633d086ee6f18f58cf3a28cd209059 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:37:18 +0530 Subject: [PATCH 08/15] Added number of bags for bagging - instead of passing as function arg , passed inside function - set the number of bags to 5 --- modules/assim.sequential/R/downscale_function.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 259ea30401..8a5504f672 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -142,6 +142,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ } else if (model_type == "cnn") { # Define k_folds within the function k_folds <- 5 + #Number of bags + num_bags <- 5 # Reshape input data for CNN x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train))) From ced7975bc97c31ea54ad1f5e1a1120ef5d12554c Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:28:45 +0530 Subject: [PATCH 09/15] Refactor CNN model with k-fold cross-validation and bagging - Implement k-fold cross-validation for CNN models - Add bagging to improve model robustness - Restructure model creation and training process - Introduce ensemble prediction function - Update evaluation metrics for each fold and final ensemble - Improve code organization and readability --- .../assim.sequential/R/downscale_function.R | 216 +++++++++--------- 1 file changed, 105 insertions(+), 111 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 8a5504f672..3838d1e3e0 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -140,133 +140,127 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ predictions[[i]] <- stats::predict(models[[i]], test_data) } } else if (model_type == "cnn") { - # Define k_folds within the function + # Define k_folds and num_bags k_folds <- 5 - #Number of bags num_bags <- 5 - # Reshape input data for CNN - x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train))) - x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test))) - - # Create k-fold indices for cross-validation (only on training data) - fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE) - for (i in seq_along(carbon_data)) { - - cv_results <- list() - - for (fold in 1:k_folds) { - cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds)) + all_models <- list() - # Split training data into training and validation sets for this fold - val_indices <- fold_indices[[fold]] - train_indices <- setdiff(1:nrow(x_train), val_indices) + # Create k-fold indices + fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE) - x_train_fold <- x_train[train_indices, , drop = FALSE] - y_train_fold <- y_train[train_indices, i] - x_val_fold <- x_train[val_indices, , drop = FALSE] - y_val_fold <- y_train[val_indices, i] - # L2 regularization factor - l2_factor <- 0.01 - # Train final bagged models on all training data - final_bagged_models <- list() - for (bag in 1:num_bags) { - bootstrap_indices <- sample(1:nrow(x_train), size = nrow(x_train), replace = TRUE) - x_train_bag <- x_train[bootstrap_indices, ] - y_train_bag <- y_train[bootstrap_indices, i] - # Define the CNN model architecture - # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer. - model <- keras3::keras_model_sequential() |> - # 1D Convolutional layer: Extracts local features from input data - keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names)) , kernel_regularizer = keras3::regularizer_l2(l2_factor)) |> - # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift - keras3::layer_batch_normalization() |> - # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization - keras3::layer_dropout(rate = 0.3) |> - # Flatten: Converts 3D output to 1D for dense layer input - keras3::layer_flatten() |> - # Dense layer: Learns complex combinations of features - keras3::layer_dense(units = 64, activation = 'relu' , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) |> - # Second batch normalization: Further stabilizes learning in deeper layers - keras3::layer_batch_normalization() |> - # Second dropout: Additional regularization to prevent overfitting in final layers - keras3::layer_dropout(rate = 0.3) |> - # Output layer: Single neuron for regression prediction - keras3::layer_dense(units = 1 , kernel_regularizer = keras3::regularizer_l2(l2_factor) ) + for (fold in 1:k_folds) { + cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds)) - # Learning rate scheduler - lr_schedule <- keras3::learning_rate_schedule_exponential_decay( - initial_learning_rate = 0.001, - decay_steps = 1000, - decay_rate = 0.9 - ) - - # Compile the model - model |> keras3::compile( - loss = 'mean_squared_error', - optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), - metrics = c('mean_absolute_error') - ) + # Split data into training and validation sets for this fold + train_indices <- setdiff(1:nrow(x_train), fold_indices[[fold]]) + val_indices <- fold_indices[[fold]] - # Early stopping callback - early_stopping <- keras3::callback_early_stopping( - monitor = 'val_loss', - patience = 10, - restore_best_weights = TRUE - ) - - tryCatch({ - model |> keras3::fit( - x = x_train_fold, - y = y_train_fold, - epochs = 500, - batch_size = 32, - callbacks = list(early_stopping), - verbose = 0 + x_train_fold <- x_train[train_indices, , drop = FALSE] + y_train_fold <- y_train[train_indices, i] + x_val_fold <- x_train[val_indices, , drop = FALSE] + y_val_fold <- y_train[val_indices, i] + + # Create bagged models for this fold + fold_models <- list() + for (bag in 1:num_bags) { + # Create bootstrap sample + bootstrap_indices <- sample(1:nrow(x_train_fold), size = nrow(x_train_fold), replace = TRUE) + x_train_bag <- x_train_fold[bootstrap_indices, ] + y_train_bag <- y_train_fold[bootstrap_indices] + + # Create and train model + model <- keras3::keras_model_sequential() |> + keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |> + keras3::layer_conv_2d( + filters = 32, + kernel_size = c(3, 1), + activation = 'relu', + padding = 'same' + ) |> + keras3::layer_flatten() |> + keras3::layer_dense( + units = 64, + activation = 'relu', + kernel_regularizer = keras3::regularizer_l2(0.01) + ) |> + keras3::layer_batch_normalization() |> + keras3::layer_dropout(rate = 0.3) |> + keras3::layer_dense( + units = 32, + activation = 'relu', + kernel_regularizer = keras3::regularizer_l2(0.01) + ) |> + keras3::layer_batch_normalization() |> + keras3::layer_dropout(rate = 0.3) |> + keras3::layer_dense( + units = 1, + kernel_regularizer = keras3::regularizer_l2(0.01) ) - final_bagged_models[[bag]] <- final_model - - # Evaluate model on validation set - val_results <- model |> keras3::evaluate(x_val_fold, y_val_fold, verbose = 0) - cv_results[[fold]] <- val_results - }, error = function(e) { - cat("Error in fold", fold, ":", conditionMessage(e), "\n") - cv_results[[fold]] <- c(NA, NA) - }) + + # Learning rate scheduler + lr_schedule <- keras3::learning_rate_schedule_exponential_decay( + initial_learning_rate = 0.001, + decay_steps = 1000, + decay_rate = 0.9 + ) + + # Early stopping callback + early_stopping <- keras3::callback_early_stopping( + monitor = 'loss', + patience = 10, + restore_best_weights = TRUE + ) + + model |> keras3::compile( + loss = 'mean_squared_error', + optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), + metrics = c('mean_absolute_error') + ) + + model |> keras3::fit( + x = x_train_bag, + y = y_train_bag, + epochs = 500, + batch_size = 32, + callbacks = list(early_stopping), + verbose = 0 + ) + + fold_models[[bag]] <- model } - - # Calculate average performance across folds - mean_mse <- mean(sapply(cv_results, function(x) x[1]), na.rm = TRUE) - mean_mae <- mean(sapply(cv_results, function(x) x[2]), na.rm = TRUE) - cat(sprintf("Ensemble %d - Mean MSE: %.4f, Mean MAE: %.4f\n", i, mean_mse, mean_mae)) + # Add fold models to all_models list + all_models <- c(all_models, fold_models) + + # Evaluate fold performance + val_predictions <- sapply(fold_models, function(m) stats::predict(m, x_val_fold)) + val_predictions_mean <- rowMeans(val_predictions) + val_mse <- mean((val_predictions_mean - y_val_fold)^2) + val_mae <- mean(abs(val_predictions_mean - y_val_fold)) + cat(sprintf("Fold %d - MSE: %.4f, MAE: %.4f\n", fold, val_mse, val_mae)) } - - # Store the trained model - models[[i]] <- final_bagged_models - - #CNN predictions - cnn_predict <- function(model, newdata, scaling_params) { + + # Store all models for this ensemble + models[[i]] <- all_models + + # Use all models for predictions + cnn_ensemble_predict <- function(models, newdata, scaling_params) { newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd) - newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata))) - predictions <- stats::predict(model, newdata) - return(as.vector(predictions)) + predictions <- sapply(models, function(m) stats::predict(m, newdata)) + return(rowMeans(predictions)) } - - # Create a prediction raster from covariates - prediction_rast <- terra::rast(covariates) - # Generate spatial predictions using the trained model - maps[[i]] <- terra::predict(prediction_rast, model = final_bagged_models, - fun = cnn_predict, + prediction_rast <- terra::rast(covariates) + maps[[i]] <- terra::predict(prediction_rast, model = models[[i]], + fun = cnn_ensemble_predict, scaling_params = scaling_params) - - # Make predictions on held-out test data - predictions[[i]] <- cnn_predict(final_bagged_models, x_data[-sample, ], scaling_params) - - # Evaluate final bagged ensemble on test set - test_predictions <- cnn_predict(final_bagged_models, x_test, scaling_params) + + predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params) + + # Evaluate final ensemble on test set + test_predictions <- cnn_ensemble_predict(models[[i]], x_test, scaling_params) test_mse <- mean((test_predictions - y_test[, i])^2) test_mae <- mean(abs(test_predictions - y_test[, i])) cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae)) From 871d78ee53df2ba34111ea1a00b3d5fad5fca0d4 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:46:05 +0530 Subject: [PATCH 10/15] Added comments some snippets require comments to explain their purpose , they have been added as well , as well as some past comments have been modified to aptly suit the changes . --- .../assim.sequential/R/downscale_function.R | 41 +++++++++++++++---- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 3838d1e3e0..6de48ba3a1 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -140,16 +140,21 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ predictions[[i]] <- stats::predict(models[[i]], test_data) } } else if (model_type == "cnn") { - # Define k_folds and num_bags + # Define k_folds and num_bags k_folds <- 5 num_bags <- 5 - + + # Reshape input data for CNN + x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train))) + x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test))) + for (i in seq_along(carbon_data)) { all_models <- list() # Create k-fold indices fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE) - + + #initialise operations for each fold for (fold in 1:k_folds) { cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds)) @@ -170,30 +175,41 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ x_train_bag <- x_train_fold[bootstrap_indices, ] y_train_bag <- y_train_fold[bootstrap_indices] - # Create and train model + # Define the CNN model architecture + # Used dual batch normalization and dropout as the first set of batch normalization and model <- keras3::keras_model_sequential() |> + # Layer Reshape : Reshape to fit target shape for the convolutional layer keras3::layer_reshape(target_shape = c(ncol(x_train), 1, 1), input_shape = ncol(x_train)) |> + # 1D Convolutional layer: Extracts local features from input data keras3::layer_conv_2d( filters = 32, kernel_size = c(3, 1), activation = 'relu', padding = 'same' ) |> + # Flatten: Converts 3D output to 1D for dense layer input keras3::layer_flatten() |> + # Dense layer: Learns complex combinations of features keras3::layer_dense( units = 64, activation = 'relu', kernel_regularizer = keras3::regularizer_l2(0.01) ) |> + # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift keras3::layer_batch_normalization() |> + # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization keras3::layer_dropout(rate = 0.3) |> + # Dense layer: Learns complex combinations of features keras3::layer_dense( units = 32, activation = 'relu', kernel_regularizer = keras3::regularizer_l2(0.01) ) |> + # Batch normalization: Further stabilizes learning in deeper layers keras3::layer_batch_normalization() |> + # Dropout: Additional regularization to prevent overfitting in final layer keras3::layer_dropout(rate = 0.3) |> + # Output layer: Single neuron for regression prediction keras3::layer_dense( units = 1, kernel_regularizer = keras3::regularizer_l2(0.01) @@ -212,13 +228,15 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ patience = 10, restore_best_weights = TRUE ) - + + # Compile the model model |> keras3::compile( loss = 'mean_squared_error', optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), metrics = c('mean_absolute_error') ) - + + # Train the model model |> keras3::fit( x = x_train_bag, y = y_train_bag, @@ -227,7 +245,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ callbacks = list(early_stopping), verbose = 0 ) - + + # Store the trained model for this bag in the fold_models list fold_models[[bag]] <- model } @@ -251,12 +270,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ predictions <- sapply(models, function(m) stats::predict(m, newdata)) return(rowMeans(predictions)) } - + + # Create a prediction raster from covariates prediction_rast <- terra::rast(covariates) + + # Generate spatial predictions using the trained model maps[[i]] <- terra::predict(prediction_rast, model = models[[i]], fun = cnn_ensemble_predict, scaling_params = scaling_params) - + + # Make predictions on held-out test data predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params) # Evaluate final ensemble on test set From b73290314fb334908950cea73258088998be0192 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Tue, 13 Aug 2024 20:13:37 +0530 Subject: [PATCH 11/15] Removed MAE ,MSE from downscale - MAE , MSE already in metrics function - reimplementing it makes it redundant - Removed redundant implementation from downscale function --- modules/assim.sequential/R/downscale_function.R | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 6de48ba3a1..ced55e56c6 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -252,13 +252,6 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ # Add fold models to all_models list all_models <- c(all_models, fold_models) - - # Evaluate fold performance - val_predictions <- sapply(fold_models, function(m) stats::predict(m, x_val_fold)) - val_predictions_mean <- rowMeans(val_predictions) - val_mse <- mean((val_predictions_mean - y_val_fold)^2) - val_mae <- mean(abs(val_predictions_mean - y_val_fold)) - cat(sprintf("Fold %d - MSE: %.4f, MAE: %.4f\n", fold, val_mse, val_mae)) } # Store all models for this ensemble @@ -282,11 +275,6 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ # Make predictions on held-out test data predictions[[i]] <- cnn_ensemble_predict(models[[i]], x_data[-sample, ], scaling_params) - # Evaluate final ensemble on test set - test_predictions <- cnn_ensemble_predict(models[[i]], x_test, scaling_params) - test_mse <- mean((test_predictions - y_test[, i])^2) - test_mae <- mean(abs(test_predictions - y_test[, i])) - cat(sprintf("Ensemble %d - Test MSE: %.4f, Test MAE: %.4f\n", i, test_mse, test_mae)) } } else { stop("Invalid model_type. Please choose either 'rf' for Random Forest or 'cnn' for Convolutional Neural Network.") From bf024780296e8c05680ad19b9abaa3d61c0b4611 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Thu, 15 Aug 2024 14:40:41 +0530 Subject: [PATCH 12/15] Improve robustness of train-validation split in SDA_downscale function - Replace 1:nrow(x_train) with seq_len(nrow(x_train)) for indexing - Ensures proper handling of edge cases, including empty datasets - Addresses potential "subscript out of bounds" error in CNN model training --- modules/assim.sequential/R/downscale_function.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index ced55e56c6..439de001a2 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -152,14 +152,14 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ all_models <- list() # Create k-fold indices - fold_indices <- caret::createFolds(y = 1:nrow(x_train), k = k_folds, list = TRUE, returnTrain = FALSE) + fold_indices <- caret::createFolds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE) #initialise operations for each fold for (fold in 1:k_folds) { cat(sprintf("Processing ensemble %d, fold %d of %d\n", i, fold, k_folds)) # Split data into training and validation sets for this fold - train_indices <- setdiff(1:nrow(x_train), fold_indices[[fold]]) + train_indices <- setdiff(seq_len(nrow(x_train)), fold_indices[[fold]]) val_indices <- fold_indices[[fold]] x_train_fold <- x_train[train_indices, , drop = FALSE] From 4ae1974d7aa78fa27e188d222e85ab0eba14b773 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:10:07 +0530 Subject: [PATCH 13/15] Implement custom create_folds function to replace caret dependency - Add create_folds function for k-fold cross-validation - Removes dependency on caret::createFolds - Supports both training and test set index generation - Allows flexible output as list or vector --- .../assim.sequential/R/downscale_function.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 439de001a2..f4d3700f18 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -62,6 +62,24 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data)) } +create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) { + n <- length(y) + indices <- seq_len(n) + folds <- split(indices, cut(seq_len(n), breaks = k, labels = FALSE)) + + if (!returnTrain) { + folds <- folds # Test indices are already what we want + } else { + folds <- lapply(folds, function(x) indices[-x]) # Return training indices + } + + if (!list) { + folds <- unlist(folds) + } + + return(folds) +} + ##' @title SDA Downscale Function ##' @name SDA_downscale ##' @author Joshua Ploshay, Sambhav Dixit From 5d86312d92fe03adeb3a77665b96db4ec2dcda45 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:10:57 +0530 Subject: [PATCH 14/15] Add comprehensive roxygen documentation for create_folds function - Include title, name, and author tags - Detail all function parameters - Provide clear description and details sections - Specify return value format - Align documentation style with project standards --- modules/assim.sequential/R/downscale_function.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index f4d3700f18..dca7181718 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -62,6 +62,20 @@ SDA_downscale_preprocess <- function(data_path, coords_path, date, carbon_pool) return(list(input_data = input_data, site_coordinates = site_coordinates, carbon_data = carbon_data)) } +##' @title Create folds function +##' @name create_folds +##' @author Sambhav Dixit +##' +##' @param y Vector. A vector of outcome data or indices. +##' @param k Numeric. The number of folds to create. +##' @param list Logical. If TRUE, returns a list of fold indices. If FALSE, returns a vector. +##' @param returnTrain Logical. If TRUE, returns indices for training sets. If FALSE, returns indices for test sets. +##' @details This function creates k-fold indices for cross-validation. It can return either training or test set indices, and the output can be in list or vector format. +##' +##' @description This function generates k-fold indices for cross-validation, allowing for flexible output formats. +##' +##' @return A list of k elements (if list = TRUE), each containing indices for a fold, or a vector of indices (if list = FALSE). + create_folds <- function(y, k, list = TRUE, returnTrain = FALSE) { n <- length(y) indices <- seq_len(n) From 70bfdaf5ac40a69309d78b063c28d41576d5603d Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:16:08 +0530 Subject: [PATCH 15/15] Replace carets - Replace caretFolds -> caret_folds - Remove carets:: dependency --- modules/assim.sequential/R/downscale_function.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index dca7181718..54ce13d2e5 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -184,7 +184,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ all_models <- list() # Create k-fold indices - fold_indices <- caret::createFolds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE) + fold_indices <- create_folds(y = seq_len(nrow(x_train)), k = k_folds, list = TRUE, returnTrain = FALSE) #initialise operations for each fold for (fold in 1:k_folds) {